git @ Cat's Eye Technologies yastasoti / 4312d8b
--delay-between-requests argument. Chris Pressey 1 year, 10 months ago
1 changed file(s) with 16 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
6262 return response
6363
6464
65 delay_between_fetches = 0
66
6765 class LinkTraverser(object):
68 def __init__(self, links, article_root=None, ignore_urls=None):
66 def __init__(self, links, article_root=None, ignore_urls=None, delay_between_requests=0.0):
6967 self.links = links
7068 self.article_root = article_root
7169 self.ignore_urls = ignore_urls or []
70 self.delay_between_requests = delay_between_requests
7271
7372 def handle_link(self, url):
7473 raise NotImplementedError
105104 'url': url,
106105 'link': link,
107106 })
108 if delay_between_fetches > 0:
109 sleep(delay_between_fetches)
107 if self.delay_between_requests > 0.0:
108 sleep(self.delay_between_requests)
110109 return failures
111110
112111
153152 argparser.add_argument('--ignore-urls', metavar='URLS', type=str, default=None,
154153 help='Comma-separated list of link targets that should not even try to be fetched'
155154 )
155 argparser.add_argument('--delay-between-requests', metavar='SECONDS', type=float, default=0.0,
156 help='Delay (in seconds, fractions allowed) between successive network requests'
157 )
156158
157159 options = argparser.parse_args(sys.argv[1:])
158160
165167 data = json.loads(f.read())
166168 links.extend(data)
167169
168 if options.ignore_urls is None:
169 options.ignore_urls = []
170 else:
171 options.ignore_urls = options.ignore_urls.split(',')
170 common_kwargs = dict(
171 article_root=options.article_root,
172 ignore_urls=[] if options.ignore_urls is None else options.ignore_urls.split(','),
173 delay_between_requests=options.delay_between_requests,
174 )
172175
173176 if options.archive_links_to:
174 traverser = LinkArchiver(links, options.archive_links_to,
175 article_root=options.article_root,
176 missing_only=options.archive_missing_only,
177 ignore_urls=options.ignore_urls,
177 traverser = LinkArchiver(
178 links, options.archive_links_to, missing_only=options.archive_missing_only,
179 **common_kwargs
178180 )
179181 else:
180 traverser = LinkChecker(links,
181 article_root=options.article_root,
182 ignore_urls=options.ignore_urls,
183 )
182 traverser = LinkChecker(links, **common_kwargs)
184183
185184 result = traverser.traverse()
186
187185 sys.stdout.write(json.dumps(result, indent=4, sort_keys=True))
188186
189187