diff --git a/script/yastasoti b/script/yastasoti index b64b35f..d4f5457 100755 --- a/script/yastasoti +++ b/script/yastasoti @@ -90,11 +90,12 @@ self.delay_between_requests = delay_between_requests self.fragile = fragile - def handle_link(self, url): - """Given a URL, process that URL. Should either return None, meaning + def handle_link(self, link): + """Given a dict containing a URL under the key `url` (and possibly + other information), process that URL. Should either return None, meaning it declined to process this URL (for whatever reason), or should return a dict representing the response from processing the URL, which should - contain the following keys: + contain (at least) the following keys: status_code: an integer. 6xx can be used to indicate internal error. @@ -128,7 +129,7 @@ raise ValueError('Local file "{}" does not exist'.format(filename)) continue else: - response = self.handle_link(url) + response = self.handle_link(link) if response is None: continue except Exception as e: @@ -153,7 +154,8 @@ class LinkChecker(LinkTraverser): - def handle_link(self, url): + def handle_link(self, link): + url = link['url'] logger.info(u"checking {}".format(url).encode('utf-8')) response = requests.head(url, allow_redirects=True, headers={ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0', @@ -176,7 +178,8 @@ return self.router[key] raise NotImplementedError("archive router could not resolve {}".format(url)) - def handle_link(self, url): + def handle_link(self, link): + url = link['url'] dirname, filename = url_to_dirname_and_filename(url) dest_dir = self.select_dest_dir(url) if dest_dir == '/dev/null':