git @ Cat's Eye Technologies yastasoti / a296c32
handle_link() now takes the entire link dictionary. Chris Pressey 6 years ago
1 changed file(s) with 9 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
8989 self.delay_between_requests = delay_between_requests
9090 self.fragile = fragile
9191
92 def handle_link(self, url):
93 """Given a URL, process that URL. Should either return None, meaning
92 def handle_link(self, link):
93 """Given a dict containing a URL under the key `url` (and possibly
94 other information), process that URL. Should either return None, meaning
9495 it declined to process this URL (for whatever reason), or should return
9596 a dict representing the response from processing the URL, which should
96 contain the following keys:
97 contain (at least) the following keys:
9798
9899 status_code: an integer. 6xx can be used to indicate internal error.
99100
127128 raise ValueError('Local file "{}" does not exist'.format(filename))
128129 continue
129130 else:
130 response = self.handle_link(url)
131 response = self.handle_link(link)
131132 if response is None:
132133 continue
133134 except Exception as e:
152153
153154
154155 class LinkChecker(LinkTraverser):
155 def handle_link(self, url):
156 def handle_link(self, link):
157 url = link['url']
156158 logger.info(u"checking {}".format(url).encode('utf-8'))
157159 response = requests.head(url, allow_redirects=True, headers={
158160 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0',
175177 return self.router[key]
176178 raise NotImplementedError("archive router could not resolve {}".format(url))
177179
178 def handle_link(self, url):
180 def handle_link(self, link):
181 url = link['url']
179182 dirname, filename = url_to_dirname_and_filename(url)
180183 dest_dir = self.select_dest_dir(url)
181184 if dest_dir == '/dev/null':