Accumulate all results, dump only the failures at the end.
Chris Pressey
4 years ago
86 | 86 | raise NotImplementedError |
87 | 87 | |
88 | 88 | def traverse(self): |
89 | failures = [] | |
89 | self.results = [] | |
90 | 90 | for link in tqdm(self.links, total=len(self.links)): |
91 | 91 | url = link['url'] |
92 | 92 | if url in self.ignore_urls: |
108 | 108 | response = self.handle_link(url) |
109 | 109 | if response is None: |
110 | 110 | continue |
111 | status = response['status_code'] | |
112 | 111 | except Exception as e: |
113 | 112 | if self.fragile: |
114 | 113 | raise |
115 | status = str(e) | |
116 | if status not in (200, 301, 302, 303): | |
117 | failures.append({ | |
118 | 'status': status, | |
119 | 'url': url, | |
120 | 'link': link, | |
121 | }) | |
114 | response = { | |
115 | "status_code": 600, | |
116 | "error": "{}: {}".format(e.__class__.__name, e) | |
117 | } | |
118 | self.results.append({ | |
119 | 'response': response, | |
120 | 'url': url, | |
121 | 'link': link, | |
122 | }) | |
122 | 123 | if self.delay_between_requests > 0.0: |
123 | 124 | sleep(self.delay_between_requests) |
124 | return failures | |
125 | ||
126 | def failures(self): | |
127 | for result in self.results: | |
128 | if result['response']['status_code'] != 200: | |
129 | yield result | |
125 | 130 | |
126 | 131 | |
127 | 132 | class LinkChecker(LinkTraverser): |
219 | 224 | links, options.archive_to, missing_only=options.archive_missing_only, |
220 | 225 | **common_kwargs |
221 | 226 | ) |
222 | elif True: | |
227 | elif False: | |
223 | 228 | traverser = WgetLinkChecker(links, **common_kwargs) |
224 | 229 | else: |
225 | 230 | traverser = LinkChecker(links, **common_kwargs) |
226 | 231 | |
227 | result = traverser.traverse() | |
232 | traverser.traverse() | |
233 | result = list(traverser.failures()) | |
228 | 234 | sys.stdout.write(json.dumps(result, indent=4, sort_keys=True)) |
229 | 235 | |
230 | 236 |