Checkpoint an experimental thing, badly.
Chris Pressey
6 years ago
3 | 3 | import hashlib |
4 | 4 | import json |
5 | 5 | import os |
6 | #from subprocess import check_call | |
6 | from subprocess import check_call, CalledProcessError | |
7 | 7 | import sys |
8 | 8 | from time import sleep, localtime, strftime |
9 | 9 | import urllib |
97 | 97 | continue |
98 | 98 | status = response.status_code |
99 | 99 | except Exception as e: |
100 | # TODO: raise | |
100 | 101 | status = str(e) |
101 | 102 | if status not in (200, 301, 302, 303): |
102 | 103 | failures.append({ |
114 | 115 | return requests.head(url) |
115 | 116 | |
116 | 117 | |
118 | class WgetLinkChecker(LinkTraverser): | |
119 | def handle_link(self, url): | |
120 | try: | |
121 | check_call(['wget', '--spider', url]) | |
122 | print('OK', url) | |
123 | except CalledProcessError as e: | |
124 | print('BAD {} {}'.format(e.returncode, url)) | |
125 | return None | |
126 | ||
127 | ||
117 | 128 | class LinkArchiver(LinkTraverser): |
118 | 129 | def __init__(self, links, dest_dir, missing_only=False, **kwargs): |
119 | 130 | super().__init__(links, **kwargs) |
178 | 189 | links, options.archive_links_to, missing_only=options.archive_missing_only, |
179 | 190 | **common_kwargs |
180 | 191 | ) |
192 | elif True: | |
193 | traverser = WgetLinkChecker(links, **common_kwargs) | |
181 | 194 | else: |
182 | 195 | traverser = LinkChecker(links, **common_kwargs) |
183 | 196 |