The return value of handle_link() is not a Response object.
Chris Pressey
4 years ago
71 | 71 | self.fragile = fragile |
72 | 72 | |
73 | 73 | def handle_link(self, url): |
74 | """Given a URL, process that URL. Should either return None, meaning | |
75 | it declined to process this URL (for whatever reason), or should return | |
76 | a dict representing the response from processing the URL, which should | |
77 | contain the following keys: | |
78 | ||
79 | status_code: an integer. 6xx can be used to indicate internal error. | |
80 | ||
81 | """ | |
74 | 82 | raise NotImplementedError |
75 | 83 | |
76 | 84 | def traverse(self): |
96 | 104 | response = self.handle_link(url) |
97 | 105 | if response is None: |
98 | 106 | continue |
99 | status = response.status_code | |
107 | status = response['status_code'] | |
100 | 108 | except Exception as e: |
101 | 109 | if self.fragile: |
102 | 110 | raise |
114 | 122 | |
115 | 123 | class LinkChecker(LinkTraverser): |
116 | 124 | def handle_link(self, url): |
117 | return requests.head(url) | |
125 | reponse = requests.head(url) | |
126 | return { | |
127 | 'status_code': response.status_code | |
128 | } | |
118 | 129 | |
119 | 130 | |
120 | 131 | class WgetLinkChecker(LinkTraverser): |
121 | 132 | def handle_link(self, url): |
122 | 133 | try: |
134 | # TODO turn on redirects | |
123 | 135 | check_call(['wget', '--spider', url]) |
124 | 136 | print('OK', url) |
137 | # TODO extract from headers | |
138 | return { | |
139 | 'status_code': 200 | |
140 | } | |
125 | 141 | except CalledProcessError as e: |
126 | 142 | print('BAD {} {}'.format(e.returncode, url)) |
127 | return None | |
143 | return { | |
144 | 'status_code': 600 | |
145 | } | |
128 | 146 | |
129 | 147 | |
130 | 148 | class LinkArchiver(LinkTraverser): |
141 | 159 | if self.missing_only and os.path.exists(os.path.join(dirname, filename)): |
142 | 160 | return None |
143 | 161 | response = download(url, dirname, filename) |
144 | return response | |
162 | return { | |
163 | 'status_code': response.status_code | |
164 | } | |
145 | 165 | |
146 | 166 | |
147 | 167 | def main(args): |