85 | 85 |
|
86 | 86 |
|
87 | 87 |
class LinkTraverser(object):
|
88 | |
def __init__(self, links, article_root=None, ignore_urls=None, delay_between_requests=0.0, fragile=False):
|
|
88 |
def __init__(self, links, extant_path=None, ignore_urls=None, delay_between_requests=0.0, fragile=False):
|
89 | 89 |
self.links = links
|
90 | |
self.article_root = article_root
|
|
90 |
self.extant_path = extant_path or []
|
91 | 91 |
self.ignore_urls = ignore_urls or []
|
92 | 92 |
self.delay_between_requests = delay_between_requests
|
93 | 93 |
self.fragile = fragile
|
|
126 | 126 |
else:
|
127 | 127 |
filename = url
|
128 | 128 |
filename = unquote(filename)
|
129 | |
filename = os.path.join(self.article_root, filename)
|
130 | |
if not os.path.exists(filename):
|
131 | |
raise ValueError('Local file "{}" does not exist'.format(filename))
|
|
129 |
found = False
|
|
130 |
for extant_dirname in self.extant_path:
|
|
131 |
extant_filename = os.path.join(extant_dirname, filename)
|
|
132 |
if os.path.exists(extant_filename):
|
|
133 |
found = True
|
|
134 |
break
|
|
135 |
if not found:
|
|
136 |
raise ValueError('Local file "{}" not found in extant-path'.format(filename))
|
132 | 137 |
continue
|
133 | 138 |
else:
|
134 | 139 |
response = self.handle_link(link)
|
|
223 | 228 |
argparser.add_argument('--archive-via', metavar='ROUTERFILE', type=str, default=None,
|
224 | 229 |
help='Download links and save each in the directory given in the router file'
|
225 | 230 |
)
|
226 | |
argparser.add_argument('--article-root', metavar='DIRNAME', type=str, default='.',
|
227 | |
help='Directory in which local files found when checking or archiving links should be located'
|
228 | |
)
|
229 | 231 |
argparser.add_argument('--delay-between-requests', metavar='SECONDS', type=float, default=0.0,
|
230 | 232 |
help='Delay (in seconds, fractions allowed) between successive network requests'
|
|
233 |
)
|
|
234 |
argparser.add_argument('--extant-path', metavar='DIRNAMES', type=str, default=None,
|
|
235 |
help='When finding a relative link that would resolve to local file, check that '
|
|
236 |
'that file exists in in this search path, which should be given as a '
|
|
237 |
'comma-delimited list of directory names'
|
231 | 238 |
)
|
232 | 239 |
argparser.add_argument('--fragile', action='store_true', default=False,
|
233 | 240 |
help='Exit on first error of any nature when processing links'
|
|
254 | 261 |
links.extend(data)
|
255 | 262 |
|
256 | 263 |
common_kwargs = dict(
|
257 | |
article_root=options.article_root,
|
|
264 |
extant_path=None if options.extant_path is None else options.extant_path.split(','),
|
258 | 265 |
ignore_urls=[] if options.ignore_urls is None else options.ignore_urls.split(','),
|
259 | 266 |
delay_between_requests=options.delay_between_requests,
|
260 | 267 |
fragile=options.fragile,
|