Add --archive-missing-only option.
Chris Pressey
7 years ago
136 | 136 |
delay_between_fetches = 0
|
137 | 137 |
|
138 | 138 |
|
139 | |
def archive_links(documents, article_root, dest_dir):
|
|
139 |
def archive_links(documents, article_root=None, dest_dir=None, missing_only=False):
|
140 | 140 |
"""If dest_dir is None, links will only be checked for existence, not downloaded."""
|
141 | 141 |
links = extract_links_from_documents(documents)
|
142 | 142 |
|
|
160 | 160 |
dirname = os.path.join(dest_dir, dirname)
|
161 | 161 |
if not os.path.exists(dirname):
|
162 | 162 |
os.makedirs(dirname)
|
|
163 |
if missing_only and os.path.exists(os.path.join(dirname, filename)):
|
|
164 |
continue
|
163 | 165 |
response = download(url, dirname, filename)
|
164 | 166 |
else:
|
165 | 167 |
response = requests.head(url)
|
42 | 42 |
)
|
43 | 43 |
argparser.add_argument('--archive-links-to', metavar='DIRNAME', type=str, default=None,
|
44 | 44 |
help='Download a copy of all web objects linked to from the entries'
|
|
45 |
)
|
|
46 |
argparser.add_argument('--archive-missing-only', action='store_true',
|
|
47 |
help='When archiving links, only download the link if it is not already archived'
|
45 | 48 |
)
|
46 | 49 |
argparser.add_argument('--check-links', action='store_true',
|
47 | 50 |
help='Check if web objects linked to from the entries exist'
|
|
151 | 154 |
|
152 | 155 |
if options.check_links or options.archive_links_to is not None:
|
153 | 156 |
from feedmark.checkers import archive_links
|
154 | |
result = archive_links(documents, options.article_root, options.archive_links_to)
|
|
157 |
result = archive_links(
|
|
158 |
documents,
|
|
159 |
article_root=options.article_root,
|
|
160 |
dest_dir=options.archive_links_to,
|
|
161 |
missing_only=options.archive_missing_only,
|
|
162 |
)
|
155 | 163 |
write(json.dumps(result, indent=4, sort_keys=True))
|
156 | 164 |
|
157 | 165 |
schema = None
|