git @ Cat's Eye Technologies Feedmark / 0131c66
Add --archive-missing-only option. Chris Pressey 7 years ago
2 changed file(s) with 12 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
136136 delay_between_fetches = 0
137137
138138
139 def archive_links(documents, article_root, dest_dir):
139 def archive_links(documents, article_root=None, dest_dir=None, missing_only=False):
140140 """If dest_dir is None, links will only be checked for existence, not downloaded."""
141141 links = extract_links_from_documents(documents)
142142
160160 dirname = os.path.join(dest_dir, dirname)
161161 if not os.path.exists(dirname):
162162 os.makedirs(dirname)
163 if missing_only and os.path.exists(os.path.join(dirname, filename)):
164 continue
163165 response = download(url, dirname, filename)
164166 else:
165167 response = requests.head(url)
4242 )
4343 argparser.add_argument('--archive-links-to', metavar='DIRNAME', type=str, default=None,
4444 help='Download a copy of all web objects linked to from the entries'
45 )
46 argparser.add_argument('--archive-missing-only', action='store_true',
47 help='When archiving links, only download the link if it is not already archived'
4548 )
4649 argparser.add_argument('--check-links', action='store_true',
4750 help='Check if web objects linked to from the entries exist'
151154
152155 if options.check_links or options.archive_links_to is not None:
153156 from feedmark.checkers import archive_links
154 result = archive_links(documents, options.article_root, options.archive_links_to)
157 result = archive_links(
158 documents,
159 article_root=options.article_root,
160 dest_dir=options.archive_links_to,
161 missing_only=options.archive_missing_only,
162 )
155163 write(json.dumps(result, indent=4, sort_keys=True))
156164
157165 schema = None