#!/usr/bin/env python3
# SPDX-FileCopyrightText: Chris Pressey, the original author of this work, has dedicated it to the public domain.
# For more information, please refer to <https://unlicense.org/>
# SPDX-License-Identifier: Unlicense
import json
import re
import os
from time import sleep
from feedmark.checkers import Schema
from feedmark.formats.markdown import feedmark_markdownize
from feedmark.loader import read_document_from
import requests
def process_books_document(filename):
book_schema = Schema(read_document_from(os.path.join("schema", "Book.md")))
document = read_document_from(filename)
for section in document.sections:
borrowlinks = section.properties.get("borrow", []) + section.properties.get("borrow-with-print-disabilities", [])
# print(borrowlinks)
for key in ("borrow", "borrow-with-print-disabilities"):
if key in section.properties:
del section.properties[key]
new_links = {
"borrow": [],
"borrow-with-print-disabilities": [],
}
for borrowlink in borrowlinks:
match = re.match(r'^\[(.+?)\]\((.+?)\)\s*(.*?)', borrowlink)
site = match.group(1)
url = match.group(2)
comments = match.group(3)
if site != 'archive.org':
raise NotImplementedError("not an archive.org link: {}".format(borrowlink))
inlibrary = False
printdisabled = False
print("fetching", url, "...")
archive_page = requests.get(url).text
inlibrary = "/details/inlibrary" in archive_page
printdisabled = "/details/printdisabled" in archive_page
if not printdisabled:
raise NotImplementedError("can't find printdisabled: {}".format(borrowlink))
sleep(5)
if inlibrary:
new_links["borrow"].append(borrowlink)
else:
new_links["borrow-with-print-disabilities"].append(borrowlink)
for key in ("borrow", "borrow-with-print-disabilities"):
if new_links[key]:
section.properties[key] = new_links[key]
s = feedmark_markdownize(document, schema=book_schema)
with open(filename, "w") as f:
f.write(s)
def main(args):
for filename in args:
print("### {} ###\n".format(filename))
process_books_document(filename)
print("\n")
if __name__ == "__main__":
import sys
main(sys.argv[1:])