script/update_book_borrowability.py - The-Glosscubator (master)

Tree @master (Download .tar.gz)

update_book_borrowability.py @master — raw · history · blame

#!/usr/bin/env python3

# SPDX-FileCopyrightText: Chris Pressey, the original author of this work, has dedicated it to the public domain.
# For more information, please refer to <https://unlicense.org/>
# SPDX-License-Identifier: Unlicense


import json
import re
import os
from time import sleep

from feedmark.checkers import Schema
from feedmark.formats.markdown import feedmark_markdownize
from feedmark.loader import read_document_from
import requests


def process_books_document(filename):
    book_schema = Schema(read_document_from(os.path.join("schema", "Book.md")))
    document = read_document_from(filename)
    for section in document.sections:
        borrowlinks = section.properties.get("borrow", []) + section.properties.get("borrow-with-print-disabilities", [])
        # print(borrowlinks)
        for key in ("borrow", "borrow-with-print-disabilities"):
            if key in section.properties:
                del section.properties[key]

        new_links = {
            "borrow": [],
            "borrow-with-print-disabilities": [],
        }

        for borrowlink in borrowlinks:
            match = re.match(r'^\[(.+?)\]\((.+?)\)\s*(.*?)', borrowlink)
            site = match.group(1)
            url = match.group(2)
            comments = match.group(3)
            if site != 'archive.org':
                raise NotImplementedError("not an archive.org link: {}".format(borrowlink))

            inlibrary = False
            printdisabled = False
            print("fetching", url, "...")
            archive_page = requests.get(url).text
            inlibrary = "/details/inlibrary" in archive_page
            printdisabled = "/details/printdisabled" in archive_page

            if not printdisabled:
                raise NotImplementedError("can't find printdisabled: {}".format(borrowlink))
            sleep(5)

            if inlibrary:
                new_links["borrow"].append(borrowlink)
            else:
                new_links["borrow-with-print-disabilities"].append(borrowlink)

        for key in ("borrow", "borrow-with-print-disabilities"):
            if new_links[key]:
                section.properties[key] = new_links[key]

    s = feedmark_markdownize(document, schema=book_schema)
    with open(filename, "w") as f:
        f.write(s)


def main(args):
    for filename in args:
        print("### {} ###\n".format(filename))
        process_books_document(filename)
        print("\n")


if __name__ == "__main__":
    import sys
    main(sys.argv[1:])