script/convert_webpages.py - The-Glosscubator (master)

Tree @master (Download .tar.gz)

convert_webpages.py @master — raw · history · blame

#!/usr/bin/env python3

# SPDX-FileCopyrightText: Chris Pressey, the original author of this work, has dedicated it to the public domain.
# For more information, please refer to <https://unlicense.org/>
# SPDX-License-Identifier: Unlicense

import json
import re
import os

from feedmark.loader import read_document_from


# first version, do not yet use


def load_webpages_from_file(filename):
    bookmarks = []
    with open(filename, "r") as f:
        for line in f:
            line = line.strip()
            if line == '- - - -':
                break
        remaining_lines = list(f)
        if not remaining_lines:
            raise NotImplementedError("No seperator in {}".format(topic))
        for line in remaining_lines:
            line = line.strip()
            # print('!!!', line)
            match = re.match(r'^\s*$', line)
            if match:
                continue
            match = re.match(r'^\s*\[(.*?)\]\((.*?)\)\s*$', line)
            if match:
                bookmarks.append(('link', match.group(1), match.group(2)))
                continue
            match = re.match(r'^\#\#\#\s*(.*?)\s*$', line)
            if match:
                bookmarks.append(('heading', match.group(1)))
                continue
            raise NotImplementedError("yuck: {}: {}".format(topic, line))
    assert bookmarks, "empty bookmarks in {}".format(topic)
    return bookmarks


def write_webpages_to_feedmark_file(filename, title, webpages):
    with open(filename, 'w') as f:
        f.write(title + "\n")
        f.write("=" * len(title) + "\n")
        f.write("""
<!--
{}-FileCopyrightText: Chris Pressey, the original author of this work, has dedicated it to the public domain.

{}-License-Identifier: CC0-1.0
-->

""".format('SPDX', 'SPDX'))
        for i, bookmark in enumerate(webpages):
            if bookmark[0] == 'link':
                f.write("### {}\n\n".format(bookmark[1]))
                f.write("*   url: {}\n\n".format(bookmark[2]))
                f.write(".\n\n")
            elif bookmark[0] == 'heading':
                f.write("### {}\n\n".format(bookmark[1]))
                f.write("*   is-heading: true\n\n")
                f.write(".\n\n")
            else:
                raise NotImplementedError("ouch")


def main(args):
    topic = args[0]
    base_dir = "."
    if not os.path.isdir(os.path.join(base_dir, topic, "src")):
        print("skipping", topic)
        return
    filename = os.path.join(base_dir, topic, "src", "Webpages.md")
    webpages = load_webpages_from_file(filename)
    write_webpages_to_feedmark_file(filename, topic, webpages)


if __name__ == "__main__":
    import sys
    main(sys.argv[1:])