git @ Cat's Eye Technologies Chrysoberyl / master script / cleanup-articles
master

Tree @master (Download .tar.gz)

cleanup-articles @masterraw · history · blame

#!/usr/bin/env python3

# SPDX-FileCopyrightText: Chris Pressey, the original author of this work, has dedicated it to the public domain.
# For more information, please refer to <https://unlicense.org/>
# SPDX-License-Identifier: Unlicense

import json

from feedmark.checkers import Schema
from feedmark.formats.markdown import feedmark_markdownize
from feedmark.loader import read_document_from, read_refdex_from


def document_name(title):
    if title == 'News':
        return "NEWS.md"
    else:
        return "article/{}.md".format(title)


def rewrite_documents(articles, refdex):
    for title, schema in articles:
        if title in ('News',):
            continue
        print("{}...".format(title))
        filename = document_name(title)
        document = read_document_from(filename)
        document.rewrite_reference_links(refdex)

        if schema:
            schema_document = read_document_from("schema/{}.md".format(schema))
            schema = Schema(schema_document)
            results = schema.check_documents([document])
            if results:
                WARN_ONLY = False
                formatted_results = json.dumps(results, indent=4, sort_keys=True)
                if WARN_ONLY:
                    print("WARNING: {}".format(formatted_results))
                else:
                    raise ValueError(formatted_results)

        text = feedmark_markdownize(document, schema=schema)
        with open(document.filename, 'w') as f:
            try:
                f.write(text.encode('utf-8'))
            except:
                f.write(text)


def accumulate_article_refdex(articles, refdex):
    for title, schema in articles:
        if title in ("News", "HTML5 Installations",):
            continue
        print("{}...".format(title))
        filename = document_name(title)
        document = read_document_from(filename)
        for section in document.sections:
            refdex[section.title] = {
                'filename': document.filename,
                'anchor': section.anchor
            }


def regenerate_refdex(articles):
    INPUT_REFDEXES = [
        'misc-refdex/games-refdex.json',
        'misc-refdex/texts-refdex.json',
        'misc-refdex/retrocomputing-refdex.json',
        'misc-refdex/languages-refdex.json',
        'misc-refdex/misc-refdex.json',
        'misc-refdex/dossier-refdex.json',
        'misc-refdex/glosscubator-refdex.json',
        'misc-refdex/sgon-refdex.json',
    ]
    refdex = read_refdex_from(INPUT_REFDEXES)
    accumulate_article_refdex(articles, refdex)
    with open('refdex.json', 'w') as f:
        text = json.dumps(refdex, indent=4, sort_keys=True)
        try:
            f.write(text.encode('utf-8'))
        except:
            f.write(text)


if __name__ == '__main__':
    articles_doc = read_document_from('article/README.md')
    articles_map = dict([(section.title, {
        "title": section.title,
        "image_url": section.properties.get("image_url"),
        "schema": section.properties.get("schema"),
    }) for section in articles_doc.sections])
    articles = []
    for title, properties in articles_map.items():
        articles.append((title, properties['schema']))

    regenerate_refdex(articles)
    refdex = read_refdex_from(['refdex.json'], input_refdex_filename_prefix='../')
    rewrite_documents(articles, refdex)