git @ Cat's Eye Technologies Feedmark / master src / feedmark / models.py
master

Tree @master (Download .tar.gz)

models.py @masterraw · history · blame

# Copyright (c) 2019-2024 Chris Pressey, Cat's Eye Technologies
# This file is distributed under an MIT license.  See LICENSES/ directory.
# SPDX-License-Identifier: LicenseRef-MIT-X-Feedmark

from datetime import datetime
from collections import OrderedDict
import re

from marko.block import BlankLine, LinkRefDefs
from marko.element import Element
from marko.inline import Image, Link

from .formats.markdown import markdown_to_html5, markdown_to_html5_deep
from .renderer import CleanMarkdownRenderer
from .utils import quote


def rewrite_link_ref_defs(refdex, link_ref_defs):
    from marko.block import LinkRefDefs

    new_link_ref_defs = LinkRefDefs()
    seen_names = set()
    for name, (url, title) in link_ref_defs.items():
        name = sorted(link_ref_defs.unnormalized_labels_for(name))[0]
        if name in seen_names:
            continue
        seen_names.add(name)
        if name in refdex:
            entry = refdex[name]
            if "filename" in entry and "anchor" in entry:
                filename = quote(entry["filename"].encode("utf-8"))
                anchor = quote(entry["anchor"].encode("utf-8"))
                url = "{}#{}".format(filename, anchor)
            elif "filenames" in entry and "anchor" in entry:
                # pick the last one, for compatibility with single-refdex style
                filename = quote(entry["filenames"][-1].encode("utf-8"))
                anchor = quote(entry["anchor"].encode("utf-8"))
                url = "{}#{}".format(filename, anchor)
            elif "url" in entry:
                url = entry["url"]
            else:
                raise ValueError("Badly formed refdex entry: {}".format(entry))
        new_link_ref_defs.add(name, url, title)
    return new_link_ref_defs


class PropertyCollection:
    """Mixin that supports maintaining a set of properties on the object."""

    def __init__(self, *args, **kwargs):
        self.properties = OrderedDict()

    def add_property(self, kind, key, value):
        if kind == ":":
            if key in self.properties:
                raise KeyError("{} already given".format(key))
            self.properties[key] = value
        elif kind == "@":
            self.properties.setdefault(key, []).append(value)
        else:
            raise NotImplementedError(kind)

    def parse_property(self, listitem_text):
        match = re.match(r"^(.*?)\s*\@\s*(.*?)\s*$", listitem_text)
        if match:
            (key, val) = (match.group(1), match.group(2))
            return ("@", key, val)
        match = re.match(r"^(.*?)\s*\:\s*(.*?)\s*$", listitem_text)
        if match:
            (key, val) = (match.group(1), match.group(2))
            return (":", key, val)
        raise ValueError('Expected property, found "{}"'.format(listitem_text))

    def add_property_listitem(self, element):
        listitem_text = self.renderer.render(element).strip()
        kind, key, value = self.parse_property(listitem_text)
        self.add_property(kind, key, value)


class Document(PropertyCollection):
    def __init__(self, *args, renderer=None, **kwargs):
        super().__init__(*args, **kwargs)

        self.renderer = renderer or CleanMarkdownRenderer()

        self.title = "Untitled"
        self.properties = OrderedDict()

        self._header_comment: list(Element) = []
        self._preamble: list(Element) = []
        self.sections: list(Section) = []
        self.link_ref_defs = LinkRefDefs()

    def __str__(self):
        return "document '{}'".format(self.title.encode("utf-8"))

    def set_title(self, title):
        self.title = title

    @property
    def header_comment(self):
        md = ""
        for element in self._header_comment:
            md += self.renderer.render(element)
        md += "\n"
        return md

    def has_header_comment(self) -> bool:
        return bool(self._header_comment)

    def add_to_header_comment(self, element):
        self._header_comment.append(element)

    @property
    def preamble(self):
        md = ""
        skipping_blanks = True
        for element in self._preamble:
            if skipping_blanks and isinstance(element, BlankLine):
                continue
            else:
                skipping_blanks = False
            md += self.renderer.render(element)
        md += "\n"
        return md

    def add_to_preamble(self, element):
        self._preamble.append(element)

    def has_preamble(self) -> bool:
        return bool(self._preamble)

    def rewrite_link_ref_defs(self, refdex):
        self.link_ref_defs = rewrite_link_ref_defs(refdex, self.link_ref_defs)

    def global_link_ref_defs(self):
        return self.link_ref_defs

    def to_json_data(self, htmlize=False, ordered=False, link_ref_defs=None):
        preamble = self.preamble.strip()
        properties = self.properties

        if htmlize:
            if not link_ref_defs:
                link_ref_defs = self.global_link_ref_defs()
            preamble = markdown_to_html5(preamble, link_ref_defs=link_ref_defs)
            properties = markdown_to_html5_deep(
                self.properties, link_ref_defs=link_ref_defs
            )

        if ordered:
            properties_list = []
            for key, value in properties.items():
                properties_list.append([key, value])
            properties = properties_list
        else:
            properties = dict(properties)

        return {
            "filename": self.filename,
            "title": self.title,
            "properties": properties,
            "preamble": preamble,
            "sections": [
                section.to_json_data(
                    htmlize=htmlize, ordered=ordered, link_ref_defs=link_ref_defs
                )
                for section in self.sections
            ],
        }


class Section(PropertyCollection):
    def __init__(self, title, *args, renderer=None, **kwargs):
        super().__init__(*args, **kwargs)

        self.renderer = renderer or CleanMarkdownRenderer()
        self.document = None
        self.title = title
        self._body: list(Element) = []
        self.images = []

    def __str__(self):
        s = "section '{}'".format(self.title.encode("utf-8"))
        if self.document:
            s += " of " + str(self.document)
        return s

    def collect_image_children(self, element):
        for child in element.children:
            if isinstance(child, Image):
                yield {
                    "description": self.renderer.render(child.children[0]).strip(),
                    "source": child.dest,
                }
            if isinstance(child, Link) and isinstance(child.children[0], Image):
                yield {
                    "description": self.renderer.render(
                        child.children[0].children[0]
                    ).strip(),
                    "source": child.children[0].dest,
                    "link": child.dest,
                }

    def add_image_paragraph(self, element):
        for image_record in self.collect_image_children(element):
            self.images.append(image_record)

    @property
    def body(self):
        md = ""
        for element in self._body:
            md += self.renderer.render(element)
        md += "\n"
        md = re.sub(r"^\n+", "", md)
        md = re.sub(r"\n+$", "\n", md)
        return md

    def add_to_body(self, element):
        self._body.append(element)

    @property
    def publication_date(self):
        formats = (
            "%b %d %Y %H:%M:%S",
            "%a, %d %b %Y %H:%M:%S GMT",
        )
        for format in formats:
            try:
                return datetime.strptime(self.properties["date"], format)
            except KeyError:
                raise KeyError("could not find 'date' on {}".format(self))
            except ValueError:
                pass
        raise NotImplementedError

    @property
    def anchor(self):
        title = self.title.strip().lower()
        title = re.sub(r"[^\w]+$", "", title)
        title = re.sub(r"[^\w\s\/\.\'-]", "", title)
        return re.sub(r"[\s\/\.\'-]+", "-", title)

    def to_json_data(self, htmlize=False, ordered=False, link_ref_defs=None):
        body = self.body
        properties = self.properties

        if htmlize:
            body = markdown_to_html5(body, link_ref_defs=link_ref_defs)
            properties = markdown_to_html5_deep(
                self.properties, link_ref_defs=link_ref_defs
            )

        if ordered:
            properties_list = []
            for key, value in properties.items():
                properties_list.append([key, value])
            properties = properties_list
        else:
            properties = dict(properties)

        return {
            "title": self.title,
            "anchor": self.anchor,
            "images": self.images,
            "properties": properties,
            "body": body,
        }