git @ Cat's Eye Technologies Feedmark / master src / feedmark / parser.py
master

Tree @master (Download .tar.gz)

parser.py @masterraw · history · blame

# Copyright (c) 2019-2024 Chris Pressey, Cat's Eye Technologies
# This file is distributed under an MIT license.  See LICENSES/ directory.
# SPDX-License-Identifier: LicenseRef-MIT-X-Feedmark

from datetime import datetime
from collections import OrderedDict
import re

from marko.block import (
    SetextHeading,
    Heading,
    HTMLBlock,
    BlankLine,
    List,
    Paragraph,
    LinkRefDef,
)
from marko.inline import Image, Link, RawText, Literal
from marko.parser import Parser as MarkoParser

from .models import Document, Section
from .formats.markdown import markdown_to_html5, markdown_to_html5_deep
from .utils import quote


def strip_square_brackets(s):
    if s.startswith("[") and s.endswith("]"):
        return s[1:-1]
    return s


def has_image_child(child):
    return isinstance(child, Image) or (
        isinstance(child, Link) and isinstance(child.children[0], Image)
    )


def is_image_paragraph(child):
    return isinstance(child, Paragraph) and has_image_child(child.children[0])


def obtain_heading_text(element):
    chunks = []
    for child in element.children:
        if isinstance(child, RawText):
            chunks.append(child.children)
        elif isinstance(child, Literal):
            chunks.append("\\")
            chunks.append(child.children)
    return "".join(chunks)


class Parser:
    def __init__(self):
        pass

    def parse(self, markdown_text):
        marko_parser = MarkoParser()
        marko_document = marko_parser.parse(markdown_text)

        document = Document()
        section = None
        reading_images = True
        reading_properties = True

        for child in marko_document.children:
            if isinstance(child, (Heading, SetextHeading)):
                title_text = obtain_heading_text(child)
                if child.level == 1:
                    document.set_title(title_text)
                elif child.level == 3:
                    section = Section(title_text)
                    reading_images = True
                    reading_properties = True
                    document.sections.append(section)
                    section.document = document
                elif section:
                    section.add_to_body(child)
                else:
                    document.add_to_preamble(child)
            elif isinstance(child, HTMLBlock) and not section:
                document.add_to_header_comment(child)
            elif reading_properties and isinstance(child, List):
                reading_images = False
                reading_properties = False
                for listitem in child.children:
                    if not section:
                        document.add_property_listitem(listitem)
                    else:
                        section.add_property_listitem(listitem)
            elif isinstance(child, LinkRefDef):
                # LinkRefDef elements always go in the main document, not sections.
                document.link_ref_defs.add(
                    strip_square_brackets(child.label.text), child.dest, child.title
                )
            elif reading_images and is_image_paragraph(child):
                section.add_image_paragraph(child)
            else:
                if isinstance(child, Paragraph):
                    reading_images = False
                    reading_properties = False
                if section:
                    section.add_to_body(child)
                else:
                    document.add_to_preamble(child)

        return document