#!/usr/bin/env python3
# Copyright (c) 2019-2024 Chris Pressey, Cat's Eye Technologies
# This file is distributed under an MIT license. See LICENSES/ directory.
# SPDX-License-Identifier: LicenseRef-MIT-X-Feedmark
# This script demonstrates how the modules from the feedmark package
# can be used in scripts. It also requires the beautifulsoup4 package
# to be installed.
import json
import sys
from bs4 import BeautifulSoup
from feedmark.formats.markdown import markdown_to_html5
from feedmark.main import read_document_from
def extract_links(html_text):
links = []
soup = BeautifulSoup(html_text, "html.parser")
for link in soup.find_all("a"):
url = link.get("href")
links.append(url)
return links
def extract_links_from_documents(documents):
links = []
def make_link(url, document=None, section=None, **kwargs):
link = {
"url": url,
}
if document:
link.update(
{
"document": document.title,
}
)
elif section:
link.update(
{
"section": section.title,
"document": section.document.title,
}
)
link.update(kwargs)
return link
def extend_links(section, md):
links.extend(
[
make_link(url, section=section)
for url in extract_links(markdown_to_html5(md))
]
)
for document in documents:
link_ref_defs = document.link_ref_defs
for name, (url, title) in link_ref_defs.items():
name = sorted(link_ref_defs.unnormalized_labels_for(name))[0]
links.append(make_link(url, document=document, name=name))
for section in document.sections:
for name, url in section.images:
links.append(make_link(url, section=section, name=name))
for key, value in section.properties.items():
if isinstance(value, list):
for subitem in value:
extend_links(section, subitem)
else:
extend_links(section, value)
extend_links(section, section.body)
return links
def main(args):
documents = []
for filename in args:
documents.append(read_document_from(filename))
links = extract_links_from_documents(documents)
sys.stdout.write(json.dumps(links, indent=4, sort_keys=True))
if __name__ == "__main__":
main(sys.argv[1:])