git @ Cat's Eye Technologies NaNoGenMo-Entries-2019 / master The Insidiously Rainbow-Wigged Dr. Fu-Manchu / tirwdfm.py
master

Tree @master (Download .tar.gz)

tirwdfm.py @masterraw · history · blame

#!/usr/bin/env python3.5
# encoding: UTF-8

import random
import sys
import re
from bs4 import BeautifulSoup, NavigableString


def collapse_paragraph(t):
    for c in ('"', "'", '.', ',', '!', '?', ';', ':'):
        t = t.replace(c, '')
    for c in ('—'):
        t = t.replace(c, ' ')
    return ' '.join([u.upper() for u in t.split(' ')])


def paragraph_modification(t):
    if len([q for q in t if q == '"']) % 2 == 1:
        return None
    if t.endswith((':', '—"', 'swooned.')):
        return None
    if 'put in the wine' in t:
        return None

    words = collapse_paragraph(t)

    VERBED = (
        "SAID", "ASKED", "RAPPED", "LAUGHED", "REPLIED", "CONFESSED", "CRIED", "DEMANDED", "CONTINUED",
        "ADDED", "WHISPERED", "DIRECTED", "SUGGESTED", "HISSED", "MUTTERED", "RETURNED", "JERKED",
        "PRONOUNCED", "EXPLAINED", "GROANED",
    )

    I_SAID = ["I " + verb for verb in VERBED]
    for h in I_SAID:
        if h in words:
            return 'I adjusted my rainbow-coloured novelty wig.'

    SHE_SAID = (
        ["SHE " + verb for verb in VERBED]
    )
    for h in SHE_SAID:
        if h in words:
            return 'She adjusted her rainbow-coloured novelty wig.'

    HE_SAID = (
        ["HE " + verb for verb in VERBED] +
        ["SMITH " + verb for verb in VERBED] +
        [verb + " SMITH" for verb in VERBED] +
        [verb + " MY FRIEND" for verb in VERBED] +
        ["FU-MANCHU " + verb for verb in VERBED] +
        [verb + " FU-MANCHU" for verb in VERBED] +
        ["WEYMOUTH " + verb for verb in VERBED] +
        [verb + " WEYMOUTH" for verb in VERBED] +
        ["THE DETECTIVE " + verb for verb in VERBED] +
        [verb + " THE DETECTIVE" for verb in VERBED] +
        []
    )
    for h in HE_SAID:
        if h in words:
            return 'He adjusted his rainbow-coloured novelty wig.'

    return None


def main(args):
    filename = args[0]
    lines = []
    accum = False
    with open(filename, 'r') as f:
        for line in f:
            line = line.rstrip()
            if line == '<A NAME="chap01"></A>':
                accum = True
            if accum:
                lines.append(line)
    text = (
        '<!DOCTYPE html><html><head><title>The Insidiously Rainbow-Wigged Dr. Fu-Manchu</title></head><body>' +
        '<h1>The Insiduously Rainbow-Wigged Dr. Fu-Manchu</h1>' +
        '\n'.join(lines) +
        '</body></html>'
    )
    soup = BeautifulSoup(text, 'html.parser')

    history = [(-1, "zzz")]
    p_num = 0
    for child in soup.body.children:
        if isinstance(child, NavigableString):
            continue
        if 'The Project Gutenberg EBook' in child.get_text():
            continue
        if 'End of Project Gutenberg' in child.get_text():
            continue
        paragraph_text = child.get_text().strip()
        paragraph_text = paragraph_text.replace("fashionable hat", "fashionable hairpiece")
        if child.name.lower() == 'p':
            p_num += 1

            modification = paragraph_modification(paragraph_text)
            if modification is not None:
                (last_p_num, last_modification) = history[-1]

                if last_modification != modification and (p_num - last_p_num > 1):
                    history.append((p_num, modification))
                    paragraph_text += '  ' + modification

        paragraph_text = "<{}>{}</{}>".format(child.name, paragraph_text, child.name)
        print(paragraph_text)
    print('</html>')


if __name__ == '__main__':
    main(sys.argv[1:])