git @ Cat's Eye Technologies NaNoGenMo-Entries-2019 / master Anne of Green Garbles / build.py
master

Tree @master (Download .tar.gz)

build.py @masterraw · history · blame

#!/usr/bin/env python3.6

"""

Script to orchestrate building.

"""

from argparse import ArgumentParser
import sys
import os
from subprocess import run


OPTIONS = None


def zrun(targ, command, **kwargs):
    if targ is not None:
        targ = targ.format(**kwargs)
        if os.path.exists(targ):
            return
    command += ' 2>>{bucket}/stderr.log'
    c = command.format(**kwargs)
    sys.stderr.write("*** {}\n".format(c))
    run(c, shell=True, check=True)


def process_pipeline(bucket, random_seed):
    try:
        os.mkdir(bucket)
    except FileExistsError:
        pass

    context = dict(
        bucket=bucket,
    )
    zrun(None, 'rm -f {bucket}/stderr.log', **context)
    if OPTIONS.clean_gen:
        zrun(None, 'rm -f {bucket}/out1.txt {bucket}/out2.txt {bucket}/novel.md {bucket}/novel.html', **context)

    instances = []
    for num, filename in enumerate(OPTIONS.filenames):
        print(filename)
        context = dict(
            filename=filename,
            bucket=bucket,
            num=str(num),
            fsm=OPTIONS.state_machine,
        )
        zrun('{bucket}/tokens_{num}_raw.txt', './extract-tokenstream.py "{filename}" > {bucket}/tokens_{num}_raw.txt', **context)
        zrun('{bucket}/tokens_{num}_elim.txt', './xform-eliminate.py "{filename}" < {bucket}/tokens_{num}_raw.txt > {bucket}/tokens_{num}_elim.txt', **context)
        zrun('{bucket}/tokens_{num}_deduped.txt', './xform-dedup.py < {bucket}/tokens_{num}_elim.txt > {bucket}/tokens_{num}_deduped.txt', **context)
        zrun('{bucket}/tokens_{num}_sentences.txt', './xform-fix-fullstops.py < {bucket}/tokens_{num}_deduped.txt > {bucket}/tokens_{num}_sentences.txt', **context)
        zrun('{bucket}/tokens_{num}_capitalized.txt', './xform-fix-capitalization.py < {bucket}/tokens_{num}_sentences.txt > {bucket}/tokens_{num}_capitalized.txt', **context)
        zrun('{bucket}/tokens_{num}_end-punctuation.txt', './xform-fix-end-punctuation.py < {bucket}/tokens_{num}_capitalized.txt > {bucket}/tokens_{num}_end-punctuation.txt', **context)
        zrun('{bucket}/tokens_{num}_contractions.txt', './xform-fix-apostrophes.py < {bucket}/tokens_{num}_end-punctuation.txt > {bucket}/tokens_{num}_contractions.txt', **context)
        zrun('{bucket}/tokens_{num}_singlequotes.txt', './xform-eliminate-singlequotes.py < {bucket}/tokens_{num}_contractions.txt > {bucket}/tokens_{num}_singlequotes.txt', **context)
        zrun('{bucket}/tokens_{num}_straightquotes.txt', './xform-fix-straightquotes.py < {bucket}/tokens_{num}_singlequotes.txt > {bucket}/tokens_{num}_straightquotes.txt', **context)
        zrun('{bucket}/tokens_{num}_clean.txt', './xform-fix-doublequotes.py < {bucket}/tokens_{num}_straightquotes.txt > {bucket}/tokens_{num}_clean.txt', **context)
        if OPTIONS.markov_order == 2:
            zrun('{bucket}/tokens_{num}_prev1_tagged.txt', './xform-tag-with-prev-tokens.py < {bucket}/tokens_{num}_clean.txt > {bucket}/tokens_{num}_prev1_tagged.txt', **context)
        elif OPTIONS.markov_order == 1:
            zrun('{bucket}/tokens_{num}_prev1_tagged.txt', 'cat < {bucket}/tokens_{num}_clean.txt > {bucket}/tokens_{num}_prev1_tagged.txt', **context)
        else:
            raise NotImplementedError(OPTIONS.markov_order)
        zrun('{bucket}/tokens_{num}_state_tagged.txt', './xform-tag-with-state.py --state-machine={fsm} < {bucket}/tokens_{num}_prev1_tagged.txt > {bucket}/tokens_{num}_state_tagged.txt', **context)
        instances.append(num)

    context = dict(
        bucket=bucket,
        instreams=' '.join(['{bucket}/tokens_{i}_state_tagged.txt'.format(bucket=bucket, i=i) for i in instances]),
        random_seed=str(random_seed),
        fsm=OPTIONS.state_machine,
        title=OPTIONS.title,
    )
    zrun('{bucket}/model.json', 'cat {instreams} | ./create-model.py > {bucket}/model.json', **context)
    zrun('{bucket}/out1.txt', './gen-from-model.py --chapter-count=30 --paragraph-count=35 --title="{title}" --random-seed={random_seed} --state-machine={fsm} {bucket}/model.json > {bucket}/out1.txt', **context)
    zrun('{bucket}/out2.txt', './xform-untag.py < {bucket}/out1.txt > {bucket}/out2.txt', **context)
    zrun('{bucket}/out3.txt', './xform-remove-short-sentences.py < {bucket}/out2.txt > {bucket}/out3.txt', **context)
    zrun('{bucket}/novel.md', './render-markdown.py < {bucket}/out2.txt > {bucket}/novel.md', **context)
    zrun(None, 'wc -w {bucket}/novel.md', **context)
    zrun('{bucket}/novel.html', 'markdown_py < {bucket}/novel.md > {bucket}/novel.html', **context)
    zrun(None, 'firefox {bucket}/novel.html &', **context)


def main(args):
    global OPTIONS

    argparser = ArgumentParser()
    argparser.add_argument('dirname', metavar='DIRNAME', type=str)
    argparser.add_argument('filenames', metavar='FILENAME', type=str, nargs='+')

    argparser.add_argument("--title", type=str, default="Generated Novel")
    argparser.add_argument("--state-machine", type=str, default='dialogue')
    argparser.add_argument("--random-seed", type=str, default='9009')
    argparser.add_argument("--clean-gen", action='store_true')
    argparser.add_argument("--markov-order", type=int, default=1)

    OPTIONS = argparser.parse_args(args)

    if OPTIONS.random_seed == 'random':
        import random
        random_seed = random.randint(0, 1000000)
        sys.stderr.write(">>> Random seed is: {}".format(random_seed))
    else:
        random_seed = int(OPTIONS.random_seed)

    process_pipeline(OPTIONS.dirname, random_seed)


if __name__ == '__main__':
    main(sys.argv[1:])