git @ Cat's Eye Technologies NaNoGenMo-Entries-2019 / master Anne of Green Garbles / xform-tag-with-state.py
master

Tree @master (Download .tar.gz)

xform-tag-with-state.py @masterraw · history · blame

#!/usr/bin/env python3.6

"""

*   input format: tagged tokenstream
*   output format: tagged tokenstream

Takes a stream of tokens and outputs a tokenstream where every
token is decorated with what the new state of the state machine
would be after accepting the token.

As a side effect it checks if the structure is
consistent (a prerequisite for creating a stateful stochastic
model)

"""

from argparse import ArgumentParser
import re
import sys

import fsm

OPTIONS = None


def parse_entry(entry):
    components = entry.strip().split(' ')
    return components[-1], dict([pair.split('=') for pair in components[:-1]])


def format_entry(word, tags):
    return '{} {}'.format(' '.join(sorted('{}={}'.format(k, v) for k, v in tags.items())), word)


def main(args):
    global OPTIONS

    argparser = ArgumentParser()
    argparser.add_argument("--state-machine", type=str, default='dialogue')
    OPTIONS = argparser.parse_args(args)

    state = 'default'
    advance_state_machine = fsm.STATE_MACHINES[OPTIONS.state_machine]

    for line_no, line in enumerate(sys.stdin):
        word, tags = parse_entry(line)
        state = advance_state_machine(state, line_no, word)
        tags['state'] = state
        entry = format_entry(word, tags)
        sys.stdout.write('{}\n'.format(entry))


if __name__ == '__main__':
    main(sys.argv[1:])