git @ Cat's Eye Technologies NaNoGenLab / master poetic-inventory / poetic-inventory.py
master

Tree @master (Download .tar.gz)

poetic-inventory.py @masterraw · history · blame

#!/usr/bin/env python

import random
import re
import sys

try:
    from tqdm import tqdm
except ImportError:
    def tqdm(x):
        return x


def sentencify(words):
    return ' '.join(words)


def phrase_is_beginner(phrase):
    try:
        return phrase[0][0].isupper()
    except Exception:
        return True


def phrase_is_ender(phrase):
    try:
        return not phrase[-1].endswith((',', ';'))
    except Exception:
        return True


def main(argv):
    filenames = argv[1:]

    words = []

    for filename in tqdm(filenames):
        with open(filename, 'r') as f:
            for line in f:
                bits = line.strip().split()
                for bit in bits:
                    words.extend(bit.split('--'))

    phrases = []
    phrase = []
    for word in tqdm(words):
        if word.startswith(('"', "'")):
            word = word[1:]
        if word.endswith(('"', "'")):
            word = word[:-1]
        phrase.append(word)
        if (word not in ('Mr.', 'Mrs.', 'Dr.') and
            word.endswith(('.', '!', '?', ',', ';'))):
            phrases.append(phrase)
            phrase = []

    phrases.append(phrase)

    pq = []

    for phrase in phrases:
        b = phrase_is_beginner(phrase)
        e = phrase_is_ender(phrase)
        if not b and not e:
            pq.append(phrase)

    cap_next = True
    for p in sorted(pq):
        if p and p[0] in ('a', 'an'):
            if cap_next:
                p[0] = p[0].capitalize()
                cap_next = False
            if p[-1].endswith(';'):
                p[-1] = p[-1][:-1] + '.'
                print sentencify(p)
                print
                cap_next = True
            else:
                print sentencify(p),
    print "for that is all."


if __name__ == '__main__':
    main(sys.argv)