git @ Cat's Eye Technologies T-Rext / master bin / t-rext
master

Tree @master (Download .tar.gz)

t-rext @masterraw · history · blame

#!/usr/bin/env python3

# SPDX-FileCopyrightText: (c) 2024 Chris Pressey, Cat's Eye Technologies
# This file is distributed under an MIT license.  For details, see LICENSES/ dir.
# SPDX-License-Identifier: LicenseRef-MIT-X-T-Rext

"""Usage: t-rext [OPTIONS] FILES

Adjusts spacing (and other factors) to make a generated text more presentable.
"""

from os.path import realpath, dirname, join
import sys

sys.path.insert(0, join(dirname(realpath(sys.argv[0])), '..', 'src'))

# ----------------------------------------------------------------- #

from argparse import ArgumentParser
import codecs
import sys

from t_rext.processors import (
    CapitalizationProcessor,
    EllipsisFixer,
    TidyPunctuationLineFilter,
    TidyStartOfLineProcessor,
    QuoteOrienterLineFilter,
    LinesToParagraphsRegrouper,
)


def process(f, options):
    for para in LinesToParagraphsRegrouper(f):
        proc = QuoteOrienterLineFilter(para)
        proc = TidyStartOfLineProcessor(proc)
        proc = TidyPunctuationLineFilter(proc)
        proc = CapitalizationProcessor(proc)
        proc = EllipsisFixer(proc)
        for line in proc:
            try:
                sys.stdout.write(line)
            except UnicodeEncodeError:
                sys.stdout.write(line.encode('utf-8'))
            sys.stdout.write('\n')


def main(args):
    argparser = ArgumentParser(usage=__doc__.strip())

    argparser.add_argument('--version', action='version', version="%(prog)s 0.4")

    (options, args) = argparser.parse_known_args(args)

    for filename in args:
        if filename == "-":
            process(sys.stdin, options)
        else:
            with codecs.open(filename, 'r', encoding='UTF-8') as f:
                process(f, options)


if __name__ == '__main__':
    main(sys.argv[1:])