git @ Cat's Eye Technologies T-Rext / 307b7bc
Add a TidyStartOfLineProcessor. Chris Pressey 3 years ago
2 changed file(s) with 23 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
1717
1818 from t_rext.processors import (
1919 TidyPunctuationLineFilter,
20 TidyStartOfLineProcessor,
2021 QuoteOrienterLineFilter,
2122 LinesToParagraphsRegrouper,
2223 )
2930 with codecs.open(filename, 'r', encoding='UTF-8') as f:
3031 for para in LinesToParagraphsRegrouper(f):
3132 for line in TidyPunctuationLineFilter(
32 QuoteOrienterLineFilter(para),
33 TidyStartOfLineProcessor(
34 QuoteOrienterLineFilter(para),
35 )
3336 ):
3437 sys.stdout.write(line)
3538 sys.stdout.write('\n')
166166 )
167167
168168
169 class PrefixRewriteProcessor(LineProcessor):
170 SUBSTITUTIONS = ()
171
172 def __iter__(self):
173 for line in self.iterable:
174 line = line.rstrip()
175 for (subject, replacement) in self.SUBSTITUTIONS:
176 if line.startswith(subject):
177 line = replacement + line[len(subject):]
178 yield line
179
180
181 class TidyStartOfLineProcessor(PrefixRewriteProcessor):
182 SUBSTITUTIONS = (
183 ('. ', ''),
184 (', ', ''),
185 )
186
187
169188 class QuoteOrienterLineFilter(LineProcessor):
170189 """Note that this expects to work on a single paragraph
171190 only. (If you give it more than one paragraph, it will