Add a TidyStartOfLineProcessor.
Chris Pressey
3 years ago
17 | 17 |
|
18 | 18 |
from t_rext.processors import (
|
19 | 19 |
TidyPunctuationLineFilter,
|
|
20 |
TidyStartOfLineProcessor,
|
20 | 21 |
QuoteOrienterLineFilter,
|
21 | 22 |
LinesToParagraphsRegrouper,
|
22 | 23 |
)
|
|
29 | 30 |
with codecs.open(filename, 'r', encoding='UTF-8') as f:
|
30 | 31 |
for para in LinesToParagraphsRegrouper(f):
|
31 | 32 |
for line in TidyPunctuationLineFilter(
|
32 | |
QuoteOrienterLineFilter(para),
|
|
33 |
TidyStartOfLineProcessor(
|
|
34 |
QuoteOrienterLineFilter(para),
|
|
35 |
)
|
33 | 36 |
):
|
34 | 37 |
sys.stdout.write(line)
|
35 | 38 |
sys.stdout.write('\n')
|
166 | 166 |
)
|
167 | 167 |
|
168 | 168 |
|
|
169 |
class PrefixRewriteProcessor(LineProcessor):
|
|
170 |
SUBSTITUTIONS = ()
|
|
171 |
|
|
172 |
def __iter__(self):
|
|
173 |
for line in self.iterable:
|
|
174 |
line = line.rstrip()
|
|
175 |
for (subject, replacement) in self.SUBSTITUTIONS:
|
|
176 |
if line.startswith(subject):
|
|
177 |
line = replacement + line[len(subject):]
|
|
178 |
yield line
|
|
179 |
|
|
180 |
|
|
181 |
class TidyStartOfLineProcessor(PrefixRewriteProcessor):
|
|
182 |
SUBSTITUTIONS = (
|
|
183 |
('. ', ''),
|
|
184 |
(', ', ''),
|
|
185 |
)
|
|
186 |
|
|
187 |
|
169 | 188 |
class QuoteOrienterLineFilter(LineProcessor):
|
170 | 189 |
"""Note that this expects to work on a single paragraph
|
171 | 190 |
only. (If you give it more than one paragraph, it will
|