38 | 38 |
def clean(self, lines, name=''):
|
39 | 39 |
for line in lines:
|
40 | 40 |
yield line.rstrip()
|
|
41 |
|
|
42 |
|
|
43 |
class IllustrationCleaner(AbstractBaseCleaner):
|
|
44 |
|
|
45 |
def clean(self, lines, name=''):
|
|
46 |
for line in lines:
|
|
47 |
match = re.match(r'^\s*\[Illustration.*?\]\s*$', line)
|
|
48 |
if not match:
|
|
49 |
yield line
|
41 | 50 |
|
42 | 51 |
|
43 | 52 |
class SentinelCleaner(AbstractBaseCleaner):
|
|
131 | 140 |
|
132 | 141 |
def main(argv):
|
133 | 142 |
optparser = OptionParser(__doc__.strip())
|
|
143 |
optparser.add_option("--strip-illustrations", default=False,
|
|
144 |
action='store_true',
|
|
145 |
help="also try to remove [Illustration: foo]'s")
|
134 | 146 |
optparser.add_option("--output-dir", default=None, metavar='DIR',
|
135 | 147 |
help="if given, save the resulting files to this "
|
136 | 148 |
"directory (under their original names)"
|
|
144 | 156 |
options.output_dir, os.path.basename(filename)
|
145 | 157 |
)
|
146 | 158 |
out = open(out_filename, 'w')
|
147 | |
cleaner = MultiCleaner((
|
|
159 |
cleaners = [
|
148 | 160 |
TrailingWhitespaceCleaner(),
|
149 | 161 |
GutenbergCleaner(),
|
150 | |
ProducedByCleaner()
|
151 | |
))
|
|
162 |
]
|
|
163 |
if options.strip_illustrations:
|
|
164 |
cleaners.append(IllustrationCleaner())
|
|
165 |
cleaners.append(ProducedByCleaner())
|
|
166 |
cleaner = MultiCleaner(cleaners)
|
152 | 167 |
with open(filename, 'r') as f:
|
153 | 168 |
for line in cleaner.clean(f, name=filename):
|
154 | 169 |
out.write(line + '\n')
|