Treat `,` and `--` and clause seperators.
Chris Pressey
10 years ago
63 | 63 |
Future work
|
64 | 64 |
-----------
|
65 | 65 |
|
66 | |
Treat `,` and `--` and clause seperators.
|
67 | |
|
68 | |
Keep tweaking the score. Don't swap "that" and "what".
|
|
66 |
Keep tweaking the score. Don't swap "that" and "what". Also problematic:
|
|
67 |
"and" (using it or resulting in it, e.g. swapping off the "h" in "hand".)
|
|
68 |
Should dock the scores for those.
|
103 | 103 |
|
104 | 104 |
|
105 | 105 |
def adjust_case(new, orig):
|
106 | |
if all([x.isupper() for x in orig]):
|
|
106 |
if all([x.isupper() for x in orig if x.isalpha()]):
|
107 | 107 |
return new.upper()
|
108 | 108 |
if orig[0].isupper():
|
109 | 109 |
return new.capitalize()
|
|
130 | 130 |
for filename in filenames:
|
131 | 131 |
with open(filename, 'r') as f:
|
132 | 132 |
for line in SentinelCleaner(f, pre=options.pre).lines():
|
133 | |
line = line.replace('--', ' -- ')
|
|
133 |
line = line.replace('--', '-- ')
|
134 | 134 |
words.extend(line.split())
|
135 | 135 |
if line == '' and words[-1] is not PARAGRAPH_BREAK:
|
136 | 136 |
words.append(PARAGRAPH_BREAK)
|
|
149 | 149 |
if word.endswith(('"', "'")):
|
150 | 150 |
word = word[:-1]
|
151 | 151 |
sentence.append(word)
|
152 | |
if word not in ('Mr.', 'Mrs.', 'Dr.') and word.endswith(('.', '!', '?', ';', ':')):
|
|
152 |
if (word not in ('Mr.', 'Mrs.', 'Dr.') and
|
|
153 |
word.endswith(('.', '!', '?', ';', ':', ',', '--'))):
|
153 | 154 |
sentences.append(sentence)
|
154 | 155 |
sentence = []
|
155 | 156 |
|