diff --git a/advanced-spoonerizer/README.md b/advanced-spoonerizer/README.md index 43280d4..0d238de 100644 --- a/advanced-spoonerizer/README.md +++ b/advanced-spoonerizer/README.md @@ -64,6 +64,6 @@ Future work ----------- -Treat `,` and `--` and clause seperators. - -Keep tweaking the score. Don't swap "that" and "what". +Keep tweaking the score. Don't swap "that" and "what". Also problematic: +"and" (using it or resulting in it, e.g. swapping off the "h" in "hand".) +Should dock the scores for those. diff --git a/advanced-spoonerizer/advanced-spoonerizer.py b/advanced-spoonerizer/advanced-spoonerizer.py index 32f5182..7683333 100755 --- a/advanced-spoonerizer/advanced-spoonerizer.py +++ b/advanced-spoonerizer/advanced-spoonerizer.py @@ -104,7 +104,7 @@ def adjust_case(new, orig): - if all([x.isupper() for x in orig]): + if all([x.isupper() for x in orig if x.isalpha()]): return new.upper() if orig[0].isupper(): return new.capitalize() @@ -131,7 +131,7 @@ for filename in filenames: with open(filename, 'r') as f: for line in SentinelCleaner(f, pre=options.pre).lines(): - line = line.replace('--', ' -- ') + line = line.replace('--', '-- ') words.extend(line.split()) if line == '' and words[-1] is not PARAGRAPH_BREAK: words.append(PARAGRAPH_BREAK) @@ -150,7 +150,8 @@ if word.endswith(('"', "'")): word = word[:-1] sentence.append(word) - if word not in ('Mr.', 'Mrs.', 'Dr.') and word.endswith(('.', '!', '?', ';', ':')): + if (word not in ('Mr.', 'Mrs.', 'Dr.') and + word.endswith(('.', '!', '?', ';', ':', ',', '--'))): sentences.append(sentence) sentence = []