git @ Cat's Eye Technologies NaNoGenLab / a09d1c1
Success, for some very quick-and-dirty definition of success. Chris Pressey 10 years ago
2 changed file(s) with 106 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 quick-and-dirty-markov
1 ======================
2
3 Hypothesis
4 ----------
5
6 I can write a Markov chain processor thing from scratch during my evening
7 commute, having not written one for umpteen years, and having never really
8 studied them formally.
9
10 Apparatus
11 ---------
12
13 * Python 2.7.6 (probably works with older versions too)
14 * An input text
15
16 Method
17 ------
18
19 * TODO describe
20
21 Observations
22 ------------
23
24 Sample output
25
26 > croquettes adorned the genealogies of her from here and there was a bantering flirt of clarice pendomer suggested patricia's bedroom he made much better in the honor the alphabet what between these fogies so often do up the remainder of course she did not especially anxious to serve my wife now musgrave could conceivably have been a grim routine perished just because i liked you are like that the immunity musgrave had long since his sister was a buried his saber hacked upon a fashion and praise or do appeared to a person of her had no whit the others are
0 #!/usr/bin/env python
1
2 import random
3 import re
4 import sys
5
6 from gutenberg import GutenbergCleaner
7
8 try:
9 from tqdm import tqdm
10 except ImportError:
11 def tqdm(x):
12 return x
13
14
15 DEBUG = False
16
17
18 def main(argv):
19 filenames = argv[1:]
20
21 words = []
22
23 for filename in filenames:
24 with open(filename, 'r') as f:
25 c = GutenbergCleaner(f)
26 lines = c.extract_text().split('\n')
27 for line in lines:
28 bits = line.split()
29 for bit in bits:
30 words.extend(bit.split('--'))
31
32 wordmap = {}
33 freq = {}
34
35 def clean(word):
36 if word.endswith(('.', '!', '?', ';', ',')):
37 word = word[:-1]
38 if word.startswith(('"', "'", '(')):
39 word = word[1:]
40 if word.endswith(('"', "'", ')')):
41 word = word[:-1]
42 if word.endswith(('.', '!', '?', ';', ',')):
43 word = word[:-1]
44 return word.lower()
45
46 words = [clean(word) for word in words]
47
48 last = None
49 for word in words:
50 word = word.lower()
51 freq[word] = freq.get(word, 0) + 1
52 if last is None:
53 last = word
54 continue
55 #print last, word
56 m = wordmap.setdefault(last, {})
57 m[word] = m.get(word, 0) + 1
58 #print wordmap
59 last = word
60
61 word = random.choice(freq.keys())
62 #print word, freq[word], wordmap[word]
63 sys.stdout.write(word + ' ')
64
65 for i in xrange(0, 100):
66 num = random.randint(1, freq[word])
67 acc = 0
68 for key, value in wordmap[word].iteritems():
69 acc += value
70 if acc >= num:
71 word = key
72 word, freq[word], wordmap[word]
73 sys.stdout.write(word + ' ')
74 break
75
76
77 if __name__ == '__main__':
78 main(sys.argv)