-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathPoesy.py
146 lines (125 loc) · 4.29 KB
/
Poesy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import random
import pronouncing
# Create reverse N-grams from a list of tokens
def reverseNgrams(tokens, n):
ngrams = []
for i in range(len(tokens)-1, 0+n-2, -1):
ngram = []
for j in range(i, i-n, -1):
ngram.append(tokens[j])
ngrams.append(ngram)
return ngrams
# Organize N-grams in a frequency lookup table (N-layer nested dictionaries)
def setupModel(ngrams):
lookup = {}
n = len(ngrams[0])
for ngram in ngrams:
ptr = lookup
for i in range(0, n):
if i == n-1:
ptr.setdefault(ngram[i], 0)
ptr[ngram[i]] += 1
else:
try: ptr = ptr[ngram[i]]
except KeyError:
ptr.setdefault(ngram[i], {})
ptr = ptr[ngram[i]]
return lookup
# Loads all first words into an array for efficiency
def getFirstWords(corpus):
firstWords = []
for first in corpus:
firstWords.append(first)
return firstWords
# Randomly chooses a word from the corpus
def findFirst(corpus, firstWords):
pick = random.randrange(0, len(firstWords)-1)
return firstWords[pick]
# Randomly chooses a second word from the corpus based on the first
def findSecond(first, corpus, firstWords):
words = []
try:
for second in corpus[first]:
words.append(second)
except KeyError:
return findFirst(corpus, firstWords)
if len(words) == 0: return findFirst(corpus, firstWords)
elif len(words) == 1: return words[0]
pick = random.randrange(0, len(words)-1)
return words[pick]
# Randomly chooses a third word from the corpus based on the first and second
def findThird(first, second, corpus, firstWords):
words = []
try:
for third in corpus[first][second]:
words.append(third)
except KeyError:
return findSecond(second, corpus, firstWords)
if len(words) == 0: return findSecond(second, corpus, firstWords)
elif len(words) == 1: return words[0]
pick = random.randrange(0, len(words)-1)
return words[pick]
# Builds sentences word by word
def addWord(sentence, first, second, corpus, firstWords):
third = findThird(first, second, corpus, firstWords)
sentence.append(third)
first, second = second, third
return first, second
# Randomly chooses a rhyming word from the corpus
def findRhyme(word, corpus):
rhymes = pronouncing.rhymes(word)
while (True):
if len(rhymes) == 0:
return None
elif len(rhymes) == 1:
try:
corpus[rhymes[0]]
return rhymes[0]
except KeyError:
return None
else:
pick = random.randrange(0, len(rhymes)-1)
try:
corpus[rhymes[pick]]
return rhymes[pick]
except KeyError:
rhymes.remove(rhymes[pick])
continue
# Implementation of a couplet - AABB CCDD EEFF GGHH
def generateCouplet(corpus, lines, wordsPerLine):
firstWords = getFirstWords(corpus)
poem = []
for i in xrange(lines):
line = []
if i % 2 == 0:
while (True):
A = findFirst(corpus, firstWords)
AA = findRhyme(A, corpus)
if AA != None:
break
first = A
second = findSecond(first, corpus, firstWords)
line += [first, second]
if i % 2 == 1:
first = AA
second = findSecond(first, corpus, firstWords)
line += [first, second]
for j in xrange(wordsPerLine-2):
first, second = addWord(line, first, second, corpus, firstWords)
poem.append(line[::-1])
return poem
# Handles capitalization lost from pre-processing
def poemProcessing(poem):
for line in poem:
line[0] = line[0][0].upper()+line[0][1:]
for index, word in enumerate(line):
if word[0:2] == "i'":
line[index] = word[0:2].upper()+word[2:]
if word == 'i':
line[index] = word.upper()
# Prints poem line by line
def printPoem(poem):
for line in poem:
for word in line:
print word,
print