-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpunchy.py
127 lines (104 loc) · 3.53 KB
/
punchy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import conceptnet as cn
import datamuse as dm
import helpers as h
h = reload(h)
#import stanford as stan
import random
from nltk.corpus import wordnet as wn
from operator import itemgetter
bg_cache = {}
res_key = ""
res_cache = [] #TODO change to dict
# TODO see how get_nouns affects single words
#doesnt help much, but slows down a lot
def filterNoun(list_in):
filtered = set()
for inc in list_in:
nounified = stan.get_nouns(inc)
for n in nounified:
filtered.add(n)
return list(filtered)
def oldFilterNoun(list_in):
filtered = set()
for inc in list_in:
words = inc.split()
for w in h.pos(words,'n'):
if wn.morphy(w) is not None:
filtered.add(wn.morphy(w))
else:
filtered.add(w)
return list(filtered)
def removeMatch(l, topic, parents):
dontMatch = {h.strip_tag(topic)}
dontMatch.update(['lust', 'philia', 'rape', 'xiii'])
for p in parents:
dontMatch.add(h.strip_tag(p))
#dontMatch.add(h.baseWord(h.toNoun(p)))
temp = []
for w in l:
if w not in dontMatch:
temp.append(w)
dontMatch.add(w)
l[:] = temp
#assumes l is a list of (word,weight) tuples
def pickOne(l):
i = h.weighted_choice(l)
return l[i][0]
def get_words(topic, parents, cn_relations, w2v_relations, w2v, isIncoming=True):
res = []
stripped_topic = h.strip_tag(topic)
for p in parents:
stripped_parent = h.strip_tag(p)
for rel in cn_relations:
if isIncoming:
res += (x[1] for x in cn.getIncoming(stripped_parent, rel))
else:
res += (x[1] for x in cn.getOutgoing(stripped_parent, rel))
# TODO: find good datamuse relationships for outgoing edges.
if isIncoming:
res += (x[0] for x in dm.query(dm.related('trg',stripped_parent),dm.topics(stripped_topic), dm.metadata('p'))
if ('tags' in x[1] and x[1]['tags'][0] == 'n')) # get only nouns
w2v_words = h.get_nouns_from_verb(stripped_parent, w2v_relations, w2v)
res += w2v_words
#res = filterNoun(res)
res = oldFilterNoun(res)
removeMatch(res, topic, parents)
res = [x+'_NN' for x in res] # should hopefully all be nouns at this point...
return res
def get_bg(topic, parents, w2v, juxtapose = False):
global bg_cache
k = topic + "".join(parents)
if k not in bg_cache:
cn_relations = ["HasSubevent", "Causes", "HasPrerequisite", "UsedFor"]
w2v_relations = [('eat', 'hunger'), ('drink', 'thirst'), ('shoot', 'hatred'), ('laugh', 'happiness'), ('sleep', 'fatigue'), ('scream', 'fear')]
picked_bg = get_words(topic, parents, cn_relations, w2v_relations, w2v)
relations = parents + [topic]
picked_bg = h.w2vWeightsListNew(picked_bg, relations, w2v)
if len(picked_bg) > 20:
#print picked_bg[-len(picked_bg)/5:]
picked_bg = picked_bg[:-len(picked_bg)/5]
bg_key = topic + "".join(parents)
bg_list = picked_bg
if not bg_list:
print "bg_list empty"
return None
bg_cache[k] = bg_list
else:
bg_list = bg_cache[k]
#print(bg_list)
return h.strip_tag(pickOne(bg_list))
def get_result(topic, parents, w2v, juxtapose = False):
global res_key, res_cache
if res_key != topic + "".join(parents) or len(res_cache) == 0:
cn_relations = ["HasSubevent", "Causes", "HasPrerequisite", "UsedFor"]
picked_results = get_words(topic, parents, cn_relations, False)
relations = parents + [topic]
picked_results = sorted(picked_results, key=lambda w:h.total_similarity(w, relations, w2v), reverse=True)
if len(picked_results) > 20:
picked_results = picked_results[:-len(picked_results)/5]
res_key = topic + "".join(parents)
res_cache = picked_results
res_cache = h.w2vweightslist(res_cache,relations,w2v)
if not res_cache:
return None
return pickOne(res_cache)