-
Notifications
You must be signed in to change notification settings - Fork 1
/
check_distance_scores.py
68 lines (57 loc) · 1.81 KB
/
check_distance_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gensim
import networkx as nx
from scipy.spatial.distance import cosine
from datetime import timedelta
from interactions import InteractionsUtil as IU
from util import json_load
interactions = json_load('data/enron/interactions.json')
lda_model = gensim.models.ldamodel.LdaModel.load(
'data/enron/model-50-50.lda'
)
dictionary = gensim.corpora.dictionary.Dictionary.load(
'data/enron/dict.pkl'
)
different_weights = [
{'topics': 0.2,
'bow': 0.8},
{'topics': 1.0},
{'bow': 1.0},
]
for weights in different_weights:
meta_graph_kws = {
'distance_weights': weights,
}
g = IU.get_topic_meta_graph(
interactions,
lda_model=lda_model,
dictionary=dictionary,
undirected=False,
given_topics=False,
decompose_interactions=False,
dist_func=cosine,
preprune_secs=timedelta(weeks=4).total_seconds(),
apply_pagerank=False,
**meta_graph_kws
)
print('weights: {}\n'.format(weights))
out_degrees = g.out_degree(g.nodes())
sorted_nodes = sorted(out_degrees,
key=lambda k: out_degrees[k],
reverse=True)
print('\n'.join(map(lambda n: g.node[n]['subject'], sorted_nodes)[:10]))
node = sorted_nodes[5]
def print_message(node):
print('Sender: {}\nTime: {}\nSubject: {}\nBody: {}\n'.format(
g.node[node]['sender_id'],
g.node[node]['datetime'],
g.node[node]['subject'],
g.node[node]['body'][:1000]
)
)
print('Node')
print_message(node)
top_k_nodes = sorted(g.neighbors(node), key=lambda nb: g[node][nb]['c'])[:5]
for i, n in enumerate(top_k_nodes):
print('{}:'.format(i))
print_message(n)
print('*' * 100)