-
Notifications
You must be signed in to change notification settings - Fork 1
/
dump_events_to_json.py
97 lines (75 loc) · 3.04 KB
/
dump_events_to_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import sys
from sklearn.cluster import DBSCAN
from sklearn.feature_extraction.text import TfidfVectorizer
from events import detect_events_given_path
from meta_graph import convert_to_original_graph
from clustering import greedy_clustering_on_graph
from util import json_dump
from viz_util import to_d3_graph
from experiment_util import get_output_path
from dag_util import get_roots
def run(candidate_tree_path,
k,
id2people,
id2interaction,
dirname=None,
to_original_graph=False):
if dirname and not os.path.exists(dirname):
os.makedirs(dirname)
output_path = get_output_path(candidate_tree_path, dirname)
events = detect_events_given_path(candidate_tree_path, k)
# add people and content
for e in events:
root = get_roots(e)[0]
for n in e.nodes_iter():
e.node[n]['sender'] = id2people[e.node[n]['sender_id']]
e.node[n]['recipients'] = [id2people[id_]
for id_ in e.node[n]['recipient_ids']]
# print(id2interaction[n])
e.node[n]['subject'] = id2interaction[n]['subject']
e.node[n]['body'] = id2interaction[n]['body']
for f in ('retweet_count', 'favorite_count'):
e.node[n][f] = id2interaction[n].get(f)
e.node[n]['body'] = id2interaction[n]['body']
e.node[n]['root'] = (n == root)
e.node[n]['datetime'] = str(e.node[n]['datetime'])
# # some simple clustering
# assignment = greedy_clustering_on_graph(e)
# for n in e.nodes_iter():
# e.node[n]['cluster_label'] = assignment[n]
if to_original_graph:
events = map(convert_to_original_graph,
events)
# import pdb; pdb.set_trace()
d3_events = [to_d3_graph(e)
for e in events]
json_dump(d3_events, output_path)
if __name__ == '__main__':
import argparse
from util import load_id2obj_dict
parser = argparse.ArgumentParser('Dump events to json')
parser.add_argument('--candidate_tree_path',
'-p',
required=True
)
parser.add_argument('--dirname', '-d', required=True)
parser.add_argument('--people_path', required=True)
parser.add_argument('--interactions_path', required=True)
parser.add_argument('-k', type=int, default=10)
parser.add_argument('--to_original_graph',
action='store_true',
default=False)
parser.add_argument('--undirected',
action='store_true',
default=False)
args = parser.parse_args()
if args.to_original_graph and args.undirected:
print('ERROR: to_original_graph not allowed for undirected')
sys.exit(-1)
run(args.candidate_tree_path,
args.k,
load_id2obj_dict(args.people_path, 'id'),
load_id2obj_dict(args.interactions_path, 'message_id'),
args.dirname,
args.to_original_graph)