-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathz_005.py
34 lines (26 loc) · 898 Bytes
/
z_005.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import csv
from collections import defaultdict
def default_list():
return [0, 0]
df = pd.read_csv(os.path.join("polished_data", "people_edges_list.csv"))
names = pd.read_csv(os.path.join("polished_data", "people_nodes.csv"))
people_names = set(names.Name.array)
degrees: defaultdict[str, list] = defaultdict(default_list)
for edge in tqdm(df.itertuples(), total=df.shape[0]):
degrees[edge.Source][1] += 1
degrees[edge.Target][0] += 1
pd.DataFrame(
{
"Name": names.Name.array,
"In-degree": [degrees[name][0] for name in names.Name.to_numpy()],
"Out-degree": [degrees[name][1] for name in names.Name.to_numpy()],
}
).sort_values(by="In-degree", ascending=False).to_csv(
os.path.join("polished_data", "people_degrees.csv"),
index=False,
quoting=csv.QUOTE_MINIMAL,
)