-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_author_network_diagram.py
170 lines (153 loc) · 6.23 KB
/
plot_author_network_diagram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import glob
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import patches
def parse_bibtex(filename):
"""Given a bibtex filename, return a list whose elements are
dictionaries, one per entry in the file. They keys of each
dictionary are fields present in the bibtex entry.
"""
string = open(filename).read()
entry_begins = []
for m in re.finditer("@",string): entry_begins.append(m.start())
entry_begins.append(len(string))
E = []
for i in range(len(entry_begins)-1):
entry = string[entry_begins[i]:entry_begins[i+1]]
E.append(dict(re.findall(r'\s+(\w+)\s+=\s+\{(.*)\}',entry)))
return E
def sanitize_authors(e):
"""Splits the authors by the phrase ' and ', and also removes things
we do not need to deal with here.
"""
A = e['author']
A = A.replace("\\textbf{","")
A = A.replace("{","")
A = A.replace("}","")
A = A.replace("\\~","")
A = A.replace("\\'","")
A = A.replace("\\`","")
A = A.replace('\\"',"")
A = A.replace('\\v',"")
A = A.replace('\\c',"")
A = A.replace('\xa0B',"")
A = A.split(" and ")
return A
# Parse all records
E = []
for filename in glob.glob("bib/*.bib"):
E += parse_bibtex(filename)
# List of authors / affiliations we want in our visualization, we will
# have trouble if there are authors with the same last name
rubisco = ["Forrest Hoffman", "William Riley", "James Randerson", "Kuang-Yu Chang", "Chi Chen", "Nathan Collier", "Weiwei Fu", "Trevor Keenan", "Gretchen Keppel-Aleks", "Charles Koven", "Jitendra Kumar", "David Lawrence", "Yue Li", "Yi Liu", "Morgan Loechli", "Jiafu Mao", "Zelalem Mekonnen", "Umakant Mishra", "Keith Moore", "Mingquan Mu", "Robinson Negron-Juarez", "Bharat Sharma", "Xiaoying Shi", "Zheng Shi", "Jinyun Tang", "Yaoping Wang", "Li Xu", "Min Xu", "Qing Zhu"]
affiliation = ["ORNL", "LBNL", "UCI", "LBNL", "LBNL", "ORNL", "UCI", "LBNL", "UM", "LBNL", "ORNL", "NCAR", "UCI", "UCI", "UM", "ORNL", "LBNL", "SNL", "UCI", "UCI", "LBNL", "ORNL", "ORNL", "UCI", "LBNL", "ORNL", "UCI", "ORNL", "LBNL"]
lastname = [a.split(" ")[-1] for a in rubisco]
# Get a color per affiliation
cm = plt.get_cmap("tab10")
colors = {}
for i,a in enumerate(sorted(list(set(affiliation)))):
colors[a] = cm.colors[i]
# We generate a dictionary of sets to check aliases, the keys are the
# last names of our project group and the set is all accetable names,
# popualted from all the bib files. You will need to go into the
# generated file and hand edit the sets to remove aliases that do not
# belong with the last name.
if not os.path.isfile("author_alias.py"):
print("Generating author alias file...")
alias = {}
for e in E:
if 'author' not in e: continue
A = sanitize_authors(e)
lastA = [a.split(" ")[-1] for a in A]
for a,b in zip(lastA,A):
if a not in lastname: continue
if a not in alias: alias[a] = set()
alias[a].add(b)
with open('author_alias.py','w') as f:
f.write("alias = %s" % alias)
print("Edit 'author_alias.py' to remove incorrect alias names")
# Loop through the bib entries and create edges / increment paper
# counts
from author_alias import alias,aff
rubisco = list(alias.keys())
affiliation = [aff[key] for key in rubisco]
papers = np.zeros(len(rubisco),dtype=int)
connect = np.zeros((len(rubisco),len(rubisco)),dtype=int)
for e in E:
if 'author' not in e: continue
A = sanitize_authors(e)
edges = []
for a in A:
for b in alias:
if a in alias[b]: edges.append(b)
for x in edges:
a = rubisco.index(x)
papers[a] += 1
for y in edges:
b = rubisco.index(y)
if a <= b: continue
connect[a,b] += 1
df = pd.DataFrame({'rubisco':rubisco,
'affiliation':affiliation,
'papers':papers},columns=['rubisco','affiliation','papers'])
df = df.sort_values(['affiliation','papers'],ignore_index=True)
affiliations = list(df.affiliation.unique())
fig,ax = plt.subplots(figsize=(10,10),tight_layout=True)
angles = []
for i,r in df.iterrows():
ang = (i+2*affiliations.index(r.affiliation))/(len(df)+2*len(affiliations))*2*np.pi
angles.append(ang/np.pi*180)
x = np.cos(ang)
y = np.sin(ang)
if x >= 0:
ax.text(x,y,"%2d %s" % (r.papers,r.rubisco),size=16,
va='center',ha='left',
rotation_mode='anchor',
rotation=ang/np.pi*180)
else:
ax.text(x,y,"%s %2d" % (r.rubisco,r.papers),size=16,
va='center',ha='right',
rotation_mode='anchor',
rotation=(ang+np.pi)/np.pi*180)
df['angles'] = angles
dang = 360/(len(df)+len(affiliations))
for i,a in enumerate(affiliations):
dfa = df[df.affiliation==a]
t0 = dfa.iloc[ 0].angles-1.*dang
tf = dfa.iloc[-1].angles+0.5*dang
arc = patches.Arc((0,0),2*0.95,2*0.95,linewidth=8,theta1=t0,theta2=tf,color=colors[a])
ax.add_patch(arc)
ang = (dfa.angles.values[0]-0.6*dang)/180*np.pi
x = np.cos(ang)
y = np.sin(ang)
if x >= 0:
ax.text(x,y,"%s" % (a),size=18,color=colors[a],weight='bold',
va='center',ha='left',
rotation_mode='anchor',
rotation=ang/np.pi*180)
else:
ax.text(x,y,"%s" % (a),size=18,color=colors[a],weight='bold',
va='center',ha='right',
rotation_mode='anchor',
rotation=(ang+np.pi)/np.pi*180)
for a,n in enumerate(rubisco):
dfa = df[df.rubisco==n]
for b,m in enumerate(rubisco):
dfb = df[df.rubisco==m]
if connect[a,b] == 0: continue
color = '0.5' if dfa.affiliation.values[0] == dfb.affiliation.values[0] else 'k'
zo = -2 if dfa.affiliation.values[0] == dfb.affiliation.values[0] else -1
r = 0.91
xa,ya = r*np.cos(dfa.angles/180*np.pi),r*np.sin(dfa.angles/180*np.pi)
xb,yb = r*np.cos(dfb.angles/180*np.pi),r*np.sin(dfb.angles/180*np.pi)
plt.plot([xa,xb],[ya,yb],'-',color=color,lw=0.1+connect[a,b]/connect.max()*8,
solid_capstyle='round',zorder=zo)
x = 1.25
ax.set_axis_off()
ax.set_xlim(-x,x)
ax.set_ylim(-x,x)
plt.savefig('author_network.pdf')
plt.close()