-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIntAct.py
93 lines (56 loc) · 2.16 KB
/
IntAct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
__author__ = 'chuqiao'
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("full.csv")
df =data[['#ID(s) interactor A','ID(s) interactor B','Creation date']]
# drop rows of Pandas DataFrame whose value in certain columns is NaN
df = df.dropna()
# get only year values
df['Creation date'] = df['Creation date'].map(lambda x: str(x).split('/')[0])
# series
interacotorA = df.groupby('Creation date')['#ID(s) interactor A'].apply(list)
interacotorB = df.groupby('Creation date')['ID(s) interactor B'].apply(list)
# create dictionary
dicA = interacotorA.to_dict()
dicB = interacotorB.to_dict()
# concatenate two dic into one
interacotor = {key: value + dicB[key] for key, value in dicA.items()}
# remove duplicate values from dict
Results = {k: list(set(v)) for k, v in interacotor.items()}
# get length of list as a value in dictionary
new = {key:len(value) for key,value in Results.items()}
# sort a dictionary by key
class SortedDisplayDict(dict):
def __str__(self):
return "{" + ", ".join("%r: %r" % (key, self[key]) for key in sorted(self)) + "}"
# call SortedDisplayDict
onew = SortedDisplayDict(new)
# convert dict to dataframe and set key as index
newdf = pd.DataFrame.from_dict(onew, orient = "index")
# add columns name New
newdf.columns =["New"]
# sort by index
newdf =newdf.sort_index()
# create current column
current = []
length = 0
# for row in newdf['New'][1:]:
for row in newdf['New']:
length += row
current.append(length)
# insert 0 and remove the last one
current = [0] + current
current = current[:-1]
# add courrent columns to newdf
seriesCurrent = pd.Series(current)
newdf['Current'] = seriesCurrent.values
newdf.rename(columns={'Current':'Current UniProt accession numbers'}, inplace=True)
# newdf.columns = newdf.columns.str.replace('Current', 'Current UniProt accession numbers')
# set index name to Year
newdf.index.name = 'Year'
# plot a figure
# custom color
my_colors = ['#999CFC', '#FD9A9B']
newdf[['New','Current UniProt accession numbers']].plot(kind='bar', width=1.0, stacked=True, color=my_colors, edgecolor = "none", title = "Current and New UniProt accession numbers")
# datacursor(hover=True)
plt.show()