-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpheno.py
35 lines (32 loc) · 909 Bytes
/
pheno.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
import click as ck
import numpy as np
@ck.command()
def main():
df = pd.read_csv('data/traits.csv')
drugs = list(df.columns[1:])
drugs.remove('Linezolid')
drugs.remove('Teicoplanin')
drugs.remove('Tigecycline')
drugs.remove('Vancomycin')
for d in drugs:
pos = []
neg = []
for i, row in df.iterrows():
if row[d]:
pos.append(row.ID)
else:
neg.append(row.ID)
n = min(len(pos), len(neg))
pos, neg = np.array(pos), np.array(neg)
np.random.shuffle(pos)
pos = pos[:n]
np.random.shuffle(neg)
neg = neg[:n]
with open(f'data/{d}.phe', 'w') as f:
for sid in pos:
f.write(f'{sid}\t{sid}\t2\n')
for sid in neg:
f.write(f'{sid}\t{sid}\t1\n')
if __name__ == '__main__':
main()