-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
60 lines (50 loc) · 2.72 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import argparse
from datahandlers.dataset_handler import DRPGeneralDataset, DRPDADataset
from datahandlers.custom_preprocess_rules import NormalizationMinMax, Standardization
GDSC = DRPGeneralDataset()
GDSC.load_from_csv('GDSC',
'data/DRP2022_preprocessed/sanger/sanger_broad_ccl_log2tpm.csv',
'data/DRP2022_preprocessed/drug_features/gdsc_drug_descriptors.csv',
'data/DRP2022_preprocessed/drug_response/gdsc_tuple_labels_folds.csv')
for i in range(0, 5):
# Get cl_fold fold 0
train, test = GDSC.get_fold('pair_fold', i, preprocess=Standardization(), save=True)
print(len(train), len(test))
CTRP = DRPGeneralDataset()
CTRP.load_from_csv('CTRP',
'data/DRP2022_preprocessed/depmap/ccle_log2tpm.csv',
'data/DRP2022_preprocessed/drug_features/ctrp_drug_descriptors.csv',
'data/DRP2022_preprocessed/drug_response/ctrp_tuple_labels_folds.csv')
for i in range(0, 5):
# Get cl_fold fold 0
train, test = CTRP.get_fold('pair_fold', i, preprocess=Standardization(), save=True)
print(len(train), len(test))
GDSC = DRPDADataset()
GDSC.load_from_csv('GDSC',
'data/DRP2022_preprocessed/sanger/sanger_broad_ccl_log2tpm.csv',
'data/DRP2022_preprocessed/drug_features/gdsc_drug_descriptors.csv',
'data/DRP2022_preprocessed/drug_response/resolved_commons/gdsc_tuple_labels_folds.csv',
'data/DRP2022_preprocessed/depmap/ccle_log2tpm.csv',
'data/DRP2022_preprocessed/drug_features/ctrp_drug_descriptors.csv')
for i in range(0, 5):
# Get cl_fold fold 0
train, test = GDSC.get_fold('pair_fold', i, preprocess=Standardization(), save=True)
print(len(train), len(test))
CTRP = DRPDADataset()
CTRP.load_from_csv('CTRP',
'data/DRP2022_preprocessed/depmap/ccle_log2tpm.csv',
'data/DRP2022_preprocessed/drug_features/ctrp_drug_descriptors.csv',
'data/DRP2022_preprocessed/drug_response/resolved_commons/ctrp_tuple_labels_folds.csv',
'data/DRP2022_preprocessed/sanger/sanger_broad_ccl_log2tpm.csv',
'data/DRP2022_preprocessed/drug_features/gdsc_drug_descriptors.csv')
for i in range(0, 5):
# Get cl_fold fold 0, note that df[:, 147] values -3.4028e+38, need to change to zeros
train, test = CTRP.get_fold('pair_fold', i, preprocess=Standardization(), save=True)
print(len(train), len(test))
# if __name__ == '__main__':
# parser = argparse.ArgumentParser()
# parser.add_argument('--type', help='The type (general, da)')
# parser.add_argument('--source', help='The source (GDSC, CTRP)')
# _args = parser.parse_args()
#
# main(_args)