From 58c454db0d5560ee0121b74846986f5cf2b05d95 Mon Sep 17 00:00:00 2001 From: "he6@llnl.gov" Date: Mon, 16 Dec 2024 10:54:58 -0800 Subject: [PATCH] Updated transformer test to correctly test the standard deviation and to use a new json file specific to that test --- .../balancing_trans/jsons/all_transforms.json | 29 +++++++++++++++++++ .../test_balancing_transformer.py | 10 +++++-- 2 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 atomsci/ddm/test/integrative/balancing_trans/jsons/all_transforms.json diff --git a/atomsci/ddm/test/integrative/balancing_trans/jsons/all_transforms.json b/atomsci/ddm/test/integrative/balancing_trans/jsons/all_transforms.json new file mode 100644 index 00000000..40b7c5a6 --- /dev/null +++ b/atomsci/ddm/test/integrative/balancing_trans/jsons/all_transforms.json @@ -0,0 +1,29 @@ +{ + "dataset_key" : "replaced", + "datastore" : "False", + "uncertainty": "False", + "splitter": "scaffold", + "split_valid_frac": "0.20", + "split_test_frac": "0.20", + "split_strategy": "train_valid_test", + "prediction_type": "classification", + "model_choice_score_type": "roc_auc", + "response_cols" : "active", + "id_col": "compound_id", + "smiles_col" : "rdkit_smiles", + "result_dir": "replaced", + "system": "LC", + "transformers": "True", + "model_type": "NN", + "featurizer": "computed_descriptors", + "descriptor_type": "rdkit_raw", + "weight_transform_type": "balancing", + "learning_rate": ".0007", + "layer_sizes": "20,10", + "dropouts": "0.3,0.3", + "save_results": "False", + "max_epochs": "2", + "early_stopping_patience": "2", + "verbose": "False", + "seed":"0" + } \ No newline at end of file diff --git a/atomsci/ddm/test/integrative/balancing_trans/test_balancing_transformer.py b/atomsci/ddm/test/integrative/balancing_trans/test_balancing_transformer.py index b26b7b7c..31114379 100644 --- a/atomsci/ddm/test/integrative/balancing_trans/test_balancing_transformer.py +++ b/atomsci/ddm/test/integrative/balancing_trans/test_balancing_transformer.py @@ -34,7 +34,11 @@ def test_balancing_transformer(): def test_all_transformers(): res_dir = tempfile.mkdtemp() dskey = os.path.join(res_dir, 'special_test_dset.csv') - params = params_w_balan(dskey, res_dir) + params = read_params( + make_relative_to_file('jsons/all_transforms.json'), + dskey, + res_dir + ) make_test_datasets.make_test_dataset_and_split(dskey, params['descriptor_type']) params['previously_featurized'] = True @@ -77,7 +81,7 @@ def test_all_transformers(): # untransformed mean is 10 expected transformed mean is (10 - 0) / 2 assert abs(np.mean(trans_valid_dset.X) - 5) < 1e-4 # untransformed std is 5 expected transformed std is 5/2 - assert abs(np.std(trans_valid_dset.X) - (2.5)) + assert abs(np.std(trans_valid_dset.X) - (2.5)) < 1e-4 # validation has a 50/50 split. Majority class * 4 should equal oversampled minority class valid_weights = trans_valid_dset.w (valid_weight1, valid_weight2), (valid_count1, valid_count2) = np.unique(valid_weights, return_counts=True) @@ -132,5 +136,5 @@ def params_w_balan(dset_key, res_dir): return params if __name__ == '__main__': - #test_all_transformers() + test_all_transformers() test_balancing_transformer() \ No newline at end of file