add tests for each feature type

ATOMScience-org · stewarthe6 · Jan 30, 2025 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
commit 13e9cd41ec792ae540a956ebdfb6344c3e513b1f
diff --git a/atomsci/ddm/test/integrative/ad_index/test_ad_index.py b/atomsci/ddm/test/integrative/ad_index/test_ad_index.py
@@ -18,7 +18,7 @@ def clean():
             os.remove("./output/"+f)
 
 def test():
-    """Test full model pipeline: Curate data, fit model, and predict property for new compounds"""
+    """Test AD index calculation: Curate data, fit model, and predict property for new compounds for each feature set"""
 
     # Clean
     # -----
@@ -33,31 +33,37 @@ def test():
     python_path = sys.executable
     hp_params["script_dir"] = script_dir
     hp_params["python_path"] = python_path
+
+    for feat in ['ECFP','mordred_filtered','rdkit_raw','graphconv']:
+        if feat in ['ECFP','graphconv']:
+            hp_params['featurizer']=feat
+        else:
+            hp_params['featurizer']='computed_descriptors'
+            hp_params['descriptor_type']=feat
+        params = parse.wrapper(hp_params)
+        if not os.path.isfile(params.dataset_key):
+            params.dataset_key = os.path.join(params.script_dir, params.dataset_key)
 
-    params = parse.wrapper(hp_params)
-    if not os.path.isfile(params.dataset_key):
-        params.dataset_key = os.path.join(params.script_dir, params.dataset_key)
+        train_df = pd.read_csv(params.dataset_key)
 
-    train_df = pd.read_csv(params.dataset_key)
+        print(f"Train an RF models with {feat}")
+        pl = mp.ModelPipeline(params)
+        pl.train_model()
 
-    print("Train a RF models with ECFP")
-    pl = mp.ModelPipeline(params)
-    pl.train_model()
+        print("Calculate AD index with the just trained model.")
+        pred_df_mp = pl.predict_on_dataframe(train_df[:10], contains_responses=True, AD_method="z_score")
 
-    print("Calculate AD index with the just trained model.")
-    pred_df_mp = pl.predict_on_dataframe(train_df[:10], contains_responses=True, AD_method="z_score")
+        assert("AD_index" in pred_df_mp.columns.values), 'Error: No AD_index column pred_df_mp'
 
-    assert("AD_index" in pred_df_mp.columns.values), 'Error: No AD_index column pred_df_mp'
-
-    print("Calculate AD index with the saved model tarball file.")
-    pred_df_file = pfm.predict_from_model_file(model_path=pl.params.model_tarball_path,
-                                         input_df=train_df[:10],
-                                         id_col="compound_id",
-                                         smiles_col="base_rdkit_smiles",
-                                         response_col="pKi_mean",
-                                         dont_standardize=True,
-                                         AD_method="z_score")
-    assert("AD_index" in pred_df_file.columns.values), 'Error: No AD_index column in pred_df_file'
+        print("Calculate AD index with the saved model tarball file.")
+        pred_df_file = pfm.predict_from_model_file(model_path=pl.params.model_tarball_path,
+                                            input_df=train_df[:10],
+                                            id_col="compound_id",
+                                            smiles_col="base_rdkit_smiles",
+                                            response_col="pKi_mean",
+                                            dont_standardize=True,
+                                            AD_method="z_score")
+        assert("AD_index" in pred_df_file.columns.values), 'Error: No AD_index column in pred_df_file'
 
 if __name__ == '__main__':
     test()