Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug ad index mordred #390

Merged
merged 6 commits into from
Jan 30, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add tests for each feature type
  • Loading branch information
paulsonak committed Jan 17, 2025
commit 13e9cd41ec792ae540a956ebdfb6344c3e513b1f
48 changes: 27 additions & 21 deletions atomsci/ddm/test/integrative/ad_index/test_ad_index.py
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ def clean():
os.remove("./output/"+f)

def test():
"""Test full model pipeline: Curate data, fit model, and predict property for new compounds"""
"""Test AD index calculation: Curate data, fit model, and predict property for new compounds for each feature set"""

# Clean
# -----
@@ -33,31 +33,37 @@ def test():
python_path = sys.executable
hp_params["script_dir"] = script_dir
hp_params["python_path"] = python_path

for feat in ['ECFP','mordred_filtered','rdkit_raw','graphconv']:
if feat in ['ECFP','graphconv']:
hp_params['featurizer']=feat
else:
hp_params['featurizer']='computed_descriptors'
hp_params['descriptor_type']=feat
params = parse.wrapper(hp_params)
if not os.path.isfile(params.dataset_key):
params.dataset_key = os.path.join(params.script_dir, params.dataset_key)

params = parse.wrapper(hp_params)
if not os.path.isfile(params.dataset_key):
params.dataset_key = os.path.join(params.script_dir, params.dataset_key)
train_df = pd.read_csv(params.dataset_key)

train_df = pd.read_csv(params.dataset_key)
print(f"Train an RF models with {feat}")
pl = mp.ModelPipeline(params)
pl.train_model()

print("Train a RF models with ECFP")
pl = mp.ModelPipeline(params)
pl.train_model()
print("Calculate AD index with the just trained model.")
pred_df_mp = pl.predict_on_dataframe(train_df[:10], contains_responses=True, AD_method="z_score")

print("Calculate AD index with the just trained model.")
pred_df_mp = pl.predict_on_dataframe(train_df[:10], contains_responses=True, AD_method="z_score")
assert("AD_index" in pred_df_mp.columns.values), 'Error: No AD_index column pred_df_mp'

assert("AD_index" in pred_df_mp.columns.values), 'Error: No AD_index column pred_df_mp'

print("Calculate AD index with the saved model tarball file.")
pred_df_file = pfm.predict_from_model_file(model_path=pl.params.model_tarball_path,
input_df=train_df[:10],
id_col="compound_id",
smiles_col="base_rdkit_smiles",
response_col="pKi_mean",
dont_standardize=True,
AD_method="z_score")
assert("AD_index" in pred_df_file.columns.values), 'Error: No AD_index column in pred_df_file'
print("Calculate AD index with the saved model tarball file.")
pred_df_file = pfm.predict_from_model_file(model_path=pl.params.model_tarball_path,
input_df=train_df[:10],
id_col="compound_id",
smiles_col="base_rdkit_smiles",
response_col="pKi_mean",
dont_standardize=True,
AD_method="z_score")
assert("AD_index" in pred_df_file.columns.values), 'Error: No AD_index column in pred_df_file'

if __name__ == '__main__':
test()
Loading