Skip to content

Commit

Permalink
trying to consolidate results - does not work yet
Browse files Browse the repository at this point in the history
  • Loading branch information
JudithBernett committed Oct 2, 2024
1 parent e496f39 commit 637918e
Show file tree
Hide file tree
Showing 12 changed files with 179 additions and 38 deletions.
30 changes: 30 additions & 0 deletions bin/consolidate_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env python

import argparse


def get_parser():
parser = argparse.ArgumentParser(description="Consolidate results for SingleDrugModels")
parser.add_argument("--test_mode", type=str, required=True, help="Test mode (LPO, LCO, LDO)")
parser.add_argument("--model_names", type=str, nargs="+", required=True, help="All Model "
"names")
parser.add_argument("--pred_files", type=str, nargs="+", required=True, help="All prediction "
"files")
parser.add_argument("--n_cv_splits", type=int, required=True, help="Number of CV splits")
parser.add_argument("--cross_study_datasets", type=str, nargs="+", help="All "
"cross-study "
"datasets")
parser.add_argument("--randomizations", type=str, nargs="+", required=True, help="All "
"randomizations")
parser.add_argument("--n_trials_robustness", type=int, required=True, help="Number of trials")
return parser


def main():
parser = get_parser()
args = parser.parse_args()
print(args)


if __name__ == "__main__":
main()
7 changes: 4 additions & 3 deletions bin/make_model_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def get_parser():
parser = argparse.ArgumentParser(description="Split data into CV splits")
parser.add_argument("--models", type=str, required=True, help="List of models")
parser.add_argument("--data", type=str, required=True, help="Path to response data")
parser.add_argument("--file_name", type=str, required=True, help="Name of the file")
return parser


Expand All @@ -21,9 +22,9 @@ def main():
dataset_name = response_data.dataset_name
models = [MODEL_FACTORY[model] for model in models]
all_models = make_model_list(models, response_data)
with open(f'models_{dataset_name}.txt', 'w', encoding='utf-8') as f:
for model in all_models:
f.write(f"{model}\n")
with open(f'{args.file_name}_{dataset_name}.txt', 'w', encoding='utf-8') as f:
for model, model_class in all_models.items():
f.write(f"{model_class},{model}\n")


if __name__ == "__main__":
Expand Down
67 changes: 58 additions & 9 deletions bin/train_and_predict_final.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python

import os
import json
import sys
import argparse
import pickle
Expand All @@ -12,6 +13,7 @@
from drevalpy.models import MODEL_FACTORY
from drevalpy.experiment import (get_model_name_and_drug_id,
get_datasets_from_cv_split,
generate_data_saving_path,
train_and_predict,
randomize_train_predict,
robustness_train_predict,
Expand Down Expand Up @@ -66,7 +68,7 @@ def prep_data(arguments):
best_hpams = best_hpam_dict[f"{arguments.model_name}_{arguments.split_id}"]["best_hpam_combi"]

response_transform = get_response_transformation(arguments.response_transformation)
return model, best_hpams, train_dataset, test_dataset, es_dataset, response_transform
return model, drug_id, best_hpams, train_dataset, test_dataset, es_dataset, response_transform


def compute_randomization(
Expand All @@ -80,9 +82,12 @@ def compute_randomization(
split_id: str,
randomization_type: str = "permutation",
response_transformation=Optional[TransformerMixin],
randomization_test_path: str = ""
):
randomization_test_file = f'randomization_{randomization_test_view["test_name"]}_{split_id}.csv'

randomization_test_file = os.path.join(
randomization_test_path,
f'randomization_{randomization_test_view["test_name"]}_{split_id}.csv'
)
randomize_train_predict(
view=randomization_test_view["view"],
test_name=randomization_test_view["test_name"],
Expand All @@ -108,9 +113,12 @@ def compute_robustness(
split_id: str,
trial: int,
response_transformation=Optional[TransformerMixin],
rob_path: str = ""
):
robustness_test_file = f"robustness_{trial}_{split_id}.csv"

robustness_test_file = os.path.join(
rob_path,
f"robustness_{trial}_{split_id}.csv",
)
robustness_train_predict(
trial=trial,
trial_file=robustness_test_file,
Expand All @@ -132,6 +140,7 @@ def compute_cross(
path_data,
early_stopping_dataset,
response_transformation,
path_out,
split_index
):
split_index = split_index.split("split_")[1]
Expand All @@ -147,17 +156,39 @@ def compute_cross(
early_stopping_dataset if model.early_stopping else None
),
response_transformation=response_transformation,
predictions_path='',
path_out=path_out,
split_index=split_index,
)


if __name__ == "__main__":
arg_parser = get_parser()
args = arg_parser.parse_args()
selected_model, hpam_combi, train_set, test_set, es_set, transformation = prep_data(args)
selected_model, drug_id, hpam_combi, train_set, test_set, es_set, transformation = prep_data(
args)

if args.mode == "full":
predictions_path = generate_data_saving_path(
model_name=selected_model.model_name,
drug_id=drug_id,
result_path='',
suffix='predictions',
)
hpam_path = generate_data_saving_path(
model_name=selected_model.model_name,
drug_id=drug_id,
result_path='',
suffix='best_hpams',
)
hpam_path = os.path.join(hpam_path, f"best_hpams_{args.split_id}.json")
# save the best hyperparameters as json
with open(
hpam_path,
"w",
encoding="utf-8",
) as f:
json.dump(hpam_combi, f)

test_set = train_and_predict(
model=selected_model,
hpams=hpam_combi,
Expand All @@ -167,7 +198,10 @@ def compute_cross(
early_stopping_dataset=es_set,
response_transformation=transformation,
)
prediction_dataset = f"predictions_{args.split_id}.csv"
prediction_dataset = os.path.join(
predictions_path,
f"predictions_{args.split_id}.csv",
)
test_set.save(prediction_dataset)
for ds in args.cross_study_datasets:
if ds == "NONE.csv":
Expand All @@ -180,11 +214,18 @@ def compute_cross(
path_data=args.path_data,
early_stopping_dataset=es_set,
response_transformation=transformation,
path_out=os.path.dirname(predictions_path),
split_index=args.split_id
)
elif args.mode == "randomization":
with open(args.randomization_views_path, "r") as f:
rand_test_view = yaml.safe_load(f)
rand_path = generate_data_saving_path(
model_name=selected_model.model_name,
drug_id=drug_id,
result_path='',
suffix='randomization',
)
compute_randomization(
randomization_test_view=rand_test_view,
model=selected_model,
Expand All @@ -196,8 +237,15 @@ def compute_cross(
split_id=args.split_id,
randomization_type=args.randomization_type,
response_transformation=transformation,
randomization_test_path=rand_path,
)
elif args.mode == "robustness":
rob_path = generate_data_saving_path(
model_name=selected_model.model_name,
drug_id=drug_id,
result_path='',
suffix='robustness',
)
compute_robustness(
model=selected_model,
hpam_set=hpam_combi,
Expand All @@ -208,6 +256,7 @@ def compute_cross(
split_id=args.split_id,
trial=args.robustness_trial,
response_transformation=transformation,
rob_path=rob_path
)
else:
raise ValueError(f"Invalid mode: {args.mode}. Choose full, randomization, or robustness.")
Expand Down
26 changes: 26 additions & 0 deletions modules/local/consolidate_results/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
process CONSOLIDATE_RESULTS {
tag "Consolidate"
label 'process_single'
publishDir "${params.outdir}/${params.run_id}/${test_mode}", mode: 'copy'

input:
tuple val(test_mode), val(model_names), val(pred_files)
val(rand_modes)

output:
tuple val(test_mode), val(model_names), val(pred_files)

script:
"""
consolidate_results.py \\
--test_mode ${test_mode} \\
--model_names "${model_names}" \\
--pred_files "${pred_files}" \\
--n_cv_splits ${params.n_cv_splits} \\
${params.cross_study_datasets != '' ? '--cross_study_datasets ' + params
.cross_study_datasets.replace(',', ' ') : ''} \\
--randomizations ${rand_modes}\\
--n_trials_robustness ${params.n_trials_robustness}
"""
}
2 changes: 1 addition & 1 deletion modules/local/evaluate_final/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ process EVALUATE_FINAL {
// 'biocontainers/python:3.8.3' }"

input:
tuple val(test_mode), val(model_name), path(pred_file)
tuple val(test_mode), val(model_names), path(pred_file)

output:
path('*.csv'), emit: ch_individual_results
Expand Down
6 changes: 4 additions & 2 deletions modules/local/make_model_channel/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@ process MAKE_MODEL_CHANNEL {
// 'biocontainers/python:3.8.3' }"
input:
tuple val(models), path(response_data)
val(name)

output:
path 'models*.txt', emit: all_models
path '{models,baselines}*.txt', emit: all_models

script:
"""
make_model_channel.py \\
--models "${models}" \\
--data ${response_data}
--data ${response_data} \\
--file_name ${name}
"""

}
7 changes: 4 additions & 3 deletions modules/local/predict_full/main.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process PREDICT_FULL {
tag "${test_mode}_${model_name}_${split_id}"
label 'process_single'
publishDir "${params.outdir}/${params.run_id}/${test_mode}/${model_name}/predictions", mode: 'copy'
publishDir "${params.outdir}/${params.run_id}/${test_mode}", mode: 'copy'

//conda "conda-forge::python=3.8.3"
//container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -13,8 +13,9 @@ process PREDICT_FULL {
val(path_data)

output:
tuple val(test_mode), val(model_name), path('predictions_*.csv'), emit: ch_vis
path('cross_study/cross_study*.csv'), emit: ch_cross, optional: true
tuple val(test_mode), val(model_name), path('**predictions*.csv'), emit: ch_vis
path('cross_study/cross_study*.csv'), emit: ch_cross, optional: true
path('**best_hpams*.json'), emit: ch_hpams

script:
"""
Expand Down
4 changes: 2 additions & 2 deletions modules/local/randomization_test/main.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process RANDOMIZATION_TEST {
tag "${test_mode}_${model_name}_${randomization_type}"
label 'process_single'
publishDir "${params.outdir}/${params.run_id}/${test_mode}/${model_name}/randomization_test"
publishDir "${params.outdir}/${params.run_id}/${test_mode}", mode: 'copy'

//conda "conda-forge::python=3.8.3"
//container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -14,7 +14,7 @@ process RANDOMIZATION_TEST {
val(response_transformation)

output:
tuple val(test_mode), val(model_name), path('randomization_*.csv'), emit: ch_vis
tuple val(test_mode), val(model_name), path('**randomization*.csv'), emit: ch_vis

script:
"""
Expand Down
4 changes: 2 additions & 2 deletions modules/local/robustness_test/main.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process ROBUSTNESS_TEST {
tag "${model_name}_${robustness_iteration}"
label 'process_single'
publishDir "${params.outdir}/${params.run_id}/${test_mode}/${model_name}/robustness_test"
publishDir "${params.outdir}/${params.run_id}/${test_mode}", mode: 'copy'

//conda "conda-forge::python=3.8.3"
//container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -14,7 +14,7 @@ process ROBUSTNESS_TEST {
val(response_transformation)

output:
tuple val(test_mode), val(model_name), path('robustness_*.csv'), emit: ch_vis
tuple val(test_mode), val(model_name), path('**robustness*.csv'), emit: ch_vis

script:
"""
Expand Down
Loading

0 comments on commit 637918e

Please sign in to comment.