Skip to content

Commit

Permalink
Merge pull request #18 from cchauve/intermediate_results
Browse files Browse the repository at this point in the history
Intermediate results
  • Loading branch information
cchauve authored Apr 27, 2023
2 parents a042a62 + 7b494e5 commit ee815c7
Show file tree
Hide file tree
Showing 11 changed files with 438 additions and 93 deletions.
134 changes: 74 additions & 60 deletions example/README.md

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion example/anopheles_X_GeneRax.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ data:
NT_ext: &data_alignments_NT_ext '_NT.fasta'
AA_ext: &data_alignments_AA_ext '_AA.fasta'
ext: &data_alignments_ext !ref [*data_alignments_NT_ext]
gene_trees:
path: &data_gene_trees_path !join [*dir_data, 'gene_trees_X_3.txt']
reconciliations:
path: &data_reconciliations_path !join [*dir_data, 'reconciliations_X_3.txt']
ext: &data_reconciliations_ext '.recphyloxml'
Expand Down Expand Up @@ -234,7 +236,8 @@ tools:
- !concat [!join [*run_dir_scripts, 'GeneRax_reformat.py'], ' \']
- !concat [' ', *generax_input_file, ' \']
- !concat [' ', *generax_results_dir, ' \']
- !concat [' ', *data_reconciliations_ext]
- !concat [' ', *data_reconciliations_ext, ' \']
- !concat [' ', *data_gene_trees_path]
stats:
names:
- &generax_stats_file_species_name !concat [*generax_name, '_species', *log_ext_stat]
Expand Down
2 changes: 2 additions & 0 deletions example/anopheles_X_GeneRax_header.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ data:
NT_ext: &data_alignments_NT_ext '_NT.fasta'
AA_ext: &data_alignments_AA_ext '_AA.fasta'
ext: &data_alignments_ext !ref [*data_alignments_NT_ext]
gene_trees:
path: &data_gene_trees_path !join [*dir_data, 'gene_trees_X_3.txt']
reconciliations:
path: &data_reconciliations_path !join [*dir_data, 'reconciliations_X_3.txt']
ext: &data_reconciliations_ext '.recphyloxml'
Expand Down
47 changes: 35 additions & 12 deletions example/anopheles_X_ecceTERA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ data:
gene_trees:
path: &data_gene_trees_path !join [*run_dir_root, 'example', 'anopheles_X_3_ALE', 'data', 'gene_trees_X_3.txt']
# Paths to computed output files (generated data, no reconciliation)
reconciliations:
path: &data_reconciliations_path !join [*dir_data, 'reconciliations_X_3.txt']
ext: &data_reconciliations_ext '.recphyloxml'
adjacencies:
path: &data_adjacencies_path !join [*dir_data, 'adjacencies_X_3.txt']
ext: &data_adjacencies_ext '_adjacencies.txt'
Expand Down Expand Up @@ -121,8 +124,8 @@ log:
tools:
# DO NOT EDIT
# DeCoSTAR: computing ancestral adjacencies
DeCoSTAR:
name: &decostar_name 'DeCoSTAR'
ecceTERA_DeCoSTAR:
name: &decostar_name 'ecceTERA_DeCoSTAR'
input:
dirs:
- &decostar_input_dir !join [*dir_aux, *decostar_name]
Expand Down Expand Up @@ -156,10 +159,12 @@ tools:
files:
- &decostar_slurm_results_species_adjacencies !join [*decostar_results_dir, *decostar_slurm_results_species_adjacencies_name]
other:
- &decostar_slurm_results_adjacencies_file !join [*decostar_results_dir, 'adjacencies.txt']
- &decostar_slurm_results_genes_file_1 !join [*decostar_results_dir, 'genes.txt']
- &decostar_slurm_results_genes_file_2 !join [*decostar_results_dir, 'genes_reformatted.txt']
- &decostar_slurm_results_species_file !join [*decostar_results_dir, 'species.txt']
- &decostar_slurm_results_adjacencies_file !join [*decostar_results_dir, 'adjacencies.txt']
- &decostar_slurm_results_genes_file_1 !join [*decostar_results_dir, 'genes.txt']
- &decostar_slurm_results_genes_file_2 !join [*decostar_results_dir, 'genes_reformatted.txt']
- &decostar_slurm_results_species_file !join [*decostar_results_dir, 'species.txt']
- &decostar_slurm_results_species_tree_file !join [*decostar_results_dir, 'speciesTree.phyloxml']
- &decostar_slurm_results_reconciliations_file !join [*decostar_results_dir, 'reconciliations.xml']
cmd:
- !concat [*decostar_exec, ' \']
# Input files
Expand Down Expand Up @@ -204,27 +209,45 @@ tools:
- !concat [' ', *decostar_slurm_results_adjacencies_file, ' \']
- !concat [' ', *decostar_slurm_results_genes_file_2, ' \']
- !concat [' ', *decostar_results_dir]
- ' '
- !concat [!join [*run_dir_scripts, 'DeCoSTAR_ecceTERA_reformat.py'], ' \']
- !concat [' ', *decostar_slurm_results_genes_file_1, ' \']
- !concat [' ', *decostar_slurm_results_genes_file_2, ' \']
- !concat [' ', *decostar_slurm_results_species_tree_file, ' \']
- !concat [' ', *decostar_slurm_results_reconciliations_file, ' \']
- !concat [' ', *decostar_results_dir, ' \']
- !concat [' ', *data_reconciliations_ext, ' \']
- !concat [' ', *data_reconciliations_path]
stats:
names:
- &decostar_stats_file_species_name !concat [*decostar_name, '_species', *log_ext_stat]
- &decostar_stats_file_components_name !concat [*decostar_name, '_components', *log_ext_stat]
- &decostar_stats_file_adj_species_name !concat [*decostar_name, '_adjacencies_species', *log_ext_stat]
- &decostar_stats_file_adj_components_name !concat [*decostar_name, '_adjacencies_components', *log_ext_stat]
- &decostar_stats_file_rec_species_name !concat [*decostar_name, '_reconciliations_species', *log_ext_stat]
- &decostar_stats_file_rec_families_name !concat [*decostar_name, '_reconciliations_families', *log_ext_stat]
files:
- &decostar_stats_file_species !join [*dir_stats, *decostar_name, *decostar_stats_file_species_name]
- &decostar_stats_file_components !join [*dir_stats, *decostar_name, *decostar_stats_file_components_name]
- &decostar_stats_file_adj_species !join [*dir_stats, *decostar_name, *decostar_stats_file_adj_species_name]
- &decostar_stats_file_adj_components !join [*dir_stats, *decostar_name, *decostar_stats_file_adj_components_name]
- &decostar_stats_file_rec_species !join [*dir_stats, *decostar_name, *decostar_stats_file_rec_species_name]
- &decostar_stats_file_rec_families !join [*dir_stats, *decostar_name, *decostar_stats_file_rec_families_name]
script:
- !join [*run_dir_scripts, 'recPhyloXML_statistics.py']
- !ref [*data_reconciliations_path]
- !ref [*decostar_stats_file_rec_species]
- !ref [*decostar_stats_file_rec_families]
- !concat [';']
- !join [*run_dir_scripts, 'DeCoSTAR_statistics.py']
- !ref [*data_species_path]
- !ref [*decostar_slurm_results_genes_file_2]
- !ref [*data_adjacencies_path]
- !ref [*decostar_stats_thresholds]
- !ref [*decostar_stats_file_species]
- !ref [*decostar_stats_file_adj_species]
- !concat [';']
- !join [*run_dir_scripts, 'gene_orders_utils.py']
- 'stats'
- !ref [*decostar_slurm_results_genes_file_2]
- !ref [*data_adjacencies_path]
- !ref [*decostar_results_dir]
- !ref [*decostar_stats_file_components]
- !ref [*decostar_stats_file_adj_components]
# DO NOT EDIT
# SPPDCJ_ILP: Writing the SPPDCJ ILP file
SPPDCJ_ILP:
Expand Down
3 changes: 3 additions & 0 deletions example/anopheles_X_ecceTERA_header.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ data:
gene_trees:
path: &data_gene_trees_path !join [*run_dir_root, 'example', 'anopheles_X_3_ALE', 'data', 'gene_trees_X_3.txt']
# Paths to computed output files (generated data, no reconciliation)
reconciliations:
path: &data_reconciliations_path !join [*dir_data, 'reconciliations_X_3.txt']
ext: &data_reconciliations_ext '.recphyloxml'
adjacencies:
path: &data_adjacencies_path !join [*dir_data, 'adjacencies_X_3.txt']
ext: &data_adjacencies_ext '_adjacencies.txt'
Expand Down
3 changes: 2 additions & 1 deletion parameters/GeneRax.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
- !concat [!join [*run_dir_scripts, 'GeneRax_reformat.py'], ' \']
- !concat [' ', *generax_input_file, ' \']
- !concat [' ', *generax_results_dir, ' \']
- !concat [' ', *data_reconciliations_ext]
- !concat [' ', *data_reconciliations_ext, ' \']
- !concat [' ', *data_gene_trees_path]
stats:
names:
- &generax_stats_file_species_name !concat [*generax_name, '_species', *log_ext_stat]
Expand Down
1 change: 1 addition & 0 deletions parameters/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ This directory contains the files required to create an AGO pipeline YAML parame
- `MACSE.yaml`: paramaters file block to run MACSE; **do not edit**.
- `ALE.yaml`: paramaters file block to run ALE; **do not edit**.
- `DeCoSTAR.yaml`: paramaters file block to run DeCoSTAR; **do not edit**.
- `ecceTERA_DeCoSTAR.yaml`: paramaters file block to run ecceTERA+DeCoSTAR; **do not edit**.
- `GeneRax.yaml`: paramaters file block to run GeneRax; **do not edit**.
- `IQ-TREE.yaml`: paramaters file block to run IQ-TREE; **do not edit**.
- `SPPDCJ.yaml`: paramaters file block to run spp_dcj; **do not edit**.
Expand Down
126 changes: 126 additions & 0 deletions parameters/ecceTERA_DeCoSTAR.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# DO NOT EDIT
# DeCoSTAR: computing ancestral adjacencies
ecceTERA_DeCoSTAR:
name: &decostar_name 'ecceTERA_DeCoSTAR'
input:
dirs:
- &decostar_input_dir !join [*dir_aux, *decostar_name]
files:
- &decostar_input_file_trees !join [*decostar_input_dir, 'gene_trees.txt']
- &decostar_input_file_adjacencies !join [*decostar_input_dir, 'adjacencies.txt']
script:
- !join [*run_dir_scripts, 'DeCoSTAR_create_input_files.py']
- !ref [*data_gene_orders_path]
- !ref [*decostar_input_path]
- !ref [*data_families_path]
- !ref [*decostar_input_file_adjacencies]
- !ref [*decostar_input_file_trees]
output:
file: &decostar_output_file !ref [*data_adjacencies_path]
slurm:
options: !ref [*decostar_slurm_options]
modules: !ref [*decostar_modules]
array:
results:
file: &decostar_slurm_array_results_file !ref [*data_species_path]
field: &decostar_slurm_array_results_field 1
var: &decostar_slurm_array_results_var 'SPECIES'
name: &decostar_slurm_array_results_name !concat ['${', *decostar_slurm_array_results_var, '}']
ext: &decostar_slurm_array_results_ext !ref [*data_adjacencies_ext]
results:
dirs:
- &decostar_results_dir !join [*dir_results, *decostar_name]
names:
- &decostar_slurm_results_species_adjacencies_name !concat [*decostar_slurm_array_results_name, *decostar_slurm_array_results_ext]
files:
- &decostar_slurm_results_species_adjacencies !join [*decostar_results_dir, *decostar_slurm_results_species_adjacencies_name]
other:
- &decostar_slurm_results_adjacencies_file !join [*decostar_results_dir, 'adjacencies.txt']
- &decostar_slurm_results_genes_file_1 !join [*decostar_results_dir, 'genes.txt']
- &decostar_slurm_results_genes_file_2 !join [*decostar_results_dir, 'genes_reformatted.txt']
- &decostar_slurm_results_species_file !join [*decostar_results_dir, 'species.txt']
- &decostar_slurm_results_species_tree_file !join [*decostar_results_dir, 'speciesTree.phyloxml']
- &decostar_slurm_results_reconciliations_file !join [*decostar_results_dir, 'reconciliations.xml']
cmd:
- !concat [*decostar_exec, ' \']
# Input files
- !concat [' species.file=', *data_species_tree_path, ' \']
- !concat [' adjacencies.file=', *decostar_input_file_adjacencies, ' \']
- !concat [' gene.distribution.file=', *decostar_input_file_trees, ' \']
# Output
- !concat [' output.dir=', *decostar_results_dir, ' \']
- !concat [' write.newick=', *decostar_write_newick, ' \']
- !concat [' write.adjacency.trees=', *decostar_write_adjacency_trees, ' \']
# Model
- !concat [' already.reconciled=', *decostar_already_reconciled, ' \']
- !concat [' rooted=', *decostar_rooted, ' \']
- !concat [' nb.sample=', *decostar_nb_sample, ' \']
- !concat [' dupli.cost=', *decostar_dupli_cost, ' \']
- !concat [' loss.cost=', *decostar_loss_cost, ' \']
- !concat [' AGain.cost=', *decostar_again_cost, ' \']
- !concat [' ABreak.cost=', *decostar_abreak_cost, ' \']
- !concat [' Loss.aware=', *decostar_loss_aware, ' \']
- !concat [' Loss.iteration=', *decostar_loss_iteration, ' \']
- !concat [' C1.Advantage=', *decostar_c1_advantage, ' \']
- !concat [' all.pair.equivalence.class=', *decostar_all_pairs, ' \']
- !concat [' always.AGain=', *decostar_always_again, ' \']
- !concat [' absence.penalty=', *decostar_absence_penalty, ' \']
- !concat [' all.pair.equivalence.class=', *decostar_all_pairs, ' \']
- !concat [' boltzmann.temperature=', *decostar_boltzmann_temperature, ' \']
- ' write.adjacencies=true \'
- ' write.genes=true \'
- ' use.boltzmann=true \'
- ' char.sep="|" \'
- ' with.transfer=false \'
- ' verbose=2'
- ' '
- !concat [!join [*run_dir_scripts, 'DeCoSTAR_reformat.py'], ' \']
- !concat [' ', *data_species_path, ' \']
- !concat [' ', *decostar_slurm_results_species_file, ' \']
- !concat [' ', *decostar_already_reconciled, ' \']
- !concat [' ', *data_families_path, ' \']
- !concat [' ', *decostar_input_path, ' \']
- !concat [' ', *decostar_input_file_trees, ' \']
- !concat [' ', *decostar_slurm_results_genes_file_1, ' \']
- !concat [' ', *decostar_slurm_results_adjacencies_file, ' \']
- !concat [' ', *decostar_slurm_results_genes_file_2, ' \']
- !concat [' ', *decostar_results_dir]
- ' '
- !concat [!join [*run_dir_scripts, 'DeCoSTAR_ecceTERA_reformat.py'], ' \']
- !concat [' ', *decostar_slurm_results_genes_file_1, ' \']
- !concat [' ', *decostar_slurm_results_genes_file_2, ' \']
- !concat [' ', *decostar_slurm_results_species_tree_file, ' \']
- !concat [' ', *decostar_slurm_results_reconciliations_file, ' \']
- !concat [' ', *decostar_results_dir, ' \']
- !concat [' ', *data_reconciliations_ext, ' \']
- !concat [' ', *data_reconciliations_path]
stats:
names:
- &decostar_stats_file_adj_species_name !concat [*decostar_name, '_adjacencies_species', *log_ext_stat]
- &decostar_stats_file_adj_components_name !concat [*decostar_name, '_adjacencies_components', *log_ext_stat]
- &decostar_stats_file_rec_species_name !concat [*decostar_name, '_reconciliations_species', *log_ext_stat]
- &decostar_stats_file_rec_families_name !concat [*decostar_name, '_reconciliations_families', *log_ext_stat]
files:
- &decostar_stats_file_adj_species !join [*dir_stats, *decostar_name, *decostar_stats_file_adj_species_name]
- &decostar_stats_file_adj_components !join [*dir_stats, *decostar_name, *decostar_stats_file_adj_components_name]
- &decostar_stats_file_rec_species !join [*dir_stats, *decostar_name, *decostar_stats_file_rec_species_name]
- &decostar_stats_file_rec_families !join [*dir_stats, *decostar_name, *decostar_stats_file_rec_families_name]
script:
- !join [*run_dir_scripts, 'recPhyloXML_statistics.py']
- !ref [*data_reconciliations_path]
- !ref [*decostar_stats_file_rec_species]
- !ref [*decostar_stats_file_rec_families]
- !concat [';']
- !join [*run_dir_scripts, 'DeCoSTAR_statistics.py']
- !ref [*data_species_path]
- !ref [*decostar_slurm_results_genes_file_2]
- !ref [*data_adjacencies_path]
- !ref [*decostar_stats_thresholds]
- !ref [*decostar_stats_file_adj_species]
- !concat [';']
- !join [*run_dir_scripts, 'gene_orders_utils.py']
- 'stats'
- !ref [*decostar_slurm_results_genes_file_2]
- !ref [*data_adjacencies_path]
- !ref [*decostar_results_dir]
- !ref [*decostar_stats_file_adj_components]
Loading

0 comments on commit ee815c7

Please sign in to comment.