From ef48042d9ac5d59d5de10db114756e6142b9a606 Mon Sep 17 00:00:00 2001 From: BIOPZ-Katsantoni Maria Date: Tue, 20 Feb 2024 18:10:31 +0100 Subject: [PATCH] fix: keep order of samples within snakemake --- .../test.slurm.sh | 12 ++++++------ workflow/rules/common.smk | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_integration_workflow_with_conda/test.slurm.sh b/tests/test_integration_workflow_with_conda/test.slurm.sh index 1c5616e..9b72556 100755 --- a/tests/test_integration_workflow_with_conda/test.slurm.sh +++ b/tests/test_integration_workflow_with_conda/test.slurm.sh @@ -38,8 +38,8 @@ snakemake \ --report="snakemake_report.html" # Check md5 sum of some output files -find results/ -type f -name \*\.gz -exec gunzip '{}' \; -find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \; +find results/homo_sapiens/ -type f -name \*\.gz -exec gunzip '{}' \; +find results/homo_sapiens/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \; md5sum --check "expected_output.md5" # Check whether STAR produces expected alignments @@ -49,7 +49,7 @@ md5sum --check "expected_output.md5" echo "Verifying STAR output" result=$(bedtools intersect -F 1 -v -bed \ -a ../input_files/synthetic.mate_1.bed \ - -b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \ + -b results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \ | wc -l) if [ $result != "0" ]; then echo "Alignments for mate 1 reads are not consistent with ground truth" @@ -57,7 +57,7 @@ if [ $result != "0" ]; then fi result=$(bedtools intersect -F 1 -v -bed \ -a <(cat ../input_files/synthetic.mate_1.bed ../input_files/synthetic.mate_2.bed) \ - -b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \ + -b results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \ | wc -l) if [ $result != "0" ]; then echo "Alignments for mate 1 reads are not consistent with ground truth" @@ -67,8 +67,8 @@ fi # Check whether Salmon assigns reads to expected genes echo "Verifying Salmon output" diff \ - <(cat results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \ + <(cat results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \ <(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}') diff \ - <(cat results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \ + <(cat results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \ <(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}') diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index cb4031d..63dd06e 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -20,7 +20,7 @@ def get_sample(column_id, search_id=None, search_value=None): def get_all_samples(search_id=None, search_value=None): return list( - set(samples_table.index[samples_table[search_id] == search_value].values) + pd.unique(samples_table.index[samples_table[search_id] == search_value].values) )