From 0d5fddfcd7b321aa027a465e086f6d7ce946feb2 Mon Sep 17 00:00:00 2001 From: Zachary Nolen Date: Mon, 18 Mar 2024 17:40:10 +0100 Subject: [PATCH] General improvements to report --- workflow/Snakefile | 6 ++++ workflow/rules/0.2_ref_filt.smk | 16 +++++++---- workflow/rules/1.0_preprocessing.smk | 27 +++++++++++++++--- workflow/rules/2.1_sample_qc.smk | 13 ++++++--- workflow/rules/2.2_dna_damage.smk | 11 +++++--- workflow/rules/4.2_linkage_decay.smk | 2 +- workflow/rules/5.0_relatedness.smk | 4 +-- workflow/rules/6.0_pca.smk | 2 +- workflow/rules/6.1_admixture.smk | 4 +-- workflow/rules/7.1_thetas.smk | 6 ++-- workflow/rules/7.2_fst.smk | 2 +- workflow/rules/7.3_heterozygosity.smk | 6 ++-- workflow/rules/8.0_inbreeding.smk | 4 +-- workflow/rules/common.smk | 40 +++++++++++++++++++++++++-- 14 files changed, 107 insertions(+), 36 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index ae9a6b30..5d8a092e 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -78,6 +78,12 @@ if config["subsample_redo_filts"]: ) +if len(pipebams) > 0: + all_outputs.append( + "results/datasets/{dataset}/qc/fastp-trimming/fastp_all.{ref}_mqc.html" + ) + + if config["analyses"]["qualimap"]: all_outputs.append( "results/datasets/{dataset}/qc/qualimap/qualimap_all.{ref}_mqc.html", diff --git a/workflow/rules/0.2_ref_filt.smk b/workflow/rules/0.2_ref_filt.smk index 523d3bb3..669f9dd2 100644 --- a/workflow/rules/0.2_ref_filt.smk +++ b/workflow/rules/0.2_ref_filt.smk @@ -440,9 +440,13 @@ if config["analyses"]["extreme_depth"]: summ="results/datasets/{dataset}/filters/depth/{dataset}.{ref}_{population}{dp}_depth.summary", plot=report( "results/datasets/{dataset}/plots/depth_dist/{dataset}.{ref}_{population}{dp}_depth.svg", - category="Quality Control", - subcategory="Depth distributions and filters", - labels={"Subset": "{population}", "Type": "Histogram"}, + category="00 Quality Control", + subcategory="2 Depth distributions and filters", + labels=lambda w: { + "Subset": "{population}", + **dp_report(w), + "Type": "Histogram", + }, ), log: "logs/{dataset}/filters/depth/{dataset}.{ref}_{population}{dp}_depth_extremes.log", @@ -733,9 +737,9 @@ rule filter_summary_table: output: report( "results/datasets/{dataset}/filters/combined/{dataset}.{ref}{dp}_{sites}-filts.html", - category="Quality Control", - subcategory="Filtering Summary", - labels={"Filter": "{sites}", "Type": "Table"}, + category="00 Quality Control", + subcategory="3 Filtering Summary", + labels=lambda w: {"Filter": "{sites}", **dp_report(w), "Type": "Table"}, ), log: "logs/{dataset}/filters/combine/{dataset}.{ref}{dp}_{sites}-filts_tsv2html.log", diff --git a/workflow/rules/1.0_preprocessing.smk b/workflow/rules/1.0_preprocessing.smk index 67572dce..f4e669ca 100644 --- a/workflow/rules/1.0_preprocessing.smk +++ b/workflow/rules/1.0_preprocessing.smk @@ -29,8 +29,8 @@ rule fastp_mergedout: merged=temp("results/preprocessing/fastp/{sample}_{unit}_{lib}.merged.fastq.gz"), html=report( "results/preprocessing/qc/fastp/{sample}_{unit}_{lib}_merged.html", - category="Quality Control", - subcategory="Trimming Reports", + category="00 Quality Control", + subcategory="1 Trimming Reports", labels={ "Sample": "{sample}", "Unit": "{unit}", @@ -66,8 +66,8 @@ rule fastp_pairedout: ), html=report( "results/preprocessing/qc/fastp/{sample}_{unit}_{lib}_paired.html", - category="Quality Control", - subcategory="Trimming Reports", + category="00 Quality Control", + subcategory="1 Trimming Reports", labels={ "Sample": "{sample}", "Unit": "{unit}", @@ -89,6 +89,25 @@ rule fastp_pairedout: "v2.5.0/bio/fastp" +rule fastp_multiqc: + input: + multiqc_input_fastp, + output: + report( + "results/datasets/{dataset}/qc/fastp-trimming/fastp_all.{ref}_mqc.html", + category="00 Quality Control", + subcategory="1 Trimming Reports", + labels={"Type": "MultiQC Report"}, + ), + log: + "logs/preprocessing/fastp/{dataset}.{ref}_mqc.log", + params: + extra="", + use_input_files_only=True, + wrapper: + "v3.5.0/bio/multiqc" + + # rule fastp_pairedout: # """Process modern reads with fastp, trimming adapters and low quality bases""" # input: diff --git a/workflow/rules/2.1_sample_qc.smk b/workflow/rules/2.1_sample_qc.smk index ea2f79a7..35bc7ccb 100644 --- a/workflow/rules/2.1_sample_qc.smk +++ b/workflow/rules/2.1_sample_qc.smk @@ -71,7 +71,12 @@ rule qualimap_multiqc: input: multiqc_input_qualimap, output: - "results/datasets/{dataset}/qc/qualimap/qualimap_all.{ref}_mqc.html", + report( + "results/datasets/{dataset}/qc/qualimap/qualimap_all.{ref}_mqc.html", + category="00 Quality Control", + subcategory="5 Qualimap", + labels={"Type": "MultiQC Report"}, + ), log: "logs/mapping/qualimap/{dataset}.{ref}_mqc.log", params: @@ -331,9 +336,9 @@ rule sample_qc_summary: output: report( "results/datasets/{dataset}/qc/{dataset}.{ref}_all{dp}.sampleqc.html", - category="Quality Control", - subcategory="Sample coverage and endogenous content", - labels={"Type": "Table"}, + category="00 Quality Control", + subcategory="6 Sample depth and endogenous content", + labels=lambda w: {**dp_report(w), "Type": "Table"}, ), log: "logs/{dataset}/combine_sample_qc/{dataset}.{ref}{dp}_tsv2html.log", diff --git a/workflow/rules/2.2_dna_damage.smk b/workflow/rules/2.2_dna_damage.smk index a2427d9f..6c2ff7be 100644 --- a/workflow/rules/2.2_dna_damage.smk +++ b/workflow/rules/2.2_dna_damage.smk @@ -67,9 +67,7 @@ rule mapDamage2_rescaling: len="results/mapping/qc/mapdamage/{sample}.{ref}/Length_plot.pdf", lg_dist="results/mapping/qc/mapdamage/{sample}.{ref}/lgdistribution.txt", misincorp="results/mapping/qc/mapdamage/{sample}.{ref}/misincorporation.txt", - rescaled_bam=temp( - "results/mapping/bams/{sample}.{ref}.rmdup.realn.clip.rescaled.bam" - ), + rescaled_bam="results/mapping/bams/{sample}.{ref}.rmdup.realn.clip.rescaled.bam", log: "logs/mapping/mapdamage/{sample}.{ref}.log", benchmark: @@ -87,7 +85,12 @@ rule dna_damage_multiqc: input: multiqc_input_dnadmg, output: - "results/datasets/{dataset}/qc/dna-damage-mqc/dna-damage_all.{ref}_mqc.html", + report( + "results/datasets/{dataset}/qc/dna-damage-mqc/dna-damage_all.{ref}_mqc.html", + category="00 Quality Control", + subcategory="4 DNA Damage", + labels={"Type": "MultiQC Report"}, + ), log: "logs/mapping/dnadamage/{dataset}.{ref}_dnadmg-mqc.log", params: diff --git a/workflow/rules/4.2_linkage_decay.smk b/workflow/rules/4.2_linkage_decay.smk index f1bfc011..efbcfde3 100644 --- a/workflow/rules/4.2_linkage_decay.smk +++ b/workflow/rules/4.2_linkage_decay.smk @@ -28,7 +28,7 @@ rule fit_LD_decay: output: plot=report( "results/datasets/{dataset}/plots/LD_decay/{dataset}.{ref}_{population}{dp}_{sites}-filts.LDdecay.svg", - category="Linkage Disequilibrium Decay", + category="01 Linkage Disequilibrium Decay", subcategory="{sites}", labels=lambda w: { "Population": "{population}", diff --git a/workflow/rules/5.0_relatedness.smk b/workflow/rules/5.0_relatedness.smk index c5d9b893..bdb9318e 100644 --- a/workflow/rules/5.0_relatedness.smk +++ b/workflow/rules/5.0_relatedness.smk @@ -143,7 +143,7 @@ rule kinship_table_html: output: report( "results/datasets/{dataset}/analyses/kinship/ibsrelate_{type}/{dataset}.{ref}_all{dp}_{sites}-filts.kinship.html", - category="Relatedness", + category="02 Relatedness", subcategory="IBSrelate - {type}", labels=lambda w: {"Filter": "{sites}", **dp_report(w), "Type": "Table"}, ), @@ -200,7 +200,7 @@ rule ngsrelate_summary: output: report( "results/datasets/{dataset}/analyses/kinship/ngsrelate/{dataset}.{ref}_all{dp}_{sites}-filts_relate.html", - category="Relatedness", + category="02 Relatedness", subcategory="NgsRelate", labels=lambda w: {"Filter": "{sites}", **dp_report(w), "Type": "Table"}, ), diff --git a/workflow/rules/6.0_pca.smk b/workflow/rules/6.0_pca.smk index efa0584d..346eef5d 100644 --- a/workflow/rules/6.0_pca.smk +++ b/workflow/rules/6.0_pca.smk @@ -64,7 +64,7 @@ rule plot_pca: output: report( "results/datasets/{dataset}/plots/pca/{dataset}.{ref}_{population}{dp}_{sites}-filts_pc{xpc}-{ypc}.svg", - category="PCA", + category="03.1 PCA", labels=lambda w: { "Filter": "{sites}", **dp_report(w), diff --git a/workflow/rules/6.1_admixture.smk b/workflow/rules/6.1_admixture.smk index d5de2106..e398eea5 100644 --- a/workflow/rules/6.1_admixture.smk +++ b/workflow/rules/6.1_admixture.smk @@ -48,7 +48,7 @@ rule plot_admix: output: report( "results/datasets/{dataset}/plots/ngsadmix/{dataset}.{ref}_{population}{dp}_{sites}-filts_K{kvalue}.svg", - category="Admixture", + category="03.2 Admixture", subcategory="NGSadmix", labels=lambda w: { "Filter": "{sites}", @@ -105,7 +105,7 @@ rule plot_evalAdmix: output: report( "results/datasets/{dataset}/plots/evaladmix/{dataset}.{ref}_{population}{dp}_{sites}-filts_K{kvalue}_evaladmix.html", - category="Admixture", + category="03.2 Admixture", subcategory="evalAdmix", labels=lambda w: { "Filter": "{sites}", diff --git a/workflow/rules/7.1_thetas.smk b/workflow/rules/7.1_thetas.smk index 81b483ed..85b5b77e 100644 --- a/workflow/rules/7.1_thetas.smk +++ b/workflow/rules/7.1_thetas.smk @@ -72,7 +72,7 @@ rule plot_thetas: output: watterson=report( "results/datasets/{dataset}/plots/thetas/{dataset}.{ref}_all{dp}_{sites}-filts.window_{win}_{step}.density.watterson.pdf", - category="Watterson's Theta", + category="04.1 Watterson's Theta", labels=lambda w: { "Filter": "{sites}", **dp_report(w), @@ -83,7 +83,7 @@ rule plot_thetas: ), pi=report( "results/datasets/{dataset}/plots/thetas/{dataset}.{ref}_all{dp}_{sites}-filts.window_{win}_{step}.density.pi.pdf", - category="Nucleotide Diversity (Pi)", + category="04.2 Nucleotide Diversity (Pi)", labels=lambda w: { "Filter": "{sites}", **dp_report(w), @@ -94,7 +94,7 @@ rule plot_thetas: ), tajima=report( "results/datasets/{dataset}/plots/thetas/{dataset}.{ref}_all{dp}_{sites}-filts.window_{win}_{step}.density.tajima.pdf", - category="Tajima's D", + category="04.3 Tajima's D", labels=lambda w: { "Filter": "{sites}", **dp_report(w), diff --git a/workflow/rules/7.2_fst.smk b/workflow/rules/7.2_fst.smk index b08f2eb5..fc296ab4 100644 --- a/workflow/rules/7.2_fst.smk +++ b/workflow/rules/7.2_fst.smk @@ -153,7 +153,7 @@ rule plot_fst: output: report( "results/datasets/{dataset}/plots/fst/{dataset}.{ref}_{unit}pairs{dp}_{sites}-filts.fst.global.pdf", - category="Fst", + category="05 Fst", subcategory="Global", labels=lambda w: { "Filter": "{sites}", diff --git a/workflow/rules/7.3_heterozygosity.smk b/workflow/rules/7.3_heterozygosity.smk index 0283f08f..2c3e6fba 100644 --- a/workflow/rules/7.3_heterozygosity.smk +++ b/workflow/rules/7.3_heterozygosity.smk @@ -20,7 +20,7 @@ rule heterozygosity: table="results/datasets/{dataset}/analyses/heterozygosity/{dataset}.{ref}_all{dp}_{sites}-filts_heterozygosity.tsv", popplot=report( "results/datasets/{dataset}/plots/heterozygosity/{dataset}.{ref}_all{dp}_{sites}-filts_heterozygosity.populations.svg", - category="Heterozygosity", + category="04.4 Heterozygosity", labels=lambda w: { "Filter": "{sites}", **dp_report(w), @@ -29,7 +29,7 @@ rule heterozygosity: ), indplot=report( "results/datasets/{dataset}/plots/heterozygosity/{dataset}.{ref}_all{dp}_{sites}-filts_heterozygosity.individuals.svg", - category="Heterozygosity", + category="04.4 Heterozygosity", labels=lambda w: { "Filter": "{sites}", **dp_report(w), @@ -55,7 +55,7 @@ rule heterozygosity_table: output: report( "results/datasets/{dataset}/analyses/heterozygosity/{dataset}.{ref}_all{dp}_{sites}-filts_heterozygosity.html", - category="Heterozygosity", + category="04.4 Heterozygosity", labels=lambda w: {"Filter": "{sites}", **dp_report(w), "Type": "Table"}, ), log: diff --git a/workflow/rules/8.0_inbreeding.smk b/workflow/rules/8.0_inbreeding.smk index b075f4e1..2cb3b63e 100644 --- a/workflow/rules/8.0_inbreeding.smk +++ b/workflow/rules/8.0_inbreeding.smk @@ -76,7 +76,7 @@ rule plot_froh: output: barplot=report( "results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.froh_bins.svg", - category="Inbreeding", + category="06 Inbreeding", labels=lambda w: { "Filter": "{sites}", **dp_report(w), @@ -85,7 +85,7 @@ rule plot_froh: ), scatter=report( "results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.cumroh_nroh.svg", - category="Inbreeding", + category="06 Inbreeding", labels=lambda w: { "Filter": "{sites}", **dp_report(w), diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 43c9f988..6a16d564 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -171,6 +171,40 @@ def get_raw_fastq(wildcards): } +## Get correct reports to compile a fastp multiqc report +def multiqc_input_fastp(wildcards): + reports = [] + # Check if pipeline is actually processing any fastq files + if len(pipebams) > 0: + # subset units to samples that are starting at fastq + pipeunits = units[units["sample"].isin(pipebams)] + # join with sample list to know 'historical' or 'modern' sample context + pipeunits = pd.merge(pipeunits, samples, left_on="sample", right_index=True) + # add historical, merged fastq to report + histunits = pipeunits[pipeunits["time"] == "historical"] + reports.extend( + expand( + "results/preprocessing/qc/fastp/{sample}_{unit}_{lib}_merged.json", + zip, + sample=histunits["sample"].tolist(), + unit=pipeunits["unit"].tolist(), + lib=pipeunits["lib"].tolist(), + ) + ) + # add modern, paired fastq to report + modunits = pipeunits[pipeunits["time"] == "modern"] + reports.extend( + expand( + "results/preprocessing/qc/fastp/{sample}_{unit}_{lib}_paired.json", + zip, + sample=modunits["sample"].tolist(), + unit=modunits["unit"].tolist(), + lib=modunits["lib"].tolist(), + ) + ) + return reports + + # Reference @@ -721,8 +755,8 @@ def unit_report(wildcards): def theta_report(wildcards): stat = wildcards.stat if stat == "watterson": - return "Watterson's Theta" + return "04.1 Watterson's Theta" elif stat == "pi": - return "Nucleotide Diversity (Pi)" + return "04.2 Nucleotide Diversity (Pi)" elif stat == "tajima": - return "Tajima's D" + return "04.3 Tajima's D"