Skip to content

Commit

Permalink
General improvements to report
Browse files Browse the repository at this point in the history
  • Loading branch information
zjnolen committed Mar 18, 2024
1 parent 0a9acec commit 0d5fddf
Show file tree
Hide file tree
Showing 14 changed files with 107 additions and 36 deletions.
6 changes: 6 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ if config["subsample_redo_filts"]:
)


if len(pipebams) > 0:
all_outputs.append(
"results/datasets/{dataset}/qc/fastp-trimming/fastp_all.{ref}_mqc.html"
)


if config["analyses"]["qualimap"]:
all_outputs.append(
"results/datasets/{dataset}/qc/qualimap/qualimap_all.{ref}_mqc.html",
Expand Down
16 changes: 10 additions & 6 deletions workflow/rules/0.2_ref_filt.smk
Original file line number Diff line number Diff line change
Expand Up @@ -440,9 +440,13 @@ if config["analyses"]["extreme_depth"]:
summ="results/datasets/{dataset}/filters/depth/{dataset}.{ref}_{population}{dp}_depth.summary",
plot=report(
"results/datasets/{dataset}/plots/depth_dist/{dataset}.{ref}_{population}{dp}_depth.svg",
category="Quality Control",
subcategory="Depth distributions and filters",
labels={"Subset": "{population}", "Type": "Histogram"},
category="00 Quality Control",
subcategory="2 Depth distributions and filters",
labels=lambda w: {
"Subset": "{population}",
**dp_report(w),
"Type": "Histogram",
},
),
log:
"logs/{dataset}/filters/depth/{dataset}.{ref}_{population}{dp}_depth_extremes.log",
Expand Down Expand Up @@ -733,9 +737,9 @@ rule filter_summary_table:
output:
report(
"results/datasets/{dataset}/filters/combined/{dataset}.{ref}{dp}_{sites}-filts.html",
category="Quality Control",
subcategory="Filtering Summary",
labels={"Filter": "{sites}", "Type": "Table"},
category="00 Quality Control",
subcategory="3 Filtering Summary",
labels=lambda w: {"Filter": "{sites}", **dp_report(w), "Type": "Table"},
),
log:
"logs/{dataset}/filters/combine/{dataset}.{ref}{dp}_{sites}-filts_tsv2html.log",
Expand Down
27 changes: 23 additions & 4 deletions workflow/rules/1.0_preprocessing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ rule fastp_mergedout:
merged=temp("results/preprocessing/fastp/{sample}_{unit}_{lib}.merged.fastq.gz"),
html=report(
"results/preprocessing/qc/fastp/{sample}_{unit}_{lib}_merged.html",
category="Quality Control",
subcategory="Trimming Reports",
category="00 Quality Control",
subcategory="1 Trimming Reports",
labels={
"Sample": "{sample}",
"Unit": "{unit}",
Expand Down Expand Up @@ -66,8 +66,8 @@ rule fastp_pairedout:
),
html=report(
"results/preprocessing/qc/fastp/{sample}_{unit}_{lib}_paired.html",
category="Quality Control",
subcategory="Trimming Reports",
category="00 Quality Control",
subcategory="1 Trimming Reports",
labels={
"Sample": "{sample}",
"Unit": "{unit}",
Expand All @@ -89,6 +89,25 @@ rule fastp_pairedout:
"v2.5.0/bio/fastp"


rule fastp_multiqc:
input:
multiqc_input_fastp,
output:
report(
"results/datasets/{dataset}/qc/fastp-trimming/fastp_all.{ref}_mqc.html",
category="00 Quality Control",
subcategory="1 Trimming Reports",
labels={"Type": "MultiQC Report"},
),
log:
"logs/preprocessing/fastp/{dataset}.{ref}_mqc.log",
params:
extra="",
use_input_files_only=True,
wrapper:
"v3.5.0/bio/multiqc"


# rule fastp_pairedout:
# """Process modern reads with fastp, trimming adapters and low quality bases"""
# input:
Expand Down
13 changes: 9 additions & 4 deletions workflow/rules/2.1_sample_qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,12 @@ rule qualimap_multiqc:
input:
multiqc_input_qualimap,
output:
"results/datasets/{dataset}/qc/qualimap/qualimap_all.{ref}_mqc.html",
report(
"results/datasets/{dataset}/qc/qualimap/qualimap_all.{ref}_mqc.html",
category="00 Quality Control",
subcategory="5 Qualimap",
labels={"Type": "MultiQC Report"},
),
log:
"logs/mapping/qualimap/{dataset}.{ref}_mqc.log",
params:
Expand Down Expand Up @@ -331,9 +336,9 @@ rule sample_qc_summary:
output:
report(
"results/datasets/{dataset}/qc/{dataset}.{ref}_all{dp}.sampleqc.html",
category="Quality Control",
subcategory="Sample coverage and endogenous content",
labels={"Type": "Table"},
category="00 Quality Control",
subcategory="6 Sample depth and endogenous content",
labels=lambda w: {**dp_report(w), "Type": "Table"},
),
log:
"logs/{dataset}/combine_sample_qc/{dataset}.{ref}{dp}_tsv2html.log",
Expand Down
11 changes: 7 additions & 4 deletions workflow/rules/2.2_dna_damage.smk
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ rule mapDamage2_rescaling:
len="results/mapping/qc/mapdamage/{sample}.{ref}/Length_plot.pdf",
lg_dist="results/mapping/qc/mapdamage/{sample}.{ref}/lgdistribution.txt",
misincorp="results/mapping/qc/mapdamage/{sample}.{ref}/misincorporation.txt",
rescaled_bam=temp(
"results/mapping/bams/{sample}.{ref}.rmdup.realn.clip.rescaled.bam"
),
rescaled_bam="results/mapping/bams/{sample}.{ref}.rmdup.realn.clip.rescaled.bam",
log:
"logs/mapping/mapdamage/{sample}.{ref}.log",
benchmark:
Expand All @@ -87,7 +85,12 @@ rule dna_damage_multiqc:
input:
multiqc_input_dnadmg,
output:
"results/datasets/{dataset}/qc/dna-damage-mqc/dna-damage_all.{ref}_mqc.html",
report(
"results/datasets/{dataset}/qc/dna-damage-mqc/dna-damage_all.{ref}_mqc.html",
category="00 Quality Control",
subcategory="4 DNA Damage",
labels={"Type": "MultiQC Report"},
),
log:
"logs/mapping/dnadamage/{dataset}.{ref}_dnadmg-mqc.log",
params:
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/4.2_linkage_decay.smk
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ rule fit_LD_decay:
output:
plot=report(
"results/datasets/{dataset}/plots/LD_decay/{dataset}.{ref}_{population}{dp}_{sites}-filts.LDdecay.svg",
category="Linkage Disequilibrium Decay",
category="01 Linkage Disequilibrium Decay",
subcategory="{sites}",
labels=lambda w: {
"Population": "{population}",
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/5.0_relatedness.smk
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ rule kinship_table_html:
output:
report(
"results/datasets/{dataset}/analyses/kinship/ibsrelate_{type}/{dataset}.{ref}_all{dp}_{sites}-filts.kinship.html",
category="Relatedness",
category="02 Relatedness",
subcategory="IBSrelate - {type}",
labels=lambda w: {"Filter": "{sites}", **dp_report(w), "Type": "Table"},
),
Expand Down Expand Up @@ -200,7 +200,7 @@ rule ngsrelate_summary:
output:
report(
"results/datasets/{dataset}/analyses/kinship/ngsrelate/{dataset}.{ref}_all{dp}_{sites}-filts_relate.html",
category="Relatedness",
category="02 Relatedness",
subcategory="NgsRelate",
labels=lambda w: {"Filter": "{sites}", **dp_report(w), "Type": "Table"},
),
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/6.0_pca.smk
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ rule plot_pca:
output:
report(
"results/datasets/{dataset}/plots/pca/{dataset}.{ref}_{population}{dp}_{sites}-filts_pc{xpc}-{ypc}.svg",
category="PCA",
category="03.1 PCA",
labels=lambda w: {
"Filter": "{sites}",
**dp_report(w),
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/6.1_admixture.smk
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ rule plot_admix:
output:
report(
"results/datasets/{dataset}/plots/ngsadmix/{dataset}.{ref}_{population}{dp}_{sites}-filts_K{kvalue}.svg",
category="Admixture",
category="03.2 Admixture",
subcategory="NGSadmix",
labels=lambda w: {
"Filter": "{sites}",
Expand Down Expand Up @@ -105,7 +105,7 @@ rule plot_evalAdmix:
output:
report(
"results/datasets/{dataset}/plots/evaladmix/{dataset}.{ref}_{population}{dp}_{sites}-filts_K{kvalue}_evaladmix.html",
category="Admixture",
category="03.2 Admixture",
subcategory="evalAdmix",
labels=lambda w: {
"Filter": "{sites}",
Expand Down
6 changes: 3 additions & 3 deletions workflow/rules/7.1_thetas.smk
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ rule plot_thetas:
output:
watterson=report(
"results/datasets/{dataset}/plots/thetas/{dataset}.{ref}_all{dp}_{sites}-filts.window_{win}_{step}.density.watterson.pdf",
category="Watterson's Theta",
category="04.1 Watterson's Theta",
labels=lambda w: {
"Filter": "{sites}",
**dp_report(w),
Expand All @@ -83,7 +83,7 @@ rule plot_thetas:
),
pi=report(
"results/datasets/{dataset}/plots/thetas/{dataset}.{ref}_all{dp}_{sites}-filts.window_{win}_{step}.density.pi.pdf",
category="Nucleotide Diversity (Pi)",
category="04.2 Nucleotide Diversity (Pi)",
labels=lambda w: {
"Filter": "{sites}",
**dp_report(w),
Expand All @@ -94,7 +94,7 @@ rule plot_thetas:
),
tajima=report(
"results/datasets/{dataset}/plots/thetas/{dataset}.{ref}_all{dp}_{sites}-filts.window_{win}_{step}.density.tajima.pdf",
category="Tajima's D",
category="04.3 Tajima's D",
labels=lambda w: {
"Filter": "{sites}",
**dp_report(w),
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/7.2_fst.smk
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ rule plot_fst:
output:
report(
"results/datasets/{dataset}/plots/fst/{dataset}.{ref}_{unit}pairs{dp}_{sites}-filts.fst.global.pdf",
category="Fst",
category="05 Fst",
subcategory="Global",
labels=lambda w: {
"Filter": "{sites}",
Expand Down
6 changes: 3 additions & 3 deletions workflow/rules/7.3_heterozygosity.smk
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ rule heterozygosity:
table="results/datasets/{dataset}/analyses/heterozygosity/{dataset}.{ref}_all{dp}_{sites}-filts_heterozygosity.tsv",
popplot=report(
"results/datasets/{dataset}/plots/heterozygosity/{dataset}.{ref}_all{dp}_{sites}-filts_heterozygosity.populations.svg",
category="Heterozygosity",
category="04.4 Heterozygosity",
labels=lambda w: {
"Filter": "{sites}",
**dp_report(w),
Expand All @@ -29,7 +29,7 @@ rule heterozygosity:
),
indplot=report(
"results/datasets/{dataset}/plots/heterozygosity/{dataset}.{ref}_all{dp}_{sites}-filts_heterozygosity.individuals.svg",
category="Heterozygosity",
category="04.4 Heterozygosity",
labels=lambda w: {
"Filter": "{sites}",
**dp_report(w),
Expand All @@ -55,7 +55,7 @@ rule heterozygosity_table:
output:
report(
"results/datasets/{dataset}/analyses/heterozygosity/{dataset}.{ref}_all{dp}_{sites}-filts_heterozygosity.html",
category="Heterozygosity",
category="04.4 Heterozygosity",
labels=lambda w: {"Filter": "{sites}", **dp_report(w), "Type": "Table"},
),
log:
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/8.0_inbreeding.smk
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ rule plot_froh:
output:
barplot=report(
"results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.froh_bins.svg",
category="Inbreeding",
category="06 Inbreeding",
labels=lambda w: {
"Filter": "{sites}",
**dp_report(w),
Expand All @@ -85,7 +85,7 @@ rule plot_froh:
),
scatter=report(
"results/datasets/{dataset}/plots/inbreeding/{dataset}.{ref}_all{dp}_{sites}-filts.cumroh_nroh.svg",
category="Inbreeding",
category="06 Inbreeding",
labels=lambda w: {
"Filter": "{sites}",
**dp_report(w),
Expand Down
40 changes: 37 additions & 3 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,40 @@ def get_raw_fastq(wildcards):
}


## Get correct reports to compile a fastp multiqc report
def multiqc_input_fastp(wildcards):
reports = []
# Check if pipeline is actually processing any fastq files
if len(pipebams) > 0:
# subset units to samples that are starting at fastq
pipeunits = units[units["sample"].isin(pipebams)]
# join with sample list to know 'historical' or 'modern' sample context
pipeunits = pd.merge(pipeunits, samples, left_on="sample", right_index=True)
# add historical, merged fastq to report
histunits = pipeunits[pipeunits["time"] == "historical"]
reports.extend(
expand(
"results/preprocessing/qc/fastp/{sample}_{unit}_{lib}_merged.json",
zip,
sample=histunits["sample"].tolist(),
unit=pipeunits["unit"].tolist(),
lib=pipeunits["lib"].tolist(),
)
)
# add modern, paired fastq to report
modunits = pipeunits[pipeunits["time"] == "modern"]
reports.extend(
expand(
"results/preprocessing/qc/fastp/{sample}_{unit}_{lib}_paired.json",
zip,
sample=modunits["sample"].tolist(),
unit=modunits["unit"].tolist(),
lib=modunits["lib"].tolist(),
)
)
return reports


# Reference


Expand Down Expand Up @@ -721,8 +755,8 @@ def unit_report(wildcards):
def theta_report(wildcards):
stat = wildcards.stat
if stat == "watterson":
return "Watterson's Theta"
return "04.1 Watterson's Theta"
elif stat == "pi":
return "Nucleotide Diversity (Pi)"
return "04.2 Nucleotide Diversity (Pi)"
elif stat == "tajima":
return "Tajima's D"
return "04.3 Tajima's D"

0 comments on commit 0d5fddf

Please sign in to comment.