diff --git a/README.md b/README.md index 6f4995b..ea0ed27 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,14 @@ analysis of whole-genome sequence data for Mycobacteria tuberculosis complex sam many samples at once, on an HPC cluster system. It also integrates additional QC analysis of the input data using [fastp](https://github.com/OpenGene/fastp), and of the generated alignments using [Qualimap](https://github.com/scchess/Qualimap). +```mermaid +flowchart TD + reads --> fastp + fastp -- trimmed_reads --> tbprofiler + tbprofiler -- vcf --> snpit + tbprofiler -- bam --> qualimap_bamqc +``` + ## Usage ``` @@ -31,9 +39,11 @@ The following files will be produced for each sample: ``` . └── sample-01 + ├── sample-01_TIMESTAMP_provenance.yml ├── sample-01_fastp.csv ├── sample-01_fastp.json ├── sample-01_qualimap_alignment_qc.csv + ├── sample-01_snpit.tsv ├── sample-01_tbprofiler.bam ├── sample-01_tbprofiler.bam.bai ├── sample-01_tbprofiler_full_report.csv @@ -41,5 +51,6 @@ The following files will be produced for each sample: ├── sample-01_tbprofiler_lineage.csv ├── sample-01_tbprofiler_resistance.csv ├── sample-01_tbprofiler_summary.csv - └── sample-01_tbprofiler.vcf -``` \ No newline at end of file + ├── sample-01_tbprofiler_targets.vcf + └── sample-01_tbprofiler_whole_genome.vcf +``` diff --git a/environments/snpit.yml b/environments/snpit.yml new file mode 100644 index 0000000..c3fbbee --- /dev/null +++ b/environments/snpit.yml @@ -0,0 +1,12 @@ +name: snpit +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - python + - cython + - pip + - pysam=0.15.2 + - pip: + - git+https://github.com/philipwfowler/snpit.git diff --git a/main.nf b/main.nf index 1ab8e4c..aab9de5 100644 --- a/main.nf +++ b/main.nf @@ -4,15 +4,16 @@ import java.time.LocalDateTime nextflow.enable.dsl = 2 -include { fastp } from './modules/tbprofiler.nf' -include { tbprofiler } from './modules/tbprofiler.nf' +include { fastp } from './modules/tbprofiler.nf' +include { tbprofiler } from './modules/tbprofiler.nf' include { rename_ref_in_alignment } from './modules/tbprofiler.nf' -include { rename_ref_in_variants } from './modules/tbprofiler.nf' -include { qualimap_bamqc } from './modules/tbprofiler.nf' -include { pipeline_provenance } from './modules/provenance.nf' -include { collect_provenance } from './modules/provenance.nf' +include { rename_ref_in_variants as rename_ref_in_targets_variants } from './modules/tbprofiler.nf' +include { rename_ref_in_variants as rename_ref_in_whole_genome_variants } from './modules/tbprofiler.nf' +include { qualimap_bamqc } from './modules/tbprofiler.nf' +include { pipeline_provenance } from './modules/provenance.nf' +include { collect_provenance } from './modules/provenance.nf' -// include { snp_it } from './modules/tbprofiler.nf' +include { snpit } from './modules/tbprofiler.nf' workflow { @@ -30,17 +31,27 @@ workflow { main: fastp(ch_fastq) + tbprofiler(fastp.out.reads) + if (params.rename_ref) { rename_ref_in_alignment(tbprofiler.out.alignment) - rename_ref_in_variants(tbprofiler.out.variants) + rename_ref_in_targets_variants(tbprofiler.out.targets_vcf) + rename_ref_in_whole_genome_variants(tbprofiler.out.whole_genome_vcf) qualimap_bamqc(rename_ref_in_alignment.out) } else { qualimap_bamqc(tbprofiler.out.alignment) } - // snp_it(ch_vcf) + + if (params.rename_ref) { + snpit(rename_ref_in_whole_genome_variants.out) + } else { + snpit(tbprofiler.out.whole_genome_vcf) + } + ch_provenance = fastp.out.provenance ch_provenance = ch_provenance.join(tbprofiler.out.provenance).map{ it -> [it[0], [it[1], it[2]]] } + ch_provenance = ch_provenance.join(snpit.out.provenance).map{ it -> [it[0], it[1] << it[2]] } ch_provenance = ch_provenance.join(ch_fastq.map{ it -> it[0] }.combine(ch_pipeline_provenance)).map{ it -> [it[0], it[1] ] } diff --git a/modules/tbprofiler.nf b/modules/tbprofiler.nf index a8e5444..3f5defb 100644 --- a/modules/tbprofiler.nf +++ b/modules/tbprofiler.nf @@ -44,7 +44,8 @@ process tbprofiler { output: tuple val(sample_id), path("${sample_id}_tbprofiler*.{json,csv}"), emit: reports tuple val(sample_id), path("${sample_id}_tbprofiler*.{bam,bam.bai}"), emit: alignment - tuple val(sample_id), path("${sample_id}_tbprofiler*.vcf"), emit: variants + tuple val(sample_id), path("${sample_id}_tbprofiler_targets.vcf"), emit: targets_vcf + tuple val(sample_id), path("${sample_id}_tbprofiler_whole_genome.vcf"), emit: whole_genome_vcf tuple val(sample_id), path("${sample_id}_tbprofiler_provenance.yml"), emit: provenance script: @@ -61,13 +62,18 @@ process tbprofiler { --read1 ${reads_1} \ --read2 ${reads_2} \ --prefix ${sample_id} \ - --csv + --csv \ + --call_whole_genome mv bam/${sample_id}.bam ./${sample_id}_tbprofiler.bam mv bam/${sample_id}.bam.bai ./${sample_id}_tbprofiler.bam.bai - mv vcf/${sample_id}.targets.csq.vcf.gz ./${sample_id}_tbprofiler.vcf.gz - gunzip ./${sample_id}_tbprofiler.vcf.gz + mv vcf/${sample_id}.targets.csq.vcf.gz ./${sample_id}_tbprofiler_targets.vcf.gz + gunzip ./${sample_id}_tbprofiler_targets.vcf.gz + + mv vcf/${sample_id}.vcf.gz ./${sample_id}_tbprofiler_whole_genome.vcf.gz + gunzip ./${sample_id}_tbprofiler_whole_genome.vcf.gz + cp results/${sample_id}.results.csv ${sample_id}_tbprofiler_full_report.csv cp results/${sample_id}.results.json ${sample_id}_tbprofiler_full_report.json @@ -137,20 +143,27 @@ process qualimap_bamqc { } -process snp_it { +process snpit { tag { sample_id } - publishDir "${params.outdir}", mode: 'copy', pattern: "${sample_id}_snpit.txt" + conda "$baseDir/environments/snpit.yml" + + publishDir params.versioned_outdir ? "${params.outdir}/${sample_id}/${params.pipeline_short_name}-v${params.pipeline_minor_version}-output" : "${params.outdir}/${sample_id}", mode: 'copy', pattern: "${sample_id}_snpit.tsv" input: - file(vcf) + tuple val(sample_id), path(vcf) output: - tuple val(sample_id), path("${sample_id}_snpit.txt") + tuple val(sample_id), path("${sample_id}_snpit.tsv") + tuple val(sample_id), path("${sample_id}_snpit_provenance.yml"), emit: provenance script: """ - snpit-run.py --input ${vcf} > ${sample_id}_snpit.txt + snpit --input ${vcf} > ${sample_id}_snpit.tsv + + printf -- "- process_name: snpit\\n" > ${sample_id}_snpit_provenance.yml + printf -- " tool_name: snpit\\n" >> ${sample_id}_snpit_provenance.yml + printf -- " tool_version: \$(snpit --version 2>&1)\\n" >> ${sample_id}_snpit_provenance.yml """ } diff --git a/nextflow.config b/nextflow.config index d3210fd..b60f556 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,7 +4,7 @@ manifest { description = 'BCCDC-PHL TBProfiler Nextflow Wrapper' mainScript = 'main.nf' nextflowVersion = '>=20.01.0' - version = '0.1.0' + version = '0.2.0' } params {