Skip to content

Commit

Permalink
Add snpit (#9)
Browse files Browse the repository at this point in the history
* Add snpit

* Capture whole genome variants, pass to snpit

* typo

* Add provenance for snpit

* whitespace

* Update README
  • Loading branch information
dfornika authored Mar 8, 2023
1 parent 96ab207 commit 0751cd4
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 21 deletions.
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ analysis of whole-genome sequence data for Mycobacteria tuberculosis complex sam
many samples at once, on an HPC cluster system. It also integrates additional QC analysis of the input data using [fastp](https://github.com/OpenGene/fastp),
and of the generated alignments using [Qualimap](https://github.com/scchess/Qualimap).

```mermaid
flowchart TD
reads --> fastp
fastp -- trimmed_reads --> tbprofiler
tbprofiler -- vcf --> snpit
tbprofiler -- bam --> qualimap_bamqc
```

## Usage

```
Expand All @@ -31,15 +39,18 @@ The following files will be produced for each sample:
```
.
└── sample-01
├── sample-01_TIMESTAMP_provenance.yml
├── sample-01_fastp.csv
├── sample-01_fastp.json
├── sample-01_qualimap_alignment_qc.csv
├── sample-01_snpit.tsv
├── sample-01_tbprofiler.bam
├── sample-01_tbprofiler.bam.bai
├── sample-01_tbprofiler_full_report.csv
├── sample-01_tbprofiler_full_report.json
├── sample-01_tbprofiler_lineage.csv
├── sample-01_tbprofiler_resistance.csv
├── sample-01_tbprofiler_summary.csv
└── sample-01_tbprofiler.vcf
```
├── sample-01_tbprofiler_targets.vcf
└── sample-01_tbprofiler_whole_genome.vcf
```
12 changes: 12 additions & 0 deletions environments/snpit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: snpit
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- python
- cython
- pip
- pysam=0.15.2
- pip:
- git+https://github.com/philipwfowler/snpit.git
29 changes: 20 additions & 9 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@ import java.time.LocalDateTime

nextflow.enable.dsl = 2

include { fastp } from './modules/tbprofiler.nf'
include { tbprofiler } from './modules/tbprofiler.nf'
include { fastp } from './modules/tbprofiler.nf'
include { tbprofiler } from './modules/tbprofiler.nf'
include { rename_ref_in_alignment } from './modules/tbprofiler.nf'
include { rename_ref_in_variants } from './modules/tbprofiler.nf'
include { qualimap_bamqc } from './modules/tbprofiler.nf'
include { pipeline_provenance } from './modules/provenance.nf'
include { collect_provenance } from './modules/provenance.nf'
include { rename_ref_in_variants as rename_ref_in_targets_variants } from './modules/tbprofiler.nf'
include { rename_ref_in_variants as rename_ref_in_whole_genome_variants } from './modules/tbprofiler.nf'
include { qualimap_bamqc } from './modules/tbprofiler.nf'
include { pipeline_provenance } from './modules/provenance.nf'
include { collect_provenance } from './modules/provenance.nf'

// include { snp_it } from './modules/tbprofiler.nf'
include { snpit } from './modules/tbprofiler.nf'

workflow {

Expand All @@ -30,17 +31,27 @@ workflow {

main:
fastp(ch_fastq)

tbprofiler(fastp.out.reads)

if (params.rename_ref) {
rename_ref_in_alignment(tbprofiler.out.alignment)
rename_ref_in_variants(tbprofiler.out.variants)
rename_ref_in_targets_variants(tbprofiler.out.targets_vcf)
rename_ref_in_whole_genome_variants(tbprofiler.out.whole_genome_vcf)
qualimap_bamqc(rename_ref_in_alignment.out)
} else {
qualimap_bamqc(tbprofiler.out.alignment)
}
// snp_it(ch_vcf)

if (params.rename_ref) {
snpit(rename_ref_in_whole_genome_variants.out)
} else {
snpit(tbprofiler.out.whole_genome_vcf)
}

ch_provenance = fastp.out.provenance
ch_provenance = ch_provenance.join(tbprofiler.out.provenance).map{ it -> [it[0], [it[1], it[2]]] }
ch_provenance = ch_provenance.join(snpit.out.provenance).map{ it -> [it[0], it[1] << it[2]] }

ch_provenance = ch_provenance.join(ch_fastq.map{ it -> it[0] }.combine(ch_pipeline_provenance)).map{ it -> [it[0], it[1] ] }

Expand Down
31 changes: 22 additions & 9 deletions modules/tbprofiler.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ process tbprofiler {
output:
tuple val(sample_id), path("${sample_id}_tbprofiler*.{json,csv}"), emit: reports
tuple val(sample_id), path("${sample_id}_tbprofiler*.{bam,bam.bai}"), emit: alignment
tuple val(sample_id), path("${sample_id}_tbprofiler*.vcf"), emit: variants
tuple val(sample_id), path("${sample_id}_tbprofiler_targets.vcf"), emit: targets_vcf
tuple val(sample_id), path("${sample_id}_tbprofiler_whole_genome.vcf"), emit: whole_genome_vcf
tuple val(sample_id), path("${sample_id}_tbprofiler_provenance.yml"), emit: provenance

script:
Expand All @@ -61,13 +62,18 @@ process tbprofiler {
--read1 ${reads_1} \
--read2 ${reads_2} \
--prefix ${sample_id} \
--csv
--csv \
--call_whole_genome
mv bam/${sample_id}.bam ./${sample_id}_tbprofiler.bam
mv bam/${sample_id}.bam.bai ./${sample_id}_tbprofiler.bam.bai
mv vcf/${sample_id}.targets.csq.vcf.gz ./${sample_id}_tbprofiler.vcf.gz
gunzip ./${sample_id}_tbprofiler.vcf.gz
mv vcf/${sample_id}.targets.csq.vcf.gz ./${sample_id}_tbprofiler_targets.vcf.gz
gunzip ./${sample_id}_tbprofiler_targets.vcf.gz
mv vcf/${sample_id}.vcf.gz ./${sample_id}_tbprofiler_whole_genome.vcf.gz
gunzip ./${sample_id}_tbprofiler_whole_genome.vcf.gz
cp results/${sample_id}.results.csv ${sample_id}_tbprofiler_full_report.csv
cp results/${sample_id}.results.json ${sample_id}_tbprofiler_full_report.json
Expand Down Expand Up @@ -137,20 +143,27 @@ process qualimap_bamqc {
}


process snp_it {
process snpit {

tag { sample_id }

publishDir "${params.outdir}", mode: 'copy', pattern: "${sample_id}_snpit.txt"
conda "$baseDir/environments/snpit.yml"

publishDir params.versioned_outdir ? "${params.outdir}/${sample_id}/${params.pipeline_short_name}-v${params.pipeline_minor_version}-output" : "${params.outdir}/${sample_id}", mode: 'copy', pattern: "${sample_id}_snpit.tsv"

input:
file(vcf)
tuple val(sample_id), path(vcf)

output:
tuple val(sample_id), path("${sample_id}_snpit.txt")
tuple val(sample_id), path("${sample_id}_snpit.tsv")
tuple val(sample_id), path("${sample_id}_snpit_provenance.yml"), emit: provenance

script:
"""
snpit-run.py --input ${vcf} > ${sample_id}_snpit.txt
snpit --input ${vcf} > ${sample_id}_snpit.tsv
printf -- "- process_name: snpit\\n" > ${sample_id}_snpit_provenance.yml
printf -- " tool_name: snpit\\n" >> ${sample_id}_snpit_provenance.yml
printf -- " tool_version: \$(snpit --version 2>&1)\\n" >> ${sample_id}_snpit_provenance.yml
"""
}
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ manifest {
description = 'BCCDC-PHL TBProfiler Nextflow Wrapper'
mainScript = 'main.nf'
nextflowVersion = '>=20.01.0'
version = '0.1.0'
version = '0.2.0'
}

params {
Expand Down

0 comments on commit 0751cd4

Please sign in to comment.