Skip to content

Commit

Permalink
Merge branch 'develop' into 'master'
Browse files Browse the repository at this point in the history
Release v1.4.0

See merge request tron/tron-bam-preprocessing!16
  • Loading branch information
Pablo Riesgo Ferreiro committed May 26, 2021
2 parents 6c2b617 + 12230be commit 8ce785f
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 28 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ test:
nextflow main.nf -profile test,conda --output output/test5 --skip_metrics
nextflow main.nf -profile test,conda --output output/test6 --intervals false
nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt
nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10
nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ GATK has been providing a well known best practices document on BAM preprocessin

We aim at providing a single implementation of the BAM preprocessing pipeline that can be used across different situations. For this purpose there are some required steps and some optional steps. This is implemented as a Nextflow pipeline to simplify parallelization of execution in the cluster. The default configuration uses reference genome hg19, if another reference is needed the adequate resources must be provided. The reference genome resources for hg19 were downloaded from https://software.broadinstitute.org/gatk/download/bundle

The input is a tab-separated values file where each line corresponds to one input BAM. The output is another tab-separated values file with the absolute paths of the preprocessed and indexed BAMs.
The input is a tab-separated values file where each line corresponds to one input BAM. The output is another tab-separated values file with the absolute paths of the preprocessed and indexed BAMs.

## Implementation

Expand Down Expand Up @@ -74,6 +74,7 @@ Optional input:
* --skip_bqsr: optionally skip BQSR (default: false)
* --skip_realignment: optionally skip realignment (default: false)
* --skip_deduplication: optionally skip deduplication (default: false)
* --remove_duplicates: removes duplicate reads from output BAM instead of flagging them (default: true)
* --skip_metrics: optionally skip metrics (default: false)
* --output: the folder where to publish output (default: ./output)
* --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA)
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: tronflow-bam-preprocessing-1.3.1
name: tronflow-bam-preprocessing-1.4.0
channels:
- conda-forge
- bioconda
Expand Down
38 changes: 32 additions & 6 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ params.hs_metrics_per_base_coverage = false
params.skip_bqsr = false
params.skip_realignment = false
params.skip_deduplication = false
params.remove_duplicates = true
params.skip_metrics = false
params.output = false
params.platform = "ILLUMINA"
Expand All @@ -30,6 +31,8 @@ params.bqsr_cpus = 3
params.bqsr_memory = "4g"
params.metrics_cpus = 1
params.metrics_memory = "8g"
params.index_cpus = 1
params.index_memory = "8g"



Expand Down Expand Up @@ -83,9 +86,10 @@ process prepareBam {
output:
set val(name),
val("${bam.baseName}"),
val(type), file("${bam.baseName}.prepared.bam"),
file("${bam.baseName}.prepared.bai") into prepared_bams
val(type), file("${bam.baseName}.prepared.bam") into prepared_bams

script:
order = params.skip_deduplication ? "--SORT_ORDER coordinate": "--SORT_ORDER queryname"
"""
mkdir tmp
Expand All @@ -109,8 +113,7 @@ process prepareBam {
--RGSM ${type} \
--RGLB 1 \
--RGPL ${params.platform} \
--SORT_ORDER coordinate \
--CREATE_INDEX true
${order}
"""
}

Expand All @@ -126,7 +129,7 @@ if (!params.skip_deduplication) {
publishDir "${publish_dir}/${name}/metrics", mode: "copy", pattern: "*.dedup_metrics"

input:
set name, bam_name, type, file(bam), file(bai) from prepared_bams
set name, bam_name, type, file(bam) from prepared_bams

output:
set val(name), val(bam_name), val(type),
Expand All @@ -136,6 +139,7 @@ if (!params.skip_deduplication) {

script:
dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics"
remove_duplicates = params.remove_duplicates ? "--remove-all-duplicates true" : "--remove-all-duplicates false"
"""
mkdir tmp
Expand All @@ -144,12 +148,34 @@ if (!params.skip_deduplication) {
--input ${bam} \
--output ${bam.baseName}.dedup.bam \
--conf 'spark.executor.cores=${task.cpus}' \
${remove_duplicates} \
${dedup_metrics}
"""
}
}
else {
prepared_bams.into{ deduplicated_bams; deduplicated_bams_for_metrics; deduplicated_bams_for_hs_metrics}
process indexBam {
cpus "${params.index_cpus}"
memory "${params.index_memory}"
tag "${name}"

input:
set name, bam_name, type, file(bam) from prepared_bams

output:
set val(name), val(bam_name), val(type),
file("${bam}"), file("${bam.baseName}.bai") into deduplicated_bams,
deduplicated_bams_for_metrics, deduplicated_bams_for_hs_metrics

script:
"""
mkdir tmp
gatk BuildBamIndex \
--java-options '-Xmx8g -Djava.io.tmpdir=tmp' \
--INPUT ${bam}
"""
}
}

if (! params.skip_metrics) {
Expand Down
28 changes: 9 additions & 19 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ profiles {
params.bqsr_memory = "3g"
params.metrics_cpus = 1
params.metrics_memory = "3g"
params.index_cpus = 1
params.index_memory = "3g"
params.known_indels1 = "$baseDir/test_data/1000G_phase1.indels.hg19.sites.minimal.vcf"
params.known_indels2 = "$baseDir/test_data/Mills_and_1000G_gold_standard.indels.hg19.sites.sorted.minimal.vcf"
params.intervals = "$baseDir/test_data/minimal_intervals.intervals"
params.dbsnp = "$baseDir/test_data/dbsnp_138.hg19.minimal.vcf"
timeline.enabled = false
report.enabled = false
trace.enabled = false
dag.enabled = false
}
}

Expand All @@ -40,29 +46,12 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']

cleanup = true

timeline {
enabled = true
//file = "${params.output}/execution_timeline.html"
}
report {
enabled = true
//file = "${params.output}/execution_report.html"
}
trace {
enabled = true
//file = "${params.output}/execution_trace.txt"
}
dag {
enabled = true
//file = "${params.output}/pipeline_dag.svg"
}

VERSION = '1.3.1'
VERSION = '1.4.0'
DOI = 'https://zenodo.org/badge/latestdoi/358400957'

manifest {
name = 'TRON-Bioinformatics/tronflow-bam-preprocessing'
author = 'Pablo Riesgo-Ferreiro, Özlem Muslu'
author = 'Pablo Riesgo-Ferreiro, Özlem Muslu, Luisa Bresadola'
homePage = 'https://github.com/TRON-Bioinformatics/tronflow-bam-preprocessing'
description = 'Picard and GATK BAM preprocessing pipeline'
mainScript = 'main.nf'
Expand Down Expand Up @@ -99,6 +88,7 @@ Optional input:
* --skip_bqsr: optionally skip BQSR (default: false)
* --skip_realignment: optionally skip realignment (default: false)
* --skip_deduplication: optionally skip deduplication (default: false)
* --remove_duplicates: removes duplicate reads from output BAM instead of flagging them (default: true)
* --skip_metrics: optionally skip metrics (default: false)
* --output: the folder where to publish output (default: ./output)
* --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA)
Expand Down

0 comments on commit 8ce785f

Please sign in to comment.