Skip to content

Commit

Permalink
Merge branch 'develop' into 'master'
Browse files Browse the repository at this point in the history
Release v1.4.1

See merge request tron/tron-bam-preprocessing!18
  • Loading branch information
Pablo Riesgo Ferreiro committed Jun 1, 2021
2 parents 8ce785f + 9fe9862 commit 2a6e306
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ stages:
test:
stage: test
script:
- make clean test
- make
43 changes: 38 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,18 +1,51 @@
all : clean test check

clean:
rm -rf output
rm -f report.html*
rm -f timeline.html*
rm -f trace.txt*
rm -f dag.dot*
rm -f .nextflow.log*
rm -rf .nextflow*

test:
nextflow main.nf --help
nextflow main.nf -profile test,conda --output output/test1
nextflow main.nf -profile test,conda --skip_bqsr --output output/test2
nextflow main.nf -profile test,conda --skip_realignment --output output/test3
nextflow main.nf -profile test,conda --skip_deduplication --output output/test4
nextflow main.nf -profile test,conda --output output/test5 --skip_metrics
nextflow main.nf -profile test,conda --output output/test5 --skip_metrics --known_indels1 false --known_indels2 false
nextflow main.nf -profile test,conda --output output/test6 --intervals false
nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt
nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false

check:
test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# TRONflow BAM preprocessing pipeline
# TronFlow BAM preprocessing pipeline

[![DOI](https://zenodo.org/badge/358400957.svg)](https://zenodo.org/badge/latestdoi/358400957)

Nextflow pipeline for the preprocessing of BAM files based on Picard and GATK.
Nextflow (Di Tommaso, 2017) pipeline for the preprocessing of BAM files based on Picard and GATK (DePristo, 2011).


## Background
Expand Down Expand Up @@ -30,7 +30,7 @@ Steps:
* **Base Quality Score Recalibration (BQSR)** (optional). It aims at correcting systematic errors in the sequencer when assigning the base call quality errors, as these scores are used by variant callers it improves variant calling in some situations. Implemented in GATK4
* **Metrics** (optional). A number of metrics are obtained over the BAM file with Picard's CollectMetrics (eg: duplication, insert size, alignment, etc.).

![Pipeline](bam_preprocessing2.png)
![Pipeline](figures/bam_preprocessing2.png)

## References

Expand All @@ -45,7 +45,8 @@ This can be built from a BED file using Picard's BedToIntervalList (https://gatk
## How to run it

```
$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.3.1 --help
$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing --help
N E X T F L O W ~ version 19.07.0
Launching `main.nf` [intergalactic_shannon] - revision: e707c77d7b
Expand Down Expand Up @@ -100,3 +101,9 @@ Optional output:
* Realignment intervals
* Metrics
```


## References

* DePristo M, Banks E, Poplin R, Garimella K, Maguire J, Hartl C, Philippakis A, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell T, Kernytsky A, Sivachenko A, Cibulskis K, Gabriel S, Altshuler D, Daly M. (2011). A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet, 43:491-498. DOI: 10.1038/ng.806.
* Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316–319. 10.1038/nbt.3820
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: tronflow-bam-preprocessing-1.4.0
name: tronflow-bam-preprocessing
channels:
- conda-forge
- bioconda
Expand Down
File renamed without changes
16 changes: 7 additions & 9 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ params.prepare_bam_memory = "8g"
params.mark_duplicates_cpus = 16
params.mark_duplicates_memory = "64g"
params.realignment_around_indels_cpus = 2
params.realignment_around_indels_memory = "32g"
params.realignment_around_indels_memory = "31g"
params.bqsr_cpus = 3
params.bqsr_memory = "4g"
params.metrics_cpus = 1
Expand Down Expand Up @@ -270,18 +270,17 @@ if (!params.skip_realignment) {
file("${bam.baseName}.RA.intervals") into realignment_intervals

script:
known_indels = "" + params.known_indels1 ? " --known ${params.known_indels1}" : "" +
params.known_indels2 ? " --known ${params.known_indels2}" : ""
known_alleles = "" + params.known_indels1 ? " --knownAlleles ${params.known_indels1}" : "" +
params.known_indels2 ? " --knownAlleles ${params.known_indels2}" : ""
known_indels1 = params.known_indels1 ? " --known ${params.known_indels1}" : ""
known_indels2 = params.known_indels2 ? " --known ${params.known_indels2}" : ""
known_alleles1 = params.known_indels1 ? " --knownAlleles ${params.known_indels1}" : ""
known_alleles2 = params.known_indels2 ? " --knownAlleles ${params.known_indels2}" : ""
"""
mkdir tmp
gatk3 -Xmx${params.realignment_around_indels_memory} -Djava.io.tmpdir=tmp -T RealignerTargetCreator \
--input_file ${bam} \
--out ${bam.baseName}.RA.intervals \
--reference_sequence ${params.reference} \
${known_indels}
--reference_sequence ${params.reference} ${known_indels1} ${known_indels2}
gatk3 -Xmx${params.realignment_around_indels_memory} -Djava.io.tmpdir=tmp -T IndelRealigner \
--input_file ${bam} \
Expand All @@ -290,8 +289,7 @@ if (!params.skip_realignment) {
--targetIntervals ${bam.baseName}.RA.intervals \
--consensusDeterminationModel USE_SW \
--LODThresholdForCleaning 0.4 \
--maxReadsInMemory 600000 \
${known_alleles}
--maxReadsInMemory 600000 ${known_alleles1} ${known_alleles2}
"""
}
}
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']

cleanup = true

VERSION = '1.4.0'
VERSION = '1.4.1'
DOI = 'https://zenodo.org/badge/latestdoi/358400957'

manifest {
Expand Down
4 changes: 2 additions & 2 deletions test_data/test_input.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
TESTX_S1_L001 tumor test_data/TESTX_S1_L001.bam
TESTX_S1_L002 normal test_data/TESTX_S1_L002.bam
sample1 tumor test_data/TESTX_S1_L001.bam
sample2 normal test_data/TESTX_S1_L002.bam

0 comments on commit 2a6e306

Please sign in to comment.