Skip to content

Commit

Permalink
Merge branch 'develop' into 'master'
Browse files Browse the repository at this point in the history
Release v1.5.0

See merge request tron/tron-bam-preprocessing!19
  • Loading branch information
Pablo Riesgo Ferreiro committed Jun 1, 2021
2 parents 2a6e306 + 4fcdcfb commit 8aef905
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 41 deletions.
67 changes: 35 additions & 32 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,41 +11,44 @@ test:
nextflow main.nf -profile test,conda --skip_bqsr --output output/test2
nextflow main.nf -profile test,conda --skip_realignment --output output/test3
nextflow main.nf -profile test,conda --skip_deduplication --output output/test4
nextflow main.nf -profile test,conda --output output/test5 --skip_metrics --known_indels1 false --known_indels2 false
nextflow main.nf -profile test,conda --output output/test6 --intervals false
nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt
nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false
nextflow main.nf -profile test,conda --output output/test5 --skip_deduplication --skip_bqsr --skip_metrics --known_indels1 false --known_indels2 false
nextflow main.nf -profile test,conda --output output/test6 --intervals false --skip_deduplication --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda --output output/test9 --skip_deduplication --skip_bqsr --skip_realignment --input_files false --input_bam test_data/TESTX_S1_L001.bam

check:
test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 2 output file!"; exit 1; }
test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 2 output file!"; exit 1; }
test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 2 output file!"; exit 1; }
test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 2 output file!"; exit 1; }
test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 3 output file!"; exit 1; }
test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 3 output file!"; exit 1; }
test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 3 output file!"; exit 1; }
test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 3 output file!"; exit 1; }
test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 4 output file!"; exit 1; }
test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 4 output file!"; exit 1; }
test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 4 output file!"; exit 1; }
test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 4 output file!"; exit 1; }
test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 5 output file!"; exit 1; }
test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 5 output file!"; exit 1; }
test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 5 output file!"; exit 1; }
test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 5 output file!"; exit 1; }
test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 6 output file!"; exit 1; }
test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 6 output file!"; exit 1; }
test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 6 output file!"; exit 1; }
test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 6 output file!"; exit 1; }
test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 7 output file!"; exit 1; }
test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; }
test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 7 output file!"; exit 1; }
test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; }
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 9 output file!"; exit 1; }
test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 9 output file!"; exit 1; }
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ Usage:
main.nf --input_files input_files
Input:
* --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file
* --input_bam: the path to a single BAM (this option is not compatible with --input_files)
* --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file (this option is not compatible with --input_bam)
Sample type will be added to the BAM header @SN sample name
The input file does not have header!
Example input file:
Expand All @@ -64,6 +65,7 @@ Input:
* --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
Optional input:
* --input_name: the name of the sample. Only used when --input_bam is provided (default: normal)
* --dbsnp: path to the dbSNP VCF (required to perform BQSR)
* --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
* --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
Expand All @@ -86,7 +88,7 @@ Computational resources:
* --mark_duplicates_cpus: (default: 16)
* --mark_duplicates_memory: (default: 64g)
* --realignment_around_indels_cpus: (default: 2)
* --realignment_around_indels_memory: (default: 32g)
* --realignment_around_indels_memory: (default: 31g)
* --bqsr_cpus: (default: 3)
* --bqsr_memory: (default: 4g)
* --metrics_cpus: (default: 1)
Expand Down
18 changes: 14 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
publish_dir = 'output'
params.help= false
params.input_files = false
params.input_name = "normal"
params.input_bam = false
params.reference = false
params.dbsnp = false
params.known_indels1 = false
Expand Down Expand Up @@ -59,15 +61,23 @@ if (params.output) {
publish_dir = params.output
}

// checks required inputs
if (params.input_files) {
if (! params.input_files && ! params.input_bam) {
exit 1, "Neither --input_files or --input_bam are provided!"
}
else if (params.input_files && params.input_bam) {
exit 1, "Both --input_files and --input_bam are provided! Please, provide only one."
}
else if (params.input_files) {
Channel
.fromPath(params.input_files)
.splitCsv(header: ['name', 'type', 'bam'], sep: "\t")
.map{ row-> tuple(row.name, row.type, file(row.bam)) }
.set { input_files }
} else {
exit 1, "Input file not specified!"
} else if (params.input_bam && params.input_name) {
input_bam = file(params.input_bam)
Channel
.fromList([tuple(input_bam.name.take(input_bam.name.lastIndexOf('.')), params.input_name, input_bam)])
.set { input_files }
}

/*
Expand Down
8 changes: 5 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']

cleanup = true

VERSION = '1.4.1'
VERSION = '1.5.0'
DOI = 'https://zenodo.org/badge/latestdoi/358400957'

manifest {
Expand All @@ -67,7 +67,8 @@ Usage:
main.nf --input_files input_files
Input:
* --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file
* --input_bam: the path to a single BAM (this option is not compatible with --input_files)
* --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file (this option is not compatible with --input_bam)
Sample type will be added to the BAM header @SN sample name
The input file does not have header!
Example input file:
Expand All @@ -77,6 +78,7 @@ Input:
* --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
Optional input:
* --input_name: the name of the sample. Only used when --input_bam is provided (default: normal)
* --dbsnp: path to the dbSNP VCF (required to perform BQSR)
* --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
* --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
Expand All @@ -99,7 +101,7 @@ Computational resources:
* --mark_duplicates_cpus: (default: 16)
* --mark_duplicates_memory: (default: 64g)
* --realignment_around_indels_cpus: (default: 2)
* --realignment_around_indels_memory: (default: 32g)
* --realignment_around_indels_memory: (default: 31g)
* --bqsr_cpus: (default: 3)
* --bqsr_memory: (default: 4g)
* --metrics_cpus: (default: 1)
Expand Down

0 comments on commit 8aef905

Please sign in to comment.