Merge branch 'develop' into 'master'

Release v1.5.0 See merge request tron/tron-bam-preprocessing!19
TRON-Bioinformatics · Jun 1, 2021 · 8aef905 · 8aef905
2 parents 2a6e306 + 4fcdcfb
commit 8aef905
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 41 deletions.
diff --git a/Makefile b/Makefile
@@ -11,41 +11,44 @@ test:
 	nextflow main.nf -profile test,conda --skip_bqsr --output output/test2
 	nextflow main.nf -profile test,conda --skip_realignment --output output/test3
 	nextflow main.nf -profile test,conda --skip_deduplication --output output/test4
-	nextflow main.nf -profile test,conda --output output/test5 --skip_metrics --known_indels1 false --known_indels2 false
-	nextflow main.nf -profile test,conda --output output/test6 --intervals false
-	nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt
-	nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false
+	nextflow main.nf -profile test,conda --output output/test5 --skip_deduplication --skip_bqsr --skip_metrics --known_indels1 false --known_indels2 false
+	nextflow main.nf -profile test,conda --output output/test6 --intervals false --skip_deduplication --skip_bqsr --skip_realignment
+	nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --skip_bqsr --skip_realignment
+	nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment
+	nextflow main.nf -profile test,conda --output output/test9 --skip_deduplication --skip_bqsr --skip_realignment  --input_files false --input_bam test_data/TESTX_S1_L001.bam
 
 check:
 	test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
 	test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
 	test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
 	test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
+	test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 2 output file!"; exit 1; }
+	test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 2 output file!"; exit 1; }
+	test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 2 output file!"; exit 1; }
+	test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 2 output file!"; exit 1; }
+	test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 3 output file!"; exit 1; }
+	test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 3 output file!"; exit 1; }
+	test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 3 output file!"; exit 1; }
+	test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 3 output file!"; exit 1; }
+	test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 4 output file!"; exit 1; }
+	test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 4 output file!"; exit 1; }
+	test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 4 output file!"; exit 1; }
+	test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 4 output file!"; exit 1; }
+	test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 5 output file!"; exit 1; }
+	test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 5 output file!"; exit 1; }
+	test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 5 output file!"; exit 1; }
+	test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 5 output file!"; exit 1; }
+	test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 6 output file!"; exit 1; }
+	test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 6 output file!"; exit 1; }
+	test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 6 output file!"; exit 1; }
+	test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 6 output file!"; exit 1; }
+	test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 7 output file!"; exit 1; }
+	test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; }
+	test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 7 output file!"; exit 1; }
+	test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; }
+	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 9 output file!"; exit 1; }
+	test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 9 output file!"; exit 1; }
diff --git a/README.md b/README.md
@@ -54,7 +54,8 @@ Usage:
     main.nf --input_files input_files
 
 Input:
-    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file
+    * --input_bam: the path to a single BAM (this option is not compatible with --input_files)
+    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file (this option is not compatible with --input_bam)
     Sample type will be added to the BAM header @SN sample name
     The input file does not have header!
     Example input file:
@@ -64,6 +65,7 @@ Input:
     * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
 
 Optional input:
+    * --input_name: the name of the sample. Only used when --input_bam is provided (default: normal)
     * --dbsnp: path to the dbSNP VCF (required to perform BQSR)
     * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
     * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
@@ -86,7 +88,7 @@ Computational resources:
     * --mark_duplicates_cpus: (default: 16)
     * --mark_duplicates_memory: (default: 64g)
     * --realignment_around_indels_cpus: (default: 2)
-    * --realignment_around_indels_memory: (default: 32g)
+    * --realignment_around_indels_memory: (default: 31g)
     * --bqsr_cpus: (default: 3)
     * --bqsr_memory: (default: 4g)
     * --metrics_cpus: (default: 1)

diff --git a/main.nf b/main.nf
@@ -3,6 +3,8 @@
 publish_dir = 'output'
 params.help= false
 params.input_files = false
+params.input_name = "normal"
+params.input_bam = false
 params.reference = false
 params.dbsnp = false
 params.known_indels1 = false
@@ -59,15 +61,23 @@ if (params.output) {
   publish_dir = params.output
 }
 
-// checks required inputs
-if (params.input_files) {
+if (! params.input_files && ! params.input_bam) {
+  exit 1, "Neither --input_files or --input_bam are provided!"
+}
+else if (params.input_files && params.input_bam) {
+  exit 1, "Both --input_files and --input_bam are provided! Please, provide only one."
+}
+else if (params.input_files) {
   Channel
     .fromPath(params.input_files)
     .splitCsv(header: ['name', 'type', 'bam'], sep: "\t")
     .map{ row-> tuple(row.name, row.type, file(row.bam)) }
     .set { input_files }
-} else {
-  exit 1, "Input file not specified!"
+} else if (params.input_bam && params.input_name) {
+  input_bam = file(params.input_bam)
+  Channel
+    .fromList([tuple(input_bam.name.take(input_bam.name.lastIndexOf('.')), params.input_name, input_bam)])
+    .set { input_files }
 }
 
 /*

diff --git a/nextflow.config b/nextflow.config
@@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
 
 cleanup = true
 
-VERSION = '1.4.1'
+VERSION = '1.5.0'
 DOI = 'https://zenodo.org/badge/latestdoi/358400957'
 
 manifest {
@@ -67,7 +67,8 @@ Usage:
     main.nf --input_files input_files
 
 Input:
-    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file
+    * --input_bam: the path to a single BAM (this option is not compatible with --input_files)
+    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file (this option is not compatible with --input_bam)
     Sample type will be added to the BAM header @SN sample name
     The input file does not have header!
     Example input file:
@@ -77,6 +78,7 @@ Input:
     * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
 
 Optional input:
+    * --input_name: the name of the sample. Only used when --input_bam is provided (default: normal)
     * --dbsnp: path to the dbSNP VCF (required to perform BQSR)
     * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
     * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
@@ -99,7 +101,7 @@ Computational resources:
     * --mark_duplicates_cpus: (default: 16)
     * --mark_duplicates_memory: (default: 64g)
     * --realignment_around_indels_cpus: (default: 2)
-    * --realignment_around_indels_memory: (default: 32g)
+    * --realignment_around_indels_memory: (default: 31g)
     * --bqsr_cpus: (default: 3)
     * --bqsr_memory: (default: 4g)
     * --metrics_cpus: (default: 1)