From 4fcdcfb2896bbe2c0e3e51474fd025744cf4767c Mon Sep 17 00:00:00 2001
From: priesgof <priesgoferreiro@gmail.com>
Date: Tue, 1 Jun 2021 19:47:30 +0200
Subject: [PATCH] add support for a single file

---
 Makefile        | 67 ++++++++++++++++++++++++++-----------------------
 README.md       |  6 +++--
 main.nf         | 18 ++++++++++---
 nextflow.config |  8 +++---
 4 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/Makefile b/Makefile
index 61615f1..2edf503 100644
--- a/Makefile
+++ b/Makefile
@@ -11,41 +11,44 @@ test:
 	nextflow main.nf -profile test,conda --skip_bqsr --output output/test2
 	nextflow main.nf -profile test,conda --skip_realignment --output output/test3
 	nextflow main.nf -profile test,conda --skip_deduplication --output output/test4
-	nextflow main.nf -profile test,conda --output output/test5 --skip_metrics --known_indels1 false --known_indels2 false
-	nextflow main.nf -profile test,conda --output output/test6 --intervals false
-	nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt
-	nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false
+	nextflow main.nf -profile test,conda --output output/test5 --skip_deduplication --skip_bqsr --skip_metrics --known_indels1 false --known_indels2 false
+	nextflow main.nf -profile test,conda --output output/test6 --intervals false --skip_deduplication --skip_bqsr --skip_realignment
+	nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --skip_bqsr --skip_realignment
+	nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment
+	nextflow main.nf -profile test,conda --output output/test9 --skip_deduplication --skip_bqsr --skip_realignment  --input_files false --input_bam test_data/TESTX_S1_L001.bam
 
 check:
 	test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
 	test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
 	test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
 	test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
-	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
\ No newline at end of file
+	test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 2 output file!"; exit 1; }
+	test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 2 output file!"; exit 1; }
+	test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 2 output file!"; exit 1; }
+	test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 2 output file!"; exit 1; }
+	test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 3 output file!"; exit 1; }
+	test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 3 output file!"; exit 1; }
+	test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 3 output file!"; exit 1; }
+	test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 3 output file!"; exit 1; }
+	test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 4 output file!"; exit 1; }
+	test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 4 output file!"; exit 1; }
+	test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 4 output file!"; exit 1; }
+	test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 4 output file!"; exit 1; }
+	test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 5 output file!"; exit 1; }
+	test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 5 output file!"; exit 1; }
+	test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 5 output file!"; exit 1; }
+	test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 5 output file!"; exit 1; }
+	test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 6 output file!"; exit 1; }
+	test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 6 output file!"; exit 1; }
+	test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 6 output file!"; exit 1; }
+	test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 6 output file!"; exit 1; }
+	test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 7 output file!"; exit 1; }
+	test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; }
+	test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 7 output file!"; exit 1; }
+	test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; }
+	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 9 output file!"; exit 1; }
+	test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 9 output file!"; exit 1; }
\ No newline at end of file
diff --git a/README.md b/README.md
index 6611599..65292cb 100644
--- a/README.md
+++ b/README.md
@@ -54,7 +54,8 @@ Usage:
     main.nf --input_files input_files
 
 Input:
-    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file
+    * --input_bam: the path to a single BAM (this option is not compatible with --input_files)
+    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file (this option is not compatible with --input_bam)
     Sample type will be added to the BAM header @SN sample name
     The input file does not have header!
     Example input file:
@@ -64,6 +65,7 @@ Input:
     * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
 
 Optional input:
+    * --input_name: the name of the sample. Only used when --input_bam is provided (default: normal)
     * --dbsnp: path to the dbSNP VCF (required to perform BQSR)
     * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
     * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
@@ -86,7 +88,7 @@ Computational resources:
     * --mark_duplicates_cpus: (default: 16)
     * --mark_duplicates_memory: (default: 64g)
     * --realignment_around_indels_cpus: (default: 2)
-    * --realignment_around_indels_memory: (default: 32g)
+    * --realignment_around_indels_memory: (default: 31g)
     * --bqsr_cpus: (default: 3)
     * --bqsr_memory: (default: 4g)
     * --metrics_cpus: (default: 1)
diff --git a/main.nf b/main.nf
index 71fcc0c..154ea01 100755
--- a/main.nf
+++ b/main.nf
@@ -3,6 +3,8 @@
 publish_dir = 'output'
 params.help= false
 params.input_files = false
+params.input_name = "normal"
+params.input_bam = false
 params.reference = false
 params.dbsnp = false
 params.known_indels1 = false
@@ -59,15 +61,23 @@ if (params.output) {
   publish_dir = params.output
 }
 
-// checks required inputs
-if (params.input_files) {
+if (! params.input_files && ! params.input_bam) {
+  exit 1, "Neither --input_files or --input_bam are provided!"
+}
+else if (params.input_files && params.input_bam) {
+  exit 1, "Both --input_files and --input_bam are provided! Please, provide only one."
+}
+else if (params.input_files) {
   Channel
     .fromPath(params.input_files)
     .splitCsv(header: ['name', 'type', 'bam'], sep: "\t")
     .map{ row-> tuple(row.name, row.type, file(row.bam)) }
     .set { input_files }
-} else {
-  exit 1, "Input file not specified!"
+} else if (params.input_bam && params.input_name) {
+  input_bam = file(params.input_bam)
+  Channel
+    .fromList([tuple(input_bam.name.take(input_bam.name.lastIndexOf('.')), params.input_name, input_bam)])
+    .set { input_files }
 }
 
 /*
diff --git a/nextflow.config b/nextflow.config
index bc3c25f..dbc239d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
 
 cleanup = true
 
-VERSION = '1.4.1'
+VERSION = '1.5.0'
 DOI = 'https://zenodo.org/badge/latestdoi/358400957'
 
 manifest {
@@ -67,7 +67,8 @@ Usage:
     main.nf --input_files input_files
 
 Input:
-    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file
+    * --input_bam: the path to a single BAM (this option is not compatible with --input_files)
+    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file (this option is not compatible with --input_bam)
     Sample type will be added to the BAM header @SN sample name
     The input file does not have header!
     Example input file:
@@ -77,6 +78,7 @@ Input:
     * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
 
 Optional input:
+    * --input_name: the name of the sample. Only used when --input_bam is provided (default: normal)
     * --dbsnp: path to the dbSNP VCF (required to perform BQSR)
     * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
     * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
@@ -99,7 +101,7 @@ Computational resources:
     * --mark_duplicates_cpus: (default: 16)
     * --mark_duplicates_memory: (default: 64g)
     * --realignment_around_indels_cpus: (default: 2)
-    * --realignment_around_indels_memory: (default: 32g)
+    * --realignment_around_indels_memory: (default: 31g)
     * --bqsr_cpus: (default: 3)
     * --bqsr_memory: (default: 4g)
     * --metrics_cpus: (default: 1)