From 6ba6f28fdf8e6003fe30a5416693b64bb8e3b6de Mon Sep 17 00:00:00 2001
From: priesgof <priesgoferreiro@gmail.com>
Date: Thu, 6 May 2021 10:42:17 +0200
Subject: [PATCH 1/5] forces the Java version in conde env to OpenJDK8 + fix
 issue with duplication metrics

---
 Makefile        | 14 +++++++-------
 README.md       | 18 +++++++++++-------
 environment.yml |  3 ++-
 main.nf         |  2 +-
 nextflow.config |  2 +-
 5 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/Makefile b/Makefile
index eee69aa..0390522 100644
--- a/Makefile
+++ b/Makefile
@@ -8,10 +8,10 @@ clean:
 	rm -rf .nextflow*
 
 test:
-	#nextflow main.nf -profile test,conda --output output/test1
-	#nextflow main.nf -profile test,conda --skip_bqsr --output output/test2
-	#nextflow main.nf -profile test,conda --skip_realignment --output output/test3
-	#nextflow main.nf -profile test,conda --skip_deduplication --output output/test4
-	#nextflow main.nf -profile test,conda --output output/test5 --skip_metrics
-	#nextflow main.nf -profile test,conda --output output/test6 --intervals false
-	nextflow main.nf -profile test,conda --output output/test6 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt
+	nextflow main.nf -profile test,conda --output output/test1
+	nextflow main.nf -profile test,conda --skip_bqsr --output output/test2
+	nextflow main.nf -profile test,conda --skip_realignment --output output/test3
+	nextflow main.nf -profile test,conda --skip_deduplication --output output/test4
+	nextflow main.nf -profile test,conda --output output/test5 --skip_metrics
+	nextflow main.nf -profile test,conda --output output/test6 --intervals false
+	nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt
diff --git a/README.md b/README.md
index 08501fe..322187d 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # TRONflow BAM preprocessing pipeline
 
+[![DOI](https://zenodo.org/badge/358400957.svg)](https://zenodo.org/badge/latestdoi/358400957)
+
 Nextflow pipeline for the preprocessing of BAM files based on Picard and GATK.
 
 
@@ -32,7 +34,9 @@ Steps:
 
 ## References
 
-The bam preprocessing workflow use some required references (`--reference`, `--dbsnp`, `--known_indels1` and `--known_indels2`).
+The bam preprocessing workflow requires the human reference genome (`--reference`)
+Base Quality Score Recalibration (BQSR) requires dbSNP to avoid extracting error metrics from polymorphic sites (`--dbsnp`)
+Realignment around indels requires a set of known indels (`--known_indels1` and `--known_indels2`).
 These resources can be fetched from the GATK bundle https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle.
 
 Optionally, in order to run Picard's CollectHsMetrics an intervals file will need to be provided (`--intervals`). 
@@ -41,7 +45,7 @@ This can be built from a BED file using Picard's BedToIntervalList (https://gatk
 ## How to run it
 
 ```
-$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.1.0 --help
+$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.2.0 --help
 N E X T F L O W  ~  version 19.07.0
 Launching `main.nf` [intergalactic_shannon] - revision: e707c77d7b
 Usage:
@@ -55,13 +59,13 @@ Input:
     name1       tumor   tumor.1.bam
     name1       normal  normal.1.bam
     name2       tumor   tumor.2.bam
+    * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
  
 Optional input:
-    * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
-    * --dbsnp: path to the dbSNP VCF
-    * --known_indels1: path to a VCF of known indels
-    * --known_indels2: path to a second VCF of known indels
-    **NOTE**: if any of the above parameters is not provided, default hg19 resources under 
+    * --dbsnp: path to the dbSNP VCF (required to perform BQSR)
+    * --known_indels1: path to a VCF of known indels (required to perform realignment around indels)
+    * --known_indels2: path to a second VCF of known indels (required to perform realignment around indels)
+    **NOTE**: if any of the reference parameters is not provided, default hg19 resources under 
     /projects/data/gatk_bundle/hg19/ will be used
     
     * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
diff --git a/environment.yml b/environment.yml
index 1d7a1eb..edf71ba 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,10 +1,11 @@
 # You can use this file to create a conda environment for this pipeline:
 #   conda env create -f environment.yml
-name: tronflow-bam-preprocessing-1.1.0
+name: tronflow-bam-preprocessing-1.2.0
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
+  - openjdk=8.0.282
   - bioconda::gatk4=4.2.0.0
   - bioconda::gatk=3.8
\ No newline at end of file
diff --git a/main.nf b/main.nf
index 1c663ff..9fd9953 100755
--- a/main.nf
+++ b/main.nf
@@ -168,7 +168,7 @@ if (!params.skip_deduplication) {
 	    	file("${bam.baseName}.dedup_metrics") optional true into deduplication_metrics
 
         script:
-        dedup_metrics = params.skip_metrics ? "--metrics-file ${bam.baseName}.dedup_metrics" : ""
+        dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics"
 	    """
 	    mkdir tmp
 
diff --git a/nextflow.config b/nextflow.config
index 50d514a..d21c00a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -62,5 +62,5 @@ manifest {
   description = 'Picard and GATK BAM preprocessing pipeline'
   mainScript = 'main.nf'
   nextflowVersion = '>=19.10.0'
-  version = '1.1.0'
+  version = '1.2.0'
 }

From 6df36bea9e912fca1f7056e75c713b7b8376d4d9 Mon Sep 17 00:00:00 2001
From: priesgof <priesgoferreiro@gmail.com>
Date: Thu, 6 May 2021 10:44:07 +0200
Subject: [PATCH 2/5] rename test dataset

---
 ...ESTX_H7YRLADXX_S1_L001.bam => TESTX_S1_L001.bam} | Bin
 ...ESTX_H7YRLADXX_S1_L002.bam => TESTX_S1_L002.bam} | Bin
 test_data/test_input.txt                            |   4 ++--
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename test_data/{TESTX_H7YRLADXX_S1_L001.bam => TESTX_S1_L001.bam} (100%)
 rename test_data/{TESTX_H7YRLADXX_S1_L002.bam => TESTX_S1_L002.bam} (100%)

diff --git a/test_data/TESTX_H7YRLADXX_S1_L001.bam b/test_data/TESTX_S1_L001.bam
similarity index 100%
rename from test_data/TESTX_H7YRLADXX_S1_L001.bam
rename to test_data/TESTX_S1_L001.bam
diff --git a/test_data/TESTX_H7YRLADXX_S1_L002.bam b/test_data/TESTX_S1_L002.bam
similarity index 100%
rename from test_data/TESTX_H7YRLADXX_S1_L002.bam
rename to test_data/TESTX_S1_L002.bam
diff --git a/test_data/test_input.txt b/test_data/test_input.txt
index 1826ec8..44dce6a 100644
--- a/test_data/test_input.txt
+++ b/test_data/test_input.txt
@@ -1,2 +1,2 @@
-TESTX_H7YRLADXX_S1_L001	tumor	test_data/TESTX_H7YRLADXX_S1_L001.bam
-TESTX_H7YRLADXX_S1_L002	normal	test_data/TESTX_H7YRLADXX_S1_L002.bam
+TESTX_S1_L001	tumor	test_data/TESTX_S1_L001.bam
+TESTX_S1_L002	normal	test_data/TESTX_S1_L002.bam

From 05d7e9326e43cb2fbc755b3dd8c308c43d4662ef Mon Sep 17 00:00:00 2001
From: priesgof <priesgoferreiro@gmail.com>
Date: Thu, 6 May 2021 11:05:06 +0200
Subject: [PATCH 3/5] make realignment around indels not requiring indels
 resource + remove default references

---
 README.md       | 19 ++++++------
 main.nf         | 80 +++++++++++++------------------------------------
 nextflow.config | 59 +++++++++++++++++++++++++++++++++++-
 3 files changed, 89 insertions(+), 69 deletions(-)

diff --git a/README.md b/README.md
index 322187d..cca1345 100644
--- a/README.md
+++ b/README.md
@@ -48,9 +48,10 @@ This can be built from a BED file using Picard's BedToIntervalList (https://gatk
 $ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.2.0 --help
 N E X T F L O W  ~  version 19.07.0
 Launching `main.nf` [intergalactic_shannon] - revision: e707c77d7b
+
 Usage:
     main.nf --input_files input_files
- 
+
 Input:
     * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file
     Sample type will be added to the BAM header @SN sample name
@@ -60,14 +61,14 @@ Input:
     name1       normal  normal.1.bam
     name2       tumor   tumor.2.bam
     * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
- 
+
 Optional input:
     * --dbsnp: path to the dbSNP VCF (required to perform BQSR)
-    * --known_indels1: path to a VCF of known indels (required to perform realignment around indels)
-    * --known_indels2: path to a second VCF of known indels (required to perform realignment around indels)
-    **NOTE**: if any of the reference parameters is not provided, default hg19 resources under 
+    * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
+    * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
+    **NOTE**: if any of the reference parameters is not provided, default hg19 resources under
     /projects/data/gatk_bundle/hg19/ will be used
-    
+
     * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
     * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
     * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
@@ -77,7 +78,7 @@ Optional input:
     * --skip_metrics: optionally skip metrics (default: false)
     * --output: the folder where to publish output (default: ./output)
     * --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA)
-    
+
 Computational resources:
     * --prepare_bam_cpus: (default: 3)
     * --prepare_bam_memory: (default: 8g)
@@ -87,11 +88,11 @@ Computational resources:
     * --realignment_around_indels_memory: (default: 32g)
     * --bqsr_cpus: (default: 3)
     * --bqsr_memory: (default: 4g)
- 
+
  Output:
     * Preprocessed and indexed BAMs
     * Tab-separated values file with the absolute paths to the preprocessed BAMs, preprocessed_bams.txt
- 
+
 Optional output:
     * Recalibration report
     * Realignment intervals
diff --git a/main.nf b/main.nf
index 9fd9953..e061606 100755
--- a/main.nf
+++ b/main.nf
@@ -3,10 +3,10 @@
 publish_dir = 'output'
 params.help= false
 params.input_files = false
-params.reference = "/projects/data/gatk_bundle/hg19/ucsc.hg19.fasta"
-params.dbsnp = "/projects/data/gatk_bundle/hg19/dbsnp_138.hg19.vcf"
-params.known_indels1 = "/projects/data/gatk_bundle/hg19/1000G_phase1.indels.hg19.sites.vcf"
-params.known_indels2 = "/projects/data/gatk_bundle/hg19/Mills_and_1000G_gold_standard.indels.hg19.sites.sorted.vcf"
+params.reference = false
+params.dbsnp = false
+params.known_indels1 = false
+params.known_indels2 = false
 params.intervals = false
 params.hs_metrics_target_coverage = false
 params.hs_metrics_per_base_coverage = false
@@ -29,56 +29,7 @@ params.bqsr_memory = "4g"
 
 
 def helpMessage() {
-    log.info"""
-Usage:
-    main.nf --input_files input_files
-
-Input:
-    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file
-    Sample type will be added to the BAM header @SN sample name
-    The input file does not have header!
-    Example input file:
-    name1       tumor   tumor.1.bam
-    name1       normal  normal.1.bam
-    name2       tumor   tumor.2.bam
-
-Optional input:
-    * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
-    * --dbsnp: path to the dbSNP VCF
-    * --known_indels1: path to a VCF of known indels
-    * --known_indels2: path to a second VCF of known indels
-    **NOTE**: if any of the above parameters is not provided, default hg19 resources under
-    /projects/data/gatk_bundle/hg19/ will be used
-
-    * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
-    * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
-    * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
-    * --skip_bqsr: optionally skip BQSR (default: false)
-    * --skip_realignment: optionally skip realignment (default: false)
-    * --skip_deduplication: optionally skip deduplication (default: false)
-    * --skip_metrics: optionally skip metrics (default: false)
-    * --output: the folder where to publish output (default: ./output)
-    * --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA)
-
-Computational resources:
-    * --prepare_bam_cpus: (default: 3)
-    * --prepare_bam_memory: (default: 8g)
-    * --mark_duplicates_cpus: (default: 16)
-    * --mark_duplicates_memory: (default: 64g)
-    * --realignment_around_indels_cpus: (default: 2)
-    * --realignment_around_indels_memory: (default: 32g)
-    * --bqsr_cpus: (default: 3)
-    * --bqsr_memory: (default: 4g)
-
- Output:
-    * Preprocessed and indexed BAMs
-    * Tab-separated values file with the absolute paths to the preprocessed BAMs, preprocessed_bams.txt
-
-Optional output:
-    * Recalibration report
-    * Realignment intervals
-    * Metrics
-    """
+    log.info params.help_message
 }
 
 if (params.help) {
@@ -86,6 +37,14 @@ if (params.help) {
     exit 0
 }
 
+if (!params.reference) {
+    exit -1, "--reference is required"
+}
+
+if (!params.skip_bqsr && !params.dbsnp) {
+    exit -1, "--dbsnp is required to perform BQSR"
+}
+
 if (params.output) {
   publish_dir = params.output
 }
@@ -272,6 +231,11 @@ if (!params.skip_realignment) {
 	    	set val(name), val(bam_name), val(type), file("${bam.baseName}.realigned.bam"), file("${bam.baseName}.realigned.bai") into realigned_bams
 	    	file("${bam.baseName}.RA.intervals") into realignment_intervals
 
+        script:
+        known_indels = "" + params.known_indels1 ? " --known ${params.known_indels1}" : "" +
+            params.known_indels2 ? " --known ${params.known_indels2}" : ""
+        known_alleles = "" + params.known_indels1 ? " --knownAlleles ${params.known_indels1}" : "" +
+            params.known_indels2 ? " --knownAlleles ${params.known_indels2}" : ""
 	    """
 	    mkdir tmp
 
@@ -279,19 +243,17 @@ if (!params.skip_realignment) {
 	    --input_file ${bam} \
 	    --out ${bam.baseName}.RA.intervals \
 	    --reference_sequence ${params.reference} \
-	    --known ${params.known_indels1} \
-	    --known ${params.known_indels2}
+	    ${known_indels}
 
 	    gatk3 -Xmx${params.realignment_around_indels_memory} -Djava.io.tmpdir=tmp -T IndelRealigner \
 	    --input_file ${bam} \
 	    --out ${bam.baseName}.realigned.bam \
 	    --reference_sequence ${params.reference} \
 	    --targetIntervals ${bam.baseName}.RA.intervals \
-	    --knownAlleles ${params.known_indels1} \
-	    --knownAlleles ${params.known_indels2} \
 	    --consensusDeterminationModel USE_SW \
 	    --LODThresholdForCleaning 0.4 \
-	    --maxReadsInMemory 600000
+	    --maxReadsInMemory 600000 \
+	    ${known_alleles}
 	    """
 	}
 }
diff --git a/nextflow.config b/nextflow.config
index d21c00a..dec870d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -55,6 +55,9 @@ dag {
   //file = "${params.output}/pipeline_dag.svg"
 }
 
+VERSION = '1.2.0'
+DOI = 'https://zenodo.org/badge/latestdoi/358400957'
+
 manifest {
   name = 'TRON-Bioinformatics/tronflow-bam-preprocessing'
   author = 'Pablo Riesgo Ferreiro'
@@ -62,5 +65,59 @@ manifest {
   description = 'Picard and GATK BAM preprocessing pipeline'
   mainScript = 'main.nf'
   nextflowVersion = '>=19.10.0'
-  version = '1.2.0'
+  version = VERSION
+  doi = DOI
 }
+
+params.help_message = """
+TronFlow bam preprocessing v${VERSION} ${DOI}
+
+Usage:
+    main.nf --input_files input_files
+
+Input:
+    * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file
+    Sample type will be added to the BAM header @SN sample name
+    The input file does not have header!
+    Example input file:
+    name1       tumor   tumor.1.bam
+    name1       normal  normal.1.bam
+    name2       tumor   tumor.2.bam
+    * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
+
+Optional input:
+    * --dbsnp: path to the dbSNP VCF (required to perform BQSR)
+    * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
+    * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
+    **NOTE**: if any of the reference parameters is not provided, default hg19 resources under
+    /projects/data/gatk_bundle/hg19/ will be used
+
+    * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
+    * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
+    * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
+    * --skip_bqsr: optionally skip BQSR (default: false)
+    * --skip_realignment: optionally skip realignment (default: false)
+    * --skip_deduplication: optionally skip deduplication (default: false)
+    * --skip_metrics: optionally skip metrics (default: false)
+    * --output: the folder where to publish output (default: ./output)
+    * --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA)
+
+Computational resources:
+    * --prepare_bam_cpus: (default: 3)
+    * --prepare_bam_memory: (default: 8g)
+    * --mark_duplicates_cpus: (default: 16)
+    * --mark_duplicates_memory: (default: 64g)
+    * --realignment_around_indels_cpus: (default: 2)
+    * --realignment_around_indels_memory: (default: 32g)
+    * --bqsr_cpus: (default: 3)
+    * --bqsr_memory: (default: 4g)
+
+ Output:
+    * Preprocessed and indexed BAMs
+    * Tab-separated values file with the absolute paths to the preprocessed BAMs, preprocessed_bams.txt
+
+Optional output:
+    * Recalibration report
+    * Realignment intervals
+    * Metrics
+  """

From e6174230afdd5fe9229f2c360012efdbe803679d Mon Sep 17 00:00:00 2001
From: priesgof <priesgoferreiro@gmail.com>
Date: Thu, 6 May 2021 11:42:06 +0200
Subject: [PATCH 4/5] add options to collect hs metrics

---
 Makefile        | 1 +
 README.md       | 2 ++
 main.nf         | 9 ++++++++-
 nextflow.config | 2 ++
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0390522..0daa2b7 100644
--- a/Makefile
+++ b/Makefile
@@ -15,3 +15,4 @@ test:
 	nextflow main.nf -profile test,conda --output output/test5 --skip_metrics
 	nextflow main.nf -profile test,conda --output output/test6 --intervals false
 	nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt
+	nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10
diff --git a/README.md b/README.md
index cca1345..0729e22 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,8 @@ Optional input:
     * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
     * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
     * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
+    * --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20).
+    * --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20).
     * --skip_bqsr: optionally skip BQSR (default: false)
     * --skip_realignment: optionally skip realignment (default: false)
     * --skip_deduplication: optionally skip deduplication (default: false)
diff --git a/main.nf b/main.nf
index e061606..fea5139 100755
--- a/main.nf
+++ b/main.nf
@@ -16,7 +16,10 @@ params.skip_deduplication = false
 params.skip_metrics = false
 params.output = false
 params.platform = "ILLUMINA"
+params.collect_hs_metrics_min_base_quality = false
+params.collect_hs_metrics_min_mapping_quality = false
 
+// computational resources
 params.prepare_bam_cpus = 3
 params.prepare_bam_memory = "8g"
 params.mark_duplicates_cpus = 16
@@ -170,6 +173,10 @@ if (! params.skip_metrics) {
             hs_metrics_per_base_coverage= params.hs_metrics_per_base_coverage ?
                 "--PER_BASE_COVERAGE ${params.hs_metrics_per_base_coverage}" :
                 ""
+            minimum_base_quality = params.collect_hs_metrics_min_base_quality ?
+                "--MINIMUM_BASE_QUALITY ${params.collect_hs_metrics_min_base_quality}" : ""
+            minimum_mapping_quality = params.collect_hs_metrics_min_mapping_quality ?
+                "--MINIMUM_MAPPING_QUALITY ${params.collect_hs_metrics_min_mapping_quality}" : ""
             """
             mkdir tmp
 
@@ -179,7 +186,7 @@ if (! params.skip_metrics) {
             --OUTPUT ${bam.baseName} \
             --TARGET_INTERVALS ${params.intervals} \
             --BAIT_INTERVALS ${params.intervals} \
-            ${hs_metrics_target_coverage} ${hs_metrics_per_base_coverage}
+            ${hs_metrics_target_coverage} ${hs_metrics_per_base_coverage} ${minimum_base_quality} ${minimum_mapping_quality}
             """
         }
     }
diff --git a/nextflow.config b/nextflow.config
index dec870d..a7a2122 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -95,6 +95,8 @@ Optional input:
     * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
     * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
     * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
+    * --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20).
+    * --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20).
     * --skip_bqsr: optionally skip BQSR (default: false)
     * --skip_realignment: optionally skip realignment (default: false)
     * --skip_deduplication: optionally skip deduplication (default: false)

From dc4672dd63d1cf95d9065f67f7ef667476fc3144 Mon Sep 17 00:00:00 2001
From: priesgof <priesgoferreiro@gmail.com>
Date: Thu, 6 May 2021 11:47:22 +0200
Subject: [PATCH 5/5] update documentation

---
 README.md       | 3 ---
 nextflow.config | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/README.md b/README.md
index 0729e22..3041759 100644
--- a/README.md
+++ b/README.md
@@ -66,9 +66,6 @@ Optional input:
     * --dbsnp: path to the dbSNP VCF (required to perform BQSR)
     * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
     * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
-    **NOTE**: if any of the reference parameters is not provided, default hg19 resources under
-    /projects/data/gatk_bundle/hg19/ will be used
-
     * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
     * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
     * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
diff --git a/nextflow.config b/nextflow.config
index a7a2122..77b3d7a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -89,9 +89,6 @@ Optional input:
     * --dbsnp: path to the dbSNP VCF (required to perform BQSR)
     * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
     * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
-    **NOTE**: if any of the reference parameters is not provided, default hg19 resources under
-    /projects/data/gatk_bundle/hg19/ will be used
-
     * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
     * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
     * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)