From 6ba6f28fdf8e6003fe30a5416693b64bb8e3b6de Mon Sep 17 00:00:00 2001 From: priesgof Date: Thu, 6 May 2021 10:42:17 +0200 Subject: [PATCH 1/5] forces the Java version in conde env to OpenJDK8 + fix issue with duplication metrics --- Makefile | 14 +++++++------- README.md | 18 +++++++++++------- environment.yml | 3 ++- main.nf | 2 +- nextflow.config | 2 +- 5 files changed, 22 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index eee69aa..0390522 100644 --- a/Makefile +++ b/Makefile @@ -8,10 +8,10 @@ clean: rm -rf .nextflow* test: - #nextflow main.nf -profile test,conda --output output/test1 - #nextflow main.nf -profile test,conda --skip_bqsr --output output/test2 - #nextflow main.nf -profile test,conda --skip_realignment --output output/test3 - #nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 - #nextflow main.nf -profile test,conda --output output/test5 --skip_metrics - #nextflow main.nf -profile test,conda --output output/test6 --intervals false - nextflow main.nf -profile test,conda --output output/test6 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt + nextflow main.nf -profile test,conda --output output/test1 + nextflow main.nf -profile test,conda --skip_bqsr --output output/test2 + nextflow main.nf -profile test,conda --skip_realignment --output output/test3 + nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 + nextflow main.nf -profile test,conda --output output/test5 --skip_metrics + nextflow main.nf -profile test,conda --output output/test6 --intervals false + nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt diff --git a/README.md b/README.md index 08501fe..322187d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # TRONflow BAM preprocessing pipeline +[![DOI](https://zenodo.org/badge/358400957.svg)](https://zenodo.org/badge/latestdoi/358400957) + Nextflow pipeline for the preprocessing of BAM files based on Picard and GATK. @@ -32,7 +34,9 @@ Steps: ## References -The bam preprocessing workflow use some required references (`--reference`, `--dbsnp`, `--known_indels1` and `--known_indels2`). +The bam preprocessing workflow requires the human reference genome (`--reference`) +Base Quality Score Recalibration (BQSR) requires dbSNP to avoid extracting error metrics from polymorphic sites (`--dbsnp`) +Realignment around indels requires a set of known indels (`--known_indels1` and `--known_indels2`). These resources can be fetched from the GATK bundle https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle. Optionally, in order to run Picard's CollectHsMetrics an intervals file will need to be provided (`--intervals`). @@ -41,7 +45,7 @@ This can be built from a BED file using Picard's BedToIntervalList (https://gatk ## How to run it ``` -$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.1.0 --help +$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.2.0 --help N E X T F L O W ~ version 19.07.0 Launching `main.nf` [intergalactic_shannon] - revision: e707c77d7b Usage: @@ -55,13 +59,13 @@ Input: name1 tumor tumor.1.bam name1 normal normal.1.bam name2 tumor tumor.2.bam + * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) Optional input: - * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) - * --dbsnp: path to the dbSNP VCF - * --known_indels1: path to a VCF of known indels - * --known_indels2: path to a second VCF of known indels - **NOTE**: if any of the above parameters is not provided, default hg19 resources under + * --dbsnp: path to the dbSNP VCF (required to perform BQSR) + * --known_indels1: path to a VCF of known indels (required to perform realignment around indels) + * --known_indels2: path to a second VCF of known indels (required to perform realignment around indels) + **NOTE**: if any of the reference parameters is not provided, default hg19 resources under /projects/data/gatk_bundle/hg19/ will be used * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) diff --git a/environment.yml b/environment.yml index 1d7a1eb..edf71ba 100644 --- a/environment.yml +++ b/environment.yml @@ -1,10 +1,11 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: tronflow-bam-preprocessing-1.1.0 +name: tronflow-bam-preprocessing-1.2.0 channels: - conda-forge - bioconda - defaults dependencies: + - openjdk=8.0.282 - bioconda::gatk4=4.2.0.0 - bioconda::gatk=3.8 \ No newline at end of file diff --git a/main.nf b/main.nf index 1c663ff..9fd9953 100755 --- a/main.nf +++ b/main.nf @@ -168,7 +168,7 @@ if (!params.skip_deduplication) { file("${bam.baseName}.dedup_metrics") optional true into deduplication_metrics script: - dedup_metrics = params.skip_metrics ? "--metrics-file ${bam.baseName}.dedup_metrics" : "" + dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics" """ mkdir tmp diff --git a/nextflow.config b/nextflow.config index 50d514a..d21c00a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -62,5 +62,5 @@ manifest { description = 'Picard and GATK BAM preprocessing pipeline' mainScript = 'main.nf' nextflowVersion = '>=19.10.0' - version = '1.1.0' + version = '1.2.0' } From 6df36bea9e912fca1f7056e75c713b7b8376d4d9 Mon Sep 17 00:00:00 2001 From: priesgof Date: Thu, 6 May 2021 10:44:07 +0200 Subject: [PATCH 2/5] rename test dataset --- ...ESTX_H7YRLADXX_S1_L001.bam => TESTX_S1_L001.bam} | Bin ...ESTX_H7YRLADXX_S1_L002.bam => TESTX_S1_L002.bam} | Bin test_data/test_input.txt | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename test_data/{TESTX_H7YRLADXX_S1_L001.bam => TESTX_S1_L001.bam} (100%) rename test_data/{TESTX_H7YRLADXX_S1_L002.bam => TESTX_S1_L002.bam} (100%) diff --git a/test_data/TESTX_H7YRLADXX_S1_L001.bam b/test_data/TESTX_S1_L001.bam similarity index 100% rename from test_data/TESTX_H7YRLADXX_S1_L001.bam rename to test_data/TESTX_S1_L001.bam diff --git a/test_data/TESTX_H7YRLADXX_S1_L002.bam b/test_data/TESTX_S1_L002.bam similarity index 100% rename from test_data/TESTX_H7YRLADXX_S1_L002.bam rename to test_data/TESTX_S1_L002.bam diff --git a/test_data/test_input.txt b/test_data/test_input.txt index 1826ec8..44dce6a 100644 --- a/test_data/test_input.txt +++ b/test_data/test_input.txt @@ -1,2 +1,2 @@ -TESTX_H7YRLADXX_S1_L001 tumor test_data/TESTX_H7YRLADXX_S1_L001.bam -TESTX_H7YRLADXX_S1_L002 normal test_data/TESTX_H7YRLADXX_S1_L002.bam +TESTX_S1_L001 tumor test_data/TESTX_S1_L001.bam +TESTX_S1_L002 normal test_data/TESTX_S1_L002.bam From 05d7e9326e43cb2fbc755b3dd8c308c43d4662ef Mon Sep 17 00:00:00 2001 From: priesgof Date: Thu, 6 May 2021 11:05:06 +0200 Subject: [PATCH 3/5] make realignment around indels not requiring indels resource + remove default references --- README.md | 19 ++++++------ main.nf | 80 +++++++++++++------------------------------------ nextflow.config | 59 +++++++++++++++++++++++++++++++++++- 3 files changed, 89 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 322187d..cca1345 100644 --- a/README.md +++ b/README.md @@ -48,9 +48,10 @@ This can be built from a BED file using Picard's BedToIntervalList (https://gatk $ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.2.0 --help N E X T F L O W ~ version 19.07.0 Launching `main.nf` [intergalactic_shannon] - revision: e707c77d7b + Usage: main.nf --input_files input_files - + Input: * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file Sample type will be added to the BAM header @SN sample name @@ -60,14 +61,14 @@ Input: name1 normal normal.1.bam name2 tumor tumor.2.bam * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) - + Optional input: * --dbsnp: path to the dbSNP VCF (required to perform BQSR) - * --known_indels1: path to a VCF of known indels (required to perform realignment around indels) - * --known_indels2: path to a second VCF of known indels (required to perform realignment around indels) - **NOTE**: if any of the reference parameters is not provided, default hg19 resources under + * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels) + * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels) + **NOTE**: if any of the reference parameters is not provided, default hg19 resources under /projects/data/gatk_bundle/hg19/ will be used - + * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) @@ -77,7 +78,7 @@ Optional input: * --skip_metrics: optionally skip metrics (default: false) * --output: the folder where to publish output (default: ./output) * --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA) - + Computational resources: * --prepare_bam_cpus: (default: 3) * --prepare_bam_memory: (default: 8g) @@ -87,11 +88,11 @@ Computational resources: * --realignment_around_indels_memory: (default: 32g) * --bqsr_cpus: (default: 3) * --bqsr_memory: (default: 4g) - + Output: * Preprocessed and indexed BAMs * Tab-separated values file with the absolute paths to the preprocessed BAMs, preprocessed_bams.txt - + Optional output: * Recalibration report * Realignment intervals diff --git a/main.nf b/main.nf index 9fd9953..e061606 100755 --- a/main.nf +++ b/main.nf @@ -3,10 +3,10 @@ publish_dir = 'output' params.help= false params.input_files = false -params.reference = "/projects/data/gatk_bundle/hg19/ucsc.hg19.fasta" -params.dbsnp = "/projects/data/gatk_bundle/hg19/dbsnp_138.hg19.vcf" -params.known_indels1 = "/projects/data/gatk_bundle/hg19/1000G_phase1.indels.hg19.sites.vcf" -params.known_indels2 = "/projects/data/gatk_bundle/hg19/Mills_and_1000G_gold_standard.indels.hg19.sites.sorted.vcf" +params.reference = false +params.dbsnp = false +params.known_indels1 = false +params.known_indels2 = false params.intervals = false params.hs_metrics_target_coverage = false params.hs_metrics_per_base_coverage = false @@ -29,56 +29,7 @@ params.bqsr_memory = "4g" def helpMessage() { - log.info""" -Usage: - main.nf --input_files input_files - -Input: - * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file - Sample type will be added to the BAM header @SN sample name - The input file does not have header! - Example input file: - name1 tumor tumor.1.bam - name1 normal normal.1.bam - name2 tumor tumor.2.bam - -Optional input: - * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) - * --dbsnp: path to the dbSNP VCF - * --known_indels1: path to a VCF of known indels - * --known_indels2: path to a second VCF of known indels - **NOTE**: if any of the above parameters is not provided, default hg19 resources under - /projects/data/gatk_bundle/hg19/ will be used - - * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) - * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) - * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) - * --skip_bqsr: optionally skip BQSR (default: false) - * --skip_realignment: optionally skip realignment (default: false) - * --skip_deduplication: optionally skip deduplication (default: false) - * --skip_metrics: optionally skip metrics (default: false) - * --output: the folder where to publish output (default: ./output) - * --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA) - -Computational resources: - * --prepare_bam_cpus: (default: 3) - * --prepare_bam_memory: (default: 8g) - * --mark_duplicates_cpus: (default: 16) - * --mark_duplicates_memory: (default: 64g) - * --realignment_around_indels_cpus: (default: 2) - * --realignment_around_indels_memory: (default: 32g) - * --bqsr_cpus: (default: 3) - * --bqsr_memory: (default: 4g) - - Output: - * Preprocessed and indexed BAMs - * Tab-separated values file with the absolute paths to the preprocessed BAMs, preprocessed_bams.txt - -Optional output: - * Recalibration report - * Realignment intervals - * Metrics - """ + log.info params.help_message } if (params.help) { @@ -86,6 +37,14 @@ if (params.help) { exit 0 } +if (!params.reference) { + exit -1, "--reference is required" +} + +if (!params.skip_bqsr && !params.dbsnp) { + exit -1, "--dbsnp is required to perform BQSR" +} + if (params.output) { publish_dir = params.output } @@ -272,6 +231,11 @@ if (!params.skip_realignment) { set val(name), val(bam_name), val(type), file("${bam.baseName}.realigned.bam"), file("${bam.baseName}.realigned.bai") into realigned_bams file("${bam.baseName}.RA.intervals") into realignment_intervals + script: + known_indels = "" + params.known_indels1 ? " --known ${params.known_indels1}" : "" + + params.known_indels2 ? " --known ${params.known_indels2}" : "" + known_alleles = "" + params.known_indels1 ? " --knownAlleles ${params.known_indels1}" : "" + + params.known_indels2 ? " --knownAlleles ${params.known_indels2}" : "" """ mkdir tmp @@ -279,19 +243,17 @@ if (!params.skip_realignment) { --input_file ${bam} \ --out ${bam.baseName}.RA.intervals \ --reference_sequence ${params.reference} \ - --known ${params.known_indels1} \ - --known ${params.known_indels2} + ${known_indels} gatk3 -Xmx${params.realignment_around_indels_memory} -Djava.io.tmpdir=tmp -T IndelRealigner \ --input_file ${bam} \ --out ${bam.baseName}.realigned.bam \ --reference_sequence ${params.reference} \ --targetIntervals ${bam.baseName}.RA.intervals \ - --knownAlleles ${params.known_indels1} \ - --knownAlleles ${params.known_indels2} \ --consensusDeterminationModel USE_SW \ --LODThresholdForCleaning 0.4 \ - --maxReadsInMemory 600000 + --maxReadsInMemory 600000 \ + ${known_alleles} """ } } diff --git a/nextflow.config b/nextflow.config index d21c00a..dec870d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,6 +55,9 @@ dag { //file = "${params.output}/pipeline_dag.svg" } +VERSION = '1.2.0' +DOI = 'https://zenodo.org/badge/latestdoi/358400957' + manifest { name = 'TRON-Bioinformatics/tronflow-bam-preprocessing' author = 'Pablo Riesgo Ferreiro' @@ -62,5 +65,59 @@ manifest { description = 'Picard and GATK BAM preprocessing pipeline' mainScript = 'main.nf' nextflowVersion = '>=19.10.0' - version = '1.2.0' + version = VERSION + doi = DOI } + +params.help_message = """ +TronFlow bam preprocessing v${VERSION} ${DOI} + +Usage: + main.nf --input_files input_files + +Input: + * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file + Sample type will be added to the BAM header @SN sample name + The input file does not have header! + Example input file: + name1 tumor tumor.1.bam + name1 normal normal.1.bam + name2 tumor tumor.2.bam + * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) + +Optional input: + * --dbsnp: path to the dbSNP VCF (required to perform BQSR) + * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels) + * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels) + **NOTE**: if any of the reference parameters is not provided, default hg19 resources under + /projects/data/gatk_bundle/hg19/ will be used + + * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) + * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) + * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) + * --skip_bqsr: optionally skip BQSR (default: false) + * --skip_realignment: optionally skip realignment (default: false) + * --skip_deduplication: optionally skip deduplication (default: false) + * --skip_metrics: optionally skip metrics (default: false) + * --output: the folder where to publish output (default: ./output) + * --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA) + +Computational resources: + * --prepare_bam_cpus: (default: 3) + * --prepare_bam_memory: (default: 8g) + * --mark_duplicates_cpus: (default: 16) + * --mark_duplicates_memory: (default: 64g) + * --realignment_around_indels_cpus: (default: 2) + * --realignment_around_indels_memory: (default: 32g) + * --bqsr_cpus: (default: 3) + * --bqsr_memory: (default: 4g) + + Output: + * Preprocessed and indexed BAMs + * Tab-separated values file with the absolute paths to the preprocessed BAMs, preprocessed_bams.txt + +Optional output: + * Recalibration report + * Realignment intervals + * Metrics + """ From e6174230afdd5fe9229f2c360012efdbe803679d Mon Sep 17 00:00:00 2001 From: priesgof Date: Thu, 6 May 2021 11:42:06 +0200 Subject: [PATCH 4/5] add options to collect hs metrics --- Makefile | 1 + README.md | 2 ++ main.nf | 9 ++++++++- nextflow.config | 2 ++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0390522..0daa2b7 100644 --- a/Makefile +++ b/Makefile @@ -15,3 +15,4 @@ test: nextflow main.nf -profile test,conda --output output/test5 --skip_metrics nextflow main.nf -profile test,conda --output output/test6 --intervals false nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt + nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 diff --git a/README.md b/README.md index cca1345..0729e22 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,8 @@ Optional input: * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) + * --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20). + * --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20). * --skip_bqsr: optionally skip BQSR (default: false) * --skip_realignment: optionally skip realignment (default: false) * --skip_deduplication: optionally skip deduplication (default: false) diff --git a/main.nf b/main.nf index e061606..fea5139 100755 --- a/main.nf +++ b/main.nf @@ -16,7 +16,10 @@ params.skip_deduplication = false params.skip_metrics = false params.output = false params.platform = "ILLUMINA" +params.collect_hs_metrics_min_base_quality = false +params.collect_hs_metrics_min_mapping_quality = false +// computational resources params.prepare_bam_cpus = 3 params.prepare_bam_memory = "8g" params.mark_duplicates_cpus = 16 @@ -170,6 +173,10 @@ if (! params.skip_metrics) { hs_metrics_per_base_coverage= params.hs_metrics_per_base_coverage ? "--PER_BASE_COVERAGE ${params.hs_metrics_per_base_coverage}" : "" + minimum_base_quality = params.collect_hs_metrics_min_base_quality ? + "--MINIMUM_BASE_QUALITY ${params.collect_hs_metrics_min_base_quality}" : "" + minimum_mapping_quality = params.collect_hs_metrics_min_mapping_quality ? + "--MINIMUM_MAPPING_QUALITY ${params.collect_hs_metrics_min_mapping_quality}" : "" """ mkdir tmp @@ -179,7 +186,7 @@ if (! params.skip_metrics) { --OUTPUT ${bam.baseName} \ --TARGET_INTERVALS ${params.intervals} \ --BAIT_INTERVALS ${params.intervals} \ - ${hs_metrics_target_coverage} ${hs_metrics_per_base_coverage} + ${hs_metrics_target_coverage} ${hs_metrics_per_base_coverage} ${minimum_base_quality} ${minimum_mapping_quality} """ } } diff --git a/nextflow.config b/nextflow.config index dec870d..a7a2122 100644 --- a/nextflow.config +++ b/nextflow.config @@ -95,6 +95,8 @@ Optional input: * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) + * --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20). + * --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20). * --skip_bqsr: optionally skip BQSR (default: false) * --skip_realignment: optionally skip realignment (default: false) * --skip_deduplication: optionally skip deduplication (default: false) From dc4672dd63d1cf95d9065f67f7ef667476fc3144 Mon Sep 17 00:00:00 2001 From: priesgof Date: Thu, 6 May 2021 11:47:22 +0200 Subject: [PATCH 5/5] update documentation --- README.md | 3 --- nextflow.config | 3 --- 2 files changed, 6 deletions(-) diff --git a/README.md b/README.md index 0729e22..3041759 100644 --- a/README.md +++ b/README.md @@ -66,9 +66,6 @@ Optional input: * --dbsnp: path to the dbSNP VCF (required to perform BQSR) * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels) * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels) - **NOTE**: if any of the reference parameters is not provided, default hg19 resources under - /projects/data/gatk_bundle/hg19/ will be used - * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) diff --git a/nextflow.config b/nextflow.config index a7a2122..77b3d7a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -89,9 +89,6 @@ Optional input: * --dbsnp: path to the dbSNP VCF (required to perform BQSR) * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels) * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels) - **NOTE**: if any of the reference parameters is not provided, default hg19 resources under - /projects/data/gatk_bundle/hg19/ will be used - * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)