From 4ad680b81ebae1dddac072b4bdf586e7469483ef Mon Sep 17 00:00:00 2001 From: priesgof Date: Fri, 18 Jun 2021 11:49:34 +0200 Subject: [PATCH 1/3] simplify interface and correct output of HsMetrics --- Makefile | 6 ++++-- README.md | 2 -- main.nf | 21 ++++++--------------- nextflow.config | 4 +--- 4 files changed, 11 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 2edf503..bf03596 100644 --- a/Makefile +++ b/Makefile @@ -13,8 +13,8 @@ test: nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 nextflow main.nf -profile test,conda --output output/test5 --skip_deduplication --skip_bqsr --skip_metrics --known_indels1 false --known_indels2 false nextflow main.nf -profile test,conda --output output/test6 --intervals false --skip_deduplication --skip_bqsr --skip_realignment - nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --skip_bqsr --skip_realignment - nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment + nextflow main.nf -profile test,conda --output output/test7 --skip_bqsr --skip_realignment + nextflow main.nf -profile test,conda --output output/test8 --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment nextflow main.nf -profile test,conda --output output/test9 --skip_deduplication --skip_bqsr --skip_realignment --input_files false --input_bam test_data/TESTX_S1_L001.bam check: @@ -48,6 +48,8 @@ check: test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; } test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; } test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; } + test -s output/test8/sample1/metrics/TESTX_S1_L001.prepared.dedup.hs_metrics.txt || { echo "Missing test 8 output file!"; exit 1; } + test -s output/test8/sample1/metrics/TESTX_S1_L001.prepared.dedup_metrics.txt || { echo "Missing test 8 output file!"; exit 1; } test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; } test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; } test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 9 output file!"; exit 1; } diff --git a/README.md b/README.md index 65292cb..e8ffce5 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,6 @@ Optional input: * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels) * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels) * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) - * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) - * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) * --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20). * --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20). * --skip_bqsr: optionally skip BQSR (default: false) diff --git a/main.nf b/main.nf index 154ea01..8637c30 100755 --- a/main.nf +++ b/main.nf @@ -10,8 +10,6 @@ params.dbsnp = false params.known_indels1 = false params.known_indels2 = false params.intervals = false -params.hs_metrics_target_coverage = false -params.hs_metrics_per_base_coverage = false params.skip_bqsr = false params.skip_realignment = false params.skip_deduplication = false @@ -148,7 +146,7 @@ if (!params.skip_deduplication) { file("${bam.baseName}.dedup_metrics") optional true into deduplication_metrics script: - dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics" + dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics.txt" remove_duplicates = params.remove_duplicates ? "--remove-all-duplicates true" : "--remove-all-duplicates false" """ mkdir tmp @@ -202,18 +200,11 @@ if (! params.skip_metrics) { set name, bam_name, type, file(bam), file(bai) from deduplicated_bams_for_hs_metrics output: - file("*_metrics") optional true into txt_hs_metrics - file("*.pdf") optional true into pdf_hs_metrics - file(params.hs_metrics_target_coverage) optional true into target_hs_metrics - file(params.hs_metrics_per_base_coverage) optional true into per_base_hs_metrics + file("*_metrics") optional true + file("*.pdf") optional true + file("${bam.baseName}.hs_metrics.txt") script: - hs_metrics_target_coverage= params.hs_metrics_target_coverage ? - "--PER_TARGET_COVERAGE ${params.hs_metrics_target_coverage} --REFERENCE_SEQUENCE ${params.reference}" : - "" - hs_metrics_per_base_coverage= params.hs_metrics_per_base_coverage ? - "--PER_BASE_COVERAGE ${params.hs_metrics_per_base_coverage}" : - "" minimum_base_quality = params.collect_hs_metrics_min_base_quality ? "--MINIMUM_BASE_QUALITY ${params.collect_hs_metrics_min_base_quality}" : "" minimum_mapping_quality = params.collect_hs_metrics_min_mapping_quality ? @@ -224,10 +215,10 @@ if (! params.skip_metrics) { gatk CollectHsMetrics \ --java-options '-Xmx${params.metrics_memory} -Djava.io.tmpdir=tmp' \ --INPUT ${bam} \ - --OUTPUT ${bam.baseName} \ + --OUTPUT ${bam.baseName}.hs_metrics.txt \ --TARGET_INTERVALS ${params.intervals} \ --BAIT_INTERVALS ${params.intervals} \ - ${hs_metrics_target_coverage} ${hs_metrics_per_base_coverage} ${minimum_base_quality} ${minimum_mapping_quality} + ${minimum_base_quality} ${minimum_mapping_quality} """ } } diff --git a/nextflow.config b/nextflow.config index dbc239d..e04ef6b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] cleanup = true -VERSION = '1.5.0' +VERSION = '1.6.0' DOI = 'https://zenodo.org/badge/latestdoi/358400957' manifest { @@ -83,8 +83,6 @@ Optional input: * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels) * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels) * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) - * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) - * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) * --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20). * --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20). * --skip_bqsr: optionally skip BQSR (default: false) From 9f98f090a4afa258dfd189341f10b51df4b63ef1 Mon Sep 17 00:00:00 2001 From: priesgof Date: Wed, 23 Jun 2021 13:44:18 +0200 Subject: [PATCH 2/3] make sure that the name of the output deduplication metrics file is correct --- main.nf | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index 8637c30..f2fe6a3 100755 --- a/main.nf +++ b/main.nf @@ -134,7 +134,7 @@ if (!params.skip_deduplication) { cpus "${params.mark_duplicates_cpus}" memory "${params.mark_duplicates_memory}" tag "${name}" - publishDir "${publish_dir}/${name}/metrics", mode: "copy", pattern: "*.dedup_metrics" + publishDir "${publish_dir}/${name}/metrics", mode: "copy", pattern: "*.dedup_metrics.txt" input: set name, bam_name, type, file(bam) from prepared_bams @@ -143,7 +143,7 @@ if (!params.skip_deduplication) { set val(name), val(bam_name), val(type), file("${bam.baseName}.dedup.bam"), file("${bam.baseName}.dedup.bam.bai") into deduplicated_bams, deduplicated_bams_for_metrics, deduplicated_bams_for_hs_metrics - file("${bam.baseName}.dedup_metrics") optional true into deduplication_metrics + file("${bam.baseName}.dedup_metrics.txt") optional true script: dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics.txt" @@ -155,9 +155,7 @@ if (!params.skip_deduplication) { --java-options '-Xmx${params.mark_duplicates_memory} -Djava.io.tmpdir=tmp' \ --input ${bam} \ --output ${bam.baseName}.dedup.bam \ - --conf 'spark.executor.cores=${task.cpus}' \ - ${remove_duplicates} \ - ${dedup_metrics} + --conf 'spark.executor.cores=${task.cpus}' ${remove_duplicates} ${dedup_metrics} """ } } From a2fe200393807fd15d5dd8435ffbbf4f2bc8ff12 Mon Sep 17 00:00:00 2001 From: priesgof Date: Wed, 23 Jun 2021 13:47:53 +0200 Subject: [PATCH 3/3] extend license period --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index f12ecab..f760fbb 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019 TRON +Copyright (c) 2019-2021 TRON Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal