diff --git a/main.nf b/main.nf index fab5149..8e517de 100755 --- a/main.nf +++ b/main.nf @@ -30,8 +30,8 @@ params.collect_hs_metrics_min_mapping_quality = false // computational resources params.prepare_bam_cpus = 3 params.prepare_bam_memory = "8g" -params.mark_duplicates_cpus = 16 -params.mark_duplicates_memory = "64g" +params.mark_duplicates_cpus = 2 +params.mark_duplicates_memory = "16g" params.realignment_around_indels_cpus = 2 params.realignment_around_indels_memory = "31g" params.bqsr_cpus = 3 diff --git a/modules/01_prepare_bam.nf b/modules/01_prepare_bam.nf index 0ab3a93..f134357 100644 --- a/modules/01_prepare_bam.nf +++ b/modules/01_prepare_bam.nf @@ -17,7 +17,7 @@ process PREPARE_BAM { memory "${params.prepare_bam_memory}" tag "${name}" - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) input: tuple val(name), val(type), file(bam) @@ -58,7 +58,7 @@ process INDEX_BAM { memory "${params.index_memory}" tag "${name}" - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) input: tuple val(name), val(type), file(bam) diff --git a/modules/02_mark_duplicates.nf b/modules/02_mark_duplicates.nf index 852f3ab..9ff86ea 100644 --- a/modules/02_mark_duplicates.nf +++ b/modules/02_mark_duplicates.nf @@ -1,5 +1,5 @@ -params.mark_duplicates_cpus = 16 -params.mark_duplicates_memory = "64g" +params.mark_duplicates_cpus = 2 +params.mark_duplicates_memory = "16g" params.remove_duplicates = true params.skip_metrics = false params.output = 'output' @@ -11,7 +11,7 @@ process MARK_DUPLICATES { tag "${name}" publishDir "${params.output}/${name}/metrics/mark_duplicates", mode: "copy", pattern: "*.dedup_metrics.txt" - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) input: tuple val(name), val(type), file(bam) @@ -21,15 +21,25 @@ process MARK_DUPLICATES { file("${name}.dedup_metrics.txt") optional true script: - dedup_metrics = params.skip_metrics ? "": "--metrics-file ${name}.dedup_metrics.txt" - remove_duplicates = params.remove_duplicates ? "--remove-all-duplicates true" : "--remove-all-duplicates false" + dedup_metrics = params.skip_metrics ? "": "--METRICS_FILE ${name}.dedup_metrics.txt" + remove_duplicates = params.remove_duplicates ? "--REMOVE_DUPLICATES true" : "--REMOVE_DUPLICATES false" """ mkdir tmp - gatk MarkDuplicatesSpark \ + gatk SortSam \ + --INPUT ${bam} \ + --OUTPUT ${name}.sorted.bam \ + --SORT_ORDER coordinate + + gatk MarkDuplicates \ --java-options '-Xmx${params.mark_duplicates_memory} -Djava.io.tmpdir=tmp' \ - --input ${bam} \ - --output ${name}.dedup.bam \ - --conf 'spark.executor.cores=${task.cpus}' ${remove_duplicates} ${dedup_metrics} + --INPUT ${name}.sorted.bam \ + --OUTPUT ${name}.dedup.bam \ + --ASSUME_SORT_ORDER coordinate \ + --CREATE_INDEX true ${remove_duplicates} ${dedup_metrics} + + cp ${name}.dedup.bai ${name}.dedup.bam.bai + + rm -f ${name}.sorted.bam """ } diff --git a/modules/03_metrics.nf b/modules/03_metrics.nf index af1915b..43909fa 100644 --- a/modules/03_metrics.nf +++ b/modules/03_metrics.nf @@ -13,7 +13,7 @@ process HS_METRICS { tag "${name}" publishDir "${params.output}/${name}/metrics/hs_metrics", mode: "copy" - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) input: tuple val(name), val(type), file(bam), file(bai) @@ -53,7 +53,7 @@ process METRICS { publishDir "${params.output}/${name}/metrics/gatk_multiple_metrics", mode: "copy" // NOTE: the method CollectMultipleMetrics has a hidden dependency to R for making plots - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0 r::r=3.6.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0 r::r=3.6.0" : null) input: tuple val(name), val(type), file(bam), file(bai) diff --git a/modules/04_realignment_around_indels.nf b/modules/04_realignment_around_indels.nf index 712db9a..1f86288 100644 --- a/modules/04_realignment_around_indels.nf +++ b/modules/04_realignment_around_indels.nf @@ -13,7 +13,7 @@ process REALIGNMENT_AROUND_INDELS { publishDir "${params.output}/${name}/metrics/realignment", mode: "copy", pattern: "*.RA.intervals" // NOTE: this dependency is fixed to GATK 3 as the realignment around indels is not anymore maintained in GATK 4 - // but still for some reason for GATK 3 to work the dependency to GATK 4 is needed + // but still for some reason for GATK 3 to work the dependency to GATK 4.2.0.0 is needed conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0 bioconda::gatk=3.8" : null) input: diff --git a/modules/05_bqsr.nf b/modules/05_bqsr.nf index 391277c..c893929 100644 --- a/modules/05_bqsr.nf +++ b/modules/05_bqsr.nf @@ -11,7 +11,7 @@ process BQSR { publishDir "${params.output}/${name}", mode: "copy" tag "${name}" - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) input: tuple val(name), val(type), file(bam), file(bai) diff --git a/nextflow.config b/nextflow.config index 16ebdd1..8cf89d7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] cleanup = true -VERSION = '1.7.3' +VERSION = '1.8.0' DOI = 'https://zenodo.org/badge/latestdoi/358400957' manifest {