Skip to content

Commit

Permalink
Merge pull request #259 from nf-cmgg/merge-fast-processes
Browse files Browse the repository at this point in the history
Merge fast processes
  • Loading branch information
nvnieuwk authored Jan 27, 2025
2 parents 15836df + 6836a7a commit a87d104
Show file tree
Hide file tree
Showing 74 changed files with 728 additions and 1,746 deletions.
2 changes: 1 addition & 1 deletion .nf-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,4 @@ template:
skip_features:
- fastqc
- is_nfcore
version: 1.9.3
version: 1.10.0dev
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## v1.10.0dev

## Changes

1. Merged the following processes to improve efficiency of the pipeline:
- VCF index creation modules on output VCFs have been merged into the processes that created these VCFs
- The filter modules for `--filter` have been merged
- BED filtering and intersecting with Regions Of Interest have been merged

## v1.9.3 Nifty Nieuwkerke - [January 23 2025]

1. Fix db postprocess in vcf2db module
Expand Down
72 changes: 24 additions & 48 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,11 @@ process {
].join(" ")
}

withName: "^.*CRAM_PREPARE_SAMTOOLS_BEDTOOLS:FILTER_BEDS\$" {
withName: "^.*CRAM_PREPARE_SAMTOOLS_BEDTOOLS:PROCESS_BEDS\$" {
ext.prefix = { "${meta.id}.filter"}
ext.args = "-vE \"LOW_COVERAGE|NO_COVERAGE${params.keep_alt_contigs ? "" : "|alt|random|decoy|Un"}\""
ext.args2 = "-d 150"
}

withName: "^.*CRAM_PREPARE_SAMTOOLS_BEDTOOLS:BEDTOOLS_INTERSECT\$" {
ext.prefix = {"${meta.id}.intersect"}
ext.args = "-sorted"
ext.args3 = "-sorted"
}

/*
Expand Down Expand Up @@ -87,7 +83,7 @@ process {

withName: "^.*CRAM_CALL_GATK4:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" {
ext.prefix = { "${meta.id}.${meta.caller}.g" }
ext.args = '--allow-overlaps --output-type z'
ext.args = '--allow-overlaps --output-type z --write-index=tbi'
}

withName: "^.*CRAM_CALL_GATK4:BCFTOOLS_STATS\$" {
Expand All @@ -108,7 +104,7 @@ process {

withName: "^.*BAM_CALL_ELPREP:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" {
ext.prefix = { "${meta.id}.${meta.caller}.g" }
ext.args = '--allow-overlaps --output-type z'
ext.args = '--allow-overlaps --output-type z --write-index=tbi'
}

withName: "^.*BAM_CALL_ELPREP:BCFTOOLS_STATS\$" {
Expand Down Expand Up @@ -168,7 +164,7 @@ process {

withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" {
ext.prefix = { "${meta.id}.concat" }
ext.args = "--allow-overlaps --output-type z"
ext.args = "--allow-overlaps --output-type z --write-index=tbi"
}

/*
Expand All @@ -194,45 +190,29 @@ process {
}

withName: "^.*BAM_CALL_VARDICTJAVA:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" {
ext.args = '--allow-overlaps --output-type z'
ext.args = '--allow-overlaps --output-type z --write-index=tbi'
ext.prefix = {"${meta.id}.concat"}
}

withName: "^.*BAM_CALL_VARDICTJAVA:TABIX_VCFANNO\$" {
ext.prefix = {"${meta.id}.vcfanno"}
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
FILTER
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_1\$" {
ext.prefix = { "${meta.id}.filtered1" }
ext.args = {
meta.caller == "vardict" ?
"-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z":
meta.caller == "haplotypecaller" ?
"--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'":
meta.caller == "elprep" ?
"--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'":
""
}
}

withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_2\$" {
ext.args = {
meta.caller == "vardict" ?
"--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0' --output-type z" :
meta.caller == "haplotypecaller" ?
'--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' :
meta.caller == "elprep" ?
'--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' :
""

}
ext.prefix = {"${meta.id}.filtered"}
withName: "^.*VCF_FILTER_BCFTOOLS:BCFTOOLS_FILTER\$" {
ext.prefix = { "${meta.id}.filtered" }
ext.args = { [
meta.caller == "vardict" ? "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}'" : "",
meta.caller == "haplotypecaller" ? "--soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'" : "",
meta.caller == "elprep" ? "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'" : ""
].findAll { arg -> arg != "" }.join(" ") }
ext.args2 = { [
meta.caller == "vardict" ? "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0'" : "",
meta.caller == "haplotypecaller" ? '--soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : "",
meta.caller == "elprep" ? '--soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : "",
"--output-type z --write-index=tbi"
].findAll { arg -> arg != "" }.join(" ") }
}

/*
Expand All @@ -243,7 +223,7 @@ process {

withName: "^.*GERMLINE:BCFTOOLS_NORM\$" {
ext.prefix = {"${meta.id}.normalized"}
ext.args = "-m-"
ext.args = "-m- --output-type z --write-index=tbi"
}

/*
Expand All @@ -264,7 +244,7 @@ process {

withName: "^.*VCF_PED_RTGTOOLS:BCFTOOLS_ANNOTATE\$" {
ext.prefix = { "${meta.id}.${meta.caller}.ped.annotated" }
ext.args = "--output-type z"
ext.args = "--output-type z --write-index=tbi"
}

/*
Expand Down Expand Up @@ -301,20 +281,16 @@ process {
].join(' ').trim()}
}

withName: "^.*VCF_ANNOTATION:VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_CONCAT\$" {
withName: "^.*VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_CONCAT\$" {
ext.prefix = {"${meta.id}_concat"}
ext.args = "--allow-overlaps --output-type z"
}

withName: "^.*VCF_ANNOTATION:VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_SORT\$" {
withName: "^.*VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_SORT\$" {
ext.prefix = {"${meta.id}.sorted"}
ext.args = "--write-index=tbi --output-type z"
}

withName: "^.*VCF_ANNOTATION:BGZIP_ANNOTATED_VCFS\$" {
ext.prefix = {"${meta.id}.vcfanno"}
}


/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
VALIDATION
Expand Down
19 changes: 2 additions & 17 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"bedtools/intersect": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"bedtools/merge": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
Expand Down Expand Up @@ -85,7 +80,7 @@
},
"ensemblvep/vep": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"git_sha": "ef36baef619ebe8a244fee313d44eba571ba73b4",
"installed_by": ["modules"],
"patch": "modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff"
},
Expand Down Expand Up @@ -191,16 +186,6 @@
"installed_by": ["modules"],
"patch": "modules/nf-core/somalier/relate/somalier-relate.diff"
},
"tabix/bgzip": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"tabix/bgziptabix": {
"branch": "master",
"git_sha": "f448e846bdadd80fc8be31fbbc78d9f5b5131a45",
"installed_by": ["modules"]
},
"tabix/tabix": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
Expand All @@ -225,7 +210,7 @@
},
"vcfanno": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"git_sha": "b1137e22798227331c9a9a12bd92bd6e865865c5",
"installed_by": ["modules"]
}
}
Expand Down
87 changes: 87 additions & 0 deletions modules/local/bcftools/filter/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
process BCFTOOLS_FILTER {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0':
'biocontainers/bcftools:1.20--h8b25389_0' }"

input:
tuple val(meta), path(vcf), path(tbi)

output:
tuple val(meta), path("*.${extension}"), emit: vcf
tuple val(meta), path("*.tbi") , emit: tbi, optional: true
tuple val(meta), path("*.csi") , emit: csi, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def last_args = args3 ?: args2 ?: args

extension = last_args.contains("--output-type b") || last_args.contains("-Ob") ? "bcf.gz" :
last_args.contains("--output-type u") || last_args.contains("-Ou") ? "bcf" :
last_args.contains("--output-type z") || last_args.contains("-Oz") ? "vcf.gz" :
last_args.contains("--output-type v") || last_args.contains("-Ov") ? "vcf" :
"vcf"

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"

def filter_2 = args2 ? "| bcftools filter --threads ${task.cpus} ${args2}" : ""
def filter_3 = args3 ? "| bcftools filter --threads ${task.cpus} ${args3}" : ""

"""
bcftools filter \\
--threads ${task.cpus} \\
$args \\
$vcf \\
${filter_2} \\
${filter_3} \\
--output ${prefix}.${extension}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def last_args = args3 ?: args2 ?: args

extension = last_args.contains("--output-type b") || last_args.contains("-Ob") ? "bcf.gz" :
last_args.contains("--output-type u") || last_args.contains("-Ou") ? "bcf" :
last_args.contains("--output-type z") || last_args.contains("-Oz") ? "vcf.gz" :
last_args.contains("--output-type v") || last_args.contains("-Ov") ? "vcf" :
"vcf"
def index = last_args.contains("--write-index=tbi") || last_args.contains("-W=tbi") ? "tbi" :
last_args.contains("--write-index=csi") || last_args.contains("-W=csi") ? "csi" :
last_args.contains("--write-index") || last_args.contains("-W") ? "csi" :
""
def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch"
def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : ""

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"

"""
${create_cmd} ${prefix}.${extension}
${create_index}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
nextflow_process {

name "Test Process BEDTOOLS_INTERSECT"
name "Test Process BCFTOOLS_FILTER"
script "../main.nf"
process "BEDTOOLS_INTERSECT"
config "./nextflow.config"
process "BCFTOOLS_FILTER"

tag "modules"
tag "modules_nfcore"
tag "bedtools"
tag "bedtools/intersect"
tag "modules_local"
tag "bcftools"
tag "bcftools/filter"

test("sarscov2 - bed - bed") {
test("sarscov2 - 1 filter") {

config "./one_filter.config"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[]
]
input[1] = [[:], []]
"""
}
}
Expand All @@ -35,18 +34,18 @@ nextflow_process {

}

test("sarscov2 - bam - bam") {
test("sarscov2 - 2 filters") {

config "./two_filters.config"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/baits.bed', checkIfExists: true)
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[]
]
input[1] = [[:], []]
"""
}
}
Expand All @@ -60,20 +59,18 @@ nextflow_process {

}

test("sarscov2 - bed - stub") {
test("sarscov2 - 3 filters") {

options "-stub"
config "./three_filters.config"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[]
]
input[1] = [[:], []]
"""
}
}
Expand Down
Loading

0 comments on commit a87d104

Please sign in to comment.