From 6f9f9e470fb22b098560cab776498abe5c3eb8a0 Mon Sep 17 00:00:00 2001 From: Felix Lenner <52530259+fellen31@users.noreply.github.com> Date: Wed, 27 Mar 2024 10:34:12 +0100 Subject: [PATCH] Fix bcftools merge (#43) Fix bcftools merge --------- Co-authored-by: Anders Jemt --- CHANGELOG.md | 2 + conf/modules/repeat_calling.config | 6 ++- modules.json | 3 +- .../bcftools/merge/bcftools-merge.diff | 38 ------------------- modules/nf-core/bcftools/merge/main.nf | 9 ++--- subworkflows/local/repeat_analysis.nf | 31 +++------------ 6 files changed, 16 insertions(+), 73 deletions(-) delete mode 100644 modules/nf-core/bcftools/merge/bcftools-merge.diff diff --git a/CHANGELOG.md b/CHANGELOG.md index 9276921a..d786814c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ Initial release of genomic-medicine-sweden/skierfe, created with the [nf-core](h ### `Fixed` +- Fix BCFTools merge module inputs [#43](https://github.com/genomic-medicine-sweden/skierfe/pull/43) + ### `Dependencies` ### `Deprecated` diff --git a/conf/modules/repeat_calling.config b/conf/modules/repeat_calling.config index f2c48d7e..ff1872f4 100644 --- a/conf/modules/repeat_calling.config +++ b/conf/modules/repeat_calling.config @@ -66,9 +66,11 @@ process { } withName: BCFTOOLS_MERGE { - ext.args = "--output-type b" + + ext.args = '--output-type b' + publishDir = [ - path: { "${params.outdir}/repeat_analysis/trgt/processed/${meta.id}" }, + path: { "${params.outdir}/repeat_analysis/bcftools/merge/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/modules.json b/modules.json index a2e53d6f..86d60b2f 100644 --- a/modules.json +++ b/modules.json @@ -18,8 +18,7 @@ "bcftools/merge": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"], - "patch": "modules/nf-core/bcftools/merge/bcftools-merge.diff" + "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", diff --git a/modules/nf-core/bcftools/merge/bcftools-merge.diff b/modules/nf-core/bcftools/merge/bcftools-merge.diff deleted file mode 100644 index ec791629..00000000 --- a/modules/nf-core/bcftools/merge/bcftools-merge.diff +++ /dev/null @@ -1,38 +0,0 @@ -Changes in module 'nf-core/bcftools/merge' ---- modules/nf-core/bcftools/merge/main.nf -+++ modules/nf-core/bcftools/merge/main.nf -@@ -12,6 +12,7 @@ - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - path(bed) -+ tuple val(meta4), path(file_list) - - output: - tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: merged_variants -@@ -25,19 +26,21 @@ - def prefix = task.ext.prefix ?: "${meta.id}" - - def regions = bed ? "--regions-file $bed" : "" -+ def files = file_list ? "--file-list $file_list" : $vcfs - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : -- "vcf" -- -+ "vcf.gz" -+ - """ - bcftools merge \\ - $args \\ - $regions \\ - --threads $task.cpus \\ - --output ${prefix}.${extension} \\ -- $vcfs -+ $args \\ -+ $files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - -************************************************************ diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf index 2c5eded1..b85d3fec 100644 --- a/modules/nf-core/bcftools/merge/main.nf +++ b/modules/nf-core/bcftools/merge/main.nf @@ -12,7 +12,6 @@ process BCFTOOLS_MERGE { tuple val(meta2), path(fasta) tuple val(meta3), path(fai) path(bed) - tuple val(meta4), path(file_list) output: tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: merged_variants @@ -26,21 +25,19 @@ process BCFTOOLS_MERGE { def prefix = task.ext.prefix ?: "${meta.id}" def regions = bed ? "--regions-file $bed" : "" - def files = file_list ? "--file-list $file_list" : $vcfs def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf.gz" - + "vcf" + """ bcftools merge \\ $args \\ $regions \\ --threads $task.cpus \\ --output ${prefix}.${extension} \\ - $args \\ - $files + $vcfs cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/repeat_analysis.nf b/subworkflows/local/repeat_analysis.nf index f835179e..8940340b 100644 --- a/subworkflows/local/repeat_analysis.nf +++ b/subworkflows/local/repeat_analysis.nf @@ -34,33 +34,14 @@ workflow REPEAT_ANALYSIS { BCFTOOLS_SORT_TRGT.out.vcf .join(BCFTOOLS_INDEX_TRGT.out.csi) - .set{ ch_bcftools_query_in } - - ch_bcftools_query_in - .map{[['id':'multisample'],it[1]]} - .groupTuple() - .set{ vcfs } - - ch_bcftools_query_in - .map{[['id':'multisample'],it[2]]} + .toList() + .filter { it.size() > 1 } + .flatMap() + .map { meta, bcf, csi -> [ [ id : 'multisample' ], bcf, csi ] } .groupTuple() - .set{ csis } - - vcfs - .cross(csis) - .map{[it[0][0], it[0][1], it[1][1]]} - .set{ch_bcftools_merge_in} - - // BCFTools merge fails if only one file is provided in arguments, - // therefore make a list of all files to be merged and provide it - - ch_bcftools_merge_in - .map{ meta, vcf, index -> vcf.name.toString().replaceAll("[\\[\\]]", "") } - .collectFile(name: 'bcftools_merge_samples.txt', newLine: true) - .map{ file_list -> [ [:], file_list ] } - .set{ ch_bcftools_file_list } + .set{ ch_bcftools_merge_in } - BCFTOOLS_MERGE(ch_bcftools_merge_in, ch_fasta, ch_fai, [], ch_bcftools_file_list ) + BCFTOOLS_MERGE ( ch_bcftools_merge_in, ch_fasta, ch_fai, [] ) ch_versions = ch_versions.mix(TRGT.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_SORT_TRGT.out.versions)