From 7efb7d380e43643872c5738fd50ccffe6bd13235 Mon Sep 17 00:00:00 2001 From: Julian Thomas Mohr Date: Wed, 26 Jul 2023 13:42:40 +0200 Subject: [PATCH] Replace MERGE_REPLICATES by nf-core/module SAMTOOLS_MERGE and integrate into pipeline --- local_modules/singleton_input/main.nf | 17 ++++++ main.nf | 82 ++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 7 deletions(-) create mode 100644 local_modules/singleton_input/main.nf diff --git a/local_modules/singleton_input/main.nf b/local_modules/singleton_input/main.nf new file mode 100644 index 0000000..a86047d --- /dev/null +++ b/local_modules/singleton_input/main.nf @@ -0,0 +1,17 @@ +process SINGLETON_INPUT { + tag "$meta.id" + + input: + tuple val(meta), path(input_bam) + + output: + tuple val(meta), path("${meta.id}.bam") + + when: + task.ext.when == null || task.ext.when + + script: + """ + ln -s $input_bam ${meta.id}.bam + """ +} diff --git a/main.nf b/main.nf index 4ee30e6..a1c9376 100755 --- a/main.nf +++ b/main.nf @@ -3,10 +3,12 @@ nextflow.enable.dsl = 2 +include { SINGLETON_INPUT } from './local_modules/singleton_input/main' +include { SAMTOOLS_MERGE } from './nf-core-modules/modules/nf-core/samtools/merge/main' include { CNVKIT_BATCH } from './nf-core-modules/modules/nf-core/cnvkit/batch/main' include { SEQUENZAUTILS_GCWIGGLE } from './nf-core-modules/modules/nf-core/sequenzautils/gcwiggle/main' include { SEQUENZAUTILS_BAM2SEQZ } from './nf-core-modules/modules/nf-core/sequenzautils/bam2seqz/main' -include { MERGE_REPLICATES } from './local_modules/merge_replicates' +//include { MERGE_REPLICATES } from './local_modules/merge_replicates' include { SEQUENZAUTILS_SEQZBINNING } from './local_modules/sequenzautils/seqzbinning/main' include { R_SEQUENZA } from './local_modules/rsequenza/main' @@ -43,25 +45,91 @@ if (!params.input_files) { else { Channel .fromPath(params.input_files) - .splitCsv(header: ['name', 'tumor_bam', 'normal_bam'], sep: "\t") - .map{ row-> tuple([id: row.name], row.tumor_bam, row.normal_bam) } + .splitCsv(header: ['sample', 'tumor_bam', 'normal_bam'], sep: "\t") + .map{ row-> tuple([id: row.sample], row.tumor_bam, row.normal_bam) } .set { input_files } } workflow { - MERGE_REPLICATES(input_files) - merged_bams = MERGE_REPLICATES.out.merged_bams + + tumor_bams = input_files + .map { + meta, tumor_bam, normal_bam -> + def fmeta = [:] + fmeta.id = meta.id + ".tumor" + fmeta.type = "tumor" + [fmeta, tumor_bam.tokenize(',')] + } + .branch{ + single: it[1].size() == 1 + multiple: it[1].size() > 1 + } + + normal_bams = input_files + .map { + meta, tumor_bam, normal_bam -> + def fmeta = [:] + fmeta.id = meta.id + ".normal" + fmeta.type = "normal" + [fmeta, normal_bam.tokenize(',')] + } + .branch{ + single: it[1].size() == 1 + multiple: it[1].size() > 1 + } + + SINGLETON_INPUT(tumor_bams.single.mix(normal_bams.single)) + + SAMTOOLS_MERGE( + tumor_bams.multiple.mix(normal_bams.multiple), + [[], []], + [[], []] + ) + + prepared_tumor_bams = SINGLETON_INPUT.out + .filter { it[0].type == "tumor" } + .mix( + SAMTOOLS_MERGE.out.bam + .filter { it[0].type == "tumor" } + ).map { + meta, bam -> + def fmeta = [:] + fmeta.id = meta.id[0..-(meta.type.length() + 2)] + [fmeta, bam] + } + + prepared_normal_bams = SINGLETON_INPUT.out + .filter { it[0].type == "normal" } + .mix( + SAMTOOLS_MERGE.out.bam + .filter { it[0].type == "normal" } + ).map { + meta, bam -> + def fmeta = [:] + fmeta.id = meta.id[0..-(meta.type.length() + 2)] + [fmeta, bam] + } + + ch_meta_tumor_normal = prepared_tumor_bams + .join(prepared_normal_bams, by: [0]) if (params.toolslist.contains('cnvkit')) { // NOTE: it does not provide fasta.fai or CNVkit reference, but these are created every time - CNVKIT_BATCH(merged_bams, params.reference, [], params.intervals, [], false) + CNVKIT_BATCH( + ch_meta_tumor_normal, + params.reference, + [], + params.intervals, + [], + false + ) } if (params.toolslist.contains('sequenza')) { SEQUENZAUTILS_GCWIGGLE([[id:'reference'], params.reference]) wig = SEQUENZAUTILS_GCWIGGLE.out.wig.map { it[1] } - SEQUENZAUTILS_BAM2SEQZ(merged_bams, params.reference, wig) + SEQUENZAUTILS_BAM2SEQZ(ch_meta_tumor_normal, params.reference, wig) SEQUENZAUTILS_SEQZBINNING(SEQUENZAUTILS_BAM2SEQZ.out.seqz)