Move process config into pipeline code

Signed-off-by: Ben Sherman <bentshermann@gmail.com>
nf-core · Oct 4, 2023 · 5047998 · 5047998
1 parent 3bec233
commit 5047998
Show file tree

Hide file tree

Showing 12 changed files with 572 additions and 865 deletions.
diff --git a/conf/modules.config b/conf/modules.config
diff --git a/subworkflows/local/align_star.nf b/subworkflows/local/align_star.nf
@@ -32,7 +32,42 @@ workflow ALIGN_STAR {
     ch_bam_transcript = Channel.empty()
     ch_fastq          = Channel.empty()
     ch_tab            = Channel.empty()
+    align_ext_args    = [
+        '--quantMode TranscriptomeSAM',
+        '--twopassMode Basic',
+        '--outSAMtype BAM Unsorted',
+        '--readFilesCommand zcat',
+        '--runRNGseed 0',
+        '--outFilterMultimapNmax 20',
+        '--alignSJDBoverhangMin 1',
+        '--outSAMattributes NH HI AS NM MD',
+        '--quantTranscriptomeBan Singleend',
+        '--outSAMstrandField intronMotif',
+        params.save_unaligned ? '--outReadsUnmapped Fastx' : '',
+        params.extra_star_align_args ? params.extra_star_align_args.split("\\s(?=--)") : ''
+    ].flatten().unique(false).join(' ').trim()
+    align_publish_dir = [
+        [
+            path: { "${params.outdir}/${params.aligner}/log" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{out,tab}'
+        ],
+        [
+            path: { "${params.outdir}/${params.aligner}" },
+            mode: params.publish_dir_mode,
+            pattern: '*.bam',
+            enabled: params.save_align_intermeds
+        ],
+        [
+            path: { "${params.outdir}/${params.aligner}/unmapped" },
+            mode: params.publish_dir_mode,
+            pattern: '*.fastq.gz',
+            enabled: params.save_unaligned
+        ]
+    ]
     if (is_aws_igenome) {
+        STAR_ALIGN_IGENOMES.config.ext.args = align_ext_args
+        STAR_ALIGN_IGENOMES.config.publishDir = align_publish_dir
         STAR_ALIGN_IGENOMES ( reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center )
         ch_orig_bam       = STAR_ALIGN_IGENOMES.out.bam
         ch_log_final      = STAR_ALIGN_IGENOMES.out.log_final
@@ -44,6 +79,8 @@ workflow ALIGN_STAR {
         ch_tab            = STAR_ALIGN_IGENOMES.out.tab
         ch_versions       = ch_versions.mix(STAR_ALIGN_IGENOMES.out.versions.first())
     } else {
+        STAR_ALIGN.config.ext.args = align_ext_args
+        STAR_ALIGN.config.publishDir = align_publish_dir
         STAR_ALIGN ( reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center )
         ch_orig_bam       = STAR_ALIGN.out.bam
         ch_log_final      = STAR_ALIGN.out.log_final

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
@@ -9,6 +9,11 @@ workflow INPUT_CHECK {
     samplesheet // file: /path/to/samplesheet.csv
 
     main:
+    SAMPLESHEET_CHECK.config.publishDir = [
+        path: "${params.outdir}/pipeline_info",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+    ]
     SAMPLESHEET_CHECK ( samplesheet )
         .csv
         .splitCsv ( header:true, sep:',' )

diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
@@ -55,10 +55,24 @@ workflow PREPARE_GENOME {
 
     ch_versions = Channel.empty()
 
+    genome_publish_dir = [
+        path: "${params.outdir}/genome",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+        enabled: params.save_reference
+    ]
+    genome_index_publish_dir = [
+        path: "${params.outdir}/genome/index",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+        enabled: params.save_reference
+    ]
+
     //
     // Uncompress genome fasta file if required
     //
     if (fasta.endsWith('.gz')) {
+        GUNZIP_FASTA.config.publishDir = genome_publish_dir
         ch_fasta    = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map { it[1] }
         ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
     } else {
@@ -70,18 +84,23 @@ workflow PREPARE_GENOME {
     //
     if (gtf) {
         if (gtf.endsWith('.gz')) {
+            GUNZIP_GTF.config.publishDir = genome_publish_dir
             ch_gtf      = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map { it[1] }
             ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
         } else {
             ch_gtf = Channel.value(file(gtf))
         }
     } else if (gff) {
         if (gff.endsWith('.gz')) {
+            GUNZIP_GFF.config.publishDir = genome_publish_dir
             ch_gff      = GUNZIP_GFF ( [ [:], gff ] ).gunzip.map { it[1] }
             ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
         } else {
             ch_gff = Channel.value(file(gff))
         }
+
+        GFFREAD.config.ext.args   = '--keep-exon-attrs -F -T'
+        GFFREAD.config.publishDir = genome_publish_dir
         ch_gtf      = GFFREAD ( ch_gff ).gtf
         ch_versions = ch_versions.mix(GFFREAD.out.versions)
     }
@@ -91,11 +110,14 @@ workflow PREPARE_GENOME {
     //
     if (additional_fasta) {
         if (additional_fasta.endsWith('.gz')) {
+            GUNZIP_ADDITIONAL_FASTA.config.publishDir = genome_publish_dir
             ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], additional_fasta ] ).gunzip.map { it[1] }
             ch_versions  = ch_versions.mix(GUNZIP_ADDITIONAL_FASTA.out.versions)
         } else {
             ch_add_fasta = Channel.value(file(additional_fasta))
         }
+
+        CAT_ADDITIONAL_FASTA.config.publishDir = genome_publish_dir
         CAT_ADDITIONAL_FASTA ( ch_fasta, ch_gtf, ch_add_fasta, biotype )
         ch_fasta    = CAT_ADDITIONAL_FASTA.out.fasta
         ch_gtf      = CAT_ADDITIONAL_FASTA.out.gtf
@@ -107,12 +129,14 @@ workflow PREPARE_GENOME {
     //
     if (gene_bed) {
         if (gene_bed.endsWith('.gz')) {
+            GUNZIP_GENE_BED.config.publishDir = genome_publish_dir
             ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map { it[1] }
             ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
         } else {
             ch_gene_bed = Channel.value(file(gene_bed))
         }
     } else {
+        GTF2BED.config.publishDir = genome_publish_dir
         ch_gene_bed = GTF2BED ( ch_gtf ).bed
         ch_versions = ch_versions.mix(GTF2BED.out.versions)
     }
@@ -122,18 +146,23 @@ workflow PREPARE_GENOME {
     //
     if (transcript_fasta) {
         if (transcript_fasta.endsWith('.gz')) {
+            GUNZIP_TRANSCRIPT_FASTA.config.publishDir = genome_publish_dir
             ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ( [ [:], transcript_fasta ] ).gunzip.map { it[1] }
             ch_versions         = ch_versions.mix(GUNZIP_TRANSCRIPT_FASTA.out.versions)
         } else {
             ch_transcript_fasta = Channel.value(file(transcript_fasta))
         }
         if (gencode) { 
+            PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.config.publishDir = genome_publish_dir
             PREPROCESS_TRANSCRIPTS_FASTA_GENCODE ( ch_transcript_fasta )
             ch_transcript_fasta = PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.fasta
             ch_versions         = ch_versions.mix(PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.versions)
         }
     } else {
+        GTF_GENE_FILTER.config.publishDir = genome_publish_dir
         ch_filter_gtf = GTF_GENE_FILTER ( ch_fasta, ch_gtf ).gtf
+
+        MAKE_TRANSCRIPTS_FASTA.config.publishDir = genome_publish_dir
         ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA ( ch_fasta, ch_filter_gtf ).transcript_fasta
         ch_versions         = ch_versions.mix(GTF_GENE_FILTER.out.versions)
         ch_versions         = ch_versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions)
@@ -142,6 +171,7 @@ workflow PREPARE_GENOME {
     //
     // Create chromosome sizes file
     //
+    CUSTOM_GETCHROMSIZES.config.publishDir = genome_publish_dir
     CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } )
     ch_fai         = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
     ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
@@ -154,6 +184,8 @@ workflow PREPARE_GENOME {
     if ('bbsplit' in prepare_tool_indices) {
         if (bbsplit_index) {
             if (bbsplit_index.endsWith('.tar.gz')) {
+                UNTAR_BBSPLIT_INDEX.config.ext.args2  = '--no-same-owner'
+                UNTAR_BBSPLIT_INDEX.config.publishDir = genome_index_publish_dir
                 ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], bbsplit_index ] ).untar.map { it[1] }
                 ch_versions      = ch_versions.mix(UNTAR_BBSPLIT_INDEX.out.versions)
             } else {
@@ -169,6 +201,8 @@ workflow PREPARE_GENOME {
                 .collect { [ it ] } // Collect entries as a list to pass as "tuple val(short_names), path(path_to_fasta)" to module
                 .set { ch_bbsplit_fasta_list }
 
+            BBMAP_BBSPLIT.config.ext.args   = 'build=1'
+            BBMAP_BBSPLIT.config.publishDir = genome_index_publish_dir
             ch_bbsplit_index = BBMAP_BBSPLIT ( [ [:], [] ], [], ch_fasta, ch_bbsplit_fasta_list, true ).index
             ch_versions      = ch_versions.mix(BBMAP_BBSPLIT.out.versions)
         }
@@ -181,16 +215,20 @@ workflow PREPARE_GENOME {
     if ('star_salmon' in prepare_tool_indices) {
         if (star_index) {
             if (star_index.endsWith('.tar.gz')) {
+                UNTAR_STAR_INDEX.config.ext.args2  = '--no-same-owner'
+                UNTAR_STAR_INDEX.config.publishDir = genome_index_publish_dir
                 ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] }
                 ch_versions   = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
             } else {
                 ch_star_index = Channel.value(file(star_index))
             }
         } else {
             if (is_aws_igenome) {
+                STAR_GENOMEGENERATE_IGENOMES.config.publishDir = genome_index_publish_dir
                 ch_star_index = STAR_GENOMEGENERATE_IGENOMES ( ch_fasta, ch_gtf ).index
                 ch_versions   = ch_versions.mix(STAR_GENOMEGENERATE_IGENOMES.out.versions)
             } else {
+                STAR_GENOMEGENERATE.config.publishDir = genome_index_publish_dir
                 ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index
                 ch_versions   = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
             }
@@ -204,12 +242,16 @@ workflow PREPARE_GENOME {
     if ('star_rsem' in prepare_tool_indices) {
         if (rsem_index) {
             if (rsem_index.endsWith('.tar.gz')) {
+                UNTAR_RSEM_INDEX.config.ext.args2  = '--no-same-owner'
+                UNTAR_RSEM_INDEX.config.publishDir = genome_index_publish_dir
                 ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], rsem_index ] ).untar.map { it[1] }
                 ch_versions   = ch_versions.mix(UNTAR_RSEM_INDEX.out.versions)
             } else {
                 ch_rsem_index = Channel.value(file(rsem_index))
             }
         } else {
+            RSEM_PREPAREREFERENCE_GENOME.config.ext.args   = '--star'
+            RSEM_PREPAREREFERENCE_GENOME.config.publishDir = genome_index_publish_dir
             ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME ( ch_fasta, ch_gtf ).index
             ch_versions   = ch_versions.mix(RSEM_PREPAREREFERENCE_GENOME.out.versions)
         }
@@ -222,19 +264,23 @@ workflow PREPARE_GENOME {
     ch_hisat2_index = Channel.empty()
     if ('hisat2' in prepare_tool_indices) {
         if (!splicesites) {
+            HISAT2_EXTRACTSPLICESITES.config.publishDir = genome_index_publish_dir
             ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf.map { [ [:], it ] } ).txt.map { it[1] }
             ch_versions    = ch_versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions)
         } else {
             ch_splicesites = Channel.value(file(splicesites))
         }
         if (hisat2_index) {
             if (hisat2_index.endsWith('.tar.gz')) {
+                UNTAR_HISAT2_INDEX.config.ext.args2  = '--no-same-owner'
+                UNTAR_HISAT2_INDEX.config.publishDir = genome_index_publish_dir
                 ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], hisat2_index ] ).untar.map { it[1] }
                 ch_versions     = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions)
             } else {
                 ch_hisat2_index = Channel.value(file(hisat2_index))
             }
         } else {
+            HISAT2_BUILD.config.publishDir = genome_index_publish_dir
             ch_hisat2_index = HISAT2_BUILD ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] }, ch_splicesites.map { [ [:], it ] } ).index.map { it[1] }
             ch_versions     = ch_versions.mix(HISAT2_BUILD.out.versions)
         }
@@ -246,13 +292,17 @@ workflow PREPARE_GENOME {
     ch_salmon_index = Channel.empty()
     if (salmon_index) {
         if (salmon_index.endsWith('.tar.gz')) {
+            UNTAR_SALMON_INDEX.config.ext.args2  = '--no-same-owner'
+            UNTAR_SALMON_INDEX.config.publishDir = genome_index_publish_dir
             ch_salmon_index = UNTAR_SALMON_INDEX ( [ [:], salmon_index ] ).untar.map { it[1] }
             ch_versions     = ch_versions.mix(UNTAR_SALMON_INDEX.out.versions)
         } else {
             ch_salmon_index = Channel.value(file(salmon_index))
         }
     } else {
         if ('salmon' in prepare_tool_indices) {
+            SALMON_INDEX.config.ext.args   = params.gencode ? '--gencode' : ''
+            SALMON_INDEX.config.publishDir = genome_index_publish_dir
             ch_salmon_index = SALMON_INDEX ( ch_fasta, ch_transcript_fasta ).index
             ch_versions     = ch_versions.mix(SALMON_INDEX.out.versions)
         }

diff --git a/subworkflows/local/quantify_rsem.nf b/subworkflows/local/quantify_rsem.nf
@@ -19,6 +19,31 @@ workflow QUANTIFY_RSEM {
     //
     // Quantify reads with RSEM
     //
+    RSEM_CALCULATEEXPRESSION.config.ext.args   = [
+        '--star',
+        '--star-output-genome-bam',
+        '--star-gzipped-read-file',
+        '--estimate-rspd',
+        '--seed 1'
+    ].join(' ').trim()
+    RSEM_CALCULATEEXPRESSION.config.publishDir = [
+        [
+            path: "${params.outdir}/${params.aligner}",
+            mode: params.publish_dir_mode,
+            pattern: "*.{stat,results}"
+        ],
+        [
+            path: "${params.outdir}/${params.aligner}",
+            mode: params.publish_dir_mode,
+            pattern: "*.bam",
+            enabled: params.save_align_intermeds
+        ],
+        [
+            path: "${params.outdir}/${params.aligner}/log",
+            mode: params.publish_dir_mode,
+            pattern: "*.log"
+        ]
+    ]
     RSEM_CALCULATEEXPRESSION ( reads, index )
     ch_versions = ch_versions.mix(RSEM_CALCULATEEXPRESSION.out.versions.first())
 
@@ -31,6 +56,11 @@ workflow QUANTIFY_RSEM {
     //
     // Merge counts across samples
     //
+    RSEM_MERGE_COUNTS.config.publishDir = [
+        path: "${params.outdir}/${params.aligner}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+    ]
     RSEM_MERGE_COUNTS (
         RSEM_CALCULATEEXPRESSION.out.counts_gene.collect{it[1]},       // [meta, counts]: Collect the second element (counts files) in the channel across all samples
         RSEM_CALCULATEEXPRESSION.out.counts_transcript.collect{it[1]}

diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf