From 90a3a78a87759d3a0db11a0ecd40da56ce7926ec Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Thu, 1 Feb 2024 15:15:07 +1100 Subject: [PATCH 01/17] Frame out Neo subworkflow --- conf/modules.config | 9 ++++ lib/Constants.groovy | 1 + modules/local/neo/Dockerfile | 22 ++++++++++ modules/local/neo/main.nf | 44 +++++++++++++++++++ modules/local/neo/meta.yml | 0 subworkflows/local/neo_prediction.nf | 66 ++++++++++++++++++++++++++++ workflows/wgts.nf | 19 ++++++++ 7 files changed, 161 insertions(+) create mode 100644 modules/local/neo/Dockerfile create mode 100644 modules/local/neo/main.nf create mode 100644 modules/local/neo/meta.yml create mode 100644 subworkflows/local/neo_prediction.nf diff --git a/conf/modules.config b/conf/modules.config index cc09c657..74da16c7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -218,6 +218,15 @@ process { ] } + withName: 'NEO' { + ext.jarPath = '/opt/neo/neo.jar' + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + ] + } + withName: 'CUPPA' { publishDir = [ path: { "${params.outdir}" }, diff --git a/lib/Constants.groovy b/lib/Constants.groovy index cb827975..15d41689 100644 --- a/lib/Constants.groovy +++ b/lib/Constants.groovy @@ -47,6 +47,7 @@ class Constants { LILAC, LINX, MARKDUPS, + NEO, ORANGE, PAVE, PURPLE, diff --git a/modules/local/neo/Dockerfile b/modules/local/neo/Dockerfile new file mode 100644 index 00000000..5325eb6c --- /dev/null +++ b/modules/local/neo/Dockerfile @@ -0,0 +1,22 @@ +FROM mambaorg/micromamba:0.24.0 + +USER root + +RUN \ + apt-get update && \ + apt-get install -y procps wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN \ + mkdir -p /opt/neo/ && \ + wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.1_beta/neo_v1.1.jar' + +USER mambauser + +RUN \ + micromamba install -y -n base -c bioconda -c conda-forge \ + 'openjdk >=8' && \ + micromamba clean --all --yes + +ENV PATH="/opt/conda/bin:/opt/conda/condabin:${PATH}" diff --git a/modules/local/neo/main.nf b/modules/local/neo/main.nf new file mode 100644 index 00000000..287ccfe6 --- /dev/null +++ b/modules/local/neo/main.nf @@ -0,0 +1,44 @@ +process NEO { + tag "${meta.id}" + label 'process_medium' + + container 'docker.io/scwatts/neo:1.1_beta--0' + + input: + val(meta) + + //tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) + //path genome_fasta + //val genome_ver + //path ensembl_data_resources + + output: + tuple val(meta), path('neo/'), emit: neo_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + java \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + -jar ${task.ext.jarPath} \\ + ${args} \\ + XXX + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p neo/ + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} + diff --git a/modules/local/neo/meta.yml b/modules/local/neo/meta.yml new file mode 100644 index 00000000..e69de29b diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction.nf new file mode 100644 index 00000000..c57037fb --- /dev/null +++ b/subworkflows/local/neo_prediction.nf @@ -0,0 +1,66 @@ +// +// XXX +// + +import Constants +import Utils + +include { ISOFOX } from '../../modules/local/isofox/main' +include { LILAC } from '../../modules/local/lilac/main' +include { NEO as NEO_PREDICTOR } from '../../modules/local/neo/main' +include { NEO as NEO_SCORER } from '../../modules/local/neo/main' + +workflow NEO_PREDICTION{ + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_isofox // channel: [mandatory] [ meta, isofox_dir ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] + ch_lilac // channel: [mandatory] [ meta, lilac_dir ] + ch_linx // channel: [mandatory] [ meta, linx_dir ] + + //// Reference data + //genome_fasta // channel: [mandatory] /path/to/genome_fasta + //genome_fai // channel: [mandatory] /path/to/genome_fai + //genome_dict // channel: [mandatory] /path/to/genome_dict + + // other reference data, or placeholders + + main: + // Channel for versions.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Neo prediction + // 1. select input sources after combine required channels (i.e ch_purple, ch_linx) + // 2. get runnable subjects/inputs + // 3. format input channel + // 4. run process NEO_PREDICTOR + // 5. restore meta, set skip entries + + // Feeding the Neo process raw inputs for demo purposes only + NEO_PREDICTOR(ch_inputs) + + // Isofox annotation + // 1. take outputs from NEO_PREDICTOR + // 2. format input channel + // 3. run process ISOFOX (also requires changes to process to enable alt. run mode) + // 4. restore meta + + // Assuming SAGE append will be appropriately handled upstream. Need to discuss some options: + // * -bqr_enabled true + // * -max_read_depth 100000 + + // Neo score + // 1. select input sources after combine required channels (i.e ch_purple, ch_lilac, ch_isofox, above process outputs) + // 2. get runnable subjects/inputs + // 3. format input channel + // 4. run NEO_SCORER + + // Feeding the Neo process raw inputs for demo purposes only + NEO_SCORER(ch_inputs) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/workflows/wgts.nf b/workflows/wgts.nf index 0541966d..adb2d85d 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -67,6 +67,7 @@ include { ISOFOX_QUANTIFICATION } from '../subworkflows/local/isofox_quantificat include { LILAC_CALLING } from '../subworkflows/local/lilac_calling' include { LINX_ANNOTATION } from '../subworkflows/local/linx_annotation' include { LINX_PLOTTING } from '../subworkflows/local/linx_plotting' +include { NEO_PREDICTION } from '../subworkflows/local/neo_prediction' include { ORANGE_REPORTING } from '../subworkflows/local/orange_reporting' include { PAVE_ANNOTATION } from '../subworkflows/local/pave_annotation' include { PREPARE_REFERENCE } from '../subworkflows/local/prepare_reference' @@ -708,6 +709,24 @@ workflow WGTS { } + // + // SUBWORKFLOW: XXX + // + if (run_config.stages.neo) { + + NEO_PREDICTION( + ch_inputs, + ch_isofox_out, + ch_purple_out, + ch_sage_somatic_append_out, + ch_lilac_out, + ch_linx_somatic_out, + ) + + ch_versions = ch_versions.mix(NEO_PREDICTION.out.versions) + + } + // // SUBWORKFLOW: Run CUPPA predict tissue of origin // From 4913bb83b9b5d50100eb9e134e30079302b52d14 Mon Sep 17 00:00:00 2001 From: Charles Shale Date: Mon, 5 Feb 2024 13:38:52 +1100 Subject: [PATCH 02/17] First cut of Neoepitope subworkflow --- conf/hmf_data.config | 6 +- conf/modules.config | 2 +- modules/local/neo/finder/main.nf | 50 ++++++ modules/local/neo/isofox/main.nf | 56 +++++++ modules/local/neo/main.nf | 44 ----- modules/local/neo/scorer/main.nf | 66 ++++++++ subworkflows/local/neo_prediction.nf | 233 +++++++++++++++++++++++---- workflows/wgts.nf | 6 + 8 files changed, 388 insertions(+), 75 deletions(-) create mode 100644 modules/local/neo/finder/main.nf create mode 100644 modules/local/neo/isofox/main.nf delete mode 100644 modules/local/neo/main.nf create mode 100644 modules/local/neo/scorer/main.nf diff --git a/conf/hmf_data.config b/conf/hmf_data.config index 6fdcba0b..3c17ba27 100644 --- a/conf/hmf_data.config +++ b/conf/hmf_data.config @@ -20,9 +20,13 @@ params { isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv' // LILAC lilac_resources = 'dna_pipeline/immune/' + // NEO + neo_resources = 'neo/' // ORANGE cohort_mapping = 'orange/cohort_mapping.tsv' - cohort_percentiles = 'orange/cohort_percentiles.tsv' + // RNA COHORT DATA + cohort_percentiles = 'orange/cohort_percentiles.tsv' // consider renaming and moving to rna_pipeline + cohort_tpm_medians = 'rna_pipeline/cohort_tpm_medians.tsv' alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv' gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv' // SAGE diff --git a/conf/modules.config b/conf/modules.config index 74da16c7..dd090cdd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -218,7 +218,7 @@ process { ] } - withName: 'NEO' { + withName: 'NEO_.*' { ext.jarPath = '/opt/neo/neo.jar' publishDir = [ path: { "${params.outdir}" }, diff --git a/modules/local/neo/finder/main.nf b/modules/local/neo/finder/main.nf new file mode 100644 index 00000000..60cbf2e3 --- /dev/null +++ b/modules/local/neo/finder/main.nf @@ -0,0 +1,50 @@ +process NEO_FINDER { + tag "${meta.id}" + label 'process_low' + + container 'docker.io/scwatts/neo:1.1_beta--0' + + input: + tuple val(meta), path(purple_dir), path(linx_dir) + path genome_fasta + val genome_ver + path ensembl_data_resources + + output: + tuple val(meta), path('neo/'), emit: neo_finder_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + java \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + -cp ${task.ext.jarPath} \\ + com.hartwig.hmftools.neo.epitope.NeoEpitopeFinder \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -linx_dir ${linx_dir} \\ + -somatic_vcf ${purple_dir}/${meta.sample_id}.purple.somatic.vcf.gz \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -output_dir ${output_dir} \\ + -log_debug \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p neo/ + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} + diff --git a/modules/local/neo/isofox/main.nf b/modules/local/neo/isofox/main.nf new file mode 100644 index 00000000..4ca86ba5 --- /dev/null +++ b/modules/local/neo/isofox/main.nf @@ -0,0 +1,56 @@ +process ISOFOX_NEO { + tag "${meta.id}" + label 'process_medium' + + container 'docker.io/scwatts/isofox:1.7.1--0' + + input: + tuple val(meta), path(bam), path(bai) + val functions + val read_length + path genome_fasta + val genome_ver + path genome_fai + path ensembl_data_resources + + output: + tuple val(meta), path('isofox/'), emit: isofox_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir -p isofox/ + + java \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + -jar ${task.ext.jarPath} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -bam_file ${bam} \\ + -functions NEO_EPITOPES \\ + -neoepitope_file ${neo_finder_dir}/${meta.sample_id}.neo.neo_data.tsv \\ + -read_length ${read_length} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -threads ${task.cpus} \\ + -output_dir isofox/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + isofox: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p isofox/ + touch isofox/placeholder + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/neo/main.nf b/modules/local/neo/main.nf deleted file mode 100644 index 287ccfe6..00000000 --- a/modules/local/neo/main.nf +++ /dev/null @@ -1,44 +0,0 @@ -process NEO { - tag "${meta.id}" - label 'process_medium' - - container 'docker.io/scwatts/neo:1.1_beta--0' - - input: - val(meta) - - //tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) - //path genome_fasta - //val genome_ver - //path ensembl_data_resources - - output: - tuple val(meta), path('neo/'), emit: neo_dir - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - """ - java \\ - -Xmx${Math.round(task.memory.bytes * 0.95)} \\ - -jar ${task.ext.jarPath} \\ - ${args} \\ - XXX - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //') - END_VERSIONS - """ - - stub: - """ - mkdir -p neo/ - echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml - """ -} - diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf new file mode 100644 index 00000000..7dd7ec09 --- /dev/null +++ b/modules/local/neo/scorer/main.nf @@ -0,0 +1,66 @@ +process NEO_SCORER { + tag "${meta.id}" + label 'process_medium' + + container 'docker.io/scwatts/neo:1.1_beta--0' + + input: + tuple val(meta), path(purple_dir), path(isofox_dir), path(lilac_dir), path(isofox_neo_dir), path(neo_finder_dir) + path genome_fasta + val genome_ver + path ensembl_data_resources + path neo_resources + path cohort_tpm_medians + + output: + tuple val(meta), path('neo/'), emit: neo_scorer_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def rna_sample_arg = meta.containsKey('sample_rna_id') ? "-rna_sample ${meta.sample_rna_id}" : '' + def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${purple_dir}/${meta.sample_id}.sage_append.vcf.gz" : '' + + // NeoScorer expects the fusion-neoepitopes which Isofox has annotated with RNA to be in the Isofox directory, so put them + // and the standard Isofox files (just TPM is used) into a new shared directory + // ie isofox_neo_dir + neo_finder_dir -> new directory for isofox data -> passed into -isofox_dir + def isofox_dir_arg = meta.containsKey('sample_rna_id') ? "-isofox_dir /path/isofox_combined_dir" : '' + + """ + java \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + -cp ${task.ext.jarPath} \\ + com.hartwig.hmftools.neo.score.NeoScorer \\ + ${args} \\ + -sample ${meta.sample_id} \\ + ${rna_sample_arg} \\ + -purple_dir ${purple_dir} \\ + -lilac_dir ${lilac_dir} \\ + ${isofox_dir_arg} \\ + ${rna_somatic_vcf_arg} ]] + -neo_dir ${neo_finder_dir} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -score_file_dir ${neo_resources} \\ + -cancer_tpm_medians_file ${cohort_tpm_medians} \\ + -output_dir ${output_dir} \\ + -log_debug \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p neo/ + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} + diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction.nf index c57037fb..3140fa53 100644 --- a/subworkflows/local/neo_prediction.nf +++ b/subworkflows/local/neo_prediction.nf @@ -7,8 +7,8 @@ import Utils include { ISOFOX } from '../../modules/local/isofox/main' include { LILAC } from '../../modules/local/lilac/main' -include { NEO as NEO_PREDICTOR } from '../../modules/local/neo/main' -include { NEO as NEO_SCORER } from '../../modules/local/neo/main' +include { NEO_FINDER } from '../../modules/local/neo/finder/main' +include { NEO_SCORER } from '../../modules/local/neo/scorer/main' workflow NEO_PREDICTION{ take: @@ -20,46 +20,221 @@ workflow NEO_PREDICTION{ ch_lilac // channel: [mandatory] [ meta, lilac_dir ] ch_linx // channel: [mandatory] [ meta, linx_dir ] - //// Reference data - //genome_fasta // channel: [mandatory] /path/to/genome_fasta - //genome_fai // channel: [mandatory] /path/to/genome_fai - //genome_dict // channel: [mandatory] /path/to/genome_dict - - // other reference data, or placeholders + // Reference data + genome_version // channel: [mandatory] genome version + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_fai // channel: [mandatory] /path/to/genome_fai + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + neo_resources // channel: [mandatory] /path/to/neo_resources/ + cohort_tpm_medians // channel: [mandatory] /path/to/cohort_tpm_medians/ main: // Channel for versions.yml files // channel: [ versions.yml ] ch_versions = Channel.empty() - // Neo prediction - // 1. select input sources after combine required channels (i.e ch_purple, ch_linx) - // 2. get runnable subjects/inputs - // 3. format input channel - // 4. run process NEO_PREDICTOR - // 5. restore meta, set skip entries + // Step 1: Identify neoepitopes from Purple somatic variants and Linx's (neoepitope) fusions + + // Select input sources + // channel: [ meta, isofox_dir, purple_dir, linx_annotation_dir ] + ch_inputs_finder_selected = WorkflowOncoanalyser.groupByMeta( + ch_purple, + ch_linx, + ) + .map { meta, purple_dir, linx_annotation_dir -> + + def inputs = [ + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), + ] + + return [meta, *inputs] + } + + // Sort inputs + // channel: runnable: [ meta, purple_dir, linx_annotation_dir ] + // channel: skip: [ meta ] + ch_inputs_finder_sorted = ch_inputs_finder_selected + .branch { meta, purple_dir, linx_annotation_dir -> + + def has_normal_dna = Utils.hasNormalDnaBam(meta) + + def has_runnable_inputs = purple_dir && linx_annotation_dir && has_normal_dna + + runnable: has_runnable_inputs + skip: true + return meta + } + + // Create process input channel + // channel: sample_data: [ meta, purple_dir, linx_annotation_dir ] + ch_finder_inputs = ch_inputs_finder_sorted.runnable + .map{ meta, purple_dir, linx_annotation_dir -> + + def meta_neo_finder = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_neo_finder, purple_dir, linx_annotation_dir] + } + // Feeding the Neo process raw inputs for demo purposes only - NEO_PREDICTOR(ch_inputs) + NEO_FINDER( + ch_finder_inputs, + genome_fasta, + genome_version, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(NEO_FINDER.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, neo_finder_dir ] + ch_finder_outputs = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs) + + // Step 2: When RNA is present, annotate the fusion-derived neoepitope with RNA using Isofox + + /* + + // Select input sources + // channel: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] + ch_inputs_isofox_sorted = WorkflowOncoanalyser.groupByMeta( + ch_finder_outputs, + // channel: [ meta, tumor_rna_bam (optional), tumor_rna_bai (optional) ] + ch_inputs + .map { meta -> + def has_rna = Utils.hasTumorRnaBam(meta) + + return [ + meta, + has_rna ? Utils.getTumorRnaBam(meta) : [], + has_rna ? Utils.getTumorRnaBai(meta) : [], + ] + }, + + ) + + // Sort inputs + ch_inputs_isofox_sorted = ch_finder_outputs + .branch { + + def has_rna = Utils.hasTumorRnaBam(meta) + - // Isofox annotation - // 1. take outputs from NEO_PREDICTOR - // 2. format input channel - // 3. run process ISOFOX (also requires changes to process to enable alt. run mode) - // 4. restore meta - // Assuming SAGE append will be appropriately handled upstream. Need to discuss some options: - // * -bqr_enabled true - // * -max_read_depth 100000 + runnable: + skip: + meta + + } + + // Create process input channel + // channel: [ meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] + ch_isofox_inputs = ch_inputs_isofox_sorted.runnable + .map { meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna -> + + def meta_isofox = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_isofox, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)] + } + + // Run process + ISOFOX_NEO( + ch_isofox_inputs, + isofox_read_length, + genome_fasta, + genome_version, + genome_fai, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(ISOFOX.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, isofox_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ISOFOX.out.isofox_neo_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + + */ + + // ch_finder_outputs + + // Step 3: Run Neo's binding prediction routine for neoepitope's pHLAs, taking in Lilac HLA alleles and previously + // derived neoepitopes with RNA annotation if it was available + + // Select input sources + // channel: [ meta, isofox_dir, purple_dir, lilac_dir, isofox_dir ] + // TO_DO - how to pass in the directories from step 1 and 2 (if run) above + ch_inputs_scorer_selected = WorkflowOncoanalyser.groupByMeta( + ch_purple, + ch_linx, + ch_isofox, + ) + .map { meta, purple_dir, lilac_dir -> + + def inputs = [ + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC), + Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX), + ] + + return [meta, *inputs] + } + + // Sort inputs + // channel: runnable: [ meta, purple_dir, lilac_dir,isofox_dir ] + // channel: skip: [ meta ] + ch_inputs_scorer_sorted = ch_inputs_scorer_selected + .branch { meta, purple_dir, lilac_dir, isofox_dir -> + + def has_normal_dna = Utils.hasNormalDnaBam(meta) + + def has_runnable_inputs = purple_dir && lilac_dir && has_normal_dna + + runnable: has_runnable_inputs + skip: true + return meta + } + + // Create process input channel + // channel: sample_data: [ meta, purple_dir, linx_annotation_dir ] + ch_scorer_inputs = ch_inputs_scorer_sorted.runnable + .map{ meta, purple_dir, linx_annotation_dir -> + + def meta_neo_scorer = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_neo_scorer, purple_dir, lilac_dir, isofox_dir] + } - // Neo score - // 1. select input sources after combine required channels (i.e ch_purple, ch_lilac, ch_isofox, above process outputs) - // 2. get runnable subjects/inputs - // 3. format input channel - // 4. run NEO_SCORER // Feeding the Neo process raw inputs for demo purposes only - NEO_SCORER(ch_inputs) + NEO_SCORER( + ch_scorer_inputs, + genome_fasta, + genome_version, + ensembl_data_resources, + neo_resources, + cohort_tpm_medians + ) + + ch_versions = ch_versions.mix(NEO_SCORER.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, neo_scorer_dir ] + ch_scorer_outputs = WorkflowOncoanalyser.restoreMeta(NEO_SCORER.out.neo_scorer_dir, ch_inputs) emit: versions = ch_versions // channel: [ versions.yml ] diff --git a/workflows/wgts.nf b/workflows/wgts.nf index adb2d85d..3c691bc5 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -721,6 +721,12 @@ workflow WGTS { ch_sage_somatic_append_out, ch_lilac_out, ch_linx_somatic_out, + ref_data.genome_version, + ref_data.genome_fasta, + ref_data.genome_fai, + hmf_data.ensembl_data_resources, + hmf_data.neo_resources, + hmf_data.cohort_tpm_medians, ) ch_versions = ch_versions.mix(NEO_PREDICTION.out.versions) From 396e01aa8cd9c50048241daa340040f5f24e80db Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Mon, 5 Feb 2024 14:30:23 +1100 Subject: [PATCH 03/17] Get stub runs working --- conf/hmf_data.config | 15 +- conf/modules.config | 23 ++- modules/local/neo/Dockerfile | 2 +- modules/local/neo/annotate_fusions/main.nf | 53 ++++++ modules/local/neo/finder/main.nf | 16 +- modules/local/neo/isofox/main.nf | 56 ------- modules/local/neo/meta.yml | 0 modules/local/neo/scorer/main.nf | 41 ++--- subworkflows/local/neo_prediction.nf | 180 ++++++++------------- workflows/wgts.nf | 5 +- 10 files changed, 189 insertions(+), 202 deletions(-) create mode 100644 modules/local/neo/annotate_fusions/main.nf delete mode 100644 modules/local/neo/isofox/main.nf delete mode 100644 modules/local/neo/meta.yml diff --git a/conf/hmf_data.config b/conf/hmf_data.config index 3c17ba27..82ff1af3 100644 --- a/conf/hmf_data.config +++ b/conf/hmf_data.config @@ -20,13 +20,13 @@ params { isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv' // LILAC lilac_resources = 'dna_pipeline/immune/' - // NEO - neo_resources = 'neo/' + // Neo + neo_resources = 'neo/binding/' // ORANGE cohort_mapping = 'orange/cohort_mapping.tsv' - // RNA COHORT DATA - cohort_percentiles = 'orange/cohort_percentiles.tsv' // consider renaming and moving to rna_pipeline - cohort_tpm_medians = 'rna_pipeline/cohort_tpm_medians.tsv' + // Hartwig cohort RNA data + cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.csv' + cohort_percentiles = 'orange/cohort_percentiles.tsv' alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv' gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv' // SAGE @@ -76,8 +76,13 @@ params { isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv' // LILAC lilac_resources = 'dna_pipeline/immune/' + // Neo + neo_resources = 'neo/' // ORANGE cohort_mapping = 'orange/cohort_mapping.tsv' + // RNA cohort data + cohort_percentiles = 'orange/cohort_percentiles.tsv' + cohort_tpm_medians = 'rna_pipeline/cohort_tpm_medians.tsv' cohort_percentiles = 'orange/cohort_percentiles.tsv' alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.38.csv' gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.38.csv' diff --git a/conf/modules.config b/conf/modules.config index dd090cdd..0dfd3002 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -218,12 +218,31 @@ process { ] } - withName: 'NEO_.*' { + withName: 'NEO_(?:SCORER|FINDER)' { ext.jarPath = '/opt/neo/neo.jar' + } + + withName: 'NEO_SCORER' { publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/scorer/" }, + ] + } + + withName: '.*:NEO_PREDICTION:ANNOTATE_FUSIONS' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/annotated_fusions/${filename}" }, + ] + } + + withName: 'NEO_FINDER' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/finder/" }, ] } diff --git a/modules/local/neo/Dockerfile b/modules/local/neo/Dockerfile index 5325eb6c..f734631d 100644 --- a/modules/local/neo/Dockerfile +++ b/modules/local/neo/Dockerfile @@ -10,7 +10,7 @@ RUN \ RUN \ mkdir -p /opt/neo/ && \ - wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.1_beta/neo_v1.1.jar' + wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.2_beta/neo_v1.2_beta.jar' USER mambauser diff --git a/modules/local/neo/annotate_fusions/main.nf b/modules/local/neo/annotate_fusions/main.nf new file mode 100644 index 00000000..5e9af36e --- /dev/null +++ b/modules/local/neo/annotate_fusions/main.nf @@ -0,0 +1,53 @@ +process ANNOTATE_FUSIONS { + tag "${meta.id}" + label 'process_medium' + + container 'quay.io/biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' + + input: + tuple val(meta), path(neo_finder_dir), path(bam), path(bai) + val read_length + path genome_fasta + val genome_ver + path genome_fai + path ensembl_data_resources + + output: + tuple val(meta), path('*isf.neoepitope.tsv'), emit: annotated_fusions + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir -p isofox/ + + isofox \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -bam_file ${bam} \\ + -functions NEO_EPITOPES \\ + -neo_dir ${neo_finder_dir} \\ + -read_length ${read_length} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -threads ${task.cpus} \\ + -output_dir ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + isofox: \$(isofox -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.sample_id}.isf.neoepitope.tsv + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/neo/finder/main.nf b/modules/local/neo/finder/main.nf index 60cbf2e3..4ff99a7b 100644 --- a/modules/local/neo/finder/main.nf +++ b/modules/local/neo/finder/main.nf @@ -2,17 +2,18 @@ process NEO_FINDER { tag "${meta.id}" label 'process_low' - container 'docker.io/scwatts/neo:1.1_beta--0' + container 'docker.io/scwatts/neo:1.2_beta--1' input: tuple val(meta), path(purple_dir), path(linx_dir) path genome_fasta val genome_ver + path genome_fai path ensembl_data_resources output: - tuple val(meta), path('neo/'), emit: neo_finder_dir - path 'versions.yml' , emit: versions + tuple val(meta), path('neo_finder/'), emit: neo_finder_dir + path 'versions.yml' , emit: versions when: task.ext.when == null || task.ext.when @@ -21,10 +22,11 @@ process NEO_FINDER { def args = task.ext.args ?: '' """ + mkdir -p neo_finder/ + java \\ -Xmx${Math.round(task.memory.bytes * 0.95)} \\ - -cp ${task.ext.jarPath} \\ - com.hartwig.hmftools.neo.epitope.NeoEpitopeFinder \\ + -jar ${task.ext.jarPath} \\ ${args} \\ -sample ${meta.sample_id} \\ -linx_dir ${linx_dir} \\ @@ -32,8 +34,8 @@ process NEO_FINDER { -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ -ensembl_data_dir ${ensembl_data_resources} \\ - -output_dir ${output_dir} \\ -log_debug \\ + -output_dir neo_finder/ cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -43,7 +45,7 @@ process NEO_FINDER { stub: """ - mkdir -p neo/ + mkdir -p neo_finder/ echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/neo/isofox/main.nf b/modules/local/neo/isofox/main.nf deleted file mode 100644 index 4ca86ba5..00000000 --- a/modules/local/neo/isofox/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process ISOFOX_NEO { - tag "${meta.id}" - label 'process_medium' - - container 'docker.io/scwatts/isofox:1.7.1--0' - - input: - tuple val(meta), path(bam), path(bai) - val functions - val read_length - path genome_fasta - val genome_ver - path genome_fai - path ensembl_data_resources - - output: - tuple val(meta), path('isofox/'), emit: isofox_dir - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - """ - mkdir -p isofox/ - - java \\ - -Xmx${Math.round(task.memory.bytes * 0.95)} \\ - -jar ${task.ext.jarPath} \\ - ${args} \\ - -sample ${meta.sample_id} \\ - -bam_file ${bam} \\ - -functions NEO_EPITOPES \\ - -neoepitope_file ${neo_finder_dir}/${meta.sample_id}.neo.neo_data.tsv \\ - -read_length ${read_length} \\ - -ref_genome ${genome_fasta} \\ - -ref_genome_version ${genome_ver} \\ - -ensembl_data_dir ${ensembl_data_resources} \\ - -threads ${task.cpus} \\ - -output_dir isofox/ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - isofox: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //') - END_VERSIONS - """ - - stub: - """ - mkdir -p isofox/ - touch isofox/placeholder - echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml - """ -} diff --git a/modules/local/neo/meta.yml b/modules/local/neo/meta.yml deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf index 7dd7ec09..ed65a16e 100644 --- a/modules/local/neo/scorer/main.nf +++ b/modules/local/neo/scorer/main.nf @@ -2,19 +2,17 @@ process NEO_SCORER { tag "${meta.id}" label 'process_medium' - container 'docker.io/scwatts/neo:1.1_beta--0' + container 'docker.io/scwatts/neo:1.2_beta--1' input: - tuple val(meta), path(purple_dir), path(isofox_dir), path(lilac_dir), path(isofox_neo_dir), path(neo_finder_dir) - path genome_fasta - val genome_ver + tuple val(meta), path(isofox_dir), path(purple_dir), path(sage_vcf), path(lilac_dir), path(neo_finder_dir), path(annotate_fusions) path ensembl_data_resources - path neo_resources + path neo_resources, stageAs: 'neo_reference_data' path cohort_tpm_medians output: - tuple val(meta), path('neo/'), emit: neo_scorer_dir - path 'versions.yml' , emit: versions + tuple val(meta), path('neo_scorer/'), emit: neo_scorer_dir + path 'versions.yml' , emit: versions when: task.ext.when == null || task.ext.when @@ -23,14 +21,21 @@ process NEO_SCORER { def args = task.ext.args ?: '' def rna_sample_arg = meta.containsKey('sample_rna_id') ? "-rna_sample ${meta.sample_rna_id}" : '' - def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${purple_dir}/${meta.sample_id}.sage_append.vcf.gz" : '' - - // NeoScorer expects the fusion-neoepitopes which Isofox has annotated with RNA to be in the Isofox directory, so put them - // and the standard Isofox files (just TPM is used) into a new shared directory - // ie isofox_neo_dir + neo_finder_dir -> new directory for isofox data -> passed into -isofox_dir - def isofox_dir_arg = meta.containsKey('sample_rna_id') ? "-isofox_dir /path/isofox_combined_dir" : '' + def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${sage_vcf}" : '' """ + isofox_dir_arg='' + if [[ -n "${isofox_dir}" ]]; then + isofox_dir_local=isofox__prepared/; + + cp -rL ${isofox_dir} \${isofox_dir_local}/; + cp -r ${annotate_fusions} \${isofox_dir_local}/; + + isofox_dir_arg="-isofox_dir \${isofox_dir_local}"; + fi; + + mkdir -p neo_scorer/ + java \\ -Xmx${Math.round(task.memory.bytes * 0.95)} \\ -cp ${task.ext.jarPath} \\ @@ -38,18 +43,16 @@ process NEO_SCORER { ${args} \\ -sample ${meta.sample_id} \\ ${rna_sample_arg} \\ + \${isofox_dir_arg} \\ -purple_dir ${purple_dir} \\ + ${rna_somatic_vcf_arg} \\ -lilac_dir ${lilac_dir} \\ - ${isofox_dir_arg} \\ - ${rna_somatic_vcf_arg} ]] -neo_dir ${neo_finder_dir} \\ - -ref_genome ${genome_fasta} \\ - -ref_genome_version ${genome_ver} \\ -ensembl_data_dir ${ensembl_data_resources} \\ -score_file_dir ${neo_resources} \\ -cancer_tpm_medians_file ${cohort_tpm_medians} \\ - -output_dir ${output_dir} \\ -log_debug \\ + -output_dir neo_scorer/ cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -59,7 +62,7 @@ process NEO_SCORER { stub: """ - mkdir -p neo/ + mkdir -p neo_scorer/ echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction.nf index 3140fa53..eeca49bc 100644 --- a/subworkflows/local/neo_prediction.nf +++ b/subworkflows/local/neo_prediction.nf @@ -1,12 +1,11 @@ // -// XXX +// Neo identifies and scores neoepitopes // import Constants import Utils -include { ISOFOX } from '../../modules/local/isofox/main' -include { LILAC } from '../../modules/local/lilac/main' +include { ANNOTATE_FUSIONS } from '../../modules/local/neo/annotate_fusions/main' include { NEO_FINDER } from '../../modules/local/neo/finder/main' include { NEO_SCORER } from '../../modules/local/neo/scorer/main' @@ -21,23 +20,27 @@ workflow NEO_PREDICTION{ ch_linx // channel: [mandatory] [ meta, linx_dir ] // Reference data - genome_version // channel: [mandatory] genome version genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version genome_fai // channel: [mandatory] /path/to/genome_fai ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ neo_resources // channel: [mandatory] /path/to/neo_resources/ cohort_tpm_medians // channel: [mandatory] /path/to/cohort_tpm_medians/ + // Params + isofox_read_length // string: [mandatory] Isofox read length + main: // Channel for versions.yml files // channel: [ versions.yml ] ch_versions = Channel.empty() - // Step 1: Identify neoepitopes from Purple somatic variants and Linx's (neoepitope) fusions - + // + // MODULE: Neo finder + // // Select input sources - // channel: [ meta, isofox_dir, purple_dir, linx_annotation_dir ] - ch_inputs_finder_selected = WorkflowOncoanalyser.groupByMeta( + // channel: [ meta, purple_dir, linx_annotation_dir ] + ch_finder_inputs_selected = WorkflowOncoanalyser.groupByMeta( ch_purple, ch_linx, ) @@ -54,7 +57,7 @@ workflow NEO_PREDICTION{ // Sort inputs // channel: runnable: [ meta, purple_dir, linx_annotation_dir ] // channel: skip: [ meta ] - ch_inputs_finder_sorted = ch_inputs_finder_selected + ch_finder_inputs_sorted = ch_finder_inputs_selected .branch { meta, purple_dir, linx_annotation_dir -> def has_normal_dna = Utils.hasNormalDnaBam(meta) @@ -67,25 +70,25 @@ workflow NEO_PREDICTION{ } // Create process input channel - // channel: sample_data: [ meta, purple_dir, linx_annotation_dir ] - ch_finder_inputs = ch_inputs_finder_sorted.runnable - .map{ meta, purple_dir, linx_annotation_dir -> + // channel: sample_data: [ meta_finder, purple_dir, linx_annotation_dir ] + ch_finder_inputs = ch_finder_inputs_sorted.runnable + .map { meta, purple_dir, linx_annotation_dir -> - def meta_neo_finder = [ + def meta_finder = [ key: meta.group_id, id: meta.group_id, sample_id: Utils.getTumorDnaSampleName(meta), ] - return [meta_neo_finder, purple_dir, linx_annotation_dir] + return [meta_finder, purple_dir, linx_annotation_dir] } - - // Feeding the Neo process raw inputs for demo purposes only + // Run process NEO_FINDER( ch_finder_inputs, genome_fasta, genome_version, + genome_fai, ensembl_data_resources, ) @@ -93,47 +96,27 @@ workflow NEO_PREDICTION{ // Set outputs, restoring original meta // channel: [ meta, neo_finder_dir ] - ch_finder_outputs = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs) + ch_finder_out = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs) - // Step 2: When RNA is present, annotate the fusion-derived neoepitope with RNA using Isofox - - /* - - // Select input sources - // channel: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] - ch_inputs_isofox_sorted = WorkflowOncoanalyser.groupByMeta( - ch_finder_outputs, - // channel: [ meta, tumor_rna_bam (optional), tumor_rna_bai (optional) ] - ch_inputs - .map { meta -> - def has_rna = Utils.hasTumorRnaBam(meta) - - return [ - meta, - has_rna ? Utils.getTumorRnaBam(meta) : [], - has_rna ? Utils.getTumorRnaBai(meta) : [], - ] - }, - - ) + // + // MODULE: Fusion annotation (Isofox) + // + // Annotate the fusion-derived neoepitope using Isofox where RNA data is available // Sort inputs - ch_inputs_isofox_sorted = ch_finder_outputs - .branch { - - def has_rna = Utils.hasTumorRnaBam(meta) - - - - runnable: - skip: - meta - + // channel: runnable: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] + // channel: skip: [ meta ] + ch_isofox_inputs_sorted = ch_finder_out + .branch { meta, neo_finder_dir -> + runnable: Utils.hasTumorRnaBam(meta) + return [meta, neo_finder_dir, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)] + skip: true + return meta } // Create process input channel // channel: [ meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] - ch_isofox_inputs = ch_inputs_isofox_sorted.runnable + ch_isofox_inputs = ch_isofox_inputs_sorted.runnable .map { meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna -> def meta_isofox = [ @@ -142,11 +125,11 @@ workflow NEO_PREDICTION{ sample_id: Utils.getTumorDnaSampleName(meta), ] - return [meta_isofox, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)] + return [meta_isofox, neo_finder_dir, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)] } // Run process - ISOFOX_NEO( + ANNOTATE_FUSIONS( ch_isofox_inputs, isofox_read_length, genome_fasta, @@ -155,87 +138,64 @@ workflow NEO_PREDICTION{ ensembl_data_resources, ) - ch_versions = ch_versions.mix(ISOFOX.out.versions) + ch_versions = ch_versions.mix(ANNOTATE_FUSIONS.out.versions) // Set outputs, restoring original meta - // channel: [ meta, isofox_dir ] - ch_outputs = Channel.empty() + // channel: [ meta, annotated_fusions ] + ch_annotate_fusions_out = Channel.empty() .mix( - WorkflowOncoanalyser.restoreMeta(ISOFOX.out.isofox_neo_dir, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - - */ - - // ch_finder_outputs + WorkflowOncoanalyser.restoreMeta(ANNOTATE_FUSIONS.out.annotated_fusions, ch_inputs), + ch_isofox_inputs_sorted.skip.map { meta -> [meta, []] }, + ) - // Step 3: Run Neo's binding prediction routine for neoepitope's pHLAs, taking in Lilac HLA alleles and previously - // derived neoepitopes with RNA annotation if it was available - // Select input sources - // channel: [ meta, isofox_dir, purple_dir, lilac_dir, isofox_dir ] - // TO_DO - how to pass in the directories from step 1 and 2 (if run) above - ch_inputs_scorer_selected = WorkflowOncoanalyser.groupByMeta( - ch_purple, - ch_linx, + // + // MODULE: Neo scorer + // + // Select input sources and prepare input channel + // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ] + ch_scorer_inputs = WorkflowOncoanalyser.groupByMeta( ch_isofox, + ch_purple, + ch_sage_somatic_append, + ch_lilac, + ch_finder_out, + ch_annotate_fusions_out, ) - .map { meta, purple_dir, lilac_dir -> + .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions -> - def inputs = [ - Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), - Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC), - Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX), - ] - - return [meta, *inputs] - } - - // Sort inputs - // channel: runnable: [ meta, purple_dir, lilac_dir,isofox_dir ] - // channel: skip: [ meta ] - ch_inputs_scorer_sorted = ch_inputs_scorer_selected - .branch { meta, purple_dir, lilac_dir, isofox_dir -> - - def has_normal_dna = Utils.hasNormalDnaBam(meta) - - def has_runnable_inputs = purple_dir && lilac_dir && has_normal_dna - - runnable: has_runnable_inputs - skip: true - return meta - } - - // Create process input channel - // channel: sample_data: [ meta, purple_dir, linx_annotation_dir ] - ch_scorer_inputs = ch_inputs_scorer_sorted.runnable - .map{ meta, purple_dir, linx_annotation_dir -> - - def meta_neo_scorer = [ + def meta_scorer = [ key: meta.group_id, id: meta.group_id, sample_id: Utils.getTumorDnaSampleName(meta), ] - return [meta_neo_scorer, purple_dir, lilac_dir, isofox_dir] - } + if (Utils.hasTumorRnaBam(meta)) { + meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta) + } + + def inputs = [ + Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR), + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), + Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR), + neo_finder_dir, + annotate_fusions, + ] + return [meta_scorer, *inputs] + } - // Feeding the Neo process raw inputs for demo purposes only + // Run process NEO_SCORER( ch_scorer_inputs, - genome_fasta, - genome_version, ensembl_data_resources, neo_resources, - cohort_tpm_medians + cohort_tpm_medians, ) ch_versions = ch_versions.mix(NEO_SCORER.out.versions) - // Set outputs, restoring original meta - // channel: [ meta, neo_scorer_dir ] - ch_scorer_outputs = WorkflowOncoanalyser.restoreMeta(NEO_SCORER.out.neo_scorer_dir, ch_inputs) - emit: versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/wgts.nf b/workflows/wgts.nf index 3c691bc5..5b695939 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -710,7 +710,7 @@ workflow WGTS { } // - // SUBWORKFLOW: XXX + // SUBWORKFLOW: Run Neo to identify and score neoepitopes // if (run_config.stages.neo) { @@ -721,12 +721,13 @@ workflow WGTS { ch_sage_somatic_append_out, ch_lilac_out, ch_linx_somatic_out, - ref_data.genome_version, ref_data.genome_fasta, + ref_data.genome_version, ref_data.genome_fai, hmf_data.ensembl_data_resources, hmf_data.neo_resources, hmf_data.cohort_tpm_medians, + params.isofox_read_length, ) ch_versions = ch_versions.mix(NEO_PREDICTION.out.versions) From 7abbccb0a2bd12cb51064f6ff56b59c6eaa727a8 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Wed, 14 Feb 2024 08:54:32 +1100 Subject: [PATCH 04/17] Set Neo to run only when specified --- lib/Processes.groovy | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/Processes.groovy b/lib/Processes.groovy index 98245f6d..f4770838 100644 --- a/lib/Processes.groovy +++ b/lib/Processes.groovy @@ -7,7 +7,17 @@ import Utils class Processes { public static getRunStages(include, exclude, manual_select, log) { - def processes = manual_select ? [] : Constants.Process.values().toList() + + // Get default processes + // NOTE(SW): currently set all except Neo to run by default; Process.NEO excluded to be more concise in code + def processes + if (manual_select) { + processes = [] + } else { + processes = Constants.Process.values().toList() + processes.remove(Constants.Process.NEO) + } + def include_list = this.getProcessList(include, log) def exclude_list = this.getProcessList(exclude, log) this.checkIncludeExcludeList(include_list, exclude_list, log) From 01563bc4c020531886bd431c6d41dc0105c9b206 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Fri, 16 Feb 2024 10:30:35 +1100 Subject: [PATCH 05/17] Update Neo reference data paths --- conf/hmf_data.config | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/conf/hmf_data.config b/conf/hmf_data.config index 82ff1af3..f80f086a 100644 --- a/conf/hmf_data.config +++ b/conf/hmf_data.config @@ -25,7 +25,7 @@ params { // ORANGE cohort_mapping = 'orange/cohort_mapping.tsv' // Hartwig cohort RNA data - cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.csv' + cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.37.csv' cohort_percentiles = 'orange/cohort_percentiles.tsv' alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv' gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv' @@ -77,12 +77,11 @@ params { // LILAC lilac_resources = 'dna_pipeline/immune/' // Neo - neo_resources = 'neo/' + neo_resources = 'neo/binding/' // ORANGE cohort_mapping = 'orange/cohort_mapping.tsv' - // RNA cohort data - cohort_percentiles = 'orange/cohort_percentiles.tsv' - cohort_tpm_medians = 'rna_pipeline/cohort_tpm_medians.tsv' + // Hartwig cohort RNA data + cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.38.csv' cohort_percentiles = 'orange/cohort_percentiles.tsv' alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.38.csv' gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.38.csv' From 801ec4d6940ad4901184116c5d9ced848a0f8fc5 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Wed, 14 Feb 2024 09:08:30 +1100 Subject: [PATCH 06/17] Provide Neo cancer type from samplesheet --- modules/local/neo/scorer/main.nf | 3 +++ subworkflows/local/neo_prediction.nf | 1 + 2 files changed, 4 insertions(+) diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf index ed65a16e..ba98d833 100644 --- a/modules/local/neo/scorer/main.nf +++ b/modules/local/neo/scorer/main.nf @@ -23,6 +23,8 @@ process NEO_SCORER { def rna_sample_arg = meta.containsKey('sample_rna_id') ? "-rna_sample ${meta.sample_rna_id}" : '' def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${sage_vcf}" : '' + def cancer_type_arg = meta.containsKey('cancer_type') ? "-cancer_type ${meta.cancer_type}" : '' + """ isofox_dir_arg='' if [[ -n "${isofox_dir}" ]]; then @@ -42,6 +44,7 @@ process NEO_SCORER { com.hartwig.hmftools.neo.score.NeoScorer \\ ${args} \\ -sample ${meta.sample_id} \\ + ${cancer_type_arg} \\ ${rna_sample_arg} \\ \${isofox_dir_arg} \\ -purple_dir ${purple_dir} \\ diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction.nf index eeca49bc..bd3ff59a 100644 --- a/subworkflows/local/neo_prediction.nf +++ b/subworkflows/local/neo_prediction.nf @@ -168,6 +168,7 @@ workflow NEO_PREDICTION{ key: meta.group_id, id: meta.group_id, sample_id: Utils.getTumorDnaSampleName(meta), + cancer_type: meta[Constants.InfoField.CANCER_TYPE], ] if (Utils.hasTumorRnaBam(meta)) { From d31a929b88ca284b8454f29aed4a83e0db3c9f8a Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Sat, 16 Mar 2024 17:42:16 +1100 Subject: [PATCH 07/17] Move neo_prediction.nf subworkflow for consistency --- .../local/{neo_prediction.nf => neo_prediction/main.nf} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename subworkflows/local/{neo_prediction.nf => neo_prediction/main.nf} (96%) diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction/main.nf similarity index 96% rename from subworkflows/local/neo_prediction.nf rename to subworkflows/local/neo_prediction/main.nf index bd3ff59a..ae18dae2 100644 --- a/subworkflows/local/neo_prediction.nf +++ b/subworkflows/local/neo_prediction/main.nf @@ -5,9 +5,9 @@ import Constants import Utils -include { ANNOTATE_FUSIONS } from '../../modules/local/neo/annotate_fusions/main' -include { NEO_FINDER } from '../../modules/local/neo/finder/main' -include { NEO_SCORER } from '../../modules/local/neo/scorer/main' +include { ANNOTATE_FUSIONS } from '../../../modules/local/neo/annotate_fusions/main' +include { NEO_FINDER } from '../../../modules/local/neo/finder/main' +include { NEO_SCORER } from '../../../modules/local/neo/scorer/main' workflow NEO_PREDICTION{ take: From cd3c2deb5d0707136a8611af4256a01851be4668 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Sat, 16 Mar 2024 18:17:56 +1100 Subject: [PATCH 08/17] Adjust Neo to fit with alignment subworkflow * add source selection for RNA BAM (samplesheet, alignment subworkflow) * use appropriate function call to determine present of RNA * relocate isofox_read_length assignment (wgts and targeted workflows) --- subworkflows/local/neo_prediction/main.nf | 28 ++++++++++++++++------- workflows/targeted.nf | 4 +++- workflows/wgts.nf | 7 ++++-- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/neo_prediction/main.nf b/subworkflows/local/neo_prediction/main.nf index ae18dae2..ee6b628b 100644 --- a/subworkflows/local/neo_prediction/main.nf +++ b/subworkflows/local/neo_prediction/main.nf @@ -13,6 +13,7 @@ workflow NEO_PREDICTION{ take: // Sample data ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] ch_isofox // channel: [mandatory] [ meta, isofox_dir ] ch_purple // channel: [mandatory] [ meta, purple_dir ] ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] @@ -60,7 +61,7 @@ workflow NEO_PREDICTION{ ch_finder_inputs_sorted = ch_finder_inputs_selected .branch { meta, purple_dir, linx_annotation_dir -> - def has_normal_dna = Utils.hasNormalDnaBam(meta) + def has_normal_dna = Utils.hasNormalDna(meta) def has_runnable_inputs = purple_dir && linx_annotation_dir && has_normal_dna @@ -103,13 +104,24 @@ workflow NEO_PREDICTION{ // // Annotate the fusion-derived neoepitope using Isofox where RNA data is available - // Sort inputs + // Select input sources and sort // channel: runnable: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] // channel: skip: [ meta ] - ch_isofox_inputs_sorted = ch_finder_out - .branch { meta, neo_finder_dir -> - runnable: Utils.hasTumorRnaBam(meta) - return [meta, neo_finder_dir, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)] + ch_isofox_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_finder_out, + ch_tumor_rna_bam, + ) + .map { meta, neo_finder_dir, tumor_bam, tumor_bai -> + return [ + meta, + neo_finder_dir, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), + ] + } + .branch { meta, neo_finder_dir, tumor_bam, tumor_bai -> + runnable: Utils.hasTumorRna(meta) + return [meta, neo_finder_dir, tumor_bam, tumor_bai] skip: true return meta } @@ -125,7 +137,7 @@ workflow NEO_PREDICTION{ sample_id: Utils.getTumorDnaSampleName(meta), ] - return [meta_isofox, neo_finder_dir, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)] + return [meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna] } // Run process @@ -171,7 +183,7 @@ workflow NEO_PREDICTION{ cancer_type: meta[Constants.InfoField.CANCER_TYPE], ] - if (Utils.hasTumorRnaBam(meta)) { + if (Utils.hasTumorRna(meta)) { meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta) } diff --git a/workflows/targeted.nf b/workflows/targeted.nf index f2d4f0ba..5b0b92bd 100644 --- a/workflows/targeted.nf +++ b/workflows/targeted.nf @@ -45,6 +45,9 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +// Used in Isofox subworkflow only +isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS @@ -185,7 +188,6 @@ workflow TARGETED { isofox_counts = params.isofox_counts ? file(params.isofox_counts) : panel_data.isofox_counts isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : panel_data.isofox_gc_ratios - isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED isofox_gene_ids = params.isofox_gene_ids ? file(params.isofox_gene_ids) : panel_data.isofox_gene_ids isofox_tpm_norm = params.isofox_tpm_norm ? file(params.isofox_tpm_norm) : panel_data.isofox_tpm_norm diff --git a/workflows/wgts.nf b/workflows/wgts.nf index 5b695939..d6f3bb6e 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -47,6 +47,9 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +// Used in Isofox and Neo subworkflows +isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_WTS + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS @@ -189,7 +192,6 @@ workflow WGTS { isofox_counts = params.isofox_counts ? file(params.isofox_counts) : hmf_data.isofox_counts isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : hmf_data.isofox_gc_ratios - isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_WTS ISOFOX_QUANTIFICATION( ch_inputs, @@ -716,6 +718,7 @@ workflow WGTS { NEO_PREDICTION( ch_inputs, + ch_align_rna_tumor_out, ch_isofox_out, ch_purple_out, ch_sage_somatic_append_out, @@ -727,7 +730,7 @@ workflow WGTS { hmf_data.ensembl_data_resources, hmf_data.neo_resources, hmf_data.cohort_tpm_medians, - params.isofox_read_length, + isofox_read_length, ) ch_versions = ch_versions.mix(NEO_PREDICTION.out.versions) From 2e4a57a8a8c71d04dd554d6b03edc1e03e28e672 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Thu, 30 May 2024 09:10:23 +1000 Subject: [PATCH 09/17] Indent Neo subworkflow to match nf-core style --- modules/local/neo/Dockerfile | 18 +- subworkflows/local/neo_prediction/main.nf | 378 +++++++++++----------- 2 files changed, 198 insertions(+), 198 deletions(-) diff --git a/modules/local/neo/Dockerfile b/modules/local/neo/Dockerfile index f734631d..aa0a03e3 100644 --- a/modules/local/neo/Dockerfile +++ b/modules/local/neo/Dockerfile @@ -3,20 +3,20 @@ FROM mambaorg/micromamba:0.24.0 USER root RUN \ - apt-get update && \ - apt-get install -y procps wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* + apt-get update && \ + apt-get install -y procps wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* RUN \ - mkdir -p /opt/neo/ && \ - wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.2_beta/neo_v1.2_beta.jar' + mkdir -p /opt/neo/ && \ + wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.2_beta/neo_v1.2_beta.jar' USER mambauser RUN \ - micromamba install -y -n base -c bioconda -c conda-forge \ - 'openjdk >=8' && \ - micromamba clean --all --yes + micromamba install -y -n base -c bioconda -c conda-forge \ + 'openjdk >=8' && \ + micromamba clean --all --yes ENV PATH="/opt/conda/bin:/opt/conda/condabin:${PATH}" diff --git a/subworkflows/local/neo_prediction/main.nf b/subworkflows/local/neo_prediction/main.nf index ee6b628b..2a07a043 100644 --- a/subworkflows/local/neo_prediction/main.nf +++ b/subworkflows/local/neo_prediction/main.nf @@ -9,206 +9,206 @@ include { ANNOTATE_FUSIONS } from '../../../modules/local/neo/annotate_fusions/m include { NEO_FINDER } from '../../../modules/local/neo/finder/main' include { NEO_SCORER } from '../../../modules/local/neo/scorer/main' -workflow NEO_PREDICTION{ +workflow NEO_PREDICTION { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] - ch_isofox // channel: [mandatory] [ meta, isofox_dir ] - ch_purple // channel: [mandatory] [ meta, purple_dir ] - ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] - ch_lilac // channel: [mandatory] [ meta, lilac_dir ] - ch_linx // channel: [mandatory] [ meta, linx_dir ] - - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ - neo_resources // channel: [mandatory] /path/to/neo_resources/ - cohort_tpm_medians // channel: [mandatory] /path/to/cohort_tpm_medians/ - - // Params - isofox_read_length // string: [mandatory] Isofox read length + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] + ch_isofox // channel: [mandatory] [ meta, isofox_dir ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] + ch_lilac // channel: [mandatory] [ meta, lilac_dir ] + ch_linx // channel: [mandatory] [ meta, linx_dir ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + neo_resources // channel: [mandatory] /path/to/neo_resources/ + cohort_tpm_medians // channel: [mandatory] /path/to/cohort_tpm_medians/ + + // Params + isofox_read_length // string: [mandatory] Isofox read length main: - // Channel for versions.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // - // MODULE: Neo finder - // - // Select input sources - // channel: [ meta, purple_dir, linx_annotation_dir ] - ch_finder_inputs_selected = WorkflowOncoanalyser.groupByMeta( - ch_purple, - ch_linx, + // Channel for versions.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // + // MODULE: Neo finder + // + // Select input sources + // channel: [ meta, purple_dir, linx_annotation_dir ] + ch_finder_inputs_selected = WorkflowOncoanalyser.groupByMeta( + ch_purple, + ch_linx, + ) + .map { meta, purple_dir, linx_annotation_dir -> + + def inputs = [ + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), + ] + + return [meta, *inputs] + } + + // Sort inputs + // channel: runnable: [ meta, purple_dir, linx_annotation_dir ] + // channel: skip: [ meta ] + ch_finder_inputs_sorted = ch_finder_inputs_selected + .branch { meta, purple_dir, linx_annotation_dir -> + + def has_normal_dna = Utils.hasNormalDna(meta) + + def has_runnable_inputs = purple_dir && linx_annotation_dir && has_normal_dna + + runnable: has_runnable_inputs + skip: true + return meta + } + + // Create process input channel + // channel: sample_data: [ meta_finder, purple_dir, linx_annotation_dir ] + ch_finder_inputs = ch_finder_inputs_sorted.runnable + .map { meta, purple_dir, linx_annotation_dir -> + + def meta_finder = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_finder, purple_dir, linx_annotation_dir] + } + + // Run process + NEO_FINDER( + ch_finder_inputs, + genome_fasta, + genome_version, + genome_fai, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(NEO_FINDER.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, neo_finder_dir ] + ch_finder_out = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs) + + // + // MODULE: Fusion annotation (Isofox) + // + // Annotate the fusion-derived neoepitope using Isofox where RNA data is available + + // Select input sources and sort + // channel: runnable: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] + // channel: skip: [ meta ] + ch_isofox_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_finder_out, + ch_tumor_rna_bam, + ) + .map { meta, neo_finder_dir, tumor_bam, tumor_bai -> + return [ + meta, + neo_finder_dir, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), + ] + } + .branch { meta, neo_finder_dir, tumor_bam, tumor_bai -> + runnable: Utils.hasTumorRna(meta) + return [meta, neo_finder_dir, tumor_bam, tumor_bai] + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] + ch_isofox_inputs = ch_isofox_inputs_sorted.runnable + .map { meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna -> + + def meta_isofox = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna] + } + + // Run process + ANNOTATE_FUSIONS( + ch_isofox_inputs, + isofox_read_length, + genome_fasta, + genome_version, + genome_fai, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(ANNOTATE_FUSIONS.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, annotated_fusions ] + ch_annotate_fusions_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ANNOTATE_FUSIONS.out.annotated_fusions, ch_inputs), + ch_isofox_inputs_sorted.skip.map { meta -> [meta, []] }, ) - .map { meta, purple_dir, linx_annotation_dir -> - def inputs = [ - Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), - Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), - ] - return [meta, *inputs] + // + // MODULE: Neo scorer + // + // Select input sources and prepare input channel + // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ] + ch_scorer_inputs = WorkflowOncoanalyser.groupByMeta( + ch_isofox, + ch_purple, + ch_sage_somatic_append, + ch_lilac, + ch_finder_out, + ch_annotate_fusions_out, + ) + .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions -> + + def meta_scorer = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + cancer_type: meta[Constants.InfoField.CANCER_TYPE], + ] + + if (Utils.hasTumorRna(meta)) { + meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta) } - // Sort inputs - // channel: runnable: [ meta, purple_dir, linx_annotation_dir ] - // channel: skip: [ meta ] - ch_finder_inputs_sorted = ch_finder_inputs_selected - .branch { meta, purple_dir, linx_annotation_dir -> + def inputs = [ + Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR), + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), + Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR), + neo_finder_dir, + annotate_fusions, + ] - def has_normal_dna = Utils.hasNormalDna(meta) + return [meta_scorer, *inputs] + } - def has_runnable_inputs = purple_dir && linx_annotation_dir && has_normal_dna + // Run process + NEO_SCORER( + ch_scorer_inputs, + ensembl_data_resources, + neo_resources, + cohort_tpm_medians, + ) - runnable: has_runnable_inputs - skip: true - return meta - } - - // Create process input channel - // channel: sample_data: [ meta_finder, purple_dir, linx_annotation_dir ] - ch_finder_inputs = ch_finder_inputs_sorted.runnable - .map { meta, purple_dir, linx_annotation_dir -> - - def meta_finder = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - ] - - return [meta_finder, purple_dir, linx_annotation_dir] - } - - // Run process - NEO_FINDER( - ch_finder_inputs, - genome_fasta, - genome_version, - genome_fai, - ensembl_data_resources, - ) - - ch_versions = ch_versions.mix(NEO_FINDER.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, neo_finder_dir ] - ch_finder_out = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs) - - // - // MODULE: Fusion annotation (Isofox) - // - // Annotate the fusion-derived neoepitope using Isofox where RNA data is available - - // Select input sources and sort - // channel: runnable: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] - // channel: skip: [ meta ] - ch_isofox_inputs_sorted = WorkflowOncoanalyser.groupByMeta( - ch_finder_out, - ch_tumor_rna_bam, - ) - .map { meta, neo_finder_dir, tumor_bam, tumor_bai -> - return [ - meta, - neo_finder_dir, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), - Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), - ] - } - .branch { meta, neo_finder_dir, tumor_bam, tumor_bai -> - runnable: Utils.hasTumorRna(meta) - return [meta, neo_finder_dir, tumor_bam, tumor_bai] - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] - ch_isofox_inputs = ch_isofox_inputs_sorted.runnable - .map { meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna -> - - def meta_isofox = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - ] - - return [meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna] - } - - // Run process - ANNOTATE_FUSIONS( - ch_isofox_inputs, - isofox_read_length, - genome_fasta, - genome_version, - genome_fai, - ensembl_data_resources, - ) - - ch_versions = ch_versions.mix(ANNOTATE_FUSIONS.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, annotated_fusions ] - ch_annotate_fusions_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ANNOTATE_FUSIONS.out.annotated_fusions, ch_inputs), - ch_isofox_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - - - // - // MODULE: Neo scorer - // - // Select input sources and prepare input channel - // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ] - ch_scorer_inputs = WorkflowOncoanalyser.groupByMeta( - ch_isofox, - ch_purple, - ch_sage_somatic_append, - ch_lilac, - ch_finder_out, - ch_annotate_fusions_out, - ) - .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions -> - - def meta_scorer = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - cancer_type: meta[Constants.InfoField.CANCER_TYPE], - ] - - if (Utils.hasTumorRna(meta)) { - meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta) - } - - def inputs = [ - Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR), - Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), - Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), - Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR), - neo_finder_dir, - annotate_fusions, - ] - - return [meta_scorer, *inputs] - } - - // Run process - NEO_SCORER( - ch_scorer_inputs, - ensembl_data_resources, - neo_resources, - cohort_tpm_medians, - ) - - ch_versions = ch_versions.mix(NEO_SCORER.out.versions) + ch_versions = ch_versions.mix(NEO_SCORER.out.versions) emit: - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } From 9be849183e7cf8403a8cea73c9713941ca7c59c3 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Thu, 30 May 2024 09:46:15 +1000 Subject: [PATCH 10/17] Use Bioconda for Neo annotate fusion process --- modules/local/neo/annotate_fusions/main.nf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/local/neo/annotate_fusions/main.nf b/modules/local/neo/annotate_fusions/main.nf index 5e9af36e..44e59737 100644 --- a/modules/local/neo/annotate_fusions/main.nf +++ b/modules/local/neo/annotate_fusions/main.nf @@ -2,7 +2,10 @@ process ANNOTATE_FUSIONS { tag "${meta.id}" label 'process_medium' - container 'quay.io/biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_0' : + 'biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' }" input: tuple val(meta), path(neo_finder_dir), path(bam), path(bai) From c2e15ea527eb5f06f9d360480cbe6d02e7661b66 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Thu, 30 May 2024 09:46:35 +1000 Subject: [PATCH 11/17] Add environment file for annotate fusion process --- modules/local/neo/annotate_fusions/environment.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 modules/local/neo/annotate_fusions/environment.yml diff --git a/modules/local/neo/annotate_fusions/environment.yml b/modules/local/neo/annotate_fusions/environment.yml new file mode 100644 index 00000000..d4251c57 --- /dev/null +++ b/modules/local/neo/annotate_fusions/environment.yml @@ -0,0 +1,7 @@ +name: isofox +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-isofox=1.7.1 From 5085556f51b7ea51f9b745af72ab83631dcaea5c Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Thu, 30 May 2024 09:47:32 +1000 Subject: [PATCH 12/17] Rename some Neo subworkflow variables for clarity --- modules/local/neo/finder/main.nf | 4 ++-- modules/local/neo/scorer/main.nf | 5 ++--- subworkflows/local/neo_prediction/main.nf | 8 ++++---- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/modules/local/neo/finder/main.nf b/modules/local/neo/finder/main.nf index 4ff99a7b..7a5071ca 100644 --- a/modules/local/neo/finder/main.nf +++ b/modules/local/neo/finder/main.nf @@ -5,7 +5,7 @@ process NEO_FINDER { container 'docker.io/scwatts/neo:1.2_beta--1' input: - tuple val(meta), path(purple_dir), path(linx_dir) + tuple val(meta), path(purple_dir), path(linx_annotation_dir) path genome_fasta val genome_ver path genome_fai @@ -29,7 +29,7 @@ process NEO_FINDER { -jar ${task.ext.jarPath} \\ ${args} \\ -sample ${meta.sample_id} \\ - -linx_dir ${linx_dir} \\ + -linx_dir ${linx_annotation_dir} \\ -somatic_vcf ${purple_dir}/${meta.sample_id}.purple.somatic.vcf.gz \\ -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf index ba98d833..056a8b1b 100644 --- a/modules/local/neo/scorer/main.nf +++ b/modules/local/neo/scorer/main.nf @@ -5,7 +5,7 @@ process NEO_SCORER { container 'docker.io/scwatts/neo:1.2_beta--1' input: - tuple val(meta), path(isofox_dir), path(purple_dir), path(sage_vcf), path(lilac_dir), path(neo_finder_dir), path(annotate_fusions) + tuple val(meta), path(isofox_dir), path(purple_dir), path(sage_vcf), path(lilac_dir), path(neo_finder_dir), path(annotated_fusions) path ensembl_data_resources path neo_resources, stageAs: 'neo_reference_data' path cohort_tpm_medians @@ -31,7 +31,7 @@ process NEO_SCORER { isofox_dir_local=isofox__prepared/; cp -rL ${isofox_dir} \${isofox_dir_local}/; - cp -r ${annotate_fusions} \${isofox_dir_local}/; + cp -r ${annotated_fusions} \${isofox_dir_local}/; isofox_dir_arg="-isofox_dir \${isofox_dir_local}"; fi; @@ -69,4 +69,3 @@ process NEO_SCORER { echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } - diff --git a/subworkflows/local/neo_prediction/main.nf b/subworkflows/local/neo_prediction/main.nf index 2a07a043..c069ed42 100644 --- a/subworkflows/local/neo_prediction/main.nf +++ b/subworkflows/local/neo_prediction/main.nf @@ -18,7 +18,7 @@ workflow NEO_PREDICTION { ch_purple // channel: [mandatory] [ meta, purple_dir ] ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] ch_lilac // channel: [mandatory] [ meta, lilac_dir ] - ch_linx // channel: [mandatory] [ meta, linx_dir ] + ch_linx // channel: [mandatory] [ meta, linx_annotation_dir ] // Reference data genome_fasta // channel: [mandatory] /path/to/genome_fasta @@ -165,7 +165,7 @@ workflow NEO_PREDICTION { // MODULE: Neo scorer // // Select input sources and prepare input channel - // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ] + // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotated_fusions ] ch_scorer_inputs = WorkflowOncoanalyser.groupByMeta( ch_isofox, ch_purple, @@ -174,7 +174,7 @@ workflow NEO_PREDICTION { ch_finder_out, ch_annotate_fusions_out, ) - .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions -> + .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotated_fusions -> def meta_scorer = [ key: meta.group_id, @@ -193,7 +193,7 @@ workflow NEO_PREDICTION { Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR), neo_finder_dir, - annotate_fusions, + annotated_fusions, ] return [meta_scorer, *inputs] From 8a488dba8e1b2a1f281bd3ac0e68359ca7221596 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Thu, 30 May 2024 09:47:54 +1000 Subject: [PATCH 13/17] Add meta.yml files for processes related to Neo --- modules/local/neo/annotate_fusions/meta.yml | 63 +++++++++++++++++++++ modules/local/neo/finder/meta.yml | 52 +++++++++++++++++ modules/local/neo/scorer/meta.yml | 62 ++++++++++++++++++++ 3 files changed, 177 insertions(+) create mode 100644 modules/local/neo/annotate_fusions/meta.yml create mode 100644 modules/local/neo/finder/meta.yml create mode 100644 modules/local/neo/scorer/meta.yml diff --git a/modules/local/neo/annotate_fusions/meta.yml b/modules/local/neo/annotate_fusions/meta.yml new file mode 100644 index 00000000..83a3fb0b --- /dev/null +++ b/modules/local/neo/annotate_fusions/meta.yml @@ -0,0 +1,63 @@ +name: annotate_fusions +description: Annotate neoeptitopes with RNA fusion data +keywords: + - neoepitopes + - rna + - rnaseq +tools: + - isofox: + description: Characterises and counts gene, transcript features + homepage: https://github.com/hartwigmedical/hmftools/tree/master/isofox + documentation: https://github.com/hartwigmedical/hmftools/tree/master/isofox + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - neo_finder_dir: + type: directory + description: Neo Finder directory + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAI file + pattern: "*.{bai}" + - read_length: + type: integer + description: Read length + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - annotated_fusions: + type: file + description: Annotated neoepitopes file + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@charlesshale" diff --git a/modules/local/neo/finder/meta.yml b/modules/local/neo/finder/meta.yml new file mode 100644 index 00000000..01dc2fcb --- /dev/null +++ b/modules/local/neo/finder/meta.yml @@ -0,0 +1,52 @@ +name: neo_finder +description: Identify candidate neoeptitops +keywords: + - neoepitopes +tools: + - neo: + description: Predict and score neoepitopes + homepage: https://github.com/hartwigmedical/hmftools/tree/master/neo + documentation: https://github.com/hartwigmedical/hmftools/tree/master/neo + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - purple_dir: + type: directory + description: PURPLE output directory + - linx_annotation_dir: + type: directory + description: LINX somatic annotation output directory + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - neo_finder_dir: + type: directory + description: Neo Finder output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@charlesshale" diff --git a/modules/local/neo/scorer/meta.yml b/modules/local/neo/scorer/meta.yml new file mode 100644 index 00000000..c870da4d --- /dev/null +++ b/modules/local/neo/scorer/meta.yml @@ -0,0 +1,62 @@ +name: neo_scorer +description: Score and rank candidate neoeptitops +keywords: + - neoepitopes +tools: + - neo: + description: Predict and score neoepitopes + homepage: https://github.com/hartwigmedical/hmftools/tree/master/neo + documentation: https://github.com/hartwigmedical/hmftools/tree/master/neo + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - isofox_dir: + type: directory + description: Isofox output directory (optional) + - purple_dir: + type: directory + description: PURPLE output directory + - sage_vcf: + type: file + description: SAGE VCF file + pattern: "*.{vcf.gz}" + - lilac_dir: + type: directory + description: LILAC output directory + - neo_finder_dir: + type: directory + description: Neo Finder output directory + - annotated_fusions: + type: file + description: Annotated neoepitopes file + pattern: "*.{tsv}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory + - neo_resources: + type: directory + description: HMF Neo resources directory + - cohort_tpm_medians: + type: file + description: HMF cohort TPM medians file + pattern: "*.{csv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - neo_score_dir: + type: directory + description: Neo Scorer output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@charlesshale" From 93ab00704df6dbe36923929390a02bee3f1edc7c Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Fri, 21 Jun 2024 17:06:00 +1000 Subject: [PATCH 14/17] Enable SAGE append when Neo is set to run --- workflows/wgts.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/workflows/wgts.nf b/workflows/wgts.nf index d6f3bb6e..787d1d44 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -468,9 +468,7 @@ workflow WGTS { // channel: [ meta, sage_append_vcf ] ch_sage_somatic_append_out = Channel.empty() ch_sage_germline_append_out = Channel.empty() - if (run_config.stages.orange) { - - // NOTE(SW): currently used only for ORANGE but will also be used for Neo once implemented + if (run_config.stages.orange || run_config.stages.neo) { SAGE_APPEND( ch_inputs, From 175a7d770aa621cf1e9d9fa8448ec7b8cb2fbd6f Mon Sep 17 00:00:00 2001 From: Charles Shale Date: Mon, 24 Jun 2024 13:22:34 +1000 Subject: [PATCH 15/17] Linx somatic writes neoepitopes --- modules/local/linx/somatic/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/linx/somatic/main.nf b/modules/local/linx/somatic/main.nf index 5218a8f0..04b78f89 100644 --- a/modules/local/linx/somatic/main.nf +++ b/modules/local/linx/somatic/main.nf @@ -38,6 +38,7 @@ process LINX_SOMATIC { -known_fusion_file ${known_fusion_data} \\ -driver_gene_panel ${driver_gene_panel} \\ -write_vis_data \\ + -write_neo_epitopes \\ -output_dir linx_somatic/ cat <<-END_VERSIONS > versions.yml From ef7db8c388b11f348c929b5a6d337da67a76c8f1 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Tue, 25 Jun 2024 12:16:14 +1000 Subject: [PATCH 16/17] Add reference to Neo in documentation --- README.md | 1 + docs/output.md | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/README.md b/README.md index 1e6760a7..10e016c2 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ The following processes and tools can be run with `oncoanalyser`: - HRD status prediction (`CHORD`) - Mutational signature fitting (`Sigs`) - Tissue of origin prediction (`CUPPA`) +- Neoepitope prediction (`Neo`) - Report generation (`ORANGE`, `linxreport`) ## Usage diff --git a/docs/output.md b/docs/output.md index 59f9f0c6..6fb6596b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -71,6 +71,8 @@ output/ - [Sigs](#sigs) - Mutational signature fitting - [Tissue of origin prediction](#tissue-of-origin-prediction) - [CUPPA](#cuppa) - Tissue of origin prediction +- [Neoepitope prediction](#neoepitope-prediction) + - [Neo](#neo) - Neoepitope prediction - [Report generation](#report-generation) - [ORANGE](#orange) - Key results summary - [linxreport](#linxreport) - Interactive LINX report @@ -473,6 +475,23 @@ signatures to tumor sample data. [CUPPA](https://github.com/hartwigmedical/hmftools/tree/master/cuppa) predicts tissue of origin for a given tumor sample using DNA and/or RNA features generated by upstream hmftools components. +### Neoepitope prediction + +#### Neo + +
+Output files + +- `/neo/` + - `.neo.neo_data.tsv`: Neoepitope candidates. + - `.neo.neoepitope.tsv`: LINX fusion neoepitopes. + - `.neo.peptide_scores.tsv`: Peptide binding likelihood and scoring. + +
+ +[Neo](https://github.com/hartwigmedical/hmftools/tree/master/neo) builds comprehensive neoepitope predictions from DNA +data with additional annotations made using RNA data. + ### Report generation #### ORANGE From 8b15c62f6847e6bc649fd96fd3ee737fcd3bd45f Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Tue, 10 Sep 2024 09:47:40 +1000 Subject: [PATCH 17/17] Do not run SAGE append germline for Neo --- subworkflows/local/sage_append/main.nf | 5 ++++- workflows/targeted.nf | 1 + workflows/wgts.nf | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/sage_append/main.nf b/subworkflows/local/sage_append/main.nf index a7ded0be..39c48362 100644 --- a/subworkflows/local/sage_append/main.nf +++ b/subworkflows/local/sage_append/main.nf @@ -21,6 +21,9 @@ workflow SAGE_APPEND { genome_fai // channel: [mandatory] /path/to/genome_fai genome_dict // channel: [mandatory] /path/to/genome_dict + // Params + run_germline // boolean: [mandatory] Run germline flag + main: // Channel for version.yml files // channel: [ versions.yml ] @@ -63,7 +66,7 @@ workflow SAGE_APPEND { def has_smlv_germline = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL) - runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing + runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing && run_germline skip: true return meta } diff --git a/workflows/targeted.nf b/workflows/targeted.nf index 5b0b92bd..9189ef93 100644 --- a/workflows/targeted.nf +++ b/workflows/targeted.nf @@ -478,6 +478,7 @@ workflow TARGETED { ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, + true, // run_germline ) ch_versions = ch_versions.mix(SAGE_APPEND.out.versions) diff --git a/workflows/wgts.nf b/workflows/wgts.nf index 787d1d44..98c808b7 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -478,6 +478,7 @@ workflow WGTS { ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, + run_config.stages.orange, // run_germline [run for ORANGE but not Neo] ) ch_versions = ch_versions.mix(SAGE_APPEND.out.versions)