From 90a3a78a87759d3a0db11a0ecd40da56ce7926ec Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Thu, 1 Feb 2024 15:15:07 +1100
Subject: [PATCH 01/17] Frame out Neo subworkflow

---
 conf/modules.config                  |  9 ++++
 lib/Constants.groovy                 |  1 +
 modules/local/neo/Dockerfile         | 22 ++++++++++
 modules/local/neo/main.nf            | 44 +++++++++++++++++++
 modules/local/neo/meta.yml           |  0
 subworkflows/local/neo_prediction.nf | 66 ++++++++++++++++++++++++++++
 workflows/wgts.nf                    | 19 ++++++++
 7 files changed, 161 insertions(+)
 create mode 100644 modules/local/neo/Dockerfile
 create mode 100644 modules/local/neo/main.nf
 create mode 100644 modules/local/neo/meta.yml
 create mode 100644 subworkflows/local/neo_prediction.nf

diff --git a/conf/modules.config b/conf/modules.config
index cc09c657..74da16c7 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -218,6 +218,15 @@ process {
         ]
     }
 
+    withName: 'NEO' {
+        ext.jarPath = '/opt/neo/neo.jar'
+        publishDir = [
+            path: { "${params.outdir}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+        ]
+    }
+
     withName: 'CUPPA' {
         publishDir = [
             path: { "${params.outdir}" },
diff --git a/lib/Constants.groovy b/lib/Constants.groovy
index cb827975..15d41689 100644
--- a/lib/Constants.groovy
+++ b/lib/Constants.groovy
@@ -47,6 +47,7 @@ class Constants {
         LILAC,
         LINX,
         MARKDUPS,
+        NEO,
         ORANGE,
         PAVE,
         PURPLE,
diff --git a/modules/local/neo/Dockerfile b/modules/local/neo/Dockerfile
new file mode 100644
index 00000000..5325eb6c
--- /dev/null
+++ b/modules/local/neo/Dockerfile
@@ -0,0 +1,22 @@
+FROM mambaorg/micromamba:0.24.0
+
+USER root
+
+RUN \
+  apt-get update && \
+  apt-get install -y procps wget && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
+
+RUN \
+  mkdir -p /opt/neo/ && \
+  wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.1_beta/neo_v1.1.jar'
+
+USER mambauser
+
+RUN \
+  micromamba install -y -n base -c bioconda -c conda-forge \
+    'openjdk >=8' && \
+  micromamba clean --all --yes
+
+ENV PATH="/opt/conda/bin:/opt/conda/condabin:${PATH}"
diff --git a/modules/local/neo/main.nf b/modules/local/neo/main.nf
new file mode 100644
index 00000000..287ccfe6
--- /dev/null
+++ b/modules/local/neo/main.nf
@@ -0,0 +1,44 @@
+process NEO {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    container 'docker.io/scwatts/neo:1.1_beta--0'
+
+    input:
+    val(meta)
+
+    //tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai)
+    //path genome_fasta
+    //val genome_ver
+    //path ensembl_data_resources
+
+    output:
+    tuple val(meta), path('neo/'), emit: neo_dir
+    path 'versions.yml'          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+
+    """
+    java \\
+        -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+        -jar ${task.ext.jarPath} \\
+            ${args} \\
+            XXX
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    mkdir -p neo/
+    echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
+    """
+}
+
diff --git a/modules/local/neo/meta.yml b/modules/local/neo/meta.yml
new file mode 100644
index 00000000..e69de29b
diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction.nf
new file mode 100644
index 00000000..c57037fb
--- /dev/null
+++ b/subworkflows/local/neo_prediction.nf
@@ -0,0 +1,66 @@
+//
+// XXX
+//
+
+import Constants
+import Utils
+
+include { ISOFOX               } from '../../modules/local/isofox/main'
+include { LILAC                } from '../../modules/local/lilac/main'
+include { NEO as NEO_PREDICTOR } from '../../modules/local/neo/main'
+include { NEO as NEO_SCORER    } from '../../modules/local/neo/main'
+
+workflow NEO_PREDICTION{
+    take:
+        // Sample data
+        ch_inputs              // channel: [mandatory] [ meta ]
+        ch_isofox              // channel: [mandatory] [ meta, isofox_dir ]
+        ch_purple              // channel: [mandatory] [ meta, purple_dir ]
+        ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ]
+        ch_lilac               // channel: [mandatory] [ meta, lilac_dir ]
+        ch_linx                // channel: [mandatory] [ meta, linx_dir ]
+
+        //// Reference data
+        //genome_fasta           // channel: [mandatory] /path/to/genome_fasta
+        //genome_fai             // channel: [mandatory] /path/to/genome_fai
+        //genome_dict            // channel: [mandatory] /path/to/genome_dict
+
+        // other reference data, or placeholders
+
+    main:
+        // Channel for versions.yml files
+        // channel: [ versions.yml ]
+        ch_versions = Channel.empty()
+
+        // Neo prediction
+        // 1. select input sources after combine required channels (i.e ch_purple, ch_linx)
+        // 2. get runnable subjects/inputs
+        // 3. format input channel
+        // 4. run process NEO_PREDICTOR
+        // 5. restore meta, set skip entries
+
+        // Feeding the Neo process raw inputs for demo purposes only
+        NEO_PREDICTOR(ch_inputs)
+
+        // Isofox annotation
+        // 1. take outputs from NEO_PREDICTOR
+        // 2. format input channel
+        // 3. run process ISOFOX (also requires changes to process to enable alt. run mode)
+        // 4. restore meta
+
+        // Assuming SAGE append will be appropriately handled upstream. Need to discuss some options:
+        //   * -bqr_enabled true
+        //   * -max_read_depth 100000
+
+        // Neo score
+        // 1. select input sources after combine required channels (i.e ch_purple, ch_lilac, ch_isofox, above process outputs)
+        // 2. get runnable subjects/inputs
+        // 3. format input channel
+        // 4. run NEO_SCORER
+
+        // Feeding the Neo process raw inputs for demo purposes only
+        NEO_SCORER(ch_inputs)
+
+    emit:
+        versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/workflows/wgts.nf b/workflows/wgts.nf
index 0541966d..adb2d85d 100644
--- a/workflows/wgts.nf
+++ b/workflows/wgts.nf
@@ -67,6 +67,7 @@ include { ISOFOX_QUANTIFICATION } from '../subworkflows/local/isofox_quantificat
 include { LILAC_CALLING         } from '../subworkflows/local/lilac_calling'
 include { LINX_ANNOTATION       } from '../subworkflows/local/linx_annotation'
 include { LINX_PLOTTING         } from '../subworkflows/local/linx_plotting'
+include { NEO_PREDICTION        } from '../subworkflows/local/neo_prediction'
 include { ORANGE_REPORTING      } from '../subworkflows/local/orange_reporting'
 include { PAVE_ANNOTATION       } from '../subworkflows/local/pave_annotation'
 include { PREPARE_REFERENCE     } from '../subworkflows/local/prepare_reference'
@@ -708,6 +709,24 @@ workflow WGTS {
 
     }
 
+    //
+    // SUBWORKFLOW: XXX
+    //
+    if (run_config.stages.neo) {
+
+        NEO_PREDICTION(
+            ch_inputs,
+            ch_isofox_out,
+            ch_purple_out,
+            ch_sage_somatic_append_out,
+            ch_lilac_out,
+            ch_linx_somatic_out,
+        )
+
+        ch_versions = ch_versions.mix(NEO_PREDICTION.out.versions)
+
+    }
+
     //
     // SUBWORKFLOW: Run CUPPA predict tissue of origin
     //

From 4913bb83b9b5d50100eb9e134e30079302b52d14 Mon Sep 17 00:00:00 2001
From: Charles Shale <shalecharles@gmail.com>
Date: Mon, 5 Feb 2024 13:38:52 +1100
Subject: [PATCH 02/17] First cut of Neoepitope subworkflow

---
 conf/hmf_data.config                 |   6 +-
 conf/modules.config                  |   2 +-
 modules/local/neo/finder/main.nf     |  50 ++++++
 modules/local/neo/isofox/main.nf     |  56 +++++++
 modules/local/neo/main.nf            |  44 -----
 modules/local/neo/scorer/main.nf     |  66 ++++++++
 subworkflows/local/neo_prediction.nf | 233 +++++++++++++++++++++++----
 workflows/wgts.nf                    |   6 +
 8 files changed, 388 insertions(+), 75 deletions(-)
 create mode 100644 modules/local/neo/finder/main.nf
 create mode 100644 modules/local/neo/isofox/main.nf
 delete mode 100644 modules/local/neo/main.nf
 create mode 100644 modules/local/neo/scorer/main.nf

diff --git a/conf/hmf_data.config b/conf/hmf_data.config
index 6fdcba0b..3c17ba27 100644
--- a/conf/hmf_data.config
+++ b/conf/hmf_data.config
@@ -20,9 +20,13 @@ params {
             isofox_gc_ratios              = 'rna_pipeline/read_100_exp_gc_ratios.csv'
             // LILAC
             lilac_resources               = 'dna_pipeline/immune/'
+            // NEO
+            neo_resources                 = 'neo/'
             // ORANGE
             cohort_mapping                = 'orange/cohort_mapping.tsv'
-            cohort_percentiles            = 'orange/cohort_percentiles.tsv'
+            // RNA COHORT DATA
+            cohort_percentiles            = 'orange/cohort_percentiles.tsv' // consider renaming and moving to rna_pipeline
+            cohort_tpm_medians            = 'rna_pipeline/cohort_tpm_medians.tsv'
             alt_sj_distribution           = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv'
             gene_exp_distribution         = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv'
             // SAGE
diff --git a/conf/modules.config b/conf/modules.config
index 74da16c7..dd090cdd 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -218,7 +218,7 @@ process {
         ]
     }
 
-    withName: 'NEO' {
+    withName: 'NEO_.*' {
         ext.jarPath = '/opt/neo/neo.jar'
         publishDir = [
             path: { "${params.outdir}" },
diff --git a/modules/local/neo/finder/main.nf b/modules/local/neo/finder/main.nf
new file mode 100644
index 00000000..60cbf2e3
--- /dev/null
+++ b/modules/local/neo/finder/main.nf
@@ -0,0 +1,50 @@
+process NEO_FINDER {
+    tag "${meta.id}"
+    label 'process_low'
+
+    container 'docker.io/scwatts/neo:1.1_beta--0'
+
+    input:
+    tuple val(meta), path(purple_dir), path(linx_dir)
+    path genome_fasta
+    val genome_ver
+    path ensembl_data_resources
+
+    output:
+    tuple val(meta), path('neo/'), emit: neo_finder_dir
+    path 'versions.yml'          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+
+    """
+    java \\
+        -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+        -cp ${task.ext.jarPath} \\
+        com.hartwig.hmftools.neo.epitope.NeoEpitopeFinder \\
+            ${args} \\
+            -sample ${meta.sample_id} \\
+            -linx_dir ${linx_dir} \\
+            -somatic_vcf ${purple_dir}/${meta.sample_id}.purple.somatic.vcf.gz \\
+            -ref_genome ${genome_fasta} \\
+            -ref_genome_version ${genome_ver} \\
+            -ensembl_data_dir ${ensembl_data_resources} \\
+            -output_dir ${output_dir} \\
+            -log_debug \\
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    mkdir -p neo/
+    echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
+    """
+}
+
diff --git a/modules/local/neo/isofox/main.nf b/modules/local/neo/isofox/main.nf
new file mode 100644
index 00000000..4ca86ba5
--- /dev/null
+++ b/modules/local/neo/isofox/main.nf
@@ -0,0 +1,56 @@
+process ISOFOX_NEO {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    container 'docker.io/scwatts/isofox:1.7.1--0'
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+    val functions
+    val read_length
+    path genome_fasta
+    val genome_ver
+    path genome_fai
+    path ensembl_data_resources
+
+    output:
+    tuple val(meta), path('isofox/'), emit: isofox_dir
+    path 'versions.yml'             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+
+    """
+    mkdir -p isofox/
+
+    java \\
+        -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+        -jar ${task.ext.jarPath} \\
+            ${args} \\
+            -sample ${meta.sample_id} \\
+            -bam_file ${bam} \\
+            -functions NEO_EPITOPES \\
+            -neoepitope_file ${neo_finder_dir}/${meta.sample_id}.neo.neo_data.tsv \\
+            -read_length ${read_length} \\
+            -ref_genome ${genome_fasta} \\
+            -ref_genome_version ${genome_ver} \\
+            -ensembl_data_dir ${ensembl_data_resources} \\
+            -threads ${task.cpus} \\
+            -output_dir isofox/
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        isofox: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    mkdir -p isofox/
+    touch isofox/placeholder
+    echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
+    """
+}
diff --git a/modules/local/neo/main.nf b/modules/local/neo/main.nf
deleted file mode 100644
index 287ccfe6..00000000
--- a/modules/local/neo/main.nf
+++ /dev/null
@@ -1,44 +0,0 @@
-process NEO {
-    tag "${meta.id}"
-    label 'process_medium'
-
-    container 'docker.io/scwatts/neo:1.1_beta--0'
-
-    input:
-    val(meta)
-
-    //tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai)
-    //path genome_fasta
-    //val genome_ver
-    //path ensembl_data_resources
-
-    output:
-    tuple val(meta), path('neo/'), emit: neo_dir
-    path 'versions.yml'          , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-
-    """
-    java \\
-        -Xmx${Math.round(task.memory.bytes * 0.95)} \\
-        -jar ${task.ext.jarPath} \\
-            ${args} \\
-            XXX
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //')
-    END_VERSIONS
-    """
-
-    stub:
-    """
-    mkdir -p neo/
-    echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
-    """
-}
-
diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf
new file mode 100644
index 00000000..7dd7ec09
--- /dev/null
+++ b/modules/local/neo/scorer/main.nf
@@ -0,0 +1,66 @@
+process NEO_SCORER {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    container 'docker.io/scwatts/neo:1.1_beta--0'
+
+    input:
+    tuple val(meta), path(purple_dir), path(isofox_dir), path(lilac_dir), path(isofox_neo_dir), path(neo_finder_dir)
+    path genome_fasta
+    val genome_ver
+    path ensembl_data_resources
+    path neo_resources
+    path cohort_tpm_medians
+
+    output:
+    tuple val(meta), path('neo/'), emit: neo_scorer_dir
+    path 'versions.yml'          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+
+    def rna_sample_arg = meta.containsKey('sample_rna_id') ? "-rna_sample ${meta.sample_rna_id}" : ''
+    def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${purple_dir}/${meta.sample_id}.sage_append.vcf.gz" : ''
+
+    // NeoScorer expects the fusion-neoepitopes which Isofox has annotated with RNA to be in the Isofox directory, so put them
+    // and the standard Isofox files (just TPM is used) into a new shared directory
+    // ie isofox_neo_dir + neo_finder_dir -> new directory for isofox data -> passed into -isofox_dir
+    def isofox_dir_arg = meta.containsKey('sample_rna_id') ? "-isofox_dir /path/isofox_combined_dir" : ''
+
+    """
+    java \\
+        -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+        -cp ${task.ext.jarPath} \\
+        com.hartwig.hmftools.neo.score.NeoScorer \\
+            ${args} \\
+            -sample ${meta.sample_id} \\
+            ${rna_sample_arg} \\
+            -purple_dir ${purple_dir} \\
+            -lilac_dir ${lilac_dir} \\
+            ${isofox_dir_arg} \\
+            ${rna_somatic_vcf_arg} ]]
+            -neo_dir ${neo_finder_dir} \\
+            -ref_genome ${genome_fasta} \\
+            -ref_genome_version ${genome_ver} \\
+            -ensembl_data_dir ${ensembl_data_resources} \\
+            -score_file_dir ${neo_resources} \\
+            -cancer_tpm_medians_file ${cohort_tpm_medians} \\
+            -output_dir ${output_dir} \\
+            -log_debug \\
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    mkdir -p neo/
+    echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
+    """
+}
+
diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction.nf
index c57037fb..3140fa53 100644
--- a/subworkflows/local/neo_prediction.nf
+++ b/subworkflows/local/neo_prediction.nf
@@ -7,8 +7,8 @@ import Utils
 
 include { ISOFOX               } from '../../modules/local/isofox/main'
 include { LILAC                } from '../../modules/local/lilac/main'
-include { NEO as NEO_PREDICTOR } from '../../modules/local/neo/main'
-include { NEO as NEO_SCORER    } from '../../modules/local/neo/main'
+include { NEO_FINDER           } from '../../modules/local/neo/finder/main'
+include { NEO_SCORER           } from '../../modules/local/neo/scorer/main'
 
 workflow NEO_PREDICTION{
     take:
@@ -20,46 +20,221 @@ workflow NEO_PREDICTION{
         ch_lilac               // channel: [mandatory] [ meta, lilac_dir ]
         ch_linx                // channel: [mandatory] [ meta, linx_dir ]
 
-        //// Reference data
-        //genome_fasta           // channel: [mandatory] /path/to/genome_fasta
-        //genome_fai             // channel: [mandatory] /path/to/genome_fai
-        //genome_dict            // channel: [mandatory] /path/to/genome_dict
-
-        // other reference data, or placeholders
+        // Reference data
+        genome_version         // channel: [mandatory] genome version
+        genome_fasta           // channel: [mandatory] /path/to/genome_fasta
+        genome_fai             // channel: [mandatory] /path/to/genome_fai
+        ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/
+        neo_resources          // channel: [mandatory] /path/to/neo_resources/
+        cohort_tpm_medians     // channel: [mandatory] /path/to/cohort_tpm_medians/
 
     main:
         // Channel for versions.yml files
         // channel: [ versions.yml ]
         ch_versions = Channel.empty()
 
-        // Neo prediction
-        // 1. select input sources after combine required channels (i.e ch_purple, ch_linx)
-        // 2. get runnable subjects/inputs
-        // 3. format input channel
-        // 4. run process NEO_PREDICTOR
-        // 5. restore meta, set skip entries
+        // Step 1: Identify neoepitopes from Purple somatic variants and Linx's (neoepitope) fusions
+
+        // Select input sources
+        // channel: [ meta, isofox_dir, purple_dir, linx_annotation_dir ]
+        ch_inputs_finder_selected = WorkflowOncoanalyser.groupByMeta(
+            ch_purple,
+            ch_linx,
+        )
+            .map { meta, purple_dir, linx_annotation_dir ->
+
+                def inputs = [
+                    Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR),
+                    Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR),
+                ]
+
+                return [meta, *inputs]
+            }
+
+        // Sort inputs
+        // channel: runnable: [ meta, purple_dir, linx_annotation_dir ]
+        // channel: skip: [ meta ]
+        ch_inputs_finder_sorted = ch_inputs_finder_selected
+            .branch { meta, purple_dir, linx_annotation_dir ->
+
+                def has_normal_dna = Utils.hasNormalDnaBam(meta)
+
+                def has_runnable_inputs = purple_dir && linx_annotation_dir && has_normal_dna
+
+                runnable: has_runnable_inputs
+                skip: true
+                    return meta
+            }
+
+        // Create process input channel
+        // channel: sample_data: [ meta, purple_dir, linx_annotation_dir ]
+        ch_finder_inputs = ch_inputs_finder_sorted.runnable
+            .map{ meta, purple_dir, linx_annotation_dir ->
+
+                def meta_neo_finder = [
+                    key: meta.group_id,
+                    id: meta.group_id,
+                    sample_id: Utils.getTumorDnaSampleName(meta),
+                ]
+
+                return [meta_neo_finder, purple_dir, linx_annotation_dir]
+            }
+
 
         // Feeding the Neo process raw inputs for demo purposes only
-        NEO_PREDICTOR(ch_inputs)
+        NEO_FINDER(
+            ch_finder_inputs,
+            genome_fasta,
+            genome_version,
+            ensembl_data_resources,
+        )
+
+        ch_versions = ch_versions.mix(NEO_FINDER.out.versions)
+
+        // Set outputs, restoring original meta
+        // channel: [ meta, neo_finder_dir ]
+        ch_finder_outputs = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs)
+
+        // Step 2: When RNA is present, annotate the fusion-derived neoepitope with RNA using Isofox
+
+        /*
+
+        // Select input sources
+        // channel: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
+        ch_inputs_isofox_sorted = WorkflowOncoanalyser.groupByMeta(
+            ch_finder_outputs,
+            // channel: [ meta, tumor_rna_bam (optional), tumor_rna_bai (optional) ]
+            ch_inputs
+                .map { meta ->
+                    def has_rna = Utils.hasTumorRnaBam(meta)
+
+                    return [
+                        meta,
+                        has_rna ? Utils.getTumorRnaBam(meta) : [],
+                        has_rna ? Utils.getTumorRnaBai(meta) : [],
+                    ]
+                },
+
+            )
+
+        // Sort inputs
+        ch_inputs_isofox_sorted = ch_finder_outputs
+            .branch {
+
+                def has_rna = Utils.hasTumorRnaBam(meta)
+
 
-        // Isofox annotation
-        // 1. take outputs from NEO_PREDICTOR
-        // 2. format input channel
-        // 3. run process ISOFOX (also requires changes to process to enable alt. run mode)
-        // 4. restore meta
 
-        // Assuming SAGE append will be appropriately handled upstream. Need to discuss some options:
-        //   * -bqr_enabled true
-        //   * -max_read_depth 100000
+                runnable:
+                skip:
+                    meta
+
+            }
+
+        // Create process input channel
+        // channel: [ meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
+        ch_isofox_inputs = ch_inputs_isofox_sorted.runnable
+            .map { meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ->
+
+                def meta_isofox = [
+                    key: meta.group_id,
+                    id: meta.group_id,
+                    sample_id: Utils.getTumorDnaSampleName(meta),
+                ]
+
+                return [meta_isofox, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)]
+            }
+
+        // Run process
+        ISOFOX_NEO(
+            ch_isofox_inputs,
+            isofox_read_length,
+            genome_fasta,
+            genome_version,
+            genome_fai,
+            ensembl_data_resources,
+        )
+
+        ch_versions = ch_versions.mix(ISOFOX.out.versions)
+
+        // Set outputs, restoring original meta
+        // channel: [ meta, isofox_dir ]
+        ch_outputs = Channel.empty()
+            .mix(
+                WorkflowOncoanalyser.restoreMeta(ISOFOX.out.isofox_neo_dir, ch_inputs),
+                ch_inputs_sorted.skip.map { meta -> [meta, []] },
+
+        */
+
+        // ch_finder_outputs
+
+        // Step 3: Run Neo's binding prediction routine for neoepitope's pHLAs, taking in Lilac HLA alleles and previously
+        // derived neoepitopes with RNA annotation if it was available
+
+        // Select input sources
+        // channel: [ meta, isofox_dir, purple_dir, lilac_dir, isofox_dir ]
+        // TO_DO - how to pass in the directories from step 1 and 2 (if run) above
+        ch_inputs_scorer_selected = WorkflowOncoanalyser.groupByMeta(
+            ch_purple,
+            ch_linx,
+            ch_isofox,
+        )
+            .map { meta, purple_dir, lilac_dir ->
+
+                def inputs = [
+                    Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR),
+                    Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC),
+                    Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX),
+                ]
+
+                return [meta, *inputs]
+            }
+
+        // Sort inputs
+        // channel: runnable: [ meta, purple_dir, lilac_dir,isofox_dir ]
+        // channel: skip: [ meta ]
+        ch_inputs_scorer_sorted = ch_inputs_scorer_selected
+            .branch { meta, purple_dir, lilac_dir, isofox_dir ->
+
+                def has_normal_dna = Utils.hasNormalDnaBam(meta)
+
+                def has_runnable_inputs = purple_dir && lilac_dir && has_normal_dna
+
+                runnable: has_runnable_inputs
+                skip: true
+                    return meta
+            }
+
+        // Create process input channel
+        // channel: sample_data: [ meta, purple_dir, linx_annotation_dir ]
+        ch_scorer_inputs = ch_inputs_scorer_sorted.runnable
+            .map{ meta, purple_dir, linx_annotation_dir ->
+
+                def meta_neo_scorer = [
+                    key: meta.group_id,
+                    id: meta.group_id,
+                    sample_id: Utils.getTumorDnaSampleName(meta),
+                ]
+
+                return [meta_neo_scorer, purple_dir, lilac_dir, isofox_dir]
+            }
 
-        // Neo score
-        // 1. select input sources after combine required channels (i.e ch_purple, ch_lilac, ch_isofox, above process outputs)
-        // 2. get runnable subjects/inputs
-        // 3. format input channel
-        // 4. run NEO_SCORER
 
         // Feeding the Neo process raw inputs for demo purposes only
-        NEO_SCORER(ch_inputs)
+        NEO_SCORER(
+            ch_scorer_inputs,
+            genome_fasta,
+            genome_version,
+            ensembl_data_resources,
+            neo_resources,
+            cohort_tpm_medians
+        )
+
+        ch_versions = ch_versions.mix(NEO_SCORER.out.versions)
+
+        // Set outputs, restoring original meta
+        // channel: [ meta, neo_scorer_dir ]
+        ch_scorer_outputs = WorkflowOncoanalyser.restoreMeta(NEO_SCORER.out.neo_scorer_dir, ch_inputs)
 
     emit:
         versions = ch_versions // channel: [ versions.yml ]
diff --git a/workflows/wgts.nf b/workflows/wgts.nf
index adb2d85d..3c691bc5 100644
--- a/workflows/wgts.nf
+++ b/workflows/wgts.nf
@@ -721,6 +721,12 @@ workflow WGTS {
             ch_sage_somatic_append_out,
             ch_lilac_out,
             ch_linx_somatic_out,
+            ref_data.genome_version,
+            ref_data.genome_fasta,
+            ref_data.genome_fai,
+            hmf_data.ensembl_data_resources,
+            hmf_data.neo_resources,
+            hmf_data.cohort_tpm_medians,
         )
 
         ch_versions = ch_versions.mix(NEO_PREDICTION.out.versions)

From 396e01aa8cd9c50048241daa340040f5f24e80db Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Mon, 5 Feb 2024 14:30:23 +1100
Subject: [PATCH 03/17] Get stub runs working

---
 conf/hmf_data.config                       |  15 +-
 conf/modules.config                        |  23 ++-
 modules/local/neo/Dockerfile               |   2 +-
 modules/local/neo/annotate_fusions/main.nf |  53 ++++++
 modules/local/neo/finder/main.nf           |  16 +-
 modules/local/neo/isofox/main.nf           |  56 -------
 modules/local/neo/meta.yml                 |   0
 modules/local/neo/scorer/main.nf           |  41 ++---
 subworkflows/local/neo_prediction.nf       | 180 ++++++++-------------
 workflows/wgts.nf                          |   5 +-
 10 files changed, 189 insertions(+), 202 deletions(-)
 create mode 100644 modules/local/neo/annotate_fusions/main.nf
 delete mode 100644 modules/local/neo/isofox/main.nf
 delete mode 100644 modules/local/neo/meta.yml

diff --git a/conf/hmf_data.config b/conf/hmf_data.config
index 3c17ba27..82ff1af3 100644
--- a/conf/hmf_data.config
+++ b/conf/hmf_data.config
@@ -20,13 +20,13 @@ params {
             isofox_gc_ratios              = 'rna_pipeline/read_100_exp_gc_ratios.csv'
             // LILAC
             lilac_resources               = 'dna_pipeline/immune/'
-            // NEO
-            neo_resources                 = 'neo/'
+            // Neo
+            neo_resources                 = 'neo/binding/'
             // ORANGE
             cohort_mapping                = 'orange/cohort_mapping.tsv'
-            // RNA COHORT DATA
-            cohort_percentiles            = 'orange/cohort_percentiles.tsv' // consider renaming and moving to rna_pipeline
-            cohort_tpm_medians            = 'rna_pipeline/cohort_tpm_medians.tsv'
+            // Hartwig cohort RNA data
+            cohort_tpm_medians            = 'neo/tpm_cohort/hmf_tpm_medians.csv'
+            cohort_percentiles            = 'orange/cohort_percentiles.tsv'
             alt_sj_distribution           = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv'
             gene_exp_distribution         = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv'
             // SAGE
@@ -76,8 +76,13 @@ params {
             isofox_gc_ratios              = 'rna_pipeline/read_100_exp_gc_ratios.csv'
             // LILAC
             lilac_resources               = 'dna_pipeline/immune/'
+            // Neo
+            neo_resources                 = 'neo/'
             // ORANGE
             cohort_mapping                = 'orange/cohort_mapping.tsv'
+            // RNA cohort data
+            cohort_percentiles            = 'orange/cohort_percentiles.tsv'
+            cohort_tpm_medians            = 'rna_pipeline/cohort_tpm_medians.tsv'
             cohort_percentiles            = 'orange/cohort_percentiles.tsv'
             alt_sj_distribution           = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.38.csv'
             gene_exp_distribution         = 'rna_pipeline/isofox.hmf_3444.gene_distribution.38.csv'
diff --git a/conf/modules.config b/conf/modules.config
index dd090cdd..0dfd3002 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -218,12 +218,31 @@ process {
         ]
     }
 
-    withName: 'NEO_.*' {
+    withName: 'NEO_(?:SCORER|FINDER)' {
         ext.jarPath = '/opt/neo/neo.jar'
+    }
+
+    withName: 'NEO_SCORER' {
         publishDir = [
             path: { "${params.outdir}" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" },
+            saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/scorer/" },
+        ]
+    }
+
+    withName: '.*:NEO_PREDICTION:ANNOTATE_FUSIONS' {
+        publishDir = [
+            path: { "${params.outdir}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/annotated_fusions/${filename}" },
+        ]
+    }
+
+    withName: 'NEO_FINDER' {
+        publishDir = [
+            path: { "${params.outdir}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/finder/" },
         ]
     }
 
diff --git a/modules/local/neo/Dockerfile b/modules/local/neo/Dockerfile
index 5325eb6c..f734631d 100644
--- a/modules/local/neo/Dockerfile
+++ b/modules/local/neo/Dockerfile
@@ -10,7 +10,7 @@ RUN \
 
 RUN \
   mkdir -p /opt/neo/ && \
-  wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.1_beta/neo_v1.1.jar'
+  wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.2_beta/neo_v1.2_beta.jar'
 
 USER mambauser
 
diff --git a/modules/local/neo/annotate_fusions/main.nf b/modules/local/neo/annotate_fusions/main.nf
new file mode 100644
index 00000000..5e9af36e
--- /dev/null
+++ b/modules/local/neo/annotate_fusions/main.nf
@@ -0,0 +1,53 @@
+process ANNOTATE_FUSIONS {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    container 'quay.io/biocontainers/hmftools-isofox:1.7.1--hdfd78af_0'
+
+    input:
+    tuple val(meta), path(neo_finder_dir), path(bam), path(bai)
+    val read_length
+    path genome_fasta
+    val genome_ver
+    path genome_fai
+    path ensembl_data_resources
+
+    output:
+    tuple val(meta), path('*isf.neoepitope.tsv'), emit: annotated_fusions
+    path 'versions.yml'                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+
+    """
+    mkdir -p isofox/
+
+    isofox \\
+        -Xmx${Math.round(task.memory.bytes * 0.95)} \\
+        ${args} \\
+        -sample ${meta.sample_id} \\
+        -bam_file ${bam} \\
+        -functions NEO_EPITOPES \\
+        -neo_dir ${neo_finder_dir} \\
+        -read_length ${read_length} \\
+        -ref_genome ${genome_fasta} \\
+        -ref_genome_version ${genome_ver} \\
+        -ensembl_data_dir ${ensembl_data_resources} \\
+        -threads ${task.cpus} \\
+        -output_dir ./
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        isofox: \$(isofox -version | sed 's/^.* //')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${meta.sample_id}.isf.neoepitope.tsv
+    echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
+    """
+}
diff --git a/modules/local/neo/finder/main.nf b/modules/local/neo/finder/main.nf
index 60cbf2e3..4ff99a7b 100644
--- a/modules/local/neo/finder/main.nf
+++ b/modules/local/neo/finder/main.nf
@@ -2,17 +2,18 @@ process NEO_FINDER {
     tag "${meta.id}"
     label 'process_low'
 
-    container 'docker.io/scwatts/neo:1.1_beta--0'
+    container 'docker.io/scwatts/neo:1.2_beta--1'
 
     input:
     tuple val(meta), path(purple_dir), path(linx_dir)
     path genome_fasta
     val genome_ver
+    path genome_fai
     path ensembl_data_resources
 
     output:
-    tuple val(meta), path('neo/'), emit: neo_finder_dir
-    path 'versions.yml'          , emit: versions
+    tuple val(meta), path('neo_finder/'), emit: neo_finder_dir
+    path 'versions.yml'                 , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -21,10 +22,11 @@ process NEO_FINDER {
     def args = task.ext.args ?: ''
 
     """
+    mkdir -p neo_finder/
+
     java \\
         -Xmx${Math.round(task.memory.bytes * 0.95)} \\
-        -cp ${task.ext.jarPath} \\
-        com.hartwig.hmftools.neo.epitope.NeoEpitopeFinder \\
+        -jar ${task.ext.jarPath} \\
             ${args} \\
             -sample ${meta.sample_id} \\
             -linx_dir ${linx_dir} \\
@@ -32,8 +34,8 @@ process NEO_FINDER {
             -ref_genome ${genome_fasta} \\
             -ref_genome_version ${genome_ver} \\
             -ensembl_data_dir ${ensembl_data_resources} \\
-            -output_dir ${output_dir} \\
             -log_debug \\
+            -output_dir neo_finder/
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -43,7 +45,7 @@ process NEO_FINDER {
 
     stub:
     """
-    mkdir -p neo/
+    mkdir -p neo_finder/
     echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
     """
 }
diff --git a/modules/local/neo/isofox/main.nf b/modules/local/neo/isofox/main.nf
deleted file mode 100644
index 4ca86ba5..00000000
--- a/modules/local/neo/isofox/main.nf
+++ /dev/null
@@ -1,56 +0,0 @@
-process ISOFOX_NEO {
-    tag "${meta.id}"
-    label 'process_medium'
-
-    container 'docker.io/scwatts/isofox:1.7.1--0'
-
-    input:
-    tuple val(meta), path(bam), path(bai)
-    val functions
-    val read_length
-    path genome_fasta
-    val genome_ver
-    path genome_fai
-    path ensembl_data_resources
-
-    output:
-    tuple val(meta), path('isofox/'), emit: isofox_dir
-    path 'versions.yml'             , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-
-    """
-    mkdir -p isofox/
-
-    java \\
-        -Xmx${Math.round(task.memory.bytes * 0.95)} \\
-        -jar ${task.ext.jarPath} \\
-            ${args} \\
-            -sample ${meta.sample_id} \\
-            -bam_file ${bam} \\
-            -functions NEO_EPITOPES \\
-            -neoepitope_file ${neo_finder_dir}/${meta.sample_id}.neo.neo_data.tsv \\
-            -read_length ${read_length} \\
-            -ref_genome ${genome_fasta} \\
-            -ref_genome_version ${genome_ver} \\
-            -ensembl_data_dir ${ensembl_data_resources} \\
-            -threads ${task.cpus} \\
-            -output_dir isofox/
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        isofox: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //')
-    END_VERSIONS
-    """
-
-    stub:
-    """
-    mkdir -p isofox/
-    touch isofox/placeholder
-    echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
-    """
-}
diff --git a/modules/local/neo/meta.yml b/modules/local/neo/meta.yml
deleted file mode 100644
index e69de29b..00000000
diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf
index 7dd7ec09..ed65a16e 100644
--- a/modules/local/neo/scorer/main.nf
+++ b/modules/local/neo/scorer/main.nf
@@ -2,19 +2,17 @@ process NEO_SCORER {
     tag "${meta.id}"
     label 'process_medium'
 
-    container 'docker.io/scwatts/neo:1.1_beta--0'
+    container 'docker.io/scwatts/neo:1.2_beta--1'
 
     input:
-    tuple val(meta), path(purple_dir), path(isofox_dir), path(lilac_dir), path(isofox_neo_dir), path(neo_finder_dir)
-    path genome_fasta
-    val genome_ver
+    tuple val(meta), path(isofox_dir), path(purple_dir), path(sage_vcf), path(lilac_dir), path(neo_finder_dir), path(annotate_fusions)
     path ensembl_data_resources
-    path neo_resources
+    path neo_resources, stageAs: 'neo_reference_data'
     path cohort_tpm_medians
 
     output:
-    tuple val(meta), path('neo/'), emit: neo_scorer_dir
-    path 'versions.yml'          , emit: versions
+    tuple val(meta), path('neo_scorer/'), emit: neo_scorer_dir
+    path 'versions.yml'                 , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -23,14 +21,21 @@ process NEO_SCORER {
     def args = task.ext.args ?: ''
 
     def rna_sample_arg = meta.containsKey('sample_rna_id') ? "-rna_sample ${meta.sample_rna_id}" : ''
-    def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${purple_dir}/${meta.sample_id}.sage_append.vcf.gz" : ''
-
-    // NeoScorer expects the fusion-neoepitopes which Isofox has annotated with RNA to be in the Isofox directory, so put them
-    // and the standard Isofox files (just TPM is used) into a new shared directory
-    // ie isofox_neo_dir + neo_finder_dir -> new directory for isofox data -> passed into -isofox_dir
-    def isofox_dir_arg = meta.containsKey('sample_rna_id') ? "-isofox_dir /path/isofox_combined_dir" : ''
+    def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${sage_vcf}" : ''
 
     """
+    isofox_dir_arg=''
+    if [[ -n "${isofox_dir}" ]]; then
+        isofox_dir_local=isofox__prepared/;
+
+        cp -rL ${isofox_dir} \${isofox_dir_local}/;
+        cp -r ${annotate_fusions} \${isofox_dir_local}/;
+
+        isofox_dir_arg="-isofox_dir \${isofox_dir_local}";
+    fi;
+
+    mkdir -p neo_scorer/
+
     java \\
         -Xmx${Math.round(task.memory.bytes * 0.95)} \\
         -cp ${task.ext.jarPath} \\
@@ -38,18 +43,16 @@ process NEO_SCORER {
             ${args} \\
             -sample ${meta.sample_id} \\
             ${rna_sample_arg} \\
+            \${isofox_dir_arg} \\
             -purple_dir ${purple_dir} \\
+            ${rna_somatic_vcf_arg} \\
             -lilac_dir ${lilac_dir} \\
-            ${isofox_dir_arg} \\
-            ${rna_somatic_vcf_arg} ]]
             -neo_dir ${neo_finder_dir} \\
-            -ref_genome ${genome_fasta} \\
-            -ref_genome_version ${genome_ver} \\
             -ensembl_data_dir ${ensembl_data_resources} \\
             -score_file_dir ${neo_resources} \\
             -cancer_tpm_medians_file ${cohort_tpm_medians} \\
-            -output_dir ${output_dir} \\
             -log_debug \\
+            -output_dir neo_scorer/
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -59,7 +62,7 @@ process NEO_SCORER {
 
     stub:
     """
-    mkdir -p neo/
+    mkdir -p neo_scorer/
     echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
     """
 }
diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction.nf
index 3140fa53..eeca49bc 100644
--- a/subworkflows/local/neo_prediction.nf
+++ b/subworkflows/local/neo_prediction.nf
@@ -1,12 +1,11 @@
 //
-// XXX
+// Neo identifies and scores neoepitopes
 //
 
 import Constants
 import Utils
 
-include { ISOFOX               } from '../../modules/local/isofox/main'
-include { LILAC                } from '../../modules/local/lilac/main'
+include { ANNOTATE_FUSIONS     } from '../../modules/local/neo/annotate_fusions/main'
 include { NEO_FINDER           } from '../../modules/local/neo/finder/main'
 include { NEO_SCORER           } from '../../modules/local/neo/scorer/main'
 
@@ -21,23 +20,27 @@ workflow NEO_PREDICTION{
         ch_linx                // channel: [mandatory] [ meta, linx_dir ]
 
         // Reference data
-        genome_version         // channel: [mandatory] genome version
         genome_fasta           // channel: [mandatory] /path/to/genome_fasta
+        genome_version         // channel: [mandatory] genome version
         genome_fai             // channel: [mandatory] /path/to/genome_fai
         ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/
         neo_resources          // channel: [mandatory] /path/to/neo_resources/
         cohort_tpm_medians     // channel: [mandatory] /path/to/cohort_tpm_medians/
 
+        // Params
+        isofox_read_length     //  string: [mandatory] Isofox read length
+
     main:
         // Channel for versions.yml files
         // channel: [ versions.yml ]
         ch_versions = Channel.empty()
 
-        // Step 1: Identify neoepitopes from Purple somatic variants and Linx's (neoepitope) fusions
-
+        //
+        // MODULE: Neo finder
+        //
         // Select input sources
-        // channel: [ meta, isofox_dir, purple_dir, linx_annotation_dir ]
-        ch_inputs_finder_selected = WorkflowOncoanalyser.groupByMeta(
+        // channel: [ meta, purple_dir, linx_annotation_dir ]
+        ch_finder_inputs_selected = WorkflowOncoanalyser.groupByMeta(
             ch_purple,
             ch_linx,
         )
@@ -54,7 +57,7 @@ workflow NEO_PREDICTION{
         // Sort inputs
         // channel: runnable: [ meta, purple_dir, linx_annotation_dir ]
         // channel: skip: [ meta ]
-        ch_inputs_finder_sorted = ch_inputs_finder_selected
+        ch_finder_inputs_sorted = ch_finder_inputs_selected
             .branch { meta, purple_dir, linx_annotation_dir ->
 
                 def has_normal_dna = Utils.hasNormalDnaBam(meta)
@@ -67,25 +70,25 @@ workflow NEO_PREDICTION{
             }
 
         // Create process input channel
-        // channel: sample_data: [ meta, purple_dir, linx_annotation_dir ]
-        ch_finder_inputs = ch_inputs_finder_sorted.runnable
-            .map{ meta, purple_dir, linx_annotation_dir ->
+        // channel: sample_data: [ meta_finder, purple_dir, linx_annotation_dir ]
+        ch_finder_inputs = ch_finder_inputs_sorted.runnable
+            .map { meta, purple_dir, linx_annotation_dir ->
 
-                def meta_neo_finder = [
+                def meta_finder = [
                     key: meta.group_id,
                     id: meta.group_id,
                     sample_id: Utils.getTumorDnaSampleName(meta),
                 ]
 
-                return [meta_neo_finder, purple_dir, linx_annotation_dir]
+                return [meta_finder, purple_dir, linx_annotation_dir]
             }
 
-
-        // Feeding the Neo process raw inputs for demo purposes only
+        // Run process
         NEO_FINDER(
             ch_finder_inputs,
             genome_fasta,
             genome_version,
+            genome_fai,
             ensembl_data_resources,
         )
 
@@ -93,47 +96,27 @@ workflow NEO_PREDICTION{
 
         // Set outputs, restoring original meta
         // channel: [ meta, neo_finder_dir ]
-        ch_finder_outputs = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs)
+        ch_finder_out = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs)
 
-        // Step 2: When RNA is present, annotate the fusion-derived neoepitope with RNA using Isofox
-
-        /*
-
-        // Select input sources
-        // channel: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
-        ch_inputs_isofox_sorted = WorkflowOncoanalyser.groupByMeta(
-            ch_finder_outputs,
-            // channel: [ meta, tumor_rna_bam (optional), tumor_rna_bai (optional) ]
-            ch_inputs
-                .map { meta ->
-                    def has_rna = Utils.hasTumorRnaBam(meta)
-
-                    return [
-                        meta,
-                        has_rna ? Utils.getTumorRnaBam(meta) : [],
-                        has_rna ? Utils.getTumorRnaBai(meta) : [],
-                    ]
-                },
-
-            )
+        //
+        // MODULE: Fusion annotation (Isofox)
+        //
+        // Annotate the fusion-derived neoepitope using Isofox where RNA data is available
 
         // Sort inputs
-        ch_inputs_isofox_sorted = ch_finder_outputs
-            .branch {
-
-                def has_rna = Utils.hasTumorRnaBam(meta)
-
-
-
-                runnable:
-                skip:
-                    meta
-
+        // channel: runnable: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
+        // channel: skip: [ meta ]
+        ch_isofox_inputs_sorted = ch_finder_out
+            .branch { meta, neo_finder_dir ->
+                runnable: Utils.hasTumorRnaBam(meta)
+                    return [meta, neo_finder_dir, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)]
+                skip: true
+                    return meta
             }
 
         // Create process input channel
         // channel: [ meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
-        ch_isofox_inputs = ch_inputs_isofox_sorted.runnable
+        ch_isofox_inputs = ch_isofox_inputs_sorted.runnable
             .map { meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ->
 
                 def meta_isofox = [
@@ -142,11 +125,11 @@ workflow NEO_PREDICTION{
                     sample_id: Utils.getTumorDnaSampleName(meta),
                 ]
 
-                return [meta_isofox, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)]
+                return [meta_isofox, neo_finder_dir, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)]
             }
 
         // Run process
-        ISOFOX_NEO(
+        ANNOTATE_FUSIONS(
             ch_isofox_inputs,
             isofox_read_length,
             genome_fasta,
@@ -155,87 +138,64 @@ workflow NEO_PREDICTION{
             ensembl_data_resources,
         )
 
-        ch_versions = ch_versions.mix(ISOFOX.out.versions)
+        ch_versions = ch_versions.mix(ANNOTATE_FUSIONS.out.versions)
 
         // Set outputs, restoring original meta
-        // channel: [ meta, isofox_dir ]
-        ch_outputs = Channel.empty()
+        // channel: [ meta, annotated_fusions ]
+        ch_annotate_fusions_out = Channel.empty()
             .mix(
-                WorkflowOncoanalyser.restoreMeta(ISOFOX.out.isofox_neo_dir, ch_inputs),
-                ch_inputs_sorted.skip.map { meta -> [meta, []] },
-
-        */
-
-        // ch_finder_outputs
+                WorkflowOncoanalyser.restoreMeta(ANNOTATE_FUSIONS.out.annotated_fusions, ch_inputs),
+                ch_isofox_inputs_sorted.skip.map { meta -> [meta, []] },
+            )
 
-        // Step 3: Run Neo's binding prediction routine for neoepitope's pHLAs, taking in Lilac HLA alleles and previously
-        // derived neoepitopes with RNA annotation if it was available
 
-        // Select input sources
-        // channel: [ meta, isofox_dir, purple_dir, lilac_dir, isofox_dir ]
-        // TO_DO - how to pass in the directories from step 1 and 2 (if run) above
-        ch_inputs_scorer_selected = WorkflowOncoanalyser.groupByMeta(
-            ch_purple,
-            ch_linx,
+        //
+        // MODULE: Neo scorer
+        //
+        // Select input sources and prepare input channel
+        // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ]
+        ch_scorer_inputs = WorkflowOncoanalyser.groupByMeta(
             ch_isofox,
+            ch_purple,
+            ch_sage_somatic_append,
+            ch_lilac,
+            ch_finder_out,
+            ch_annotate_fusions_out,
         )
-            .map { meta, purple_dir, lilac_dir ->
+            .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ->
 
-                def inputs = [
-                    Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR),
-                    Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC),
-                    Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX),
-                ]
-
-                return [meta, *inputs]
-            }
-
-        // Sort inputs
-        // channel: runnable: [ meta, purple_dir, lilac_dir,isofox_dir ]
-        // channel: skip: [ meta ]
-        ch_inputs_scorer_sorted = ch_inputs_scorer_selected
-            .branch { meta, purple_dir, lilac_dir, isofox_dir ->
-
-                def has_normal_dna = Utils.hasNormalDnaBam(meta)
-
-                def has_runnable_inputs = purple_dir && lilac_dir && has_normal_dna
-
-                runnable: has_runnable_inputs
-                skip: true
-                    return meta
-            }
-
-        // Create process input channel
-        // channel: sample_data: [ meta, purple_dir, linx_annotation_dir ]
-        ch_scorer_inputs = ch_inputs_scorer_sorted.runnable
-            .map{ meta, purple_dir, linx_annotation_dir ->
-
-                def meta_neo_scorer = [
+                def meta_scorer = [
                     key: meta.group_id,
                     id: meta.group_id,
                     sample_id: Utils.getTumorDnaSampleName(meta),
                 ]
 
-                return [meta_neo_scorer, purple_dir, lilac_dir, isofox_dir]
-            }
+                if (Utils.hasTumorRnaBam(meta)) {
+                    meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta)
+                }
+
+                def inputs = [
+                    Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR),
+                    Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR),
+                    Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR),
+                    Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR),
+                    neo_finder_dir,
+                    annotate_fusions,
+                ]
 
+                return [meta_scorer, *inputs]
+            }
 
-        // Feeding the Neo process raw inputs for demo purposes only
+        // Run process
         NEO_SCORER(
             ch_scorer_inputs,
-            genome_fasta,
-            genome_version,
             ensembl_data_resources,
             neo_resources,
-            cohort_tpm_medians
+            cohort_tpm_medians,
         )
 
         ch_versions = ch_versions.mix(NEO_SCORER.out.versions)
 
-        // Set outputs, restoring original meta
-        // channel: [ meta, neo_scorer_dir ]
-        ch_scorer_outputs = WorkflowOncoanalyser.restoreMeta(NEO_SCORER.out.neo_scorer_dir, ch_inputs)
-
     emit:
         versions = ch_versions // channel: [ versions.yml ]
 }
diff --git a/workflows/wgts.nf b/workflows/wgts.nf
index 3c691bc5..5b695939 100644
--- a/workflows/wgts.nf
+++ b/workflows/wgts.nf
@@ -710,7 +710,7 @@ workflow WGTS {
     }
 
     //
-    // SUBWORKFLOW: XXX
+    // SUBWORKFLOW: Run Neo to identify and score neoepitopes
     //
     if (run_config.stages.neo) {
 
@@ -721,12 +721,13 @@ workflow WGTS {
             ch_sage_somatic_append_out,
             ch_lilac_out,
             ch_linx_somatic_out,
-            ref_data.genome_version,
             ref_data.genome_fasta,
+            ref_data.genome_version,
             ref_data.genome_fai,
             hmf_data.ensembl_data_resources,
             hmf_data.neo_resources,
             hmf_data.cohort_tpm_medians,
+            params.isofox_read_length,
         )
 
         ch_versions = ch_versions.mix(NEO_PREDICTION.out.versions)

From 7abbccb0a2bd12cb51064f6ff56b59c6eaa727a8 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Wed, 14 Feb 2024 08:54:32 +1100
Subject: [PATCH 04/17] Set Neo to run only when specified

---
 lib/Processes.groovy | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/Processes.groovy b/lib/Processes.groovy
index 98245f6d..f4770838 100644
--- a/lib/Processes.groovy
+++ b/lib/Processes.groovy
@@ -7,7 +7,17 @@ import Utils
 class Processes {
 
     public static getRunStages(include, exclude, manual_select, log) {
-        def processes = manual_select ? [] : Constants.Process.values().toList()
+
+        // Get default processes
+        // NOTE(SW): currently set all except Neo to run by default; Process.NEO excluded to be more concise in code
+        def processes
+        if (manual_select) {
+            processes = []
+        } else {
+            processes = Constants.Process.values().toList()
+            processes.remove(Constants.Process.NEO)
+        }
+
         def include_list = this.getProcessList(include, log)
         def exclude_list = this.getProcessList(exclude, log)
         this.checkIncludeExcludeList(include_list, exclude_list, log)

From 01563bc4c020531886bd431c6d41dc0105c9b206 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Fri, 16 Feb 2024 10:30:35 +1100
Subject: [PATCH 05/17] Update Neo reference data paths

---
 conf/hmf_data.config | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/conf/hmf_data.config b/conf/hmf_data.config
index 82ff1af3..f80f086a 100644
--- a/conf/hmf_data.config
+++ b/conf/hmf_data.config
@@ -25,7 +25,7 @@ params {
             // ORANGE
             cohort_mapping                = 'orange/cohort_mapping.tsv'
             // Hartwig cohort RNA data
-            cohort_tpm_medians            = 'neo/tpm_cohort/hmf_tpm_medians.csv'
+            cohort_tpm_medians            = 'neo/tpm_cohort/hmf_tpm_medians.37.csv'
             cohort_percentiles            = 'orange/cohort_percentiles.tsv'
             alt_sj_distribution           = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv'
             gene_exp_distribution         = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv'
@@ -77,12 +77,11 @@ params {
             // LILAC
             lilac_resources               = 'dna_pipeline/immune/'
             // Neo
-            neo_resources                 = 'neo/'
+            neo_resources                 = 'neo/binding/'
             // ORANGE
             cohort_mapping                = 'orange/cohort_mapping.tsv'
-            // RNA cohort data
-            cohort_percentiles            = 'orange/cohort_percentiles.tsv'
-            cohort_tpm_medians            = 'rna_pipeline/cohort_tpm_medians.tsv'
+            // Hartwig cohort RNA data
+            cohort_tpm_medians            = 'neo/tpm_cohort/hmf_tpm_medians.38.csv'
             cohort_percentiles            = 'orange/cohort_percentiles.tsv'
             alt_sj_distribution           = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.38.csv'
             gene_exp_distribution         = 'rna_pipeline/isofox.hmf_3444.gene_distribution.38.csv'

From 801ec4d6940ad4901184116c5d9ced848a0f8fc5 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Wed, 14 Feb 2024 09:08:30 +1100
Subject: [PATCH 06/17] Provide Neo cancer type from samplesheet

---
 modules/local/neo/scorer/main.nf     | 3 +++
 subworkflows/local/neo_prediction.nf | 1 +
 2 files changed, 4 insertions(+)

diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf
index ed65a16e..ba98d833 100644
--- a/modules/local/neo/scorer/main.nf
+++ b/modules/local/neo/scorer/main.nf
@@ -23,6 +23,8 @@ process NEO_SCORER {
     def rna_sample_arg = meta.containsKey('sample_rna_id') ? "-rna_sample ${meta.sample_rna_id}" : ''
     def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${sage_vcf}" : ''
 
+    def cancer_type_arg = meta.containsKey('cancer_type') ? "-cancer_type ${meta.cancer_type}" : ''
+
     """
     isofox_dir_arg=''
     if [[ -n "${isofox_dir}" ]]; then
@@ -42,6 +44,7 @@ process NEO_SCORER {
         com.hartwig.hmftools.neo.score.NeoScorer \\
             ${args} \\
             -sample ${meta.sample_id} \\
+            ${cancer_type_arg} \\
             ${rna_sample_arg} \\
             \${isofox_dir_arg} \\
             -purple_dir ${purple_dir} \\
diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction.nf
index eeca49bc..bd3ff59a 100644
--- a/subworkflows/local/neo_prediction.nf
+++ b/subworkflows/local/neo_prediction.nf
@@ -168,6 +168,7 @@ workflow NEO_PREDICTION{
                     key: meta.group_id,
                     id: meta.group_id,
                     sample_id: Utils.getTumorDnaSampleName(meta),
+                    cancer_type: meta[Constants.InfoField.CANCER_TYPE],
                 ]
 
                 if (Utils.hasTumorRnaBam(meta)) {

From d31a929b88ca284b8454f29aed4a83e0db3c9f8a Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Sat, 16 Mar 2024 17:42:16 +1100
Subject: [PATCH 07/17] Move neo_prediction.nf subworkflow for consistency

---
 .../local/{neo_prediction.nf => neo_prediction/main.nf}     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
 rename subworkflows/local/{neo_prediction.nf => neo_prediction/main.nf} (96%)

diff --git a/subworkflows/local/neo_prediction.nf b/subworkflows/local/neo_prediction/main.nf
similarity index 96%
rename from subworkflows/local/neo_prediction.nf
rename to subworkflows/local/neo_prediction/main.nf
index bd3ff59a..ae18dae2 100644
--- a/subworkflows/local/neo_prediction.nf
+++ b/subworkflows/local/neo_prediction/main.nf
@@ -5,9 +5,9 @@
 import Constants
 import Utils
 
-include { ANNOTATE_FUSIONS     } from '../../modules/local/neo/annotate_fusions/main'
-include { NEO_FINDER           } from '../../modules/local/neo/finder/main'
-include { NEO_SCORER           } from '../../modules/local/neo/scorer/main'
+include { ANNOTATE_FUSIONS } from '../../../modules/local/neo/annotate_fusions/main'
+include { NEO_FINDER       } from '../../../modules/local/neo/finder/main'
+include { NEO_SCORER       } from '../../../modules/local/neo/scorer/main'
 
 workflow NEO_PREDICTION{
     take:

From cd3c2deb5d0707136a8611af4256a01851be4668 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Sat, 16 Mar 2024 18:17:56 +1100
Subject: [PATCH 08/17] Adjust Neo to fit with alignment subworkflow

* add source selection for RNA BAM (samplesheet, alignment subworkflow)
* use appropriate function call to determine present of RNA
* relocate isofox_read_length assignment (wgts and targeted workflows)
---
 subworkflows/local/neo_prediction/main.nf | 28 ++++++++++++++++-------
 workflows/targeted.nf                     |  4 +++-
 workflows/wgts.nf                         |  7 ++++--
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/subworkflows/local/neo_prediction/main.nf b/subworkflows/local/neo_prediction/main.nf
index ae18dae2..ee6b628b 100644
--- a/subworkflows/local/neo_prediction/main.nf
+++ b/subworkflows/local/neo_prediction/main.nf
@@ -13,6 +13,7 @@ workflow NEO_PREDICTION{
     take:
         // Sample data
         ch_inputs              // channel: [mandatory] [ meta ]
+        ch_tumor_rna_bam       // channel: [mandatory] [ meta, bam, bai ]
         ch_isofox              // channel: [mandatory] [ meta, isofox_dir ]
         ch_purple              // channel: [mandatory] [ meta, purple_dir ]
         ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ]
@@ -60,7 +61,7 @@ workflow NEO_PREDICTION{
         ch_finder_inputs_sorted = ch_finder_inputs_selected
             .branch { meta, purple_dir, linx_annotation_dir ->
 
-                def has_normal_dna = Utils.hasNormalDnaBam(meta)
+                def has_normal_dna = Utils.hasNormalDna(meta)
 
                 def has_runnable_inputs = purple_dir && linx_annotation_dir && has_normal_dna
 
@@ -103,13 +104,24 @@ workflow NEO_PREDICTION{
         //
         // Annotate the fusion-derived neoepitope using Isofox where RNA data is available
 
-        // Sort inputs
+        // Select input sources and sort
         // channel: runnable: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
         // channel: skip: [ meta ]
-        ch_isofox_inputs_sorted = ch_finder_out
-            .branch { meta, neo_finder_dir ->
-                runnable: Utils.hasTumorRnaBam(meta)
-                    return [meta, neo_finder_dir, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)]
+        ch_isofox_inputs_sorted = WorkflowOncoanalyser.groupByMeta(
+            ch_finder_out,
+            ch_tumor_rna_bam,
+        )
+            .map { meta, neo_finder_dir, tumor_bam, tumor_bai ->
+                return [
+                    meta,
+                    neo_finder_dir,
+                    Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR),
+                    Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR),
+                ]
+            }
+            .branch { meta, neo_finder_dir, tumor_bam, tumor_bai ->
+                runnable: Utils.hasTumorRna(meta)
+                    return [meta, neo_finder_dir, tumor_bam, tumor_bai]
                 skip: true
                     return meta
             }
@@ -125,7 +137,7 @@ workflow NEO_PREDICTION{
                     sample_id: Utils.getTumorDnaSampleName(meta),
                 ]
 
-                return [meta_isofox, neo_finder_dir, Utils.getTumorRnaBam(meta), Utils.getTumorRnaBai(meta)]
+                return [meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna]
             }
 
         // Run process
@@ -171,7 +183,7 @@ workflow NEO_PREDICTION{
                     cancer_type: meta[Constants.InfoField.CANCER_TYPE],
                 ]
 
-                if (Utils.hasTumorRnaBam(meta)) {
+                if (Utils.hasTumorRna(meta)) {
                     meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta)
                 }
 
diff --git a/workflows/targeted.nf b/workflows/targeted.nf
index f2d4f0ba..5b0b92bd 100644
--- a/workflows/targeted.nf
+++ b/workflows/targeted.nf
@@ -45,6 +45,9 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 // Check mandatory parameters
 if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
 
+// Used in Isofox subworkflow only
+isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
@@ -185,7 +188,6 @@ workflow TARGETED {
 
         isofox_counts = params.isofox_counts ? file(params.isofox_counts) : panel_data.isofox_counts
         isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : panel_data.isofox_gc_ratios
-        isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED
 
         isofox_gene_ids = params.isofox_gene_ids ? file(params.isofox_gene_ids) : panel_data.isofox_gene_ids
         isofox_tpm_norm = params.isofox_tpm_norm ? file(params.isofox_tpm_norm) : panel_data.isofox_tpm_norm
diff --git a/workflows/wgts.nf b/workflows/wgts.nf
index 5b695939..d6f3bb6e 100644
--- a/workflows/wgts.nf
+++ b/workflows/wgts.nf
@@ -47,6 +47,9 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 // Check mandatory parameters
 if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
 
+// Used in Isofox and Neo subworkflows
+isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_WTS
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
@@ -189,7 +192,6 @@ workflow WGTS {
 
         isofox_counts = params.isofox_counts ? file(params.isofox_counts) : hmf_data.isofox_counts
         isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : hmf_data.isofox_gc_ratios
-        isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_WTS
 
         ISOFOX_QUANTIFICATION(
             ch_inputs,
@@ -716,6 +718,7 @@ workflow WGTS {
 
         NEO_PREDICTION(
             ch_inputs,
+            ch_align_rna_tumor_out,
             ch_isofox_out,
             ch_purple_out,
             ch_sage_somatic_append_out,
@@ -727,7 +730,7 @@ workflow WGTS {
             hmf_data.ensembl_data_resources,
             hmf_data.neo_resources,
             hmf_data.cohort_tpm_medians,
-            params.isofox_read_length,
+            isofox_read_length,
         )
 
         ch_versions = ch_versions.mix(NEO_PREDICTION.out.versions)

From 2e4a57a8a8c71d04dd554d6b03edc1e03e28e672 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Thu, 30 May 2024 09:10:23 +1000
Subject: [PATCH 09/17] Indent Neo subworkflow to match nf-core style

---
 modules/local/neo/Dockerfile              |  18 +-
 subworkflows/local/neo_prediction/main.nf | 378 +++++++++++-----------
 2 files changed, 198 insertions(+), 198 deletions(-)

diff --git a/modules/local/neo/Dockerfile b/modules/local/neo/Dockerfile
index f734631d..aa0a03e3 100644
--- a/modules/local/neo/Dockerfile
+++ b/modules/local/neo/Dockerfile
@@ -3,20 +3,20 @@ FROM mambaorg/micromamba:0.24.0
 USER root
 
 RUN \
-  apt-get update && \
-  apt-get install -y procps wget && \
-  apt-get clean && \
-  rm -rf /var/lib/apt/lists/*
+    apt-get update && \
+    apt-get install -y procps wget && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
 RUN \
-  mkdir -p /opt/neo/ && \
-  wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.2_beta/neo_v1.2_beta.jar'
+    mkdir -p /opt/neo/ && \
+    wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.2_beta/neo_v1.2_beta.jar'
 
 USER mambauser
 
 RUN \
-  micromamba install -y -n base -c bioconda -c conda-forge \
-    'openjdk >=8' && \
-  micromamba clean --all --yes
+    micromamba install -y -n base -c bioconda -c conda-forge \
+        'openjdk >=8' && \
+    micromamba clean --all --yes
 
 ENV PATH="/opt/conda/bin:/opt/conda/condabin:${PATH}"
diff --git a/subworkflows/local/neo_prediction/main.nf b/subworkflows/local/neo_prediction/main.nf
index ee6b628b..2a07a043 100644
--- a/subworkflows/local/neo_prediction/main.nf
+++ b/subworkflows/local/neo_prediction/main.nf
@@ -9,206 +9,206 @@ include { ANNOTATE_FUSIONS } from '../../../modules/local/neo/annotate_fusions/m
 include { NEO_FINDER       } from '../../../modules/local/neo/finder/main'
 include { NEO_SCORER       } from '../../../modules/local/neo/scorer/main'
 
-workflow NEO_PREDICTION{
+workflow NEO_PREDICTION {
     take:
-        // Sample data
-        ch_inputs              // channel: [mandatory] [ meta ]
-        ch_tumor_rna_bam       // channel: [mandatory] [ meta, bam, bai ]
-        ch_isofox              // channel: [mandatory] [ meta, isofox_dir ]
-        ch_purple              // channel: [mandatory] [ meta, purple_dir ]
-        ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ]
-        ch_lilac               // channel: [mandatory] [ meta, lilac_dir ]
-        ch_linx                // channel: [mandatory] [ meta, linx_dir ]
-
-        // Reference data
-        genome_fasta           // channel: [mandatory] /path/to/genome_fasta
-        genome_version         // channel: [mandatory] genome version
-        genome_fai             // channel: [mandatory] /path/to/genome_fai
-        ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/
-        neo_resources          // channel: [mandatory] /path/to/neo_resources/
-        cohort_tpm_medians     // channel: [mandatory] /path/to/cohort_tpm_medians/
-
-        // Params
-        isofox_read_length     //  string: [mandatory] Isofox read length
+    // Sample data
+    ch_inputs              // channel: [mandatory] [ meta ]
+    ch_tumor_rna_bam       // channel: [mandatory] [ meta, bam, bai ]
+    ch_isofox              // channel: [mandatory] [ meta, isofox_dir ]
+    ch_purple              // channel: [mandatory] [ meta, purple_dir ]
+    ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ]
+    ch_lilac               // channel: [mandatory] [ meta, lilac_dir ]
+    ch_linx                // channel: [mandatory] [ meta, linx_dir ]
+
+    // Reference data
+    genome_fasta           // channel: [mandatory] /path/to/genome_fasta
+    genome_version         // channel: [mandatory] genome version
+    genome_fai             // channel: [mandatory] /path/to/genome_fai
+    ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/
+    neo_resources          // channel: [mandatory] /path/to/neo_resources/
+    cohort_tpm_medians     // channel: [mandatory] /path/to/cohort_tpm_medians/
+
+    // Params
+    isofox_read_length     //  string: [mandatory] Isofox read length
 
     main:
-        // Channel for versions.yml files
-        // channel: [ versions.yml ]
-        ch_versions = Channel.empty()
-
-        //
-        // MODULE: Neo finder
-        //
-        // Select input sources
-        // channel: [ meta, purple_dir, linx_annotation_dir ]
-        ch_finder_inputs_selected = WorkflowOncoanalyser.groupByMeta(
-            ch_purple,
-            ch_linx,
+    // Channel for versions.yml files
+    // channel: [ versions.yml ]
+    ch_versions = Channel.empty()
+
+    //
+    // MODULE: Neo finder
+    //
+    // Select input sources
+    // channel: [ meta, purple_dir, linx_annotation_dir ]
+    ch_finder_inputs_selected = WorkflowOncoanalyser.groupByMeta(
+        ch_purple,
+        ch_linx,
+    )
+        .map { meta, purple_dir, linx_annotation_dir ->
+
+            def inputs = [
+                Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR),
+                Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR),
+            ]
+
+            return [meta, *inputs]
+        }
+
+    // Sort inputs
+    // channel: runnable: [ meta, purple_dir, linx_annotation_dir ]
+    // channel: skip: [ meta ]
+    ch_finder_inputs_sorted = ch_finder_inputs_selected
+        .branch { meta, purple_dir, linx_annotation_dir ->
+
+            def has_normal_dna = Utils.hasNormalDna(meta)
+
+            def has_runnable_inputs = purple_dir && linx_annotation_dir && has_normal_dna
+
+            runnable: has_runnable_inputs
+            skip: true
+                return meta
+        }
+
+    // Create process input channel
+    // channel: sample_data: [ meta_finder, purple_dir, linx_annotation_dir ]
+    ch_finder_inputs = ch_finder_inputs_sorted.runnable
+        .map { meta, purple_dir, linx_annotation_dir ->
+
+            def meta_finder = [
+                key: meta.group_id,
+                id: meta.group_id,
+                sample_id: Utils.getTumorDnaSampleName(meta),
+            ]
+
+            return [meta_finder, purple_dir, linx_annotation_dir]
+        }
+
+    // Run process
+    NEO_FINDER(
+        ch_finder_inputs,
+        genome_fasta,
+        genome_version,
+        genome_fai,
+        ensembl_data_resources,
+    )
+
+    ch_versions = ch_versions.mix(NEO_FINDER.out.versions)
+
+    // Set outputs, restoring original meta
+    // channel: [ meta, neo_finder_dir ]
+    ch_finder_out = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs)
+
+    //
+    // MODULE: Fusion annotation (Isofox)
+    //
+    // Annotate the fusion-derived neoepitope using Isofox where RNA data is available
+
+    // Select input sources and sort
+    // channel: runnable: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
+    // channel: skip: [ meta ]
+    ch_isofox_inputs_sorted = WorkflowOncoanalyser.groupByMeta(
+        ch_finder_out,
+        ch_tumor_rna_bam,
+    )
+        .map { meta, neo_finder_dir, tumor_bam, tumor_bai ->
+            return [
+                meta,
+                neo_finder_dir,
+                Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR),
+                Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR),
+            ]
+        }
+        .branch { meta, neo_finder_dir, tumor_bam, tumor_bai ->
+            runnable: Utils.hasTumorRna(meta)
+                return [meta, neo_finder_dir, tumor_bam, tumor_bai]
+            skip: true
+                return meta
+        }
+
+    // Create process input channel
+    // channel: [ meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
+    ch_isofox_inputs = ch_isofox_inputs_sorted.runnable
+        .map { meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ->
+
+            def meta_isofox = [
+                key: meta.group_id,
+                id: meta.group_id,
+                sample_id: Utils.getTumorDnaSampleName(meta),
+            ]
+
+            return [meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna]
+        }
+
+    // Run process
+    ANNOTATE_FUSIONS(
+        ch_isofox_inputs,
+        isofox_read_length,
+        genome_fasta,
+        genome_version,
+        genome_fai,
+        ensembl_data_resources,
+    )
+
+    ch_versions = ch_versions.mix(ANNOTATE_FUSIONS.out.versions)
+
+    // Set outputs, restoring original meta
+    // channel: [ meta, annotated_fusions ]
+    ch_annotate_fusions_out = Channel.empty()
+        .mix(
+            WorkflowOncoanalyser.restoreMeta(ANNOTATE_FUSIONS.out.annotated_fusions, ch_inputs),
+            ch_isofox_inputs_sorted.skip.map { meta -> [meta, []] },
         )
-            .map { meta, purple_dir, linx_annotation_dir ->
 
-                def inputs = [
-                    Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR),
-                    Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR),
-                ]
 
-                return [meta, *inputs]
+    //
+    // MODULE: Neo scorer
+    //
+    // Select input sources and prepare input channel
+    // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ]
+    ch_scorer_inputs = WorkflowOncoanalyser.groupByMeta(
+        ch_isofox,
+        ch_purple,
+        ch_sage_somatic_append,
+        ch_lilac,
+        ch_finder_out,
+        ch_annotate_fusions_out,
+    )
+        .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ->
+
+            def meta_scorer = [
+                key: meta.group_id,
+                id: meta.group_id,
+                sample_id: Utils.getTumorDnaSampleName(meta),
+                cancer_type: meta[Constants.InfoField.CANCER_TYPE],
+            ]
+
+            if (Utils.hasTumorRna(meta)) {
+                meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta)
             }
 
-        // Sort inputs
-        // channel: runnable: [ meta, purple_dir, linx_annotation_dir ]
-        // channel: skip: [ meta ]
-        ch_finder_inputs_sorted = ch_finder_inputs_selected
-            .branch { meta, purple_dir, linx_annotation_dir ->
+            def inputs = [
+                Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR),
+                Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR),
+                Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR),
+                Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR),
+                neo_finder_dir,
+                annotate_fusions,
+            ]
 
-                def has_normal_dna = Utils.hasNormalDna(meta)
+            return [meta_scorer, *inputs]
+        }
 
-                def has_runnable_inputs = purple_dir && linx_annotation_dir && has_normal_dna
+    // Run process
+    NEO_SCORER(
+        ch_scorer_inputs,
+        ensembl_data_resources,
+        neo_resources,
+        cohort_tpm_medians,
+    )
 
-                runnable: has_runnable_inputs
-                skip: true
-                    return meta
-            }
-
-        // Create process input channel
-        // channel: sample_data: [ meta_finder, purple_dir, linx_annotation_dir ]
-        ch_finder_inputs = ch_finder_inputs_sorted.runnable
-            .map { meta, purple_dir, linx_annotation_dir ->
-
-                def meta_finder = [
-                    key: meta.group_id,
-                    id: meta.group_id,
-                    sample_id: Utils.getTumorDnaSampleName(meta),
-                ]
-
-                return [meta_finder, purple_dir, linx_annotation_dir]
-            }
-
-        // Run process
-        NEO_FINDER(
-            ch_finder_inputs,
-            genome_fasta,
-            genome_version,
-            genome_fai,
-            ensembl_data_resources,
-        )
-
-        ch_versions = ch_versions.mix(NEO_FINDER.out.versions)
-
-        // Set outputs, restoring original meta
-        // channel: [ meta, neo_finder_dir ]
-        ch_finder_out = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs)
-
-        //
-        // MODULE: Fusion annotation (Isofox)
-        //
-        // Annotate the fusion-derived neoepitope using Isofox where RNA data is available
-
-        // Select input sources and sort
-        // channel: runnable: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
-        // channel: skip: [ meta ]
-        ch_isofox_inputs_sorted = WorkflowOncoanalyser.groupByMeta(
-            ch_finder_out,
-            ch_tumor_rna_bam,
-        )
-            .map { meta, neo_finder_dir, tumor_bam, tumor_bai ->
-                return [
-                    meta,
-                    neo_finder_dir,
-                    Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR),
-                    Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR),
-                ]
-            }
-            .branch { meta, neo_finder_dir, tumor_bam, tumor_bai ->
-                runnable: Utils.hasTumorRna(meta)
-                    return [meta, neo_finder_dir, tumor_bam, tumor_bai]
-                skip: true
-                    return meta
-            }
-
-        // Create process input channel
-        // channel: [ meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ]
-        ch_isofox_inputs = ch_isofox_inputs_sorted.runnable
-            .map { meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ->
-
-                def meta_isofox = [
-                    key: meta.group_id,
-                    id: meta.group_id,
-                    sample_id: Utils.getTumorDnaSampleName(meta),
-                ]
-
-                return [meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna]
-            }
-
-        // Run process
-        ANNOTATE_FUSIONS(
-            ch_isofox_inputs,
-            isofox_read_length,
-            genome_fasta,
-            genome_version,
-            genome_fai,
-            ensembl_data_resources,
-        )
-
-        ch_versions = ch_versions.mix(ANNOTATE_FUSIONS.out.versions)
-
-        // Set outputs, restoring original meta
-        // channel: [ meta, annotated_fusions ]
-        ch_annotate_fusions_out = Channel.empty()
-            .mix(
-                WorkflowOncoanalyser.restoreMeta(ANNOTATE_FUSIONS.out.annotated_fusions, ch_inputs),
-                ch_isofox_inputs_sorted.skip.map { meta -> [meta, []] },
-            )
-
-
-        //
-        // MODULE: Neo scorer
-        //
-        // Select input sources and prepare input channel
-        // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ]
-        ch_scorer_inputs = WorkflowOncoanalyser.groupByMeta(
-            ch_isofox,
-            ch_purple,
-            ch_sage_somatic_append,
-            ch_lilac,
-            ch_finder_out,
-            ch_annotate_fusions_out,
-        )
-            .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ->
-
-                def meta_scorer = [
-                    key: meta.group_id,
-                    id: meta.group_id,
-                    sample_id: Utils.getTumorDnaSampleName(meta),
-                    cancer_type: meta[Constants.InfoField.CANCER_TYPE],
-                ]
-
-                if (Utils.hasTumorRna(meta)) {
-                    meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta)
-                }
-
-                def inputs = [
-                    Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR),
-                    Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR),
-                    Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR),
-                    Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR),
-                    neo_finder_dir,
-                    annotate_fusions,
-                ]
-
-                return [meta_scorer, *inputs]
-            }
-
-        // Run process
-        NEO_SCORER(
-            ch_scorer_inputs,
-            ensembl_data_resources,
-            neo_resources,
-            cohort_tpm_medians,
-        )
-
-        ch_versions = ch_versions.mix(NEO_SCORER.out.versions)
+    ch_versions = ch_versions.mix(NEO_SCORER.out.versions)
 
     emit:
-        versions = ch_versions // channel: [ versions.yml ]
+    versions = ch_versions // channel: [ versions.yml ]
 }

From 9be849183e7cf8403a8cea73c9713941ca7c59c3 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Thu, 30 May 2024 09:46:15 +1000
Subject: [PATCH 10/17] Use Bioconda for Neo annotate fusion process

---
 modules/local/neo/annotate_fusions/main.nf | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modules/local/neo/annotate_fusions/main.nf b/modules/local/neo/annotate_fusions/main.nf
index 5e9af36e..44e59737 100644
--- a/modules/local/neo/annotate_fusions/main.nf
+++ b/modules/local/neo/annotate_fusions/main.nf
@@ -2,7 +2,10 @@ process ANNOTATE_FUSIONS {
     tag "${meta.id}"
     label 'process_medium'
 
-    container 'quay.io/biocontainers/hmftools-isofox:1.7.1--hdfd78af_0'
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_0' :
+        'biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' }"
 
     input:
     tuple val(meta), path(neo_finder_dir), path(bam), path(bai)

From c2e15ea527eb5f06f9d360480cbe6d02e7661b66 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Thu, 30 May 2024 09:46:35 +1000
Subject: [PATCH 11/17] Add environment file for annotate fusion process

---
 modules/local/neo/annotate_fusions/environment.yml | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 modules/local/neo/annotate_fusions/environment.yml

diff --git a/modules/local/neo/annotate_fusions/environment.yml b/modules/local/neo/annotate_fusions/environment.yml
new file mode 100644
index 00000000..d4251c57
--- /dev/null
+++ b/modules/local/neo/annotate_fusions/environment.yml
@@ -0,0 +1,7 @@
+name: isofox
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::hmftools-isofox=1.7.1

From 5085556f51b7ea51f9b745af72ab83631dcaea5c Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Thu, 30 May 2024 09:47:32 +1000
Subject: [PATCH 12/17] Rename some Neo subworkflow variables for clarity

---
 modules/local/neo/finder/main.nf          | 4 ++--
 modules/local/neo/scorer/main.nf          | 5 ++---
 subworkflows/local/neo_prediction/main.nf | 8 ++++----
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/modules/local/neo/finder/main.nf b/modules/local/neo/finder/main.nf
index 4ff99a7b..7a5071ca 100644
--- a/modules/local/neo/finder/main.nf
+++ b/modules/local/neo/finder/main.nf
@@ -5,7 +5,7 @@ process NEO_FINDER {
     container 'docker.io/scwatts/neo:1.2_beta--1'
 
     input:
-    tuple val(meta), path(purple_dir), path(linx_dir)
+    tuple val(meta), path(purple_dir), path(linx_annotation_dir)
     path genome_fasta
     val genome_ver
     path genome_fai
@@ -29,7 +29,7 @@ process NEO_FINDER {
         -jar ${task.ext.jarPath} \\
             ${args} \\
             -sample ${meta.sample_id} \\
-            -linx_dir ${linx_dir} \\
+            -linx_dir ${linx_annotation_dir} \\
             -somatic_vcf ${purple_dir}/${meta.sample_id}.purple.somatic.vcf.gz \\
             -ref_genome ${genome_fasta} \\
             -ref_genome_version ${genome_ver} \\
diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf
index ba98d833..056a8b1b 100644
--- a/modules/local/neo/scorer/main.nf
+++ b/modules/local/neo/scorer/main.nf
@@ -5,7 +5,7 @@ process NEO_SCORER {
     container 'docker.io/scwatts/neo:1.2_beta--1'
 
     input:
-    tuple val(meta), path(isofox_dir), path(purple_dir), path(sage_vcf), path(lilac_dir), path(neo_finder_dir), path(annotate_fusions)
+    tuple val(meta), path(isofox_dir), path(purple_dir), path(sage_vcf), path(lilac_dir), path(neo_finder_dir), path(annotated_fusions)
     path ensembl_data_resources
     path neo_resources, stageAs: 'neo_reference_data'
     path cohort_tpm_medians
@@ -31,7 +31,7 @@ process NEO_SCORER {
         isofox_dir_local=isofox__prepared/;
 
         cp -rL ${isofox_dir} \${isofox_dir_local}/;
-        cp -r ${annotate_fusions} \${isofox_dir_local}/;
+        cp -r ${annotated_fusions} \${isofox_dir_local}/;
 
         isofox_dir_arg="-isofox_dir \${isofox_dir_local}";
     fi;
@@ -69,4 +69,3 @@ process NEO_SCORER {
     echo -e '${task.process}:\\n  stub: noversions\\n' > versions.yml
     """
 }
-
diff --git a/subworkflows/local/neo_prediction/main.nf b/subworkflows/local/neo_prediction/main.nf
index 2a07a043..c069ed42 100644
--- a/subworkflows/local/neo_prediction/main.nf
+++ b/subworkflows/local/neo_prediction/main.nf
@@ -18,7 +18,7 @@ workflow NEO_PREDICTION {
     ch_purple              // channel: [mandatory] [ meta, purple_dir ]
     ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ]
     ch_lilac               // channel: [mandatory] [ meta, lilac_dir ]
-    ch_linx                // channel: [mandatory] [ meta, linx_dir ]
+    ch_linx                // channel: [mandatory] [ meta, linx_annotation_dir ]
 
     // Reference data
     genome_fasta           // channel: [mandatory] /path/to/genome_fasta
@@ -165,7 +165,7 @@ workflow NEO_PREDICTION {
     // MODULE: Neo scorer
     //
     // Select input sources and prepare input channel
-    // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ]
+    // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotated_fusions ]
     ch_scorer_inputs = WorkflowOncoanalyser.groupByMeta(
         ch_isofox,
         ch_purple,
@@ -174,7 +174,7 @@ workflow NEO_PREDICTION {
         ch_finder_out,
         ch_annotate_fusions_out,
     )
-        .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotate_fusions ->
+        .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotated_fusions ->
 
             def meta_scorer = [
                 key: meta.group_id,
@@ -193,7 +193,7 @@ workflow NEO_PREDICTION {
                 Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR),
                 Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR),
                 neo_finder_dir,
-                annotate_fusions,
+                annotated_fusions,
             ]
 
             return [meta_scorer, *inputs]

From 8a488dba8e1b2a1f281bd3ac0e68359ca7221596 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Thu, 30 May 2024 09:47:54 +1000
Subject: [PATCH 13/17] Add meta.yml files for processes related to Neo

---
 modules/local/neo/annotate_fusions/meta.yml | 63 +++++++++++++++++++++
 modules/local/neo/finder/meta.yml           | 52 +++++++++++++++++
 modules/local/neo/scorer/meta.yml           | 62 ++++++++++++++++++++
 3 files changed, 177 insertions(+)
 create mode 100644 modules/local/neo/annotate_fusions/meta.yml
 create mode 100644 modules/local/neo/finder/meta.yml
 create mode 100644 modules/local/neo/scorer/meta.yml

diff --git a/modules/local/neo/annotate_fusions/meta.yml b/modules/local/neo/annotate_fusions/meta.yml
new file mode 100644
index 00000000..83a3fb0b
--- /dev/null
+++ b/modules/local/neo/annotate_fusions/meta.yml
@@ -0,0 +1,63 @@
+name: annotate_fusions
+description: Annotate neoeptitopes with RNA fusion data
+keywords:
+  - neoepitopes
+  - rna
+  - rnaseq
+tools:
+  - isofox:
+      description: Characterises and counts gene, transcript features
+      homepage: https://github.com/hartwigmedical/hmftools/tree/master/isofox
+      documentation: https://github.com/hartwigmedical/hmftools/tree/master/isofox
+      licence: ["GPL v3"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [id: 'sample_id']
+  - neo_finder_dir:
+      type: directory
+      description: Neo Finder directory
+  - bam:
+      type: file
+      description: BAM file
+      pattern: "*.{bam}"
+  - bai:
+      type: file
+      description: BAI file
+      pattern: "*.{bai}"
+  - read_length:
+      type: integer
+      description: Read length
+  - genome_fasta:
+      type: file
+      description: Reference genome assembly FASTA file
+      pattern: "*.{fa,fasta}"
+  - genome_ver:
+      type: string
+      description: Reference genome version
+  - genome_fai:
+      type: file
+      description: Reference genome assembly fai file
+      pattern: "*.{fai}"
+  - ensembl_data_resources:
+      type: directory
+      description: HMF ensembl data resources directory
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [id: 'sample_id']
+  - annotated_fusions:
+      type: file
+      description: Annotated neoepitopes file
+      pattern: "*.{tsv}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@scwatts"
+  - "@charlesshale"
diff --git a/modules/local/neo/finder/meta.yml b/modules/local/neo/finder/meta.yml
new file mode 100644
index 00000000..01dc2fcb
--- /dev/null
+++ b/modules/local/neo/finder/meta.yml
@@ -0,0 +1,52 @@
+name: neo_finder
+description: Identify candidate neoeptitops
+keywords:
+  - neoepitopes
+tools:
+  - neo:
+      description: Predict and score neoepitopes
+      homepage: https://github.com/hartwigmedical/hmftools/tree/master/neo
+      documentation: https://github.com/hartwigmedical/hmftools/tree/master/neo
+      licence: ["GPL v3"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [id: 'sample_id']
+  - purple_dir:
+      type: directory
+      description: PURPLE output directory
+  - linx_annotation_dir:
+      type: directory
+      description: LINX somatic annotation output directory
+  - genome_fasta:
+      type: file
+      description: Reference genome assembly FASTA file
+      pattern: "*.{fa,fasta}"
+  - genome_ver:
+      type: string
+      description: Reference genome version
+  - genome_fai:
+      type: file
+      description: Reference genome assembly fai file
+      pattern: "*.{fai}"
+  - ensembl_data_resources:
+      type: directory
+      description: HMF ensembl data resources directory
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [id: 'sample_id']
+  - neo_finder_dir:
+      type: directory
+      description: Neo Finder output directory
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@scwatts"
+  - "@charlesshale"
diff --git a/modules/local/neo/scorer/meta.yml b/modules/local/neo/scorer/meta.yml
new file mode 100644
index 00000000..c870da4d
--- /dev/null
+++ b/modules/local/neo/scorer/meta.yml
@@ -0,0 +1,62 @@
+name: neo_scorer
+description: Score and rank candidate neoeptitops
+keywords:
+  - neoepitopes
+tools:
+  - neo:
+      description: Predict and score neoepitopes
+      homepage: https://github.com/hartwigmedical/hmftools/tree/master/neo
+      documentation: https://github.com/hartwigmedical/hmftools/tree/master/neo
+      licence: ["GPL v3"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [id: 'sample_id']
+  - isofox_dir:
+      type: directory
+      description: Isofox output directory (optional)
+  - purple_dir:
+      type: directory
+      description: PURPLE output directory
+  - sage_vcf:
+      type: file
+      description: SAGE VCF file
+      pattern: "*.{vcf.gz}"
+  - lilac_dir:
+      type: directory
+      description: LILAC output directory
+  - neo_finder_dir:
+      type: directory
+      description: Neo Finder output directory
+  - annotated_fusions:
+      type: file
+      description: Annotated neoepitopes file
+      pattern: "*.{tsv}"
+  - ensembl_data_resources:
+      type: directory
+      description: HMF ensembl data resources directory
+  - neo_resources:
+      type: directory
+      description: HMF Neo resources directory
+  - cohort_tpm_medians:
+      type: file
+      description: HMF cohort TPM medians file
+      pattern: "*.{csv}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [id: 'sample_id']
+  - neo_score_dir:
+      type: directory
+      description: Neo Scorer output directory
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@scwatts"
+  - "@charlesshale"

From 93ab00704df6dbe36923929390a02bee3f1edc7c Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Fri, 21 Jun 2024 17:06:00 +1000
Subject: [PATCH 14/17] Enable SAGE append when Neo is set to run

---
 workflows/wgts.nf | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/workflows/wgts.nf b/workflows/wgts.nf
index d6f3bb6e..787d1d44 100644
--- a/workflows/wgts.nf
+++ b/workflows/wgts.nf
@@ -468,9 +468,7 @@ workflow WGTS {
     // channel: [ meta, sage_append_vcf ]
     ch_sage_somatic_append_out = Channel.empty()
     ch_sage_germline_append_out = Channel.empty()
-    if (run_config.stages.orange) {
-
-        // NOTE(SW): currently used only for ORANGE but will also be used for Neo once implemented
+    if (run_config.stages.orange || run_config.stages.neo) {
 
         SAGE_APPEND(
             ch_inputs,

From 175a7d770aa621cf1e9d9fa8448ec7b8cb2fbd6f Mon Sep 17 00:00:00 2001
From: Charles Shale <shalecharles@gmail.com>
Date: Mon, 24 Jun 2024 13:22:34 +1000
Subject: [PATCH 15/17] Linx somatic writes neoepitopes

---
 modules/local/linx/somatic/main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/local/linx/somatic/main.nf b/modules/local/linx/somatic/main.nf
index 5218a8f0..04b78f89 100644
--- a/modules/local/linx/somatic/main.nf
+++ b/modules/local/linx/somatic/main.nf
@@ -38,6 +38,7 @@ process LINX_SOMATIC {
         -known_fusion_file ${known_fusion_data} \\
         -driver_gene_panel ${driver_gene_panel} \\
         -write_vis_data \\
+        -write_neo_epitopes \\
         -output_dir linx_somatic/
 
     cat <<-END_VERSIONS > versions.yml

From ef7db8c388b11f348c929b5a6d337da67a76c8f1 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Tue, 25 Jun 2024 12:16:14 +1000
Subject: [PATCH 16/17] Add reference to Neo in documentation

---
 README.md      |  1 +
 docs/output.md | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/README.md b/README.md
index 1e6760a7..10e016c2 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,7 @@ The following processes and tools can be run with `oncoanalyser`:
 - HRD status prediction (`CHORD`)
 - Mutational signature fitting (`Sigs`)
 - Tissue of origin prediction (`CUPPA`)
+- Neoepitope prediction (`Neo`)
 - Report generation (`ORANGE`, `linxreport`)
 
 ## Usage
diff --git a/docs/output.md b/docs/output.md
index 59f9f0c6..6fb6596b 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -71,6 +71,8 @@ output/
   - [Sigs](#sigs) - Mutational signature fitting
 - [Tissue of origin prediction](#tissue-of-origin-prediction)
   - [CUPPA](#cuppa) - Tissue of origin prediction
+- [Neoepitope prediction](#neoepitope-prediction)
+  - [Neo](#neo) - Neoepitope prediction
 - [Report generation](#report-generation)
   - [ORANGE](#orange) - Key results summary
   - [linxreport](#linxreport) - Interactive LINX report
@@ -473,6 +475,23 @@ signatures to tumor sample data.
 [CUPPA](https://github.com/hartwigmedical/hmftools/tree/master/cuppa) predicts tissue of origin for a given tumor sample
 using DNA and/or RNA features generated by upstream hmftools components.
 
+### Neoepitope prediction
+
+#### Neo
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `<group_id>/neo/`
+  - `<tumor_dna_id>.neo.neo_data.tsv`: Neoepitope candidates.
+  - `<tumor_dna_id>.neo.neoepitope.tsv`: LINX fusion neoepitopes.
+  - `<tumor_dna_id>.neo.peptide_scores.tsv`: Peptide binding likelihood and scoring.
+
+</details>
+
+[Neo](https://github.com/hartwigmedical/hmftools/tree/master/neo) builds comprehensive neoepitope predictions from DNA
+data with additional annotations made using RNA data.
+
 ### Report generation
 
 #### ORANGE

From 8b15c62f6847e6bc649fd96fd3ee737fcd3bd45f Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Tue, 10 Sep 2024 09:47:40 +1000
Subject: [PATCH 17/17] Do not run SAGE append germline for Neo

---
 subworkflows/local/sage_append/main.nf | 5 ++++-
 workflows/targeted.nf                  | 1 +
 workflows/wgts.nf                      | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/subworkflows/local/sage_append/main.nf b/subworkflows/local/sage_append/main.nf
index a7ded0be..39c48362 100644
--- a/subworkflows/local/sage_append/main.nf
+++ b/subworkflows/local/sage_append/main.nf
@@ -21,6 +21,9 @@ workflow SAGE_APPEND {
     genome_fai       // channel: [mandatory] /path/to/genome_fai
     genome_dict      // channel: [mandatory] /path/to/genome_dict
 
+    // Params
+    run_germline     // boolean: [mandatory] Run germline flag
+
     main:
     // Channel for version.yml files
     // channel: [ versions.yml ]
@@ -63,7 +66,7 @@ workflow SAGE_APPEND {
             def has_smlv_germline = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz")
             def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL)
 
-            runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing
+            runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing && run_germline
             skip: true
                 return meta
         }
diff --git a/workflows/targeted.nf b/workflows/targeted.nf
index 5b0b92bd..9189ef93 100644
--- a/workflows/targeted.nf
+++ b/workflows/targeted.nf
@@ -478,6 +478,7 @@ workflow TARGETED {
             ref_data.genome_version,
             ref_data.genome_fai,
             ref_data.genome_dict,
+            true,  // run_germline
         )
 
         ch_versions = ch_versions.mix(SAGE_APPEND.out.versions)
diff --git a/workflows/wgts.nf b/workflows/wgts.nf
index 787d1d44..98c808b7 100644
--- a/workflows/wgts.nf
+++ b/workflows/wgts.nf
@@ -478,6 +478,7 @@ workflow WGTS {
             ref_data.genome_version,
             ref_data.genome_fai,
             ref_data.genome_dict,
+            run_config.stages.orange,  // run_germline [run for ORANGE but not Neo]
         )
 
         ch_versions = ch_versions.mix(SAGE_APPEND.out.versions)