Skip to content

Commit

Permalink
condition for running process (#153)
Browse files Browse the repository at this point in the history
* add condition for run process
* add resquiggle processors param
* add other docker env
  • Loading branch information
liuyang2006 authored Jul 22, 2022
1 parent ef3f3c4 commit 99abdcd
Show file tree
Hide file tree
Showing 14 changed files with 347 additions and 9 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,5 @@ testdemo.py
/src/nanome/xgboost/sanity/
/hpc_test/
/src/nanome/nanocompare/utils/na12878.filelist.txt
/guppy_basecaller-core-dump-db/
/guppy_basecaller-core-dump-db/
/locations/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ NANOME pipeline support running with various ways in different platforms:


## Simple usage
Please refer to [Usage](https://github.com/LabShengLi/nanome/blob/master/docs/Usage.md) and [Specific Usage](https://github.com/LabShengLi/nanome/blob/master/docs/SpecificUsage.md) and [NANOME options](https://github.com/LabShengLi/nanome/blob/tutorial1/docs/nanome_params.md) for how to use NANOME pipeline. For running on CloudOS platform (e.g., google cloud), please check [Usage on CloudOS](https://github.com/LabShengLi/nanome/blob/master/docs/Usage.md#4-running-pipeline-on-cloud-computing-platform). We provide a **tutorial video** for running NANOME pipeline:
Please refer to [Usage](https://github.com/LabShengLi/nanome/blob/master/docs/Usage.md) and [Specific Usage](https://github.com/LabShengLi/nanome/blob/master/docs/SpecificUsage.md) and [NANOME options](https://github.com/LabShengLi/nanome/blob/tutorial1/docs/nanome_params.md) for how to use NANOME pipeline. For running on CloudOS platform (e.g., google cloud), please check [Usage on CloudOS](https://github.com/LabShengLi/nanome/blob/master/docs/Usage.md#5-running-pipeline-on-cloud-computing-platform). We provide a **tutorial video** for running NANOME pipeline:

[![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/TfotM55KTVE/0.jpg)](https://www.youtube.com/watch?v=TfotM55KTVE)

Expand Down
6 changes: 5 additions & 1 deletion conf/executors/lifebit.config
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ process {
params.errorStrategy : task.exitStatus in [1, 2, 10, 14] ? 'retry' : params.errorStrategy }
}

withName: 'ENVCHECK|BASECALL|Guppy|MEGALODON' { // allocate gpu
withName: 'ENVCHECK|BASECALL|Guppy|MEGALODON|DEEPSIGNAL2' { // allocate gpu
accelerator = [request: params.gpuNumber, type: params.gpuType]
beforeScript = "export CUDA_VISIBLE_DEVICES=0" // pass CUDA var to process, since GCP do not export it
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
Expand All @@ -105,6 +105,10 @@ process {
// container = 'liuyangzzu/nanome:v1.4'
container = params.tombo_docker_name
}

withName: 'DEEPSIGNAL2' {
container = params.deepsignal2_docker_name
}
}

env {
Expand Down
14 changes: 13 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ include { MEGALODON; MGLDNCOMB } from './modules/MEGALODON'

include { DEEPSIGNAL; DPSIGCOMB } from './modules/DEEPSIGNAL'

include { DEEPSIGNAL2; DEEPSIGNAL2COMB } from './modules/DEEPSIGNAL2'

include { REPORT } from './modules/REPORT'

include { Guppy; GuppyComb; Tombo; TomboComb; DeepMod; DpmodComb; METEORE } from './modules/OLDTOOLS'
Expand Down Expand Up @@ -332,7 +334,8 @@ workflow {
}

// Resquiggle running if use Tombo or DeepSignal
if (((params.runDeepSignal || params.runTombo) && params.runMethcall) || params.runResquiggle) {
if (((params.runDeepSignal || params.runTombo || params.runDeepSignal2) && params.runMethcall) || params.runResquiggle) {
// BASECALL.out.basecall.subscribe({ println("BASECALL.out.basecall: $it") })
RESQUIGGLE(BASECALL.out.basecall, ENVCHECK.out.reference_genome)
}

Expand Down Expand Up @@ -369,6 +372,15 @@ workflow {
r3 = Channel.empty()
}

if (params.runDeepSignal2 && params.runMethcall) {
DEEPSIGNAL2(RESQUIGGLE.out.resquiggle.collect(),
ENVCHECK.out.reference_genome,
ch_src, ch_utils)
DEEPSIGNAL2COMB(DEEPSIGNAL2.out.deepsignal2_combine_out,
ch_src, ch_utils
)
}

if (params.runGuppy && params.runMethcall) {
Guppy(UNTAR.out.untar, ENVCHECK.out.reference_genome, ch_utils)

Expand Down
3 changes: 3 additions & 0 deletions modules/ALIGNMENT.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ process ALIGNMENT {
path "${basecallDir.baseName}.alignment", optional:true, emit: alignment
tuple val(basecallDir.baseName), path ("${basecallDir.baseName}.alignment"), optional:true, emit: alignment_tuple

when:
params.runAlignment

shell:
cores = task.cpus * params.mediumProcTimes
'''
Expand Down
2 changes: 1 addition & 1 deletion modules/DEEPSIGNAL.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ process DEEPSIGNAL {
path "${params.dsname}_deepsignal_batch_${indir.baseName}.*.gz", emit: deepsignal_tsv

when:
params.runMethcall && params.runDeepSignal
params.runMethcall && params.runDeepSignal && ! params.stopDeepSignal

script:
cores = task.cpus * params.highProcTimes
Expand Down
145 changes: 145 additions & 0 deletions modules/DEEPSIGNAL2.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
=========================================================================================
NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
=========================================================================================
NANOME Analysis Pipeline.
#### Homepage / Documentation
https://github.com/LabShengLi/nanome
@Author : Yang Liu
@FileName : DEEPSIGNAL2.nf
@Software : NANOME project
@Organization : JAX Sheng Li Lab
----------------------------------------------------------------------------------------
*/
process DEEPSIGNAL2 {
tag "${params.dsname}"

publishDir "${params.outdir}/${params.dsname}-methylation-callings/Raw_Results-${params.dsname}",
mode: "copy",
pattern: "${params.dsname}_deepsignal2_per_read_combine.*.gz",
enabled: params.outputRaw

publishDir "${params.outdir}/${params.dsname}-methylation-callings/Features-${params.dsname}",
mode: "copy",
pattern: "${params.dsname}_deepsignal2_feature_combine.*.gz"

input:
path resquiggle_collect
// path feature_collect
path reference_genome
path ch_src
path ch_utils

output:
path "${params.dsname}_deepsignal2_per_read_combine.*.gz", emit: deepsignal2_combine_out
path "${params.dsname}_deepsignal2_feature_combine.*.gz", emit: deepsignal2_feature_out, optional: true

when:
params.runMethcall && params.runDeepSignal2

script:
cores = task.cpus * params.highProcTimes

shell:
'''
set +xu
. /opt/conda/etc/profile.d/conda.sh
conda activate /opt/conda/envs/deepsignal2
set -x
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
export HDF5_PLUGIN_PATH="$CONDA_PREFIX/hdf5/lib/plugin"
which deepsignal2
wget !{params.DEEPSIGNAL2_MODEL_FILE}
tar -xzf !{params.DEEPSIGNAL2_MODEL_NAME}.tar.gz
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
if [[ "${CUDA_VISIBLE_DEVICES:-}" == "" ]] ; then
echo "Detect no GPU, using CPU commandType"
commandType='cpu'
gpuOptions=" "
else
echo "Detect GPU, using GPU commandType"
commandType='gpu'
gpuOptions="--nproc_gpu 1"
fi
> !{params.dsname}_deepsignal2_feature_combine.tsv.gz
> !{params.dsname}_deepsignal2_per_read_combine.tsv.gz
find . -maxdepth 1 -name '*.resquiggle' -print0 |
while IFS= read -r -d '' infn; do
deepsignal2 extract \
-i ${infn}/workspace/ \
-o ${infn}_feature.tsv \
--corrected_group RawGenomeCorrected_000 \
--nproc !{cores} --motifs CG \
&>> !{params.dsname}.DeepSignal2.run.log
deepsignal2 call_mods \
--model_path !{params.DEEPSIGNAL2_MODEL_NAME} \
--input_path ${infn}_feature.tsv \
--result_file ${infn/.resquiggle/_deepsignal2_batch_per_read.tsv} \
--nproc !{cores} ${gpuOptions} \
&>> !{params.dsname}.DeepSignal2.run.log
cat ${infn}_feature.tsv | gzip -f >> \
!{params.dsname}_deepsignal2_feature_combine.tsv.gz
cat ${infn/.resquiggle/_deepsignal2_batch_per_read.tsv} | gzip -f >> \
!{params.dsname}_deepsignal2_per_read_combine.tsv.gz
done
echo "### DeepSignal2 methylation DONE"
if [[ !{params.deduplicate} == true ]] ; then
echo "### Deduplicate for read-level outputs"
## sort order: Chr, Start, (End), ID, Strand
zcat !{params.dsname}_deepsignal2_per_read_combine.tsv.gz |\
sort -V -u -k1,1 -k2,2n -k5,5 -k3,3 |\
gzip -f > !{params.dsname}_deepsignal2_per_read_combine.sort.tsv.gz
rm !{params.dsname}_deepsignal2_per_read_combine.tsv.gz &&\
mv !{params.dsname}_deepsignal2_per_read_combine.sort.tsv.gz \
!{params.dsname}_deepsignal2_per_read_combine.tsv.gz
fi
echo "### DeepSignal2 all combine DONE"
'''
}

process DEEPSIGNAL2COMB {
tag "${params.dsname}"

publishDir "${params.outdir}/${params.dsname}-methylation-callings",
mode: "copy",
pattern: "Read_Level-${params.dsname}/${params.dsname}_*-perRead-score*.gz"

publishDir "${params.outdir}/${params.dsname}-methylation-callings",
mode: "copy",
pattern: "Site_Level-${params.dsname}/*-perSite-cov1*.gz"

input:
path deepsignal2_combine_out
path ch_src
path ch_utils

output:
path "Read_Level-${params.dsname}/${params.dsname}_*-perRead-score*.gz", emit: read_unify
path "Site_Level-${params.dsname}/*-perSite-cov*.gz", emit: site_unify

when:
params.runCombine

script:
cores = task.cpus * params.highProcTimes

shell:
'''
## Unify format output
bash utils/unify_format_for_calls.sh \
!{params.dsname} DeepSignal2 DeepSignal\
!{deepsignal2_combine_out} \
. !{cores} 12 !{params.sort ? true : false} \
"!{params.chrSet1.replaceAll(',', ' ')}"
'''
}
3 changes: 3 additions & 0 deletions modules/QCEXPORT.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ process QCEXPORT {
path "${params.dsname}_QCReport", emit: qc_report
path "${params.dsname}_bam_data", optional: true, emit: bam_data

when:
params.runQC

shell:
cores = task.cpus * params.highProcTimes
samtools_cores = task.cpus * params.mediumProcTimes
Expand Down
3 changes: 3 additions & 0 deletions modules/REPORT.nf
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ process REPORT {
path "Site_Level-${params.dsname}/*-perSite-cov*.gz", emit: site_unify, optional: true
path "${params.dsname}_nanome_${params.NANOME_MODEL}_per_read_combine.*.gz", emit: nanome_combine_out, optional: true

when:
params.runNANOME

"""
if [[ ${params.runNANOME} == true ]] ; then
## NANOME XGBoost method
Expand Down
25 changes: 24 additions & 1 deletion modules/RESQUIGGLE.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,26 @@
process RESQUIGGLE {
tag "${basecallDir.baseName}"

publishDir "${params.outdir}/${params.dsname}-methylation-callings/Features-${params.dsname}",
mode: "copy",
pattern: "${basecallDir.baseName}.deepsignal1_batch_features.tsv.gz",
enabled: params.feature_extract

publishDir "${params.outdir}/${params.dsname}-methylation-callings/Features-${params.dsname}",
mode: "symlink",
pattern: "${basecallDir.baseName}.resquiggle",
enabled: params.publishResquiggle

input:
path basecallDir
each path(reference_genome)

output:
path "${basecallDir.baseName}.resquiggle", emit: resquiggle
path "${basecallDir.baseName}.deepsignal1_batch_features.tsv.gz", emit: feature_extract, optional: true

when:
(params.runMethcall && (params.runDeepSignal || params.runTombo)) || params.runResquiggle
(params.runMethcall && ((params.runDeepSignal && ! params.stopDeepSignal) || params.runTombo || params.runDeepSignal2)) || params.runResquiggle

shell:
cores = task.cpus * params.highProcTimes
Expand Down Expand Up @@ -63,6 +74,7 @@ process RESQUIGGLE {
### Out of memory solution for large data: --tomboResquiggleOptions '--signal-length-range 0 500000 --sequence-length-range 0 50000'
tombo resquiggle\
--processes !{resquiggle_cores} \
--threads-per-process !{params.tomboThreadsPerProcess} \
--corrected-group !{params.ResquiggleCorrectedGroup} \
--basecall-group !{params.BasecallGroupName} \
--basecall-subgroup !{params.BasecallSubGroupName}\
Expand All @@ -72,5 +84,16 @@ process RESQUIGGLE {
!{params.referenceGenome} &>> !{params.dsname}.!{basecallDir.baseName}.Resquiggle.run.log
echo "### tombo resquiggle DONE"
## Start to extract features
if [[ !{params.feature_extract} == true ]] ; then
deepsignal extract \
--fast5_dir !{basecallDir.baseName}.resquiggle/workspace/ \
--reference_path !{params.referenceGenome} \
--write_path !{basecallDir.baseName}.batch_features.tsv \
--corrected_group !{params.ResquiggleCorrectedGroup} \
--nproc !{cores}
gzip -f !{basecallDir.baseName}.deepsignal1_batch_features.tsv
fi
'''
}
25 changes: 23 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ params {

tombo_docker_name = "liuyangzzu/nanome:v1.4"
clair3_docker_name = "hkubal/clair3:latest"
deepsignal2_docker_name = "liuyangzzu/deepsignal2:v1.0"


// process and executor configurations
executor = null
Expand Down Expand Up @@ -69,8 +71,15 @@ params {
runGuppy = false
runGuppyGcf52ref= false // Guppy readlevel extract software, not certified by us
runNANOME = true // NANOME concensus

runDeepSignal2 = false
runNewTool = false // run new added tool in interface

runQC = true
runAlignment = true

stopDeepSignal = false // used for switch between two HPC: gpu and cpu

newModuleConfigs = null

runTombo = false
Expand Down Expand Up @@ -120,6 +129,9 @@ params {
ResquiggleCorrectedGroup = "RawGenomeCorrected_000"
tomboResquiggleOptions = null // '--signal-length-range 0 500000 --sequence-length-range 0 50000', ref: tombo resquiggle --print-advanced-arguments
tomboMultiprocessRegionSize = 1000 // tombo methylation calling options
tomboThreadsPerProcess = 1 // --threads-per-process
feature_extract = false // if extract tombo resquiggle features
publishResquiggle = false // if publish resquiggle symlink

// DeepSignal model names
deepsignalDir = null // default is get model online, or specify the name of model dir
Expand Down Expand Up @@ -160,6 +172,10 @@ params {
METEORE_GITHUB_ONLINE = "https://github.com/comprna/METEORE/archive/refs/tags/v1.0.0.tar.gz"
METEOREDirName = "METEORE-1.0.0"

// DeepSignal2 model
DEEPSIGNAL2_MODEL_FILE = "https://storage.googleapis.com/jax-nanopore-01-project-data/nanome-input/model.dp2.CG.R9.4_1D.human_hx1.bn17_sn16.both_bilstm.b17_s16_epoch4.ckpt.tar.gz"
DEEPSIGNAL2_MODEL_NAME = "model.dp2.CG.R9.4_1D.human_hx1.bn17_sn16.both_bilstm.b17_s16_epoch4.ckpt"

// concensus model
NANOME_MODEL = 'NANOME3T' // or 'NA12878', 'NA12878_XGBoost_NA_top3'
NANOME_CONSENSUS_TOOLS = 'Nanopolish Megalodon DeepSignal' // or 'Megalodon DeepSignal' for NANOME2T
Expand Down Expand Up @@ -206,6 +222,9 @@ profiles {
withName: 'CLAIR3' {
container = params.clair3_docker_name
}
withName: 'DEEPSIGNAL2' {
container = params.deepsignal2_docker_name
}
}

docker{
Expand Down Expand Up @@ -235,6 +254,9 @@ profiles {
withName: 'CLAIR3' {
container = "docker://${params.clair3_docker_name}"
}
withName: 'DEEPSIGNAL2' {
container = "docker://${params.deepsignal2_docker_name}"
}
}

singularity {
Expand All @@ -248,7 +270,6 @@ profiles {
// for container, fast5 gz format need export this env
HDF5_PLUGIN_PATH = "/opt/conda/envs/nanome/hdf5/lib/plugin"
}

}

hpc { // general hpc configuration
Expand Down Expand Up @@ -432,7 +453,7 @@ profiles {
params.errorStrategy : task.exitStatus in [1, 2, 10, 14] ? 'retry' : params.errorStrategy }
}

withName: 'ENVCHECK|BASECALL|Guppy|MEGALODON' { // allocate gpu
withName: 'ENVCHECK|BASECALL|Guppy|MEGALODON|DEEPSIGNAL2' { // allocate gpu
accelerator = [request: params.gpuNumber, type: params.gpuType]
beforeScript = "export CUDA_VISIBLE_DEVICES=0" // pass CUDA var to process, since GCP do not export it
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
Expand Down
Loading

0 comments on commit 99abdcd

Please sign in to comment.