From 1cdc472e1e31412db42596902377a0363b187d74 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Thu, 2 Feb 2023 15:50:58 -0500 Subject: [PATCH] update guppy to latest (#156) * support latest guppy version 6.4.6 ref: https://pypi.org/project/ont-pyguppy-client-lib/ --- Dockerfile | 2 +- README.md | 4 ++-- environment.yml | 2 +- main.nf | 10 ++++++---- modules/BASECALL.nf | 2 +- modules/CONSENSUS.nf | 8 ++++---- modules/NANOPOLISH.nf | 4 ++-- modules/RESQUIGGLE.nf | 9 +++++---- modules/UNTAR.nf | 1 + nextflow.config | 12 ++++++++---- setup.py | 2 +- src/nanome/common/global_settings.py | 2 +- utils/validate_nanome_container.sh | 5 +++-- 13 files changed, 36 insertions(+), 27 deletions(-) diff --git a/Dockerfile b/Dockerfile index 82dec37e..2b4551e7 100755 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ LABEL description="Nanome project in Li Lab at The Jackson Laboratory" \ # Guppy version 6.4.x is not support, due to no fast5_out option # ont-remora 2.x is not support, due to pod5 needs python 3.7+ -ARG GUPPY_VERSION=6.3.9 +ARG GUPPY_VERSION=6.4.6 ARG REMORA_VERSION=1.1.1 ARG MEGALODON_VERSION=2.5.0 ARG BUILD_PACKAGES="wget apt-transport-https procps git curl libnvidia-compute-460-server" diff --git a/README.md b/README.md index 0f15e05e..d9a5e617 100755 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ## Highlights of NANOME pipeline ### Several first highlights for NANOME -![Figure_pipe_comp](https://github.com/LabShengLi/nanome/blob/xgboost1/docs/resources/pipeline_comparison.jpg) +![Figure_pipe_comp](https://github.com/LabShengLi/nanome/blob/master/docs/resources/pipeline_comparison.jpg) * Enables users to process **terabasescale** Oxford Nanopore sequencing datasets. * Provide a **one command line**/**web-based UI** for end-to-end analyzing Nanopore sequencing methylation-callings. @@ -143,7 +143,7 @@ Please check [NANOME report](https://github.com/LabShengLi/nanome/blob/master/do ### Haplotype-aware consensus methylations Please check [phasing usage](https://github.com/LabShengLi/nanome/blob/tutorial1/docs/Phasing.md). -![PhasingDemo](https://github.com/LabShengLi/nanome/blob/tutorial1/docs/resources/nanome3t_5mc_phasing2.png) +![PhasingDemo](https://github.com/LabShengLi/nanome/blob/master/docs/resources/nanome3t_5mc_phasing2.png) ### Lifebit CloudOS report We now support running NANOME on cloud computing platform. [Lifebit](https://lifebit.ai/lifebit-cloudos/) is a web-based cloud computing platform, and below is the running reports: diff --git a/environment.yml b/environment.yml index 81cd91b3..f35cd45a 100755 --- a/environment.yml +++ b/environment.yml @@ -43,7 +43,7 @@ dependencies: - nanopolish>=0.14.0 - pip: - xgboost<=1.5.2 # nanome model load need <=1.5.x - - ont-pyguppy-client-lib==6.3.9 + - ont-pyguppy-client-lib>=6.4.6 - deepsignal>=0.2.0 - fast5mod==1.0.5 - nanome-jax>=2.0.10 diff --git a/main.nf b/main.nf index 25067870..031fb78e 100755 --- a/main.nf +++ b/main.nf @@ -123,6 +123,7 @@ if (params.runResquiggle) summary['runResquiggle'] = 'Yes' if (params.runMethcall) { if (params.runNanopolish) summary['runNanopolish'] = 'Yes' if (params.runMegalodon) summary['runMegalodon'] = 'Yes' + if (params.runDeepSignal2) summary['runDeepSignal2'] = 'Yes' if (params.runDeepSignal) summary['runDeepSignal'] = 'Yes' if (params.runGuppy) summary['runGuppy'] = 'Yes' if (params.runTombo) summary['runTombo'] = 'Yes' @@ -326,14 +327,15 @@ workflow { // Resquiggle running if use Tombo or DeepSignal if (((params.runDeepSignal || params.runTombo || params.runDeepSignal2) && params.runMethcall) || params.runResquiggle) { - resquiggle = RESQUIGGLE(BASECALL.out.basecall, ENVCHECK.out.reference_genome) + resquiggle = RESQUIGGLE(UNTAR.out.untar_tuple.join(BASECALL.out.basecall_tuple), ENVCHECK.out.reference_genome) f1 = params.feature_extract ? resquiggle.feature_extract : Channel.empty() } else { f1 = Channel.empty() } if (params.runNanopolish && params.runMethcall) { - NANOPOLISH(BASECALL.out.basecall_tuple.join(ALIGNMENT.out.alignment_tuple), ENVCHECK.out.reference_genome) + NANOPOLISH(UNTAR.out.untar_tuple.join(BASECALL.out.basecall_tuple).join(ALIGNMENT.out.alignment_tuple), + ENVCHECK.out.reference_genome) comb_nanopolish = NPLSHCOMB(NANOPOLISH.out.nanopolish_tsv.collect(), ch_src, ch_utils) s1 = comb_nanopolish.site_unify r1 = comb_nanopolish.read_unify @@ -454,7 +456,7 @@ workflow { } null2.concat( - r1, r2, r3, f1, f2 + r1, r2, r3, r3_1, f1, f2 ).toList().set { top3_tools_read_unify } if (params.runNANOME) { @@ -490,7 +492,7 @@ workflow { // Site level combine a list null1.concat( - s1, s2, s3, s4, s5, s6, s7, s_new, s8 + s1, s2, s3, s3_1, s4, s5, s6, s7, s_new, s8 ).toList().set { tools_site_unify } REPORT(tools_site_unify, top3_tools_read_unify, diff --git a/modules/BASECALL.nf b/modules/BASECALL.nf index b2f1952e..651bc95b 100644 --- a/modules/BASECALL.nf +++ b/modules/BASECALL.nf @@ -50,7 +50,7 @@ process BASECALL { --save_path !{fast5Untar.baseName}.basecall \ --config !{params.GUPPY_BASECALL_MODEL} \ --num_callers !{task.cpus} \ - --fast5_out --compress_fastq\ + --compress_fastq\ --verbose_logs ${gpuOptions} &>> !{params.dsname}.!{fast5Untar.baseName}.Basecall.run.log else ## Just use user's basecalled input diff --git a/modules/CONSENSUS.nf b/modules/CONSENSUS.nf index 7c38c7fa..4e9b8dbb 100644 --- a/modules/CONSENSUS.nf +++ b/modules/CONSENSUS.nf @@ -48,9 +48,9 @@ process CONSENSUS { then echo "### found deepsignal1_batch_features" cat *.deepsignal1_batch_features.tsv.gz > \ - ${params.dsname}_deepsignal1_feature_combine.tsv.gz + ${params.dsname}_deepsignal_feature_combine.tsv.gz else - echo "### not found deepsignal1_batch_features" + echo "### not found deepsignal_batch_features" fi MegalodonReadReport=\$(find . -maxdepth 1 -name '*Megalodon-perRead-score.tsv.gz') @@ -69,7 +69,7 @@ process CONSENSUS { NanopolishOptions="--nanopolish \$NanopolishReadReport" fi - DeepSignalReadReport=\$(find . -maxdepth 1 -name '*DeepSignal-perRead-score.tsv.gz') + DeepSignalReadReport=\$(find . -maxdepth 1 -name '*DeepSignal*-perRead-score.tsv.gz' | head -n 1) if [[ -z \$DeepSignalReadReport ]] ; then echo "### Not found DeepSignal read-level outputs" DeepSignalOptions=" " @@ -77,7 +77,7 @@ process CONSENSUS { DeepSignalOptions="--deepsignal \$DeepSignalReadReport" fi - FeatureFile=\$(find . -maxdepth 1 -name '*_deepsignal1_feature_combine.tsv.gz') + FeatureFile=\$(find . -maxdepth 1 -name '*_deepsignal*_feature_combine.tsv.gz' | head -n 1) if [[ -z \$FeatureFile ]] ; then echo "### Not found Feature file" FeatureOptions=" " diff --git a/modules/NANOPOLISH.nf b/modules/NANOPOLISH.nf index eb31450f..ffe7070d 100644 --- a/modules/NANOPOLISH.nf +++ b/modules/NANOPOLISH.nf @@ -21,7 +21,7 @@ process NANOPOLISH { input: // path basecallDir - tuple val(id), path (basecallDir), path(alignmentDir) + tuple val(id), path (untarDir), path (basecallDir), path(alignmentDir) each path(reference_genome) output: @@ -45,7 +45,7 @@ process NANOPOLISH { ## Index, ref: https://github.com/jts/nanopolish#data-preprocessing ## Index the raw read with fastq, we do not index in basecalled dir, in case of cache can be work ln -s \${fastqFile} \${fastqFile##*/} - nanopolish index -d ${basecallDir}/workspace \ + nanopolish index -d ${untarDir}/ \ -s ${basecallDir}/${basecallDir.baseName}-sequencing_summary.txt \ \${fastqFile##*/} diff --git a/modules/RESQUIGGLE.nf b/modules/RESQUIGGLE.nf index 6ccdc226..ede71af6 100644 --- a/modules/RESQUIGGLE.nf +++ b/modules/RESQUIGGLE.nf @@ -26,7 +26,7 @@ process RESQUIGGLE { enabled: params.publishResquiggle input: - path basecallDir + tuple val(id), path (untarDir), path (basecallDir) each path(reference_genome) output: @@ -49,11 +49,11 @@ process RESQUIGGLE { cp -f !{basecallDir}/batch_basecall_combine_fq_*.fq.gz \ !{basecallDir.baseName}.resquiggle/ - ## cp -rf !{basecallDir}/workspace !{basecallDir.baseName}.resquiggle/ - find !{basecallDir}/workspace -name '*.fast5' -type f| \ + ## cp -rf !{untarDir}/*.fast5 !{basecallDir.baseName}.resquiggle/ + find !{untarDir}/ -name '*.fast5' -type f| \ parallel -j!{task.cpus * params.highProcTimes} \ 'cp {} !{basecallDir.baseName}.resquiggle/workspace/' - echo "### Duplicate from basecall DONE" + echo "### Duplicate from untar DONE" ### Prerocessing, using combined fq.gz ### ref: https://github.com/bioinfomaticsCSU/deepsignal#quick-start @@ -61,6 +61,7 @@ process RESQUIGGLE { tombo preprocess annotate_raw_with_fastqs\ --fast5-basedir !{basecallDir.baseName}.resquiggle/workspace\ --fastq-filenames !{basecallDir.baseName}.resquiggle/batch_basecall_combine_fq_*.fq\ + --sequencing-summary-filenames !{basecallDir}/!{untarDir.baseName}-sequencing_summary.txt \ --basecall-group !{params.BasecallGroupName}\ --basecall-subgroup !{params.BasecallSubGroupName}\ --overwrite --processes !{samtools_cores} \ diff --git a/modules/UNTAR.nf b/modules/UNTAR.nf index dc650628..41686234 100644 --- a/modules/UNTAR.nf +++ b/modules/UNTAR.nf @@ -21,6 +21,7 @@ process UNTAR { output: // untar dir or basecalled dir structure path "${fast5Input.baseName}.untar", emit: untar, optional: true + tuple val(fast5Input.baseName), path ("${fast5Input.baseName}.untar"), optional:true, emit: untar_tuple shell: cores = task.cpus * params.highProcTimes diff --git a/nextflow.config b/nextflow.config index e15d33bb..78fd84de 100755 --- a/nextflow.config +++ b/nextflow.config @@ -18,8 +18,8 @@ params { conda_name = "nanome" // sample: /projects/li-lab/yang/anaconda3/envs/nanome conda_cache = 'local_conda_cache' - docker_name = "liuyangzzu/nanome:v2.0.5" - singularity_name = "docker://liuyangzzu/nanome:v2.0.5" + docker_name = "liuyangzzu/nanome:v2.0.6" + singularity_name = "docker://liuyangzzu/nanome:v2.0.6" singularity_cache = 'local_singularity_cache' containerOptions = null // or "--gpus all" for docker @@ -67,12 +67,12 @@ params { // Default tool running configuration, top 4 as default runNanopolish = true runMegalodon = true - runDeepSignal = true + runDeepSignal = false runGuppy = false runGuppyGcf52ref= false // Guppy readlevel extract software, not certified by us runNANOME = true // NANOME concensus - runDeepSignal2 = false + runDeepSignal2 = true runNewTool = false // run new added tool in interface runQC = true @@ -539,18 +539,22 @@ cleanup = params.cleanup dag { file = "${params.tracedir}/NANOME_dag_${params.dsname}.svg" + overwrite = true } report { file = "${params.tracedir}/NANOME_report_${params.dsname}.html" + overwrite = true } timeline { file = "${params.tracedir}/NANOME_timeline_${params.dsname}.html" + overwrite = true } trace { file = "${params.tracedir}/NANOME_trace_${params.dsname}.txt" + overwrite = true } manifest { diff --git a/setup.py b/setup.py index 250da88a..2b2f726c 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ setuptools.setup( name="nanome-jax", - version="2.0.10", + version="2.0.11", author="Yang Liu", author_email="yang.liu@jax.org", description="NANOME (Nanopore methylation) pipeline developed by Li Lab at The Jackson Laboratory", diff --git a/src/nanome/common/global_settings.py b/src/nanome/common/global_settings.py index dc6844a2..900e67b9 100755 --- a/src/nanome/common/global_settings.py +++ b/src/nanome/common/global_settings.py @@ -16,7 +16,7 @@ from nanome.common.global_config import set_log_debug_level, current_time_str -NANOME_VERSION = "2.0.10" +NANOME_VERSION = "2.0.11" # define the small error of 0 and 1, for fully-meth and unmeth eval EPSLONG = 1e-5 diff --git a/utils/validate_nanome_container.sh b/utils/validate_nanome_container.sh index 95432fc0..a4f34034 100644 --- a/utils/validate_nanome_container.sh +++ b/utils/validate_nanome_container.sh @@ -84,13 +84,14 @@ else > ${versionFilename} printf '%s\t%s\n' Tool Version >> ${versionFilename} printf '%s\t%s\n' NANOME 1.0 >> ${versionFilename} - printf '%s\t%s\n' Nanopolish ${nanopolish_version} >> ${versionFilename} printf '%s\t%s\n' Megalodon ${megalodon_version} >> ${versionFilename} + printf '%s\t%s\n' Nanopolish ${nanopolish_version} >> ${versionFilename} + printf '%s\t%s\n' DeepSignal2 0.1.3 >> ${versionFilename} printf '%s\t%s\n' DeepSignal ${deepsignal_version} >> ${versionFilename} printf '%s\t%s\n' Guppy ${guppy_version} >> ${versionFilename} printf '%s\t%s\n' Tombo ${tombo_version} >> ${versionFilename} printf '%s\t%s\n' METEORE 1.0.0 >> ${versionFilename} - printf '%s\t%s\n' DeepMod ${deepmod_version} >> ${versionFilename} + printf '%s\t%s\n' DeepMod 0.1.3 >> ${versionFilename} echo "### check tools version file:${versionFilename}" cat ${versionFilename}