diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..3f46597e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +resources/exit_rif/EXIT-RIF.g.vcf.gz filter=lfs diff=lfs merge=lfs -text +resources/exit_rif/EXIT-RIF.g.vcf.gz.tbi filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index c055b9bd..0fcf165d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,7 @@ results* *.nextflow* conda_envs/xbs-nf-env* +containers/**/*yml + +samplesheet.csv +xbs-nf.sh diff --git a/README.md b/README.md index 65426ea9..5b746b74 100644 --- a/README.md +++ b/README.md @@ -1,64 +1,31 @@ # XBS-nf -# Benefits of the Nextflow wrapper +XBS-nf (compleX Bacterial Samples) is a pipeline for comprehensive genomic analyses of Mycobacterium tuberculosis with a focus on clinical decision making as well as research. + +# Salient features of the implementation - Fine-grained control over resource allocation (CPU/Memory/Storage) -- Reliance of bioconda and biocontainers for installing packages for reproducibility -- Ease of use on a range of infrastructure (cloud/on-prem clusters/local machine) +- Reliance of bioconda for installing packages for reproducibility +- Ease of use on a range of infrastructure (cloud/on-prem HPC clusters/ servers (or local machines)) - Resumability for failed processes - Centralized locations for specifying analysis parameters and hardware requirements - - XBS-nf parameters (`conf/global_parameters.config`) + - XBS-nf parameters (`default_parameters.config`) - Hardware requirements (`conf/standard.config`) - - Software requirements (`conf/docker.config` or `conf/conda.config`) - -# Quickstart for a server/laptop - -**NOTE**: The instructions for a cluster system like SLURM/PBS are slightly different! - -The simplest use case is to analyze a few genomes on a single machine environment. Almost all aspects are customizable but for the sake of brevity, a bare bones guide for any beginner user is as shown below - -- [ ] Clone the project - -```shell -git clone https://github.com/abhi18av/xbs-nf -cd xbs-nf -``` - -- [ ] Move your genomes (`fastq.gz files`) to a specific folder. For example `xbs-nf/data/full_data` folder - -- [ ] Prepare a samplesheet using `xbs-nf/resources/reference_set/xbs-nf.test.csv` as a reference for the format. + - Execution (software) requirements (`conf/docker.config` or `conf/conda.config`) +- A GVCF reference dataset for ~600 samples -You can optionally put your sample samplsheet in `xbs-nf/resources/reference_set/` folder. +# Usage and Tutorial -- [ ] Update the `xbs-nf/conf/server.config` file to point to the reference sheet +TODO: For the usage and tutorials please refer the XBS-nf website -- [ ] To run the pipeline, make sure you have `conda` installed. Moreover, if you don't already have `nextflow` installed, you can use the following commands to install it - -```shell -conda create -n xbs-nf-env -c bioconda -c conda-forge nextflow mamba openjdk=11 -``` - - -You can confirm the setup by activating that environment and using the `nextflow info` command - -``` -conda activate -n xbs-nf-env - -nextflow info -``` - -- [ ] Then simply issue the following command on the command line - -``` -nextflow run main.nf -profile conda,server -``` +# Citation +TODO: Update this section and add a citation.cff file # Contributions Contributions are warmly accepted! - # License TODO diff --git a/conda_envs/setup_conda_envs.sh b/conda_envs/setup_conda_envs.sh index f7be930c..384d3748 100644 --- a/conda_envs/setup_conda_envs.sh +++ b/conda_envs/setup_conda_envs.sh @@ -2,10 +2,10 @@ set -xue -# NOTE: These environments must be in /path/to/xbs-nf/conda_envs folder +# NOTE: Please replace `conda` with `mamba` if it is installed for faster installs. -# NOTE: If there are problems in `mamba`, replace it with `conda` +# NOTE: The conda environments are expected by the `conda_local` profile to be created within `xbs-nf/conda_envs` directory -mamba create -p ./xbs-nf-env-1 bioconda::gatk4=4.2.4.1 conda-forge::R=4.1 conda-forge::r-ggplot2=3.3.5 bioconda::datamash=1.1.0 bioconda::delly=0.8.7 bioconda::lofreq=2.1.5 bioconda::delly=0.8.7 bioconda::lofreq=2.1.5 bioconda::tb-profiler=4.1.1 bioconda::multiqc=1.11 bioconda::fastqc=0.11.8 +conda env create -p xbs-nf-env-1 --file xbs-nf-env-1.yml -mamba create -p ./xbs-nf-env-2 jemunro::quanttb=1.01 bioconda::bwa=0.7.17 bioconda::samtools=1.9 bioconda::iqtree=2.1.2 bioconda::snp-dists=0.8.2 bioconda::snp-sites=2.4.0 bioconda::bcftools=1.9 bioconda::snpeff=4.3.1t bioconda::clusterpicker=1.2.3 +conda env create -p xbs-nf-env-2 --file xbs-nf-env-2.yml diff --git a/conda_envs/xbs-nf-env-1.yml b/conda_envs/xbs-nf-env-1.yml new file mode 100644 index 00000000..8d269756 --- /dev/null +++ b/conda_envs/xbs-nf-env-1.yml @@ -0,0 +1,16 @@ +name: xbs-nf-env-1 +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gatk4=4.2.4.1 + - conda-forge::R=4.1 + - conda-forge::r-ggplot2=3.3.5 + - bioconda::datamash=1.1.0 + - bioconda::delly=0.8.7 + - bioconda::lofreq=2.1.5 + - bioconda::delly=0.8.7 + - bioconda::lofreq=2.1.5 + - bioconda::tb-profiler=4.1.1 + - bioconda::multiqc=1.11 + - bioconda::fastqc=0.11.8 diff --git a/conda_envs/xbs-nf-env-2.yml b/conda_envs/xbs-nf-env-2.yml new file mode 100644 index 00000000..54de7e7d --- /dev/null +++ b/conda_envs/xbs-nf-env-2.yml @@ -0,0 +1,15 @@ +name: xbs-nf-env-2 +channels: + - conda-forge + - bioconda +dependencies: + - jemunro::quanttb=1.01 + - bioconda::bwa=0.7.17 + - bioconda::samtools=1.9 + - bioconda::iqtree=2.1.2 + - bioconda::snp-dists=0.8.2 + - bioconda::snp-sites=2.4.0 + - bioconda::bcftools=1.9 + - bioconda::snpeff=4.3.1t + - bioconda::clusterpicker=1.2.3 + diff --git a/conf/docker.config b/conf/docker.config index 7b271291..6b326e6e 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -6,12 +6,12 @@ process { withName: 'GATK.*|LOFREQ.*|DELLY.*|TBPROFILER.*|MULTIQC.*|FASTQC.*|UTILS.*' { - container = "rg.nl-ams.scw.cloud/xbs-nf-containers/xbs-nf-container-1:0.3.0" + container = "rg.nl-ams.scw.cloud/xbs-nf-containers/xbs-nf-container-1:0.5.0" } withName: 'QUANTTB.*|BWA.*|IQTREE.*|SNPDISTS.*|SNPSITES.*|BCFTOOLS.*|BGZIP.*|SAMTOOLS.*|SNPEFF.*|CLUSTERPICKER.*' { - container = "rg.nl-ams.scw.cloud/xbs-nf-containers/xbs-nf-container-2:0.3.0" + container = "rg.nl-ams.scw.cloud/xbs-nf-containers/xbs-nf-container-2:0.5.0" } /* diff --git a/conf/optimized_processes.config b/conf/optimized_processes.config index 54cdda1f..f2968720 100644 --- a/conf/optimized_processes.config +++ b/conf/optimized_processes.config @@ -14,11 +14,6 @@ process { memory = 2.GB } - withName: 'TBPROFILER_LOAD_LIBRARY' { - cpus = 2 - memory = 2.GB - } - withName: 'UTILS_QUANTTB_.*' { cpus = 1 memory = 2.GB @@ -98,4 +93,8 @@ process { cpus = 1 memory = 2.GB } + + withName: 'TBPROFILER_VCF_PROFILE__LOFREQ.*' { + maxForks = 1 + } } diff --git a/conf/test.config b/conf/test.config new file mode 100644 index 00000000..1800d459 --- /dev/null +++ b/conf/test.config @@ -0,0 +1,57 @@ +manifest { + + name = "SLURM testing with 5 samples with optimization without EXIT-RIF GVCF" +} + +params { + outdir = "${projectDir}/results" + optimize_variant_recalibration = false + compute_minor_variants = true + dataset_is_not_contaminated = true + + use_ref_exit_rif_gvcf = false + + //The path to resistance database to use for resistance calling + resistance_db = "NONE" + + + save_mode = 'symlink' + + + //NOTE: This is customized version for dev time testing (remove gaussian param) + GATK_VARIANT_RECALIBRATOR__SNP { + results_dir = "${params.outdir}/gatk/variant_recalibrator__snp" + + arguments = " --use-allele-specific-annotations \ + -AS \ + --target-titv 1.7 \ + --truth-sensitivity-tranche 100.0 \ + --truth-sensitivity-tranche 99.9 \ + --truth-sensitivity-tranche 99.8 \ + --truth-sensitivity-tranche 99.7 \ + --truth-sensitivity-tranche 99.6 \ + --truth-sensitivity-tranche 99.5 \ + --truth-sensitivity-tranche 99.4 \ + --truth-sensitivity-tranche 99.3 \ + --truth-sensitivity-tranche 99.2 \ + --truth-sensitivity-tranche 99.1 \ + --truth-sensitivity-tranche 99.0 \ + --max-gaussians 1 \ + -mq-cap 60" + } + + +} + +executor { +// queueSize = 1 + pollInterval = '5sec' +} + +process { + + executor = "slurm" + errorStrategy = { task.attempt < 3 ? 'retry' : 'ignore' } + + time = '1h' +} diff --git a/containers/build.sh b/containers/build.sh index d538eb7e..e97ee31e 100755 --- a/containers/build.sh +++ b/containers/build.sh @@ -5,10 +5,13 @@ set -uex DOCKER_NAMESPACE="rg.nl-ams.scw.cloud/xbs-nf-containers" +cp ../conda_envs/xbs-nf-env-1.yml ./xbs-nf-container-1 +cp ../conda_envs/xbs-nf-env-2.yml ./xbs-nf-container-2 + for container_dir in $(find * -type d); do echo "Building $container_dir ..." cd $container_dir - CONTAINER_TAG=0.3.0 + CONTAINER_TAG=0.5.0 CONTAINER_NAME=$DOCKER_NAMESPACE/$container_dir:$CONTAINER_TAG echo "Container Name : $CONTAINER_NAME " docker build -t $CONTAINER_NAME . diff --git a/containers/xbs-nf-container-1/Dockerfile b/containers/xbs-nf-container-1/Dockerfile index 3396890c..167891e5 100644 --- a/containers/xbs-nf-container-1/Dockerfile +++ b/containers/xbs-nf-container-1/Dockerfile @@ -1,22 +1,9 @@ FROM mambaorg/micromamba MAINTAINER abhi18av@outlook.com +COPY --chown=$MAMBA_USER:$MAMBA_USER xbs-nf-env-1.yml /tmp/xbs-nf-env-1.yml -RUN \ - micromamba install -y -n base -c conda-forge -c bioconda \ - bioconda::gatk4=4.2.4.1 \ - conda-forge::R=4.1 \ - conda-forge::r-ggplot2=3.3.5 \ - bioconda::datamash=1.1.0 \ - bioconda::delly=0.8.7 \ - bioconda::lofreq=2.1.5 \ - bioconda::delly=0.8.7 \ - bioconda::lofreq=2.1.5 \ - bioconda::tb-profiler=4.1.1 \ - bioconda::multiqc=1.11 \ - bioconda::fastqc=0.11.8 \ - && micromamba clean -a -y - +RUN micromamba install -y -f /tmp/xbs-nf-env-1.yml -n base -RUN micromamba install -y -n base conda-forge::procps-ng \ +RUN micromamba install -y -n base conda-forge::procps-ng \ && micromamba clean -a -y diff --git a/containers/xbs-nf-container-2/Dockerfile b/containers/xbs-nf-container-2/Dockerfile index b8c8696b..e1b0c1c6 100644 --- a/containers/xbs-nf-container-2/Dockerfile +++ b/containers/xbs-nf-container-2/Dockerfile @@ -1,19 +1,9 @@ FROM mambaorg/micromamba MAINTAINER abhi18av@outlook.com +COPY --chown=$MAMBA_USER:$MAMBA_USER xbs-nf-env-2.yml /tmp/xbs-nf-env-2.yml -RUN \ - micromamba install -y -n base -c conda-forge -c bioconda \ - jemunro::quanttb=1.01 \ - bioconda::bwa=0.7.17 \ - bioconda::samtools=1.9 \ - bioconda::iqtree=2.1.2 \ - bioconda::snp-dists=0.8.2 \ - bioconda::snp-sites=2.4.0 \ - bioconda::bcftools=1.9 \ - bioconda::snpeff=4.3.1t \ - bioconda::clusterpicker=1.2.3 \ - && micromamba clean -a -y +RUN micromamba install -y -f /tmp/xbs-nf-env-2.yml -n base -RUN micromamba install -y -n base conda-forge::procps-ng=3.3.16 conda-forge::bc=v1.07.1 \ +RUN micromamba install -y -n base conda-forge::procps-ng \ && micromamba clean -a -y diff --git a/default_params.config b/default_params.config index c89a049d..85de302b 100644 --- a/default_params.config +++ b/default_params.config @@ -6,6 +6,9 @@ input_samplesheet = "samplesheet.csv" // The directory to which all output files should be written outdir = "results" +// Set this to true if you'd like to run all the workflows +only_qc_check_wf = false // OR true + // The name of the output VCF file // NOTE: This parameter is used to derive the JOINT_NAME in XBS_main.py vcf_name = "joint" @@ -26,8 +29,7 @@ rel_abundance_cutoff = 0.80 ntm_fraction_cutoff = 0.20 //The path to resistance database to use for resistance calling -//NOTE: Keeping it false since the trouble with keys (drug names) in tb-profiler collate stage. See https://github.com/abhi18av/xbs-nf/pull/50 -resistance_db = "${projectDir}/resources/resistance_db_who" // OR false +resistance_db = "${projectDir}/resources/resistance_db_who" // OR "NONE" //NOTE: If the dataset is contaminated, Base Recalibration steps are skipped (XBS_call#L48) @@ -38,6 +40,10 @@ compute_minor_variants = false optimize_variant_recalibration = false +// NOTE: Can use the local copies if git-lfs has been installed properly. It is problematic to download this via HTTPS (in private repo). +use_ref_exit_rif_gvcf = true + + //FIXME: (For SNPEFF) Add warning in readme NOT to include a reference file with multiple dots in name. ref_fasta_basename = "NC-000962-3-H37Rv" ref_fasta_dir = "${projectDir}/resources/genome" @@ -51,15 +57,14 @@ ref_fasta_pac = "${params.ref_fasta}.pac" ref_fasta_sa = "${params.ref_fasta}.sa" -//TODO: Confirm if this is shared -drgenes_list = "${projectDir}/data/test_data/DRgenes.list" +//NOTE: Enable this when the file is actually used +// drgenes_list = "${projectDir}/data/test_data/DRgenes.list" rrna_list = "${projectDir}/resources/regions/rRNA.list" dbsnp_vcf = "${projectDir}/resources/known/Benavente2015.UVPapproved.rRNAexcluded.vcf.gz" dbsnp_vcf_tbi = "${params.dbsnp_vcf}.tbi" - excluded_loci_list = "${projectDir}/resources/regions/UVP_List_of_Excluded_loci.list" benavente2015_vcf = "${projectDir}/resources/known/Benavente2015.UVPapproved.rRNAexcluded.vcf.gz" @@ -122,27 +127,28 @@ should_publish = true //----------------------- FASTQC { - results_dir = "${params.outdir}/fastqc/" + results_dir = "${params.outdir}/libraries/fastq_quality_control/fastqc/single_qc/" } MULTIQC { - results_dir = "${params.outdir}/multiqc/" + results_dir = "${params.outdir}/libraries/fastq_quality_control/fastqc/multi_qc/" } BWA_MEM { - results_dir = "${params.outdir}/bwa/mem/" + results_dir = "${params.outdir}/libraries/mapped_sequences/mapped_libraries/" } QUANTTB_QUANT { - results_dir = "${params.outdir}/quanttb/quant/" + results_dir = "${params.outdir}/libraries/multiple_infection/quanttb/" } UTILS_QUANTTB_SAMPLE_QC { - results_dir = "${params.outdir}/quanttb/sample_qc/" + results_dir = "${params.outdir}/libraries/multiple_infection/quanttb/" + should_publish = false } UTILS_QUANTTB_COHORT_STATS { - results_dir = "${params.outdir}/quanttb/cohort_stats/" + results_dir = "${params.outdir}/QC_statistics/multiple_infection/quanttb/" } @@ -152,38 +158,38 @@ UTILS_QUANTTB_COHORT_STATS { //----------------------- SAMTOOLS_MERGE { - results_dir = "${params.outdir}/samtools/merge/" - + results_dir = "${params.outdir}/samples/mapped_sequences/merged_libraries/" + should_publish = false } GATK_MARK_DUPLICATES { - results_dir = "${params.outdir}/gatk/mark_duplicates/" + results_dir = "${params.outdir}/samples/mapped_sequences/marked_duplicates/" } GATK_BASE_RECALIBRATOR { - results_dir = "${params.outdir}/gatk/base_recalibrator/" - + results_dir = "${params.outdir}/samples/mapped_sequences/base_recalibrated/" + should_publish = false } GATK_APPLY_BQSR { - results_dir = "${params.outdir}/gatk/apply_bqsr/" + results_dir = "${params.outdir}/samples/mapped_sequences/base_recalibrated/" } SAMTOOLS_INDEX { - results_dir = "${params.outdir}/samtools/index/" + results_dir = "${params.outdir}/samples/mapped_sequences/marked_duplicates/" } GATK_HAPLOTYPE_CALLER { - results_dir = "${params.outdir}/gatk/haplotype_caller/" + results_dir = "${params.outdir}/samples/variant_files/major_variants/haplotype_caller/" arguments = " -ploidy 1 --read-filter MappingQualityNotZeroReadFilter -G StandardAnnotation -G AS_StandardAnnotation " } GATK_HAPLOTYPE_CALLER__MINOR_VARIANTS { - results_dir = "${params.outdir}/gatk/haplotype_caller__minor_variants/" + results_dir = "${params.outdir}/samples/variant_files/minor_variants/haplotype_caller/" arguments = " -ploidy 1 \ --minimum-mapping-quality 60 \ @@ -194,81 +200,85 @@ GATK_HAPLOTYPE_CALLER__MINOR_VARIANTS { } LOFREQ_CALL__NTM { - results_dir = "${params.outdir}/lofreq/call__ntm/" + results_dir = "${params.outdir}/QC_statistics/non-tuberculous_mycobacteria/" region = "1472307-1472307" arguments = " -m 60 -Q 20 -a 1 " + + should_publish = false } LOFREQ_INDELQUAL { - results_dir = "${params.outdir}/lofreq/indelqual/" + results_dir = "${params.outdir}/samples/variant_files/minor_variants/" arguments = "-m 60" + + should_publish = false } SAMTOOLS_INDEX__LOFREQ { - results_dir = "${params.outdir}/samtools/index__lofreq/" - + results_dir = "${params.outdir}/samples/variant_files/minor_variants/" } LOFREQ_CALL { - results_dir = "${params.outdir}/lofreq/call/" + results_dir = "${params.outdir}/samples/variant_files/minor_variants/" //NOTE: Curretly using default p-value for filtering. XBS_call#L118 arguments = "-m 60 --call-indels" } LOFREQ_FILTER { - results_dir = "${params.outdir}/lofreq/filter/" + results_dir = "${params.outdir}/samples/variant_files/minor_variants/" - arguments = "-a 60" + //Note: this is to filter the LoFreq output to an Allele Frequency of choice. + arguments = "-a 0.20" } DELLY_CALL { - results_dir = "${params.outdir}/delly/call/" + results_dir = "${params.outdir}/samples/variant_files/structural_variants/" arguments = "-u 30" } BCFTOOLS_VIEW { - results_dir = "${params.outdir}/bcftools/view/" + results_dir = "${params.outdir}/samples/variant_files/structural_variants/" } GATK_INDEX_FEATURE_FILE { - results_dir = "${params.outdir}/gatk/index_feature_file/" + results_dir = "${params.outdir}/samples/variant_files/structural_variants/" } GATK_SELECT_VARIANTS__INTERVALS { - results_dir = "${params.outdir}/gatk/select_variants__intervals/" + results_dir = "${params.outdir}/samples/variant_files/structural_variants/" } SAMTOOLS_STATS { - results_dir = "${params.outdir}/samtools/stats/" + results_dir = "${params.outdir}/QC_statistics/mapping/" arguments = "-F DUP,SUPPLEMENTARY,SECONDARY,UNMAP,QCFAIL" } GATK_COLLECT_WGS_METRICS { - results_dir = "${params.outdir}/gatk/collect_wgs_metrics/" + results_dir = "${params.outdir}/QC_statistics/coverage/" arguments = " --READ_LENGTH 0 --COVERAGE_CAP 10000 --COUNT_UNPAIRED" } GATK_FLAG_STAT { - results_dir = "${params.outdir}/gatk/flag_stat/" + results_dir = "${params.outdir}/QC_statistics/mapping/" } UTILS_SAMPLE_STATS { - results_dir = "${params.outdir}/stats/samples/" + results_dir = "${params.outdir}/QC_statistics/samples_thresholds/" } UTILS_COHORT_STATS { - results_dir = "${params.outdir}/stats/cohort/" + results_dir = "${params.outdir}/QC_statistics/cohort/" } @@ -280,54 +290,61 @@ UTILS_COHORT_STATS { GATK_COMBINE_GVCFS { - results_dir = "${params.outdir}/gatk/combine_gvcfs/" - + results_dir = "${params.outdir}/cohort/raw_variant_files/" + arguments = " -G StandardAnnotation -G AS_StandardAnnotation " - + + should_publish = false } GATK_GENOTYPE_GVCFS { - results_dir = "${params.outdir}/gatk/genotype_gvcfs" + results_dir = "${params.outdir}/cohort/raw_variant_files/" arguments = " -G StandardAnnotation -G AS_StandardAnnotation --sample-ploidy 1 " + + should_publish = false } SNPEFF { - results_dir = "${params.outdir}/snpeff" + results_dir = "${params.outdir}/cohort/raw_variant_files/" arguments = " -nostats -ud 40 Mycobacterium_tuberculosis_h37rv " } BGZIP { - results_dir = "${params.outdir}/bgzip" + results_dir = "${params.outdir}/cohort/raw_variant_files/" } GATK_INDEX_FEATURE_FILE__COHORT { - results_dir = "${params.outdir}/gatk/index_feature_file__cohort" + results_dir = "${params.outdir}/cohort/raw_variant_files/" } GATK_SELECT_VARIANTS__SNP { - results_dir = "${params.outdir}/gatk/select_variants__snp" + results_dir = "${params.outdir}/cohort/snp_variant_files/" arguments = " --remove-unused-alternates --exclude-non-variants " + + should_publish = false } GATK_SELECT_VARIANTS__INDEL { - results_dir = "${params.outdir}/gatk/select_variants__indel" + results_dir = "${params.outdir}/cohort/indel_variant_files/" arguments = " --remove-unused-alternates --exclude-non-variants --select-type-to-include MNP --select-type-to-include MIXED" + + should_publish = false } GATK_VARIANT_RECALIBRATOR__SNP { - results_dir = "${params.outdir}/gatk/variant_recalibrator__snp" + results_dir = "${params.outdir}/cohort/snp_variant_files/vqsr/" arguments = " --use-allele-specific-annotations \ -AS \ @@ -348,35 +365,36 @@ GATK_VARIANT_RECALIBRATOR__SNP { } UTILS_ELIMINATE_ANNOTATION { - results_dir = "${params.outdir}/eliminate_annotation/" + //This can later be unpublished, as well as other ANN outputs) + results_dir = "${params.outdir}/cohort/snp_variant_files/vqsr/" } GATK_VARIANT_RECALIBRATOR__ANN2 { - results_dir = "${params.outdir}/gatk/variant_recalibrator__ann2" + results_dir = "${params.outdir}/cohort/snp_variant_files/vqsr__ann2/" } GATK_VARIANT_RECALIBRATOR__ANN3 { - results_dir = "${params.outdir}/gatk/variant_recalibrator__ann3" + results_dir = "${params.outdir}/cohort/snp_variant_files/vqsr__ann3/" } GATK_VARIANT_RECALIBRATOR__ANN4 { - results_dir = "${params.outdir}/gatk/variant_recalibrator__ann4" + results_dir = "${params.outdir}/cohort/snp_variant_files/vqsr__ann4/" } GATK_VARIANT_RECALIBRATOR__ANN5 { - results_dir = "${params.outdir}/gatk/variant_recalibrator__ann5" + results_dir = "${params.outdir}/cohort/snp_variant_files/vqsr__ann5/" } GATK_VARIANT_RECALIBRATOR__ANN6 { - results_dir = "${params.outdir}/gatk/variant_recalibrator__ann6" + results_dir = "${params.outdir}/cohort/snp_variant_files/vqsr__ann6/" } GATK_VARIANT_RECALIBRATOR__ANN7 { - results_dir = "${params.outdir}/gatk/variant_recalibrator__ann7" + results_dir = "${params.outdir}/cohort/snp_variant_files/vqsr__ann7/" } GATK_VARIANT_RECALIBRATOR__INDEL { - results_dir = "${params.outdir}/gatk/variant_recalibrator__indel" + results_dir = "${params.outdir}/cohort/indel_variant_files/vqsr/" arguments = " -AS \ @@ -398,97 +416,102 @@ GATK_VARIANT_RECALIBRATOR__INDEL { GATK_APPLY_VQSR__SNP { - results_dir = "${params.outdir}/gatk/apply_vqsr__snp" + results_dir = "${params.outdir}/cohort/snp_variant_files/" arguments = " --ts-filter-level 99.90 -AS --exclude-filtered " } GATK_APPLY_VQSR__INDEL { - results_dir = "${params.outdir}/gatk/apply_vqsr_indel" + results_dir = "${params.outdir}/cohort/indel_variant_files/" arguments = "" } GATK_SELECT_VARIANTS__EXCLUSION__SNP { - results_dir = "${params.outdir}/gatk/select_variants__exclusion__snp" + results_dir = "${params.outdir}/cohort/snp_variant_files/" arguments = " " } GATK_SELECT_VARIANTS__EXCLUSION__INDEL { - results_dir = "${params.outdir}/gatk/select_variants__exclusion__indel" + results_dir = "${params.outdir}/cohort/indel_variant_files/" arguments = " --select-type-to-include MNP --select-type-to-include MIXED " } GATK_MERGE_VCFS { - results_dir = "${params.outdir}/gatk/merge_vcfs" + results_dir = "${params.outdir}/cohort/combined_variant_files/" } TBPROFILER_VCF_PROFILE__COHORT { - results_dir = "${params.outdir}/tbprofiler/vcf_profile__cohort" + results_dir = "${params.outdir}/analyses/drug_resistance/major_variants/" } TBPROFILER_COLLATE__COHORT { - results_dir = "${params.outdir}/tbprofiler/collate__cohort" + results_dir = "${params.outdir}/analyses/drug_resistance/major_variants/" - prefix = "XBS.resistance" + prefix = "major_variants" } TBPROFILER_VCF_PROFILE__LOFREQ { - results_dir = "${params.outdir}/tbprofiler/vcf_profile__lofreq" + results_dir = "${params.outdir}/analyses/drug_resistance/minor_variants/" } TBPROFILER_COLLATE__LOFREQ { - results_dir = "${params.outdir}/tbprofiler/collate__lofreq" + results_dir = "${params.outdir}/analyses/drug_resistance/minor_variants/" - prefix = "XBS.resistance.LoFreq" + prefix = "minor_variants" } GATK_SELECT_VARIANTS__PHYLOGENY { - results_dir = "${params.outdir}/gatk/select_variants__phylogeny" + results_dir = "${params.outdir}/analyses/phylogeny/" arguments = " --remove-unused-alternates --exclude-non-variants " + should_publish = false } GATK_VARIANTS_TO_TABLE { - results_dir = "${params.outdir}/gatk/variants_to_table" + results_dir = "${params.outdir}/cohort/multiple_alignment_files/" arguments = " -GF GT " + + should_publish = false } SNPSITES { - results_dir = "${params.outdir}/snpsites/" + results_dir = "${params.outdir}/cohort/multiple_alignment_files/" } SNPDISTS { - results_dir = "${params.outdir}/snpdists/" + results_dir = "${params.outdir}/analyses/snp_distances/" } IQTREE { - results_dir = "${params.outdir}/iqtree/" - + results_dir = "${params.outdir}/analyses/phylogeny/" + + //Add "-bb 10000" for ultrafast bootstraps, or "-b 1000" for boring bootstraps + arguments = " -allnni -redo " } CLUSTERPICKER { - results_dir = "${params.outdir}/clusterpicker/" + results_dir = "${params.outdir}/analyses/cluster_analysis/" bootstrap_1 = 0 bootstrap_2 = 0 diff --git a/main.nf b/main.nf index a853e4ee..e41ef7da 100644 --- a/main.nf +++ b/main.nf @@ -55,85 +55,88 @@ reads_ch = Channel.fromPath(params.input_samplesheet) } - - //================================================================================ -// TEST workflow +// Main workflow //================================================================================ -workflow TEST { +workflow { - reads_ch.view() + if (params.only_qc_check_wf) { - // QUALITY_CHECK_WF(reads_ch) + QUALITY_CHECK_WF(reads_ch) - // MAP_WF(QUALITY_CHECK_WF.out) + } else { - // CALL_WF(MAP_WF.out.sorted_reads) + QUALITY_CHECK_WF(reads_ch) - // collated_gvcfs_ch = CALL_WF.out.gvcf_ch.flatten().collate(3) + MAP_WF(QUALITY_CHECK_WF.out) - // // collated_gvcfs_ch.view() + CALL_WF(MAP_WF.out.sorted_reads) - // sample_stats_ch = CALL_WF.out.cohort_stats_tsv - // .splitCsv(header: false, skip: 1, sep: '\t' ) - // .map { row -> [ - // row.first(), // SAMPLE - // row.last().toInteger() // ALL_THRESHOLDS_MET - // ] - // } - // .filter { it[1] == 1} // Filter out samples which meet all the thresholds - // .map { [ it[0] ] } - // // .view() + collated_gvcfs_ch = CALL_WF.out.gvcf_ch.flatten().collate(3) + sample_stats_ch = CALL_WF.out.cohort_stats_tsv + .splitCsv(header: false, skip: 1, sep: '\t' ) + .map { row -> [ + row.first(), // SAMPLE + row.last().toInteger() // ALL_THRESHOLDS_MET + ] + } + .filter { it[1] == 1} // Filter out samples which meet all the thresholds + .map { [ it[0] ] } - // selected_gvcfs_ch = collated_gvcfs_ch.join(sample_stats_ch) - // .flatten() - // .filter { it.class == sun.nio.fs.UnixPath } - // // .view() + selected_gvcfs_ch = collated_gvcfs_ch.join(sample_stats_ch) + .flatten() + .filter { it.class == sun.nio.fs.UnixPath } - // MERGE_WF(selected_gvcfs_ch.collect(), CALL_WF.out.lofreq_vcf_ch) + MERGE_WF(selected_gvcfs_ch.collect(), CALL_WF.out.lofreq_vcf_ch) -} + } +} //================================================================================ -// Main workflow +// TEST workflow //================================================================================ -workflow { +workflow TEST { - QUALITY_CHECK_WF(reads_ch) + if (params.only_qc_check_wf) { - MAP_WF(QUALITY_CHECK_WF.out) + QUALITY_CHECK_WF(reads_ch) - CALL_WF(MAP_WF.out.sorted_reads) + } else { - collated_gvcfs_ch = CALL_WF.out.gvcf_ch.flatten().collate(3) + QUALITY_CHECK_WF(reads_ch) - // collated_gvcfs_ch.view() + MAP_WF(QUALITY_CHECK_WF.out) - sample_stats_ch = CALL_WF.out.cohort_stats_tsv - .splitCsv(header: false, skip: 1, sep: '\t' ) - .map { row -> [ - row.first(), // SAMPLE - row.last().toInteger() // ALL_THRESHOLDS_MET - ] - } - .filter { it[1] == 1} // Filter out samples which meet all the thresholds - .map { [ it[0] ] } - // .view() - //FIXME remove the view + CALL_WF(MAP_WF.out.sorted_reads) - selected_gvcfs_ch = collated_gvcfs_ch.join(sample_stats_ch) - .flatten() - .filter { it.class == sun.nio.fs.UnixPath } - // .view() - //FIXME remove the view + collated_gvcfs_ch = CALL_WF.out.gvcf_ch.flatten().collate(3) + // collated_gvcfs_ch.view() + + sample_stats_ch = CALL_WF.out.cohort_stats_tsv + .splitCsv(header: false, skip: 1, sep: '\t' ) + .map { row -> [ + row.first(), // SAMPLE + row.last().toInteger() // ALL_THRESHOLDS_MET + ] + } + .filter { it[1] == 1} // Filter out samples which meet all the thresholds + .map { [ it[0] ] } + .view( it -> "sample_stats_ch => $it") + + selected_gvcfs_ch = collated_gvcfs_ch.join(sample_stats_ch) + .flatten() + .filter { it.class == sun.nio.fs.UnixPath } + .view( it -> "selected_gvcfs_ch => $it") - MERGE_WF(selected_gvcfs_ch.collect(), CALL_WF.out.lofreq_vcf_ch) + MERGE_WF(selected_gvcfs_ch.collect(), CALL_WF.out.lofreq_vcf_ch) + + } } diff --git a/modules/gatk/combine_gvcfs.nf b/modules/gatk/combine_gvcfs.nf index 4cf70c77..0e31eef4 100644 --- a/modules/gatk/combine_gvcfs.nf +++ b/modules/gatk/combine_gvcfs.nf @@ -8,6 +8,7 @@ process GATK_COMBINE_GVCFS { val(gvcfs_string) path(gvcfs) path(ref_fasta) + path(ref_exit_rif_gvcf) path("*") output: @@ -16,18 +17,24 @@ process GATK_COMBINE_GVCFS { script: + def optionalRefExitRifGvcf = ( ref_exit_rif_gvcf.simpleName != "NONE") ? " --variant ${ref_exit_rif_gvcf} " : "" + """ ${params.gatk_path} CombineGVCFs --java-options "-Xmx${task.memory.giga}G" \\ -R ${ref_fasta} \\ ${params.arguments} \\ --variant ${gvcfs_string} \\ + ${optionalRefExitRifGvcf} \\ -O ${joint_name}.combined.vcf.gz + + cp ${joint_name}.combined.vcf.gz .command.out """ stub: """ touch ${joint_name}.combined.vcf.gz + touch ${joint_name}.combined.vcf.gz.tbi """ } diff --git a/modules/gatk/haplotype_caller.nf b/modules/gatk/haplotype_caller.nf index 6a063c26..8ceb0a8c 100644 --- a/modules/gatk/haplotype_caller.nf +++ b/modules/gatk/haplotype_caller.nf @@ -20,6 +20,7 @@ process GATK_HAPLOTYPE_CALLER { -I ${bam} \\ -ERC GVCF \\ ${params.arguments} \\ + -bamout ${sampleName}.haplotype_caller.bam \\ -O ${sampleName}.g.vcf.gz """ diff --git a/modules/gatk/haplotype_caller__minor_variants.nf b/modules/gatk/haplotype_caller__minor_variants.nf index ed33f4a8..eb1fd357 100644 --- a/modules/gatk/haplotype_caller__minor_variants.nf +++ b/modules/gatk/haplotype_caller__minor_variants.nf @@ -19,6 +19,7 @@ process GATK_HAPLOTYPE_CALLER__MINOR_VARIANTS { -R ${reference} \\ -I ${bam} \\ ${params.arguments} \\ + -bamout ${sampleName}.haplotype_caller.bam \\ -O ${sampleName}.AllSites.g.vcf.gz """ diff --git a/modules/gatk/variants_to_table.nf b/modules/gatk/variants_to_table.nf index e83ca6fc..e6fa81c9 100644 --- a/modules/gatk/variants_to_table.nf +++ b/modules/gatk/variants_to_table.nf @@ -20,8 +20,8 @@ process GATK_VARIANTS_TO_TABLE { !{params.arguments} \\ -O /dev/stdout \\ | sed -e 's/^\\t//g' \\ - | sed -e 's/*/-/g' \\ - | sed -e 's/\\./-/g' \\ + | sed -e '2~1 s/*/-/g' \\ + | sed -e '2~1 s/\\./-/g' \\ | sed '2,${/^.*\\(-.*\\)\\{'"!{params.median_coverage_cutoff}"',\\}.*$/d}' \\ | !{params.datamash_path} transpose \\ | sed -e 's/^/>/g' \\ diff --git a/modules/iqtree/iqtree.nf b/modules/iqtree/iqtree.nf index 41b57729..f02c37e5 100644 --- a/modules/iqtree/iqtree.nf +++ b/modules/iqtree/iqtree.nf @@ -24,7 +24,7 @@ process IQTREE { """ ${params.iqtree_path} \\ -s ${fasta} \\ - -T ${task.cpus} \\ + -T AUTO \\ ${params.arguments} \\ --prefix ${joint_name}.${prefix} """ diff --git a/modules/tbprofiler/collate.nf b/modules/tbprofiler/collate.nf index 50f74f82..331ef0bc 100644 --- a/modules/tbprofiler/collate.nf +++ b/modules/tbprofiler/collate.nf @@ -8,12 +8,12 @@ process TBPROFILER_COLLATE { path(resistanceDb) output: - path("*.XBS.resistance*") + path("*${params.prefix}*") script: def optionalDb = resistanceDb ? "--db ${resistanceDb}" : "" - def optionallyLoadLibraryForContainers = resistanceDb ? "cd ${resistanceDb}; ${params.tbprofiler_path} load_library ${resistanceDb.name}; cd ../" : "" + def optionallyLoadLibraryForContainers = (optionalDb != "") ? "cd ${resistanceDb}; ${params.tbprofiler_path} load_library ${resistanceDb.name}; cd ../" : "" """ ${optionallyLoadLibraryForContainers} @@ -25,7 +25,7 @@ process TBPROFILER_COLLATE { stub: """ - touch ${joint_name}.XBS.resistance.txt + touch ${joint_name}.${params.prefix}.txt """ } diff --git a/modules/tbprofiler/load_library.nf b/modules/tbprofiler/load_library.nf deleted file mode 100644 index 4b56ea6d..00000000 --- a/modules/tbprofiler/load_library.nf +++ /dev/null @@ -1,41 +0,0 @@ -process TBPROFILER_LOAD_LIBRARY { - - input: - path(resistanceDb) - - output: - path(resistanceDb), includeInputs: true - - script: - - if (!workflow.container && resistanceDb) { - - """ - echo "A container ${workflow.container} is NOT used: TRUE" - echo "resistanceDB status: ${resistanceDb}" - - echo "Load the library" - - cd ${resistanceDb} - - ${params.tbprofiler_path} load_library ${resistanceDb.name} - """ - - } else { - - """ - echo "A container ${workflow.container} is used: TRUE" - echo "resistanceDB status: ${resistanceDb.name}" - - - echo "Do NOT load the library" - """ - } - - stub: - """ - echo "This is a container based run => ${workflow.container}" - echo "${params.tbprofiler_path} load_library ${resistanceDb.name}" - """ - -} diff --git a/modules/tbprofiler/vcf_profile__cohort.nf b/modules/tbprofiler/vcf_profile__cohort.nf index 52ee7fbe..b166d7ee 100644 --- a/modules/tbprofiler/vcf_profile__cohort.nf +++ b/modules/tbprofiler/vcf_profile__cohort.nf @@ -13,7 +13,7 @@ process TBPROFILER_VCF_PROFILE__COHORT { script: def optionalDb = resistanceDb ? "--db ${resistanceDb.name}" : "" - def optionallyLoadLibraryForContainers = resistanceDb ? "cd ${resistanceDb}; ${params.tbprofiler_path} load_library ${resistanceDb.name}; cd ../" : "" + def optionallyLoadLibraryForContainers = (optionalDb != "") ? "cd ${resistanceDb}; ${params.tbprofiler_path} load_library ${resistanceDb.name}; cd ../" : "" """ ${optionallyLoadLibraryForContainers} diff --git a/modules/tbprofiler/vcf_profile__lofreq.nf b/modules/tbprofiler/vcf_profile__lofreq.nf index f27d94ea..ed76c37e 100644 --- a/modules/tbprofiler/vcf_profile__lofreq.nf +++ b/modules/tbprofiler/vcf_profile__lofreq.nf @@ -14,7 +14,7 @@ process TBPROFILER_VCF_PROFILE__LOFREQ { script: def optionalDb = resistanceDb ? "--db ${resistanceDb.name}" : "" - def optionallyLoadLibraryForContainers = resistanceDb ? "cd ${resistanceDb}; ${params.tbprofiler_path} load_library ${resistanceDb.name}; cd ../" : "" + def optionallyLoadLibraryForContainers = (optionalDb != "") ? "cd ${resistanceDb}; ${params.tbprofiler_path} load_library ${resistanceDb.name}; cd ../" : "" """ diff --git a/nextflow.config b/nextflow.config index 5888c238..7de437d1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,11 +1,10 @@ manifest { name = 'XBS-nf' - description = '' + description = 'XBS-nf (compleX Bacterial Samples) is a pipeline for comprehensive genomic analyses of Mycobacterium tuberculosis with a focus on clinical decision making as well as research.' version = '0.9.8' - author = '' + author = 'TORCH-Consortium' defaultBranch = 'master' - homePage = '' - nextflowVersion = '>=21.10.0' + homePage = 'https://github.com/TORCH-Consortium/xbs-nf' } params { includeConfig 'default_params.config' } @@ -21,5 +20,8 @@ profiles { // Executor specific settings pbs { includeConfig 'conf/pbs.config' } server { includeConfig 'conf/server.config' } + + //NOTE: Test profile - DO NOT USE + test { includeConfig 'conf/test.config' } } diff --git a/resources/exit_rif/EXIT-RIF.g.vcf.gz b/resources/exit_rif/EXIT-RIF.g.vcf.gz new file mode 100644 index 00000000..4b73f3d1 --- /dev/null +++ b/resources/exit_rif/EXIT-RIF.g.vcf.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e46c95819c738dbffb815072c7f828e701bb6c6ad9dc8fca9763e25435ab5eb +size 259540425 diff --git a/resources/exit_rif/EXIT-RIF.g.vcf.gz.tbi b/resources/exit_rif/EXIT-RIF.g.vcf.gz.tbi new file mode 100644 index 00000000..709f414d --- /dev/null +++ b/resources/exit_rif/EXIT-RIF.g.vcf.gz.tbi @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973808296c54ebe6daf6e26898e916f16d2a94f177e2354ebf65bec66b2fda3c +size 5096 diff --git a/workflows/map_wf.nf b/workflows/map_wf.nf index 925beee8..a7581ee5 100644 --- a/workflows/map_wf.nf +++ b/workflows/map_wf.nf @@ -1,5 +1,3 @@ -include { FASTQC } from '../modules/fastqc/fastqc.nf' addParams (params.FASTQC) -include { MULTIQC } from '../modules/multiqc/multiqc.nf' addParams (params.MULTIQC) include { BWA_MEM } from '../modules/bwa/mem.nf' addParams (params.BWA_MEM) workflow MAP_WF { @@ -8,10 +6,6 @@ workflow MAP_WF { main: - FASTQC(reads_ch) - - MULTIQC(FASTQC.out.collect()) - BWA_MEM(reads_ch, params.ref_fasta, [params.ref_fasta_dict, diff --git a/workflows/merge_wf.nf b/workflows/merge_wf.nf index 9949d31d..ec73cc96 100644 --- a/workflows/merge_wf.nf +++ b/workflows/merge_wf.nf @@ -65,4 +65,5 @@ workflow MERGE_WF { CLUSTER_ANALYSIS__EXCOMPLEX(PHYLOGENY_ANALYSIS__EXCOMPLEX.out.snpsites_tree_tuple, excomplex_prefix_ch) + } diff --git a/workflows/quality_check_wf.nf b/workflows/quality_check_wf.nf index bb039fc6..c3e15997 100644 --- a/workflows/quality_check_wf.nf +++ b/workflows/quality_check_wf.nf @@ -1,3 +1,5 @@ +include { FASTQC } from '../modules/fastqc/fastqc.nf' addParams (params.FASTQC) +include { MULTIQC } from '../modules/multiqc/multiqc.nf' addParams (params.MULTIQC) include { QUANTTB_QUANT } from '../modules/quanttb/quant.nf' addParams( params.QUANTTB_QUANT ) include { UTILS_QUANTTB_SAMPLE_QC } from '../modules/utils/quanttb_sample_qc.nf' addParams( params.UTILS_QUANTTB_SAMPLE_QC ) include { UTILS_QUANTTB_COHORT_STATS } from '../modules/utils/quanttb_cohort_stats.nf' addParams( params.UTILS_QUANTTB_COHORT_STATS ) @@ -10,6 +12,10 @@ workflow QUALITY_CHECK_WF { main: + FASTQC(reads_ch) + + MULTIQC(FASTQC.out.collect()) + QUANTTB_QUANT(reads_ch) UTILS_QUANTTB_SAMPLE_QC(QUANTTB_QUANT.out.quanttb_report_tuple, @@ -26,7 +32,7 @@ workflow QUALITY_CHECK_WF { .splitCsv(header: false, skip: 1) .map { row -> { - relabundance_threshold_met = row[3] + relabundance_threshold_met = row[4] derived_sample_name = row[-1] if(relabundance_threshold_met == "1") { diff --git a/workflows/subworkflows/optimize_variant_recalibration.nf b/workflows/subworkflows/optimize_variant_recalibration.nf index f66492ac..177b0857 100644 --- a/workflows/subworkflows/optimize_variant_recalibration.nf +++ b/workflows/subworkflows/optimize_variant_recalibration.nf @@ -131,6 +131,9 @@ workflow OPTIMIZE_VARIANT_RECALIBRATION { [params.ref_fasta_fai, params.ref_fasta_dict] ) + //TODO: Implement the comparison via tranches file from all of these annotation optimizations + //NOTE: We can run the other annotations process after the 7ANN process, in parallel. Deffered to a future time in interest of Engg. effort. + emit: optimized_vqsr_ch = select_variants_vcftuple_ch diff --git a/workflows/subworkflows/prepare_cohort_vcf.nf b/workflows/subworkflows/prepare_cohort_vcf.nf index c85ef0f1..f0345331 100644 --- a/workflows/subworkflows/prepare_cohort_vcf.nf +++ b/workflows/subworkflows/prepare_cohort_vcf.nf @@ -12,19 +12,24 @@ workflow PREPARE_COHORT_VCF { main: - //FIXME Remove this - // cohort_gvcfs_ch.view() - gvcfs_string_ch = cohort_gvcfs_ch .flatten() .filter { it.getExtension() == "gz" } + .map { it -> it.name } .reduce { a, b -> "$a --variant $b " } - //FIXME Remove this - // gvcfs_string_ch.view() + + def refExitRifGvcf = params.use_ref_exit_rif_gvcf ? "${projectDir}/resources/exit_rif/EXIT-RIF.g.vcf.gz" : "${projectDir}/resources/NONE.g.vcf.gz" + + def refExitRifGvcfTbi = "${refExitRifGvcf}.tbi" // merge_combine - GATK_COMBINE_GVCFS(params.vcf_name, gvcfs_string_ch, cohort_gvcfs_ch, params.ref_fasta, [params.ref_fasta_fai, params.ref_fasta_dict]) + GATK_COMBINE_GVCFS(params.vcf_name, + gvcfs_string_ch, + cohort_gvcfs_ch, + params.ref_fasta, + refExitRifGvcf, + [params.ref_fasta_fai, params.ref_fasta_dict, refExitRifGvcfTbi]) // merge_genotype @@ -35,7 +40,6 @@ workflow PREPARE_COHORT_VCF { BGZIP(SNPEFF.out) GATK_INDEX_FEATURE_FILE__COHORT(BGZIP.out, '') - emit: cohort_vcf_and_index_ch = GATK_INDEX_FEATURE_FILE__COHORT.out } diff --git a/workflows/subworkflows/resistance_analysis.nf b/workflows/subworkflows/resistance_analysis.nf index 6f329058..2cdaa3d3 100644 --- a/workflows/subworkflows/resistance_analysis.nf +++ b/workflows/subworkflows/resistance_analysis.nf @@ -1,4 +1,3 @@ -include { TBPROFILER_LOAD_LIBRARY } from "../../modules/tbprofiler/load_library.nf" include { TBPROFILER_VCF_PROFILE__COHORT } from "../../modules/tbprofiler/vcf_profile__cohort.nf" addParams (params.TBPROFILER_VCF_PROFILE__COHORT) include { TBPROFILER_COLLATE as TBPROFILER_COLLATE__COHORT } from "../../modules/tbprofiler/collate.nf" addParams (params.TBPROFILER_COLLATE__COHORT) include { TBPROFILER_COLLATE as TBPROFILER_COLLATE__LOFREQ } from "../../modules/tbprofiler/collate.nf" addParams (params.TBPROFILER_COLLATE__LOFREQ) @@ -13,18 +12,16 @@ workflow RESISTANCE_ANALYSIS { main: - def database = params.resistance_db ? params.resistance_db : [] - - TBPROFILER_LOAD_LIBRARY(database) + def resistanceDb = params.resistance_db != "NONE" ? params.resistance_db : [] // merge_call_resistance - TBPROFILER_VCF_PROFILE__COHORT(merged_vcf_ch, TBPROFILER_LOAD_LIBRARY.out) - TBPROFILER_COLLATE__COHORT(params.vcf_name, TBPROFILER_VCF_PROFILE__COHORT.out, TBPROFILER_LOAD_LIBRARY.out) + TBPROFILER_VCF_PROFILE__COHORT(merged_vcf_ch, resistanceDb) + TBPROFILER_COLLATE__COHORT(params.vcf_name, TBPROFILER_VCF_PROFILE__COHORT.out, resistanceDb) // merge_call_resistance_lofreq BGZIP(lofreq_vcf_ch) - TBPROFILER_VCF_PROFILE__LOFREQ(BGZIP.out, TBPROFILER_LOAD_LIBRARY.out) + TBPROFILER_VCF_PROFILE__LOFREQ(BGZIP.out, resistanceDb) TBPROFILER_COLLATE__LOFREQ(params.vcf_name, TBPROFILER_VCF_PROFILE__LOFREQ.out.resistance_json.collect(), - TBPROFILER_LOAD_LIBRARY.out) + resistanceDb) }