From 31bf093db0280ef54ce17d39f0169253531aa054 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 7 Jun 2024 21:45:49 +0200 Subject: [PATCH 01/63] Add stats to multiqc --- assets/multiqc_config.yml | 10 + conf/steps/panel_prep.config | 9 + modules.json | 5 + .../nf-core/bcftools/stats/environment.yml | 8 + modules/nf-core/bcftools/stats/main.nf | 60 ++++++ modules/nf-core/bcftools/stats/meta.yml | 77 ++++++++ .../nf-core/bcftools/stats/tests/main.nf.test | 182 ++++++++++++++++++ .../bcftools/stats/tests/main.nf.test.snap | 180 +++++++++++++++++ modules/nf-core/bcftools/stats/tests/tags.yml | 2 + workflows/phaseimpute/main.nf | 12 ++ 10 files changed, 545 insertions(+) create mode 100644 modules/nf-core/bcftools/stats/environment.yml create mode 100644 modules/nf-core/bcftools/stats/main.nf create mode 100644 modules/nf-core/bcftools/stats/meta.yml create mode 100644 modules/nf-core/bcftools/stats/tests/main.nf.test create mode 100644 modules/nf-core/bcftools/stats/tests/main.nf.test.snap create mode 100644 modules/nf-core/bcftools/stats/tests/tags.yml diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 4770e881..b7bcd51a 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -10,6 +10,16 @@ report_section_order: "nf-core-phaseimpute-summary": order: -1002 +top_modules: + - "samtools": + name: "Samtools coverage before and after downsampling" + path_filters: + - "*_.stats" + - "bcftools": + name: "BCFtools stats of phased reference panel" + path_filters: + - "*_bcftools_stats.txt" + export_plots: true disable_version_detection: true diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 1f7c455f..1a807340 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -218,4 +218,13 @@ process { ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS:.*' { + prefix = { "${meta.id}_${meta.chr}_panel" } + publishDir = [ + path: { "${params.outdir}/prep_panel/stats/" }, + mode: params.publish_dir_mode, + enabled: true + ] + } + } diff --git a/modules.json b/modules.json index 3dbcec63..914edbba 100644 --- a/modules.json +++ b/modules.json @@ -48,6 +48,11 @@ "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, + "bcftools/stats": { + "branch": "master", + "git_sha": "a5ba4d59c2b248c0379b0f8aeb4e7e754566cd1f", + "installed_by": ["modules"] + }, "bcftools/view": { "branch": "master", "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml new file mode 100644 index 00000000..128fe204 --- /dev/null +++ b/modules/nf-core/bcftools/stats/environment.yml @@ -0,0 +1,8 @@ +name: bcftools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf new file mode 100644 index 00000000..20e5da77 --- /dev/null +++ b/modules/nf-core/bcftools/stats/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) + tuple val(meta3), path(targets) + tuple val(meta4), path(samples) + tuple val(meta5), path(exons) + tuple val(meta6), path(fasta) + + output: + tuple val(meta), path("*stats.txt"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def reference_fasta = fasta ? "--fasta-ref ${fasta}" : "" + def exons_file = exons ? "--exons ${exons}" : "" + """ + bcftools stats \\ + $args \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $reference_fasta \\ + $exons_file \\ + $vcf > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml new file mode 100644 index 00000000..7ea2103e --- /dev/null +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -0,0 +1,77 @@ +name: bcftools_stats +description: Generates stats from VCF files +keywords: + - variant calling + - stats + - VCF +tools: + - stats: + description: | + Parses VCF or BCF and produces text file stats which is suitable for + machine processing and can be plotted using plot-vcfstats. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen. + pattern: "*.tbi" + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited) + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files) + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + - exons: + type: file + description: | + Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed). + e.g. 'exons.tsv.gz' + - fasta: + type: file + description: | + Faidx indexed reference sequence file to determine INDEL context. + e.g. 'reference.fa' +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: Text output file containing stats + pattern: "*_{stats.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test b/modules/nf-core/bcftools/stats/tests/main.nf.test new file mode 100644 index 00000000..be618b0b --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test @@ -0,0 +1,182 @@ +nextflow_process { + + name "Test Process BCFTOOLS_STATS" + script "../main.nf" + process "BCFTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/stats" + + test("sarscov2 - vcf_gz") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - regions") { + + when { + process { + """ + input[0] = [ [ id:'regions_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)] + input[1] = [ [id:'regions_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("regions_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - targets") { + + when { + process { + """ + input[0] = [ [ id:'targets_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [id:'targets_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("targets_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - exons") { + + when { + process { + """ + input[0] = [ [ id:'exon_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [id: "exon_test"], + file(params.modules_testdata_base_path + 'delete_me/bcftools/stats/exons.tsv.gz', checkIfExists: true) ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("exon_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - reference") { + + when { + process { + """ + input[0] = [ [ id:'ref_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [id: 'ref_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("ref_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + + test("sarscov2 - vcf_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test.snap b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..cd8cff6d --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap @@ -0,0 +1,180 @@ +{ + "sarscov2 - vcf_gz - reference": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --fasta-ref genome.fasta test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:35.506777837" + }, + "sarscov2 - vcf_gz - exons": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --exons exons.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:30.57486244" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:27.637515559" + }, + "sarscov2 - vcf_gz - targets": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --targets-file test2.targets.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:25.732997442" + }, + "regions_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:32.559884458" + }, + "targets_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:37.512009805" + }, + "sarscov2 - vcf_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ], + "stats": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:57:09.614976125" + }, + "exon_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:42.347397266" + }, + "ref_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:47.26823622" + }, + "sarscov2 - vcf_gz": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:27.670416598" + }, + "sarscov2 - vcf_gz - regions": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --regions-file test3.vcf.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:20.759094062" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/tags.yml b/modules/nf-core/bcftools/stats/tests/tags.yml new file mode 100644 index 00000000..53c12d92 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/stats: + - "modules/nf-core/bcftools/stats/**" diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 12933520..9c9b410f 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -34,6 +34,7 @@ include { VCF_CONCATENATE_BCFTOOLS as CONCAT_PANEL } from '../../subworkflows/ include { CHANNEL_POSFILE_CREATE_CSV } from '../../subworkflows/local/channel_posfile_create_csv' include { CHANNEL_CHUNKS_CREATE_CSV } from '../../subworkflows/local/channel_chunks_create_csv' include { CHANNEL_PANEL_CREATE_CSV } from '../../subworkflows/local/channel_panel_create_csv' +include { BCFTOOLS_STATS } from '../../modules/nf-core/bcftools/stats/main' // Imputation subworkflows include { CHANNEL_IMPUTE_CREATE_CSV } from '../../subworkflows/local/channel_impute_create_csv' @@ -171,6 +172,17 @@ workflow PHASEIMPUTE { ch_panel_phased = VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi } + // Compute stats on panel + BCFTOOLS_STATS( + ch_panel_phased, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] }) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS.out.stats.map{ [it[1]] }) + // Create chunks from reference VCF VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) From 5ba53aef69affd15487cd53eb9f7803f2b79b401 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 7 Jun 2024 17:27:27 +0200 Subject: [PATCH 02/63] Add parameter sim_by_chr and dinamically publish downsampled file USe samtools depth instead of coverage --- conf/steps/simulation.config | 35 +++++++--- modules.json | 8 ++- .../nf-core/samtools/coverage/environment.yml | 4 +- modules/nf-core/samtools/coverage/main.nf | 12 ++-- .../samtools/coverage/samtools-coverage.diff | 31 ++------- .../samtools/coverage/tests/main.nf.test.snap | 18 ++--- .../nf-core/samtools/depth/environment.yml | 8 +++ modules/nf-core/samtools/depth/main.nf | 39 +++++++++++ modules/nf-core/samtools/depth/meta.yml | 54 +++++++++++++++ .../samtools/depth/samtools-depth.diff | 14 ++++ nextflow.config | 1 + nextflow_schema.json | 6 ++ subworkflows/local/bam_downsample/main.nf | 69 ++++++++++--------- workflows/phaseimpute/main.nf | 28 ++++++-- 14 files changed, 234 insertions(+), 93 deletions(-) create mode 100644 modules/nf-core/samtools/depth/environment.yml create mode 100644 modules/nf-core/samtools/depth/main.nf create mode 100644 modules/nf-core/samtools/depth/meta.yml create mode 100644 modules/nf-core/samtools/depth/samtools-depth.diff diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index ef7ba0b9..2141b0b5 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -24,20 +24,27 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_COVERAGE' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_DEPTH' { + publishDir = [enabled: false] + ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.stats" } + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { + ext.prefix = { params.sim_by_chr ? + "${meta.id}_D${meta.depth}_C${meta.chr ?: "all"}" : + "${meta.id}" + } publishDir = [ - path: { "${params.outdir}/simulation/stats/" }, + path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + saveAs: { params.sim_by_chr ? null : it } ] - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.stats" } - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { - ext.prefix = { "${meta.id}_D${meta.depth}_R${meta.region.replace(':','_')}" } - publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_1' { - publishDir = [ enabled: false ] + publishDir = [ + path: { "${params.outdir}/simulation/" }, + mode: params.publish_dir_mode, + saveAs: { params.sim_by_chr ? null : it } + ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_MERGE' { ext.prefix = { "${meta.id}" } @@ -45,4 +52,14 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_2' { ext.args = "" } + + // Coverage process + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_TRT' { + ext.prefix = { "${meta.id}_truth" } + publishDir = [ enabled: false ] + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_SIM' { + ext.prefix = { "${meta.id}" } + publishDir = [ enabled: false ] + } } diff --git a/modules.json b/modules.json index 914edbba..29e7469b 100644 --- a/modules.json +++ b/modules.json @@ -133,10 +133,16 @@ }, "samtools/coverage": { "branch": "master", - "git_sha": "38afbe42f7db7f19c7a89607c0a71c68f3be3131", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"], "patch": "modules/nf-core/samtools/coverage/samtools-coverage.diff" }, + "samtools/depth": { + "branch": "master", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/depth/samtools-depth.diff" + }, "samtools/faidx": { "branch": "master", "git_sha": "f153f1f10e1083c49935565844cccb7453021682", diff --git a/modules/nf-core/samtools/coverage/environment.yml b/modules/nf-core/samtools/coverage/environment.yml index b5e6b997..74461be7 100644 --- a/modules/nf-core/samtools/coverage/environment.yml +++ b/modules/nf-core/samtools/coverage/environment.yml @@ -4,5 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/coverage/main.nf b/modules/nf-core/samtools/coverage/main.nf index 52f3225c..8fc884ae 100644 --- a/modules/nf-core/samtools/coverage/main.nf +++ b/modules/nf-core/samtools/coverage/main.nf @@ -4,11 +4,11 @@ process SAMTOOLS_COVERAGE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: - tuple val(meta), path(input), path(input_index), val(region) + tuple val(meta), path(input), path(input_index) tuple val(meta2), path(fasta), path(fai) output: @@ -19,15 +19,13 @@ process SAMTOOLS_COVERAGE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def region_cmd = region ? "--region ${region}" : '' + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ coverage \\ $args \\ -o ${prefix}.txt \\ - $region_cmd \\ --reference ${fasta} \\ $input diff --git a/modules/nf-core/samtools/coverage/samtools-coverage.diff b/modules/nf-core/samtools/coverage/samtools-coverage.diff index a37b6818..f47a4817 100644 --- a/modules/nf-core/samtools/coverage/samtools-coverage.diff +++ b/modules/nf-core/samtools/coverage/samtools-coverage.diff @@ -1,32 +1,15 @@ Changes in module 'nf-core/samtools/coverage' --- modules/nf-core/samtools/coverage/main.nf +++ modules/nf-core/samtools/coverage/main.nf -@@ -8,7 +8,7 @@ - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" +@@ -9,8 +9,7 @@ input: -- tuple val(meta), path(input), path(input_index) -+ tuple val(meta), path(input), path(input_index), val(region) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - -@@ -20,13 +20,15 @@ - task.ext.when == null || task.ext.when - - script: -- def args = task.ext.args ?: '' -- def prefix = task.ext.prefix ?: "${meta.id}" -+ def args = task.ext.args ?: '' -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ def region_cmd = region ? "--region ${region}" : '' - """ - samtools \\ - coverage \\ - $args \\ - -o ${prefix}.txt \\ -+ $region_cmd \\ - --reference ${fasta} \\ - $input + tuple val(meta), path(input), path(input_index) +- tuple val(meta2), path(fasta) +- tuple val(meta3), path(fai) ++ tuple val(meta2), path(fasta), path(fai) + output: + tuple val(meta), path("*.txt"), emit: coverage ************************************************************ diff --git a/modules/nf-core/samtools/coverage/tests/main.nf.test.snap b/modules/nf-core/samtools/coverage/tests/main.nf.test.snap index cc3ce01c..63e26051 100644 --- a/modules/nf-core/samtools/coverage/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/coverage/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,56e1239217405837de88af882d9d68f6" + "versions.yml:md5,16dec2ad2e9aa7cc174e86b03ec99984" ], "coverage": [ [ @@ -24,7 +24,7 @@ ] ], "versions": [ - "versions.yml:md5,56e1239217405837de88af882d9d68f6" + "versions.yml:md5,16dec2ad2e9aa7cc174e86b03ec99984" ] } ], @@ -32,7 +32,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-29T11:08:03.724132" + "timestamp": "2024-05-28T15:41:47.4359515" }, "test_samtools_coverage_bam": { "content": [ @@ -47,7 +47,7 @@ ] ], "1": [ - "versions.yml:md5,56e1239217405837de88af882d9d68f6" + "versions.yml:md5,16dec2ad2e9aa7cc174e86b03ec99984" ], "coverage": [ [ @@ -59,7 +59,7 @@ ] ], "versions": [ - "versions.yml:md5,56e1239217405837de88af882d9d68f6" + "versions.yml:md5,16dec2ad2e9aa7cc174e86b03ec99984" ] } ], @@ -67,7 +67,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-29T11:36:30.272862" + "timestamp": "2024-05-28T15:41:36.39684853" }, "test_samtools_coverage_cram": { "content": [ @@ -82,7 +82,7 @@ ] ], "1": [ - "versions.yml:md5,56e1239217405837de88af882d9d68f6" + "versions.yml:md5,16dec2ad2e9aa7cc174e86b03ec99984" ], "coverage": [ [ @@ -94,7 +94,7 @@ ] ], "versions": [ - "versions.yml:md5,56e1239217405837de88af882d9d68f6" + "versions.yml:md5,16dec2ad2e9aa7cc174e86b03ec99984" ] } ], @@ -102,6 +102,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-29T11:39:08.488488" + "timestamp": "2024-05-28T15:41:42.036690983" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/depth/environment.yml b/modules/nf-core/samtools/depth/environment.yml new file mode 100644 index 00000000..70975f2a --- /dev/null +++ b/modules/nf-core/samtools/depth/environment.yml @@ -0,0 +1,8 @@ +name: samtools_depth +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/depth/main.nf b/modules/nf-core/samtools/depth/main.nf new file mode 100644 index 00000000..e8810398 --- /dev/null +++ b/modules/nf-core/samtools/depth/main.nf @@ -0,0 +1,39 @@ +process SAMTOOLS_DEPTH { + tag "$meta1.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta1), path(bam), path(bai) + tuple val(meta2), path(intervals) + + output: + tuple val(meta1), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta1.id}" + def positions = intervals ? "-b ${intervals}" : "" + """ + samtools \\ + depth \\ + --threads ${task.cpus-1} \\ + $args \\ + $positions \\ + -o ${prefix}.tsv \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/depth/meta.yml b/modules/nf-core/samtools/depth/meta.yml new file mode 100644 index 00000000..bc51f264 --- /dev/null +++ b/modules/nf-core/samtools/depth/meta.yml @@ -0,0 +1,54 @@ +name: samtools_depth +description: Computes the depth at each position or region. +keywords: + - depth + - samtools + - statistics + - coverage +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files; samtools depth – computes the read depth at each position or region + homepage: http://www.htslib.org + documentation: http://www.htslib.org/doc/samtools-depth.html + tool_dev_url: https://github.com/samtools/samtools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] +input: + - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - intervals: + type: file + description: list of positions or regions in specified bed file + pattern: "*.{bed}" +output: + - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: The output of samtools depth has three columns - the name of the contig or chromosome, the position and the number of reads aligned at that position + pattern: "*.{tsv}" +authors: + - "@louperelo" + - "@nevinwu" +maintainers: + - "@louperelo" + - "@nevinwu" diff --git a/modules/nf-core/samtools/depth/samtools-depth.diff b/modules/nf-core/samtools/depth/samtools-depth.diff new file mode 100644 index 00000000..523a63e0 --- /dev/null +++ b/modules/nf-core/samtools/depth/samtools-depth.diff @@ -0,0 +1,14 @@ +Changes in module 'nf-core/samtools/depth' +--- modules/nf-core/samtools/depth/main.nf ++++ modules/nf-core/samtools/depth/main.nf +@@ -8,7 +8,7 @@ + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: +- tuple val(meta1), path(bam) ++ tuple val(meta1), path(bam), path(bai) + tuple val(meta2), path(intervals) + + output: + +************************************************************ diff --git a/nextflow.config b/nextflow.config index b8ae5b48..919f061f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,7 @@ params { // Simulate depth = 1 genotype = null + sim_by_chr = true // Validation input_truth = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 66127d46..d1ac56d3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,6 +84,12 @@ "default": 1, "fa_icon": "fas fa-list-ol" }, + "sim_by_chr": { + "type": "boolean", + "description": "Simulate data by chromosome then merge it or simulate all data at once", + "default": true, + "pattern": "true|false" + }, "genotype": { "type": "string", "description": "Genotype position to use to simulate the data", diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index 0270731f..304af995 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -1,4 +1,4 @@ -include { SAMTOOLS_COVERAGE } from '../../../modules/nf-core/samtools/coverage' +include { SAMTOOLS_DEPTH } from '../../../modules/nf-core/samtools/depth' include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_1 } from '../../../modules/nf-core/samtools/index' include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge' @@ -14,22 +14,18 @@ workflow BAM_DOWNSAMPLE { main: ch_versions = Channel.empty() - // Add region to channel - ch_coverage = ch_bam - .map{ metaICR, bam, index -> - [ metaICR, bam, index, metaICR.region ] - } - - // Get coverage of the region - SAMTOOLS_COVERAGE ( ch_coverage, ch_fasta ) // [ meta, bam, bai, region], [ meta, fasta, fai ] - ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE.out.versions.first()) - - // Compute mean depth of the region - ch_mean_depth = SAMTOOLS_COVERAGE.out.coverage - .splitCsv(header: true, sep:'\t') + // Compute mean depth + SAMTOOLS_DEPTH(ch_bam, [[], []]) + ch_mean_depth = SAMTOOLS_DEPTH.out.tsv + .splitCsv(header: false, sep:'\t') .map{ metaICR, row -> - [ metaICR,"${row.meandepth}" as Float ] + [ metaICR, row[2] as Float ] } + .groupTuple() + .map{ metaICR, depth -> + [ metaICR, depth.sum()/depth.size() ] + } + ch_versions = ch_versions.mix(SAMTOOLS_DEPTH.out.versions.first()) // Compute downsampling factor ch_depth_factor = ch_mean_depth @@ -39,10 +35,10 @@ workflow BAM_DOWNSAMPLE { } // Add all necessary channel for downsampling - ch_input_downsample = ch_coverage + ch_input_downsample = ch_bam .combine(ch_depth_factor, by : 0) - .map{ metaICR, bam, index, region, metaICRD, depth -> - [ metaICRD, bam, index, region, depth ] + .map{ metaICR, bam, index, metaICRD, depth -> + [ metaICRD, bam, index, [], depth ] } // Downsample @@ -61,25 +57,30 @@ workflow BAM_DOWNSAMPLE { ch_bam_emul = SAMTOOLS_VIEW.out.bam .combine(SAMTOOLS_INDEX_1.out.bai, by:0) - SAMTOOLS_MERGE( - ch_bam_emul - .map{ - metaICRD, bam, index -> [metaICRD.subMap("id", "depth"), bam, index] - } - .groupTuple() - .map{ metaID, bam, index -> - [ metaID + ["chr": "all"], bam, index ] - }, - ch_fasta - ) - SAMTOOLS_INDEX_2(SAMTOOLS_MERGE.out.bam) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_2.out.versions.first()) + if (params.sim_by_chr == true) { + SAMTOOLS_MERGE( + ch_bam_emul + .map{ + metaICRD, bam, index -> [metaICRD.subMap("id", "depth"), bam, index] + } + .groupTuple() + .map{ metaID, bam, index -> + [ metaID + ["chr": "all"], bam, index ] + }, + ch_fasta + ) + ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first()) + + SAMTOOLS_INDEX_2(SAMTOOLS_MERGE.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_2.out.versions.first()) - ch_bam_emul_all = SAMTOOLS_MERGE.out.bam - .combine(SAMTOOLS_INDEX_2.out.bai, by:0) + ch_bam_emul_all = SAMTOOLS_MERGE.out.bam + .combine(SAMTOOLS_INDEX_2.out.bai, by:0) + } else { + ch_bam_emul_all = ch_bam_emul + } emit: bam_emul = ch_bam_emul_all // channel: [ [id, chr, region, depth], bam, bai ] - coverage = SAMTOOLS_COVERAGE.out.coverage // channel: [ [id, chr, region, depth], txt ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 9c9b410f..aae6b86a 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -23,6 +23,8 @@ include { checkHapLegend } from '../../subworkflows/local/utils_nfc include { BAM_REGION } from '../../subworkflows/local/bam_region' include { BAM_DOWNSAMPLE } from '../../subworkflows/local/bam_downsample' include { CHANNEL_SIMULATE_CREATE_CSV } from '../../subworkflows/local/channel_simulate_create_csv' +include { SAMTOOLS_COVERAGE as SAMTOOLS_COVERAGE_SIM } from '../../modules/nf-core/samtools/coverage' +include { SAMTOOLS_COVERAGE as SAMTOOLS_COVERAGE_TRT } from '../../modules/nf-core/samtools/coverage' // Panelprep subworkflows include { VCF_CHR_CHECK } from '../../subworkflows/local/vcf_chr_check' @@ -105,24 +107,36 @@ workflow PHASEIMPUTE { error "All input files must be in BAM format to perform simulation" } } - // Split the bam into the region specified - BAM_REGION(ch_input_sim, ch_region, ch_fasta) - ch_versions = ch_versions.mix(BAM_REGION.out.versions) + // Compute coverage of input files + SAMTOOLS_COVERAGE_TRT(ch_input_sim, ch_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_TRT.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_TRT.out.coverage.map{it[1]}) - // Initialize channel to impute - ch_bam_to_impute = Channel.empty() + if (params.sim_by_chr == true) { + // Split the bam into the region specified + BAM_REGION(ch_input_sim, ch_region, ch_fasta) + ch_versions = ch_versions.mix(BAM_REGION.out.versions) + ch_input_dwn = BAM_REGION.out.bam_region + } else { + ch_input_dwn = ch_input_sim + .map{ meta, bam, index -> [ meta + [chr: "all"], bam, index ] } + } if (params.depth) { // Downsample input to desired depth BAM_DOWNSAMPLE( - BAM_REGION.out.bam_region, + ch_input_dwn, ch_depth, ch_fasta ) ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(BAM_DOWNSAMPLE.out.coverage.map{ [it[1]] }) ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul ch_input_validate_truth = ch_input_sim + + // Compute coverage of input files + SAMTOOLS_COVERAGE_SIM(BAM_DOWNSAMPLE.out.bam_emul, ch_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_SIM.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_SIM.out.coverage.map{it[1]}) } if (params.genotype) { From 01f3814fcced2b6b7f97dbc17b00db64b059463e Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 7 Jun 2024 17:42:26 +0200 Subject: [PATCH 03/63] Add description and change parameter name --- CHANGELOG.md | 1 + conf/steps/simulation.config | 6 +++--- nextflow.config | 2 +- nextflow_schema.json | 4 ++-- subworkflows/local/bam_downsample/main.nf | 2 +- workflows/phaseimpute/main.nf | 11 ++++++----- workflows/phaseimpute/tests/test_all.nf.test.snap | 15 ++------------- 7 files changed, 16 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69d8dea8..7b0ae38e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#56](https://github.com/nf-core/phaseimpute/pull/56) - Move to nf-test to check the output files names generated. Fix validation and concatenation by chromosomes missing. Add dedicated GLIMPSE1 subworkflow. Fix posfile generation to be done once for glimpse and stitch. - [#68](https://github.com/nf-core/phaseimpute/pull/68) - QUILT can handle external params chunks and hap-legend files. - [#78](https://github.com/nf-core/phaseimpute/pull/78) - Separate validate step from panel preparation. +- [#84](https://github.com/nf-core/phaseimpute/pull/84) - Change depth computation to use SAMTOOLS_DEPTH and make separation by chromosome in simulation optional with `--sim_by_reg` parameter. ### `Fixed` diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index 2141b0b5..4d6adb7b 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -29,21 +29,21 @@ process { ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.stats" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { - ext.prefix = { params.sim_by_chr ? + ext.prefix = { params.sim_by_reg ? "${meta.id}_D${meta.depth}_C${meta.chr ?: "all"}" : "${meta.id}" } publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.sim_by_chr ? null : it } + saveAs: { params.sim_by_reg ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_1' { publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.sim_by_chr ? null : it } + saveAs: { params.sim_by_reg ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_MERGE' { diff --git a/nextflow.config b/nextflow.config index 919f061f..4602c687 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,7 @@ params { // Simulate depth = 1 genotype = null - sim_by_chr = true + sim_by_reg = true // Validation input_truth = null diff --git a/nextflow_schema.json b/nextflow_schema.json index d1ac56d3..19b6f113 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,9 +84,9 @@ "default": 1, "fa_icon": "fas fa-list-ol" }, - "sim_by_chr": { + "sim_by_reg": { "type": "boolean", - "description": "Simulate data by chromosome then merge it or simulate all data at once", + "description": "Simulate data only for the given region then merge it or simulate all data at once", "default": true, "pattern": "true|false" }, diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index 304af995..09b04b1f 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -57,7 +57,7 @@ workflow BAM_DOWNSAMPLE { ch_bam_emul = SAMTOOLS_VIEW.out.bam .combine(SAMTOOLS_INDEX_1.out.bai, by:0) - if (params.sim_by_chr == true) { + if (params.sim_by_reg == true) { SAMTOOLS_MERGE( ch_bam_emul .map{ diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index aae6b86a..43ded089 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -100,6 +100,8 @@ workflow PHASEIMPUTE { if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { // Output channel of simulate process ch_sim_output = Channel.empty() + // Set truth channel + ch_input_validate_truth = ch_input_sim // Test if the input are all bam files getAllFilesExtension(ch_input_sim) @@ -109,13 +111,13 @@ workflow PHASEIMPUTE { // Compute coverage of input files SAMTOOLS_COVERAGE_TRT(ch_input_sim, ch_fasta) - ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_TRT.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_TRT.out.versions) ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_TRT.out.coverage.map{it[1]}) - if (params.sim_by_chr == true) { + if (params.sim_by_reg == true) { // Split the bam into the region specified BAM_REGION(ch_input_sim, ch_region, ch_fasta) - ch_versions = ch_versions.mix(BAM_REGION.out.versions) + ch_versions = ch_versions.mix(BAM_REGION.out.versions) ch_input_dwn = BAM_REGION.out.bam_region } else { ch_input_dwn = ch_input_sim @@ -131,11 +133,10 @@ workflow PHASEIMPUTE { ) ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul - ch_input_validate_truth = ch_input_sim // Compute coverage of input files SAMTOOLS_COVERAGE_SIM(BAM_DOWNSAMPLE.out.bam_emul, ch_fasta) - ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_SIM.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_SIM.out.versions) ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_SIM.out.coverage.map{it[1]}) } diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index ebee0b12..14dd08ec 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -28,13 +28,7 @@ "simulation/NA19401.bam.bai", "simulation/NA20359.bam", "simulation/NA20359.bam.bai", - "simulation/csv/simulate.csv", - "simulation/stats/NA12878_Rchr21_16570000-16610000.stats.txt", - "simulation/stats/NA12878_Rchr22_16570000-16610000.stats.txt", - "simulation/stats/NA19401_Rchr21_16570000-16610000.stats.txt", - "simulation/stats/NA19401_Rchr22_16570000-16610000.stats.txt", - "simulation/stats/NA20359_Rchr21_16570000-16610000.stats.txt", - "simulation/stats/NA20359_Rchr22_16570000-16610000.stats.txt" + "simulation/csv/simulate.csv" ], [ "imputation/csv/impute.csv", @@ -171,12 +165,7 @@ "simulation/NA19401.bam.bai", "simulation/NA20359.bam", "simulation/NA20359.bam.bai", - "simulation/stats/NA12878_Rchr21_16570000-16610000.stats.txt", - "simulation/stats/NA12878_Rchr22_16570000-16610000.stats.txt", - "simulation/stats/NA19401_Rchr21_16570000-16610000.stats.txt", - "simulation/stats/NA19401_Rchr22_16570000-16610000.stats.txt", - "simulation/stats/NA20359_Rchr21_16570000-16610000.stats.txt", - "simulation/stats/NA20359_Rchr22_16570000-16610000.stats.txt" + "simulation/csv/simulate.csv" ] ], "meta": { From ed7fc919f287fab637a3da8ae67ae4e5c821f6df Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sat, 8 Jun 2024 15:11:50 +0200 Subject: [PATCH 04/63] Change to used input_region --- CHANGELOG.md | 2 +- conf/steps/simulation.config | 6 +++--- main.nf | 4 ++++ nextflow.config | 1 - nextflow_schema.json | 6 ------ subworkflows/local/bam_downsample/main.nf | 2 +- workflows/phaseimpute/main.nf | 4 +--- 7 files changed, 10 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b0ae38e..6c101351 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,7 +30,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#56](https://github.com/nf-core/phaseimpute/pull/56) - Move to nf-test to check the output files names generated. Fix validation and concatenation by chromosomes missing. Add dedicated GLIMPSE1 subworkflow. Fix posfile generation to be done once for glimpse and stitch. - [#68](https://github.com/nf-core/phaseimpute/pull/68) - QUILT can handle external params chunks and hap-legend files. - [#78](https://github.com/nf-core/phaseimpute/pull/78) - Separate validate step from panel preparation. -- [#84](https://github.com/nf-core/phaseimpute/pull/84) - Change depth computation to use SAMTOOLS_DEPTH and make separation by chromosome in simulation optional with `--sim_by_reg` parameter. +- [#84](https://github.com/nf-core/phaseimpute/pull/84) - Change depth computation to use SAMTOOLS_DEPTH and make separation by chromosome only if regions are specified. ### `Fixed` diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index 4d6adb7b..f3355993 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -29,21 +29,21 @@ process { ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.stats" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { - ext.prefix = { params.sim_by_reg ? + ext.prefix = { params.input_region ? "${meta.id}_D${meta.depth}_C${meta.chr ?: "all"}" : "${meta.id}" } publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.sim_by_reg ? null : it } + saveAs: { params.input_region ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_1' { publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.sim_by_reg ? null : it } + saveAs: { params.input_region ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_MERGE' { diff --git a/main.nf b/main.nf index 8ccc449a..191440f2 100644 --- a/main.nf +++ b/main.nf @@ -63,6 +63,10 @@ workflow NFCORE_PHASEIMPUTE { ch_input_validate = ch_input } + if (params.steps.split(',').contains("all")) { + ch_input_truth = ch_input + } + // // WORKFLOW: Run pipeline // diff --git a/nextflow.config b/nextflow.config index 4602c687..b8ae5b48 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,6 @@ params { // Simulate depth = 1 genotype = null - sim_by_reg = true // Validation input_truth = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 19b6f113..66127d46 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,12 +84,6 @@ "default": 1, "fa_icon": "fas fa-list-ol" }, - "sim_by_reg": { - "type": "boolean", - "description": "Simulate data only for the given region then merge it or simulate all data at once", - "default": true, - "pattern": "true|false" - }, "genotype": { "type": "string", "description": "Genotype position to use to simulate the data", diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index 09b04b1f..e8464cd8 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -57,7 +57,7 @@ workflow BAM_DOWNSAMPLE { ch_bam_emul = SAMTOOLS_VIEW.out.bam .combine(SAMTOOLS_INDEX_1.out.bai, by:0) - if (params.sim_by_reg == true) { + if (params.input_region) { SAMTOOLS_MERGE( ch_bam_emul .map{ diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 43ded089..046d6800 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -100,8 +100,6 @@ workflow PHASEIMPUTE { if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { // Output channel of simulate process ch_sim_output = Channel.empty() - // Set truth channel - ch_input_validate_truth = ch_input_sim // Test if the input are all bam files getAllFilesExtension(ch_input_sim) @@ -114,7 +112,7 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_TRT.out.versions) ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_TRT.out.coverage.map{it[1]}) - if (params.sim_by_reg == true) { + if (params.input_region) { // Split the bam into the region specified BAM_REGION(ch_input_sim, ch_region, ch_fasta) ch_versions = ch_versions.mix(BAM_REGION.out.versions) From f30cfaaf87dd414b4c50a091d3916d84578d6746 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sat, 8 Jun 2024 18:48:28 +0200 Subject: [PATCH 05/63] Simplify channels creations --- subworkflows/local/bam_downsample/main.nf | 13 +++++------ workflows/phaseimpute/main.nf | 27 +++++++---------------- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index e8464cd8..5edd5f87 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -63,10 +63,7 @@ workflow BAM_DOWNSAMPLE { .map{ metaICRD, bam, index -> [metaICRD.subMap("id", "depth"), bam, index] } - .groupTuple() - .map{ metaID, bam, index -> - [ metaID + ["chr": "all"], bam, index ] - }, + .groupTuple(), ch_fasta ) ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first()) @@ -74,13 +71,13 @@ workflow BAM_DOWNSAMPLE { SAMTOOLS_INDEX_2(SAMTOOLS_MERGE.out.bam) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_2.out.versions.first()) - ch_bam_emul_all = SAMTOOLS_MERGE.out.bam + ch_bam_emul = SAMTOOLS_MERGE.out.bam .combine(SAMTOOLS_INDEX_2.out.bai, by:0) - } else { - ch_bam_emul_all = ch_bam_emul } + ch_bam_emul = ch_bam_emul + .map{ meta, bam, index -> [meta + [chr: "all"], bam, index]} emit: - bam_emul = ch_bam_emul_all // channel: [ [id, chr, region, depth], bam, bai ] + bam_emul = ch_bam_emul // channel: [ [id, chr, region, depth], bam, bai ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 046d6800..b0c0ae36 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -98,9 +98,6 @@ workflow PHASEIMPUTE { // Simulate data if asked // if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { - // Output channel of simulate process - ch_sim_output = Channel.empty() - // Test if the input are all bam files getAllFilesExtension(ch_input_sim) .map{ if (it != "bam") { @@ -113,24 +110,17 @@ workflow PHASEIMPUTE { ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_TRT.out.coverage.map{it[1]}) if (params.input_region) { - // Split the bam into the region specified + // Split the bam into the regions specified BAM_REGION(ch_input_sim, ch_region, ch_fasta) ch_versions = ch_versions.mix(BAM_REGION.out.versions) - ch_input_dwn = BAM_REGION.out.bam_region - } else { - ch_input_dwn = ch_input_sim - .map{ meta, bam, index -> [ meta + [chr: "all"], bam, index ] } + ch_input_sim = BAM_REGION.out.bam_region } if (params.depth) { // Downsample input to desired depth - BAM_DOWNSAMPLE( - ch_input_dwn, - ch_depth, - ch_fasta - ) - ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) - ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul + BAM_DOWNSAMPLE(ch_input_sim, ch_depth, ch_fasta) + ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) + ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul // Compute coverage of input files SAMTOOLS_COVERAGE_SIM(BAM_DOWNSAMPLE.out.bam_emul, ch_fasta) @@ -165,12 +155,13 @@ workflow PHASEIMPUTE { VCF_SITES_EXTRACT_BCFTOOLS(VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi) ch_versions = ch_versions.mix(VCF_SITES_EXTRACT_BCFTOOLS.out.versions) - // Generate posfile channels from extracted sites + // Generate all necessary channels ch_posfile_glimpse = VCF_SITES_EXTRACT_BCFTOOLS.out.glimpse_posfile ch_posfile_stitch = VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv_stitch ch_panel_sites = VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites + ch_panel_phased = VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi - // Phase panel with tool of choice (e.g. SHAPEIT5) + // Phase panel with Shapeit5 if (params.phased == false) { VCF_PHASE_SHAPEIT5( VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi.combine(Channel.of([[]])), @@ -181,8 +172,6 @@ workflow PHASEIMPUTE { ) ch_panel_phased = VCF_PHASE_SHAPEIT5.out.vcf_tbi ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions) - } else { - ch_panel_phased = VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi } // Compute stats on panel From 1c8bf4c1068d7256eccabeafb467ef89affdcbc3 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:40:12 +0000 Subject: [PATCH 06/63] correct typo which raised error --- subworkflows/local/chunk_prepare_channel/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/chunk_prepare_channel/main.nf b/subworkflows/local/chunk_prepare_channel/main.nf index 26e388b5..bf8c30d5 100644 --- a/subworkflows/local/chunk_prepare_channel/main.nf +++ b/subworkflows/local/chunk_prepare_channel/main.nf @@ -14,7 +14,7 @@ workflow CHUNK_PREPARE_CHANNEL { .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} } - if(tool = "quilt") { + if(tool == "quilt") { ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} .splitText() .map { metamap, line -> From e5a4eb0770cd28f66a6a21004664385bb43d067f Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:40:35 +0000 Subject: [PATCH 07/63] allow external chunks in glimpse1 --- workflows/phaseimpute/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index b0c0ae36..fc90206e 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -204,7 +204,8 @@ workflow PHASEIMPUTE { // Use chunks from parameters if provided or use previous chunks from panelprep if (params.chunks) { - ch_chunks_glimpse1 = CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse").out.chunks + CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") + ch_chunks_glimpse1 = CHUNK_PREPARE_CHANNEL.out.chunks } else if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { ch_chunks_glimpse1 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 } From a58c1946ea82c0d46fed6730331971d08814153d Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:40:59 +0000 Subject: [PATCH 08/63] emit final prepared vcf --- conf/steps/panel_prep.config | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 1a807340..fd051988 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -51,34 +51,52 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_DEL_MLT_ALL' { ext.args = '-v snps -m 2 -M 2 -Oz' ext.prefix = { "${meta.id}_${meta.chr}_biallelic_snps" } - publishDir = [ enabled: false ] - } + publishDir = [ + path: { "${params.outdir}/prep_panel/normalized" }, + enabled: true + ] + } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_2' { ext.args = "--tbi" - publishDir = [enabled: false] - } + publishDir = [ + path: { "${params.outdir}/prep_panel/normalized" }, + enabled: true + ] + } // (Optional) Subworkflow: Remove samples from panel withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_DEL_SPL' { ext.args = { "-Oz -s^${params.remove_samples}" } ext.prefix = { "${meta.id}_${meta.chr}_biallelic_removed_samples" } - publishDir = [ enabled: false ] + publishDir = [ + path: { "${params.outdir}/prep_panel/normalized" }, + enabled: true + ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_3' { ext.args = "--tbi" - publishDir = [enabled: false] + publishDir = [ + path: { "${params.outdir}/prep_panel/normalized" }, + enabled: true + ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:VCFLIB_VCFFIXUP' { ext.prefix = { "${meta.id}_${meta.chr}" } - publishDir = [enabled: false] + publishDir = [ + path: { "${params.outdir}/prep_panel/compute_freq" }, + enabled: true + ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_4' { ext.args = "--tbi" - publishDir = [ enabled: false ] + publishDir = [ + path: { "${params.outdir}/prep_panel/compute_freq" }, + enabled: true + ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' { From 3e70142e83c9e57e1fc0fd3c124f41d2c7e483bb Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:41:33 +0000 Subject: [PATCH 09/63] run test from external params --- conf/test.config | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/conf/test.config b/conf/test.config index 942d5b00..aba01239 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,12 +29,14 @@ params { phased = true // Pipeline steps - steps = "panelprep,impute" + steps = "impute" + + // External params + chunks = "${projectDir}/tests/csv/chunks.csv" + posfile = "${projectDir}/tests/csv/posfile.csv" // Impute tools tools = "glimpse1" } -withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { - ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') -} + From 129de0aa5b67302fb61b1680e399a6892d563873 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:44:39 +0000 Subject: [PATCH 10/63] accept external chunks in glimpse2 --- conf/test_glimpse2.config | 17 +++++------------ workflows/phaseimpute/main.nf | 3 ++- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/conf/test_glimpse2.config b/conf/test_glimpse2.config index f010f1c1..39f93681 100644 --- a/conf/test_glimpse2.config +++ b/conf/test_glimpse2.config @@ -28,19 +28,12 @@ params { phased = true // Pipeline steps - steps = "panelprep,impute" + steps = "impute" + + // External params + chunks = "${projectDir}/tests/csv/chunks.csv" + posfile = "${projectDir}/tests/csv/posfile.csv" // Impute tools tools = "glimpse2" } - -process { - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { - ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { - ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') - } -} - diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index fc90206e..a7493b24 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -244,7 +244,8 @@ workflow PHASEIMPUTE { if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { ch_chunks_glimpse2 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse2 } else if (params.chunks) { - ch_chunks_glimpse2 = CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse").out.chunks + CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") + ch_chunks_glimpse2 = CHUNK_PREPARE_CHANNEL.out.chunks } // Use panel from parameters if provided From fbbc87e2453fe3c5f748b987d27b0c26c3229bf4 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:46:45 +0000 Subject: [PATCH 11/63] remove posfile not used in glimpse2 --- conf/test_glimpse2.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/test_glimpse2.config b/conf/test_glimpse2.config index 39f93681..5c3ac281 100644 --- a/conf/test_glimpse2.config +++ b/conf/test_glimpse2.config @@ -32,7 +32,6 @@ params { // External params chunks = "${projectDir}/tests/csv/chunks.csv" - posfile = "${projectDir}/tests/csv/posfile.csv" // Impute tools tools = "glimpse2" From ec44ae8acb4cc89725ce1c3c53a7150d869281b5 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:59:26 +0000 Subject: [PATCH 12/63] make meta.id uniform across all schemas --- assets/schema_chunks.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_chunks.json b/assets/schema_chunks.json index a295d003..7c4d82ee 100644 --- a/assets/schema_chunks.json +++ b/assets/schema_chunks.json @@ -11,7 +11,7 @@ "type": "string", "pattern": "^\\S+$", "errorMessage": "Panel name must be provided as a string and cannot contain spaces", - "meta": ["panel"] + "meta": ["id"] }, "chr": { "type": "string", From a676f34b44e149dc07d11b15c49bac552e1220d3 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 8 Jun 2024 11:50:30 +0000 Subject: [PATCH 13/63] use input panel from panelprep --- conf/test.config | 2 +- tests/csv/panel_2.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index aba01239..5231aa59 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,7 +25,7 @@ params { // Genome references fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" - panel = "${projectDir}/tests/csv/panel.csv" + panel = "${projectDir}/tests/csv/panel_2.csv" phased = true // Pipeline steps diff --git a/tests/csv/panel_2.csv b/tests/csv/panel_2.csv index 5f1fb144..f06d9a19 100644 --- a/tests/csv/panel_2.csv +++ b/tests/csv/panel_2.csv @@ -1,2 +1,2 @@ panel,chr,vcf,index,hap,legend -1000GP.s.norel,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.bcf.csi,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.hap.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.legend.gz +1000GP.s.norel,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz.tbi,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.hap.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.legend.gz From 247951c31326b924a3896048ddc24aa6029b634f Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 8 Jun 2024 11:51:15 +0000 Subject: [PATCH 14/63] change chunks to those from specific region --- tests/csv/chunks.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/csv/chunks.csv b/tests/csv/chunks.csv index 473031ee..e77b3b76 100644 --- a/tests/csv/chunks.csv +++ b/tests/csv/chunks.csv @@ -1,2 +1,2 @@ panel,chr,file -1000GP.s.norel,chr22,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/22/chr22_chunks_glimpse1.txt" +1000GP.s.norel,chr22,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/22/1000GP.s.norel_chr22_chunks_glimpse1.txt" From b4dd4cfc69128b745eedb8e35e5647bd542afc35 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 8 Jun 2024 12:20:03 +0000 Subject: [PATCH 15/63] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c101351..0895a46b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#68](https://github.com/nf-core/phaseimpute/pull/68) - QUILT can handle external params chunks and hap-legend files. - [#78](https://github.com/nf-core/phaseimpute/pull/78) - Separate validate step from panel preparation. - [#84](https://github.com/nf-core/phaseimpute/pull/84) - Change depth computation to use SAMTOOLS_DEPTH and make separation by chromosome only if regions are specified. +- [#85](https://github.com/nf-core/phaseimpute/pull/85) - Use external params in individual tests for tools. ### `Fixed` From 5705fb34082d025063e4606c4f7b169431b3f247 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 8 Jun 2024 12:27:13 +0000 Subject: [PATCH 16/63] update snapshot --- workflows/phaseimpute/tests/test_all.nf.test.snap | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index 14dd08ec..82562f27 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -78,6 +78,11 @@ "prep_panel/haplegend/1000GP.s.norel_chr22.legend.gz", "prep_panel/haplegend/1000GP.s.norel_chr22.samples", "prep_panel/haplegend/versions.yml", + "prep_panel/normalized/1000GP.s.norel_chr21_biallelic_snps.vcf.gz", + "prep_panel/normalized/1000GP.s.norel_chr21_biallelic_snps.vcf.gz.tbi", + "prep_panel/normalized/1000GP.s.norel_chr22_biallelic_snps.vcf.gz", + "prep_panel/normalized/1000GP.s.norel_chr22_biallelic_snps.vcf.gz.tbi", + "prep_panel/normalized/versions.yml", "prep_panel/sites/tsv/1000GP.s.norel_chr21_glimpse1_sites_tsv.txt.gz", "prep_panel/sites/tsv/1000GP.s.norel_chr22_glimpse1_sites_tsv.txt.gz", "prep_panel/sites/tsv/versions.yml", @@ -114,7 +119,7 @@ "nf-test": "0.8.4", "nextflow": "24.04.2" }, - "timestamp": "2024-06-05T00:38:54.77344212" + "timestamp": "2024-06-08T12:22:26.975587517" }, "Check test_validate": { "content": [ From 46ca0a119be27159075a0d94b384f0c6947b3a02 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 9 Jun 2024 19:17:01 +0200 Subject: [PATCH 17/63] Update multiqc report --- conf/modules.config | 44 ++- conf/steps/imputation_glimpse1.config | 2 +- conf/steps/panel_prep.config | 9 - conf/steps/simulation.config | 10 - conf/steps/validation.config | 4 +- modules/nf-core/bcftools/pluginsplit/main.nf | 7 +- workflows/phaseimpute/main.nf | 296 ++++++++++--------- 7 files changed, 212 insertions(+), 160 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f212b77c..9bd02bf3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,7 +18,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: 'MULTIQC' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, @@ -26,4 +26,46 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + // Stats + // Simulation + // Coverage process + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_TRT' { + ext.prefix = { "${meta.id}_truth" } + publishDir = [ enabled: false ] + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_SIM' { + ext.prefix = { "${meta.id}_sim" } + publishDir = [ enabled: false ] + } + + // VCF + // PANEL + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS_PANEL' { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/prep_panel/stats/" }, + mode: params.publish_dir_mode, + enabled: true + ] + } + + // TRUTH + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS_TRUTH' { + ext.prefix = { "${meta.id}_truth" } + publishDir = [ + path: { "${params.outdir}/validation/stats" }, + mode: params.publish_dir_mode, + enabled: true, + ] + } + // IMPUTE + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS_TOOLS' { + ext.prefix = { "${meta.id}_${meta.tools}" } + publishDir = [ + path: { "${params.outdir}/imputation/stats" }, + mode: params.publish_dir_mode, + enabled: true + ] + } } diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index 29ece5cc..0e3b8a24 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -33,7 +33,7 @@ process { "-Aim", "-C alleles" ].join(' ') - ext.prefix = { "${meta.id}.call" } + ext.prefix = { "${meta.id}" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BAM_GL_BCFTOOLS:BCFTOOLS_ANNOTATE' { diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index fd051988..b949c03e 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -236,13 +236,4 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS:.*' { - prefix = { "${meta.id}_${meta.chr}_panel" } - publishDir = [ - path: { "${params.outdir}/prep_panel/stats/" }, - mode: params.publish_dir_mode, - enabled: true - ] - } - } diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index f3355993..8e5c7a2d 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -52,14 +52,4 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_2' { ext.args = "" } - - // Coverage process - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_TRT' { - ext.prefix = { "${meta.id}_truth" } - publishDir = [ enabled: false ] - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_SIM' { - ext.prefix = { "${meta.id}" } - publishDir = [ enabled: false ] - } } diff --git a/conf/steps/validation.config b/conf/steps/validation.config index dcd1f906..198ebe3c 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -28,7 +28,7 @@ process { "-Aim", "-C alleles" ].join(' ') - ext.prefix = { "${meta.id}_truth.call" } + ext.prefix = { "${meta.id}_truth" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_ANNOTATE' { @@ -42,7 +42,7 @@ process { // Concatenate the truth set withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:.*' { - ext.prefix = { "${meta.id}_truth_concat" } + ext.prefix = { "${meta.id}_truth" } publishDir = [ path: { "${params.outdir}/validation/concat" }, mode: params.publish_dir_mode, diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf index 4ff09768..8b910ab7 100644 --- a/modules/nf-core/bcftools/pluginsplit/main.nf +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -29,6 +29,11 @@ process BCFTOOLS_PLUGINSPLIT { def groups_arg = groups ? "--groups-file ${groups}" : "" def regions_arg = regions ? "--regions-file ${regions}" : "" def targets_arg = targets ? "--targets-file ${targets}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" """ bcftools plugin split \\ @@ -40,7 +45,7 @@ process BCFTOOLS_PLUGINSPLIT { ${targets_arg} \\ --output ${prefix} - mv ${prefix}/* . + for i in ${prefix}/*; do cp "\$i" "./\$(basename "\$i" .${extension})_stitch.${extension}"; done cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index a7493b24..e6e45286 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -36,7 +36,7 @@ include { VCF_CONCATENATE_BCFTOOLS as CONCAT_PANEL } from '../../subworkflows/ include { CHANNEL_POSFILE_CREATE_CSV } from '../../subworkflows/local/channel_posfile_create_csv' include { CHANNEL_CHUNKS_CREATE_CSV } from '../../subworkflows/local/channel_chunks_create_csv' include { CHANNEL_PANEL_CREATE_CSV } from '../../subworkflows/local/channel_panel_create_csv' -include { BCFTOOLS_STATS } from '../../modules/nf-core/bcftools/stats/main' +include { BCFTOOLS_STATS as BCFTOOLS_STATS_PANEL } from '../../modules/nf-core/bcftools/stats' // Imputation subworkflows include { CHANNEL_IMPUTE_CREATE_CSV } from '../../subworkflows/local/channel_impute_create_csv' @@ -61,8 +61,12 @@ include { BAM_IMPUTE_STITCH } from '../../subworkflows/ include { VCF_SAMPLES_BCFTOOLS } from '../../subworkflows/local/vcf_samples_bcftools' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_STITCH } from '../../subworkflows/local/vcf_concatenate_bcftools' +// Imputation stats +include { BCFTOOLS_STATS as BCFTOOLS_STATS_TOOLS } from '../../modules/nf-core/bcftools/stats' + // Concordance subworkflows include { BAM_GL_BCFTOOLS as GL_TRUTH } from '../../subworkflows/local/bam_gl_bcftools' +include { BCFTOOLS_STATS as BCFTOOLS_STATS_TRUTH } from '../../modules/nf-core/bcftools/stats' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_TRUTH } from '../../subworkflows/local/vcf_concatenate_bcftools' include { VCF_CONCORDANCE_GLIMPSE2 } from '../../subworkflows/local/vcf_concordance_glimpse2' @@ -174,17 +178,6 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions) } - // Compute stats on panel - BCFTOOLS_STATS( - ch_panel_phased, - [[],[]], - [[],[]], - [[],[]], - [[],[]], - ch_fasta.map{ [it[0], it[1]] }) - ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS.out.stats.map{ [it[1]] }) - // Create chunks from reference VCF VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) @@ -199,142 +192,152 @@ workflow PHASEIMPUTE { } if (params.steps.split(',').contains("impute") || params.steps.split(',').contains("all")) { - if (params.tools.split(',').contains("glimpse1")) { - log.info("Impute with GLIMPSE1") - - // Use chunks from parameters if provided or use previous chunks from panelprep - if (params.chunks) { - CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") - ch_chunks_glimpse1 = CHUNK_PREPARE_CHANNEL.out.chunks - } else if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { - ch_chunks_glimpse1 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 - } - - if (params.posfile) { - ch_posfile_glimpse = ch_posfile.map {meta, vcf, csi, txt -> [ meta, vcf, txt ]} - } - // Use panel from parameters if provided - if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { - ch_panel_phased = ch_panel - } - - // Run imputation - VCF_IMPUTE_GLIMPSE1( - ch_input_impute, - ch_posfile_glimpse, - ch_panel_phased, - ch_chunks_glimpse1, - ch_fasta - ) - ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE1.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(VCF_IMPUTE_GLIMPSE1.out.multiqc_files) - - // Concatenate by chromosomes - CONCAT_GLIMPSE1(VCF_IMPUTE_GLIMPSE1.out.vcf_tbi) - ch_versions = ch_versions.mix(CONCAT_GLIMPSE1.out.versions) - - // Add results to input validate - ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE1.out.vcf_tbi) + if (params.tools.split(',').contains("glimpse1")) { + log.info("Impute with GLIMPSE1") + + // Use chunks from parameters if provided or use previous chunks from panelprep + if (params.chunks) { + CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") + ch_chunks_glimpse1 = CHUNK_PREPARE_CHANNEL.out.chunks + } else if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { + ch_chunks_glimpse1 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 + } + if (params.posfile) { + ch_posfile_glimpse = ch_posfile.map {meta, vcf, csi, txt -> [ meta, vcf, txt ]} } - if (params.tools.split(',').contains("glimpse2")) { - log.info("Impute with GLIMPSE2") - - // Use chunks from parameters if provided or use previous chunks from panelprep - if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { - ch_chunks_glimpse2 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse2 - } else if (params.chunks) { - CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") - ch_chunks_glimpse2 = CHUNK_PREPARE_CHANNEL.out.chunks - } - - // Use panel from parameters if provided - if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { - ch_panel_phased = ch_panel - } - - // Run imputation - VCF_IMPUTE_GLIMPSE2( - ch_input_impute, - ch_panel_phased, - ch_chunks_glimpse2, - ch_fasta - ) - ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE2.out.versions) - // Concatenate by chromosomes - CONCAT_GLIMPSE2(VCF_IMPUTE_GLIMPSE2.out.vcf_tbi) - ch_versions = ch_versions.mix(CONCAT_GLIMPSE2.out.versions) - - // Add results to input validate - ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE2.out.vcf_tbi) + // Use panel from parameters if provided + if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { + ch_panel_phased = ch_panel } - if (params.tools.split(',').contains("stitch")) { - log.info("Impute with STITCH") - // Use provided posfile - if (params.posfile) { - ch_posfile_stitch = POSFILE_PREPARE_GAWK(ch_posfile) - } + // Run imputation + VCF_IMPUTE_GLIMPSE1( + ch_input_impute, + ch_posfile_glimpse, + ch_panel_phased, + ch_chunks_glimpse1, + ch_fasta + ) + ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE1.out.versions) - // Prepare inputs - PREPARE_INPUT_STITCH(ch_input_impute, ch_posfile_stitch, ch_region) - ch_versions = ch_versions.mix(PREPARE_INPUT_STITCH.out.versions) + // Concatenate by chromosomes + CONCAT_GLIMPSE1(VCF_IMPUTE_GLIMPSE1.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_GLIMPSE1.out.versions) - // Impute with STITCH - BAM_IMPUTE_STITCH ( - PREPARE_INPUT_STITCH.out.stitch_parameters, - PREPARE_INPUT_STITCH.out.stitch_samples, - ch_fasta - ) - ch_versions = ch_versions.mix(BAM_IMPUTE_STITCH.out.versions) + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE1.out.vcf_tbi) - // Concatenate by chromosomes - CONCAT_STITCH(BAM_IMPUTE_STITCH.out.vcf_tbi) - ch_versions = ch_versions.mix(CONCAT_STITCH.out.versions) + } + if (params.tools.split(',').contains("glimpse2")) { + log.info("Impute with GLIMPSE2") + + // Use chunks from parameters if provided or use previous chunks from panelprep + if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { + ch_chunks_glimpse2 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse2 + } else if (params.chunks) { + CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") + ch_chunks_glimpse2 = CHUNK_PREPARE_CHANNEL.out.chunks + } - // Separate by samples - VCF_SAMPLES_BCFTOOLS(CONCAT_STITCH.out.vcf_tbi) - ch_versions = ch_versions.mix(VCF_SAMPLES_BCFTOOLS.out.versions) + // Use panel from parameters if provided + if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { + ch_panel_phased = ch_panel + } - // Add results to input validate - ch_input_validate = ch_input_validate.mix(VCF_SAMPLES_BCFTOOLS.out.vcf_tbi) + // Run imputation + VCF_IMPUTE_GLIMPSE2( + ch_input_impute, + ch_panel_phased, + ch_chunks_glimpse2, + ch_fasta + ) + ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE2.out.versions) + // Concatenate by chromosomes + CONCAT_GLIMPSE2(VCF_IMPUTE_GLIMPSE2.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_GLIMPSE2.out.versions) + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE2.out.vcf_tbi) + } + if (params.tools.split(',').contains("stitch")) { + log.info("Impute with STITCH") + + // Use provided posfile + if (params.posfile) { + ch_posfile_stitch = POSFILE_PREPARE_GAWK(ch_posfile) } - if (params.tools.split(',').contains("quilt")) { - log.info("Impute with QUILT") - - // Use previous chunks if --steps panelprep - if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { - ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt - // Use provided chunks if --chunks - } else if (params.chunks) { - CHUNK_PREPARE_CHANNEL(ch_chunks, "quilt") - ch_chunks_quilt = CHUNK_PREPARE_CHANNEL.out.chunks - } - - // Use previous hap_legend if --steps panelprep - if (params.steps.split(',').find { it in ["all", "panelprep"] }) { - ch_hap_legend = VCF_NORMALIZE_BCFTOOLS.out.hap_legend - } - - // Impute BAMs with QUILT - BAM_IMPUTE_QUILT( - ch_input_impute, - ch_hap_legend, - ch_chunks_quilt - ) - ch_versions = ch_versions.mix(BAM_IMPUTE_QUILT.out.versions) - - // Concatenate by chromosomes - CONCAT_QUILT(BAM_IMPUTE_QUILT.out.vcf_tbi) - ch_versions = ch_versions.mix(CONCAT_QUILT.out.versions) - - // Add results to input validate - ch_input_validate = ch_input_validate.mix(CONCAT_QUILT.out.vcf_tbi) + + // Prepare inputs + PREPARE_INPUT_STITCH(ch_input_impute, ch_posfile_stitch, ch_region) + ch_versions = ch_versions.mix(PREPARE_INPUT_STITCH.out.versions) + + // Impute with STITCH + BAM_IMPUTE_STITCH ( + PREPARE_INPUT_STITCH.out.stitch_parameters, + PREPARE_INPUT_STITCH.out.stitch_samples, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_IMPUTE_STITCH.out.versions) + + // Concatenate by chromosomes + CONCAT_STITCH(BAM_IMPUTE_STITCH.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_STITCH.out.versions) + + // Separate by samples + VCF_SAMPLES_BCFTOOLS(CONCAT_STITCH.out.vcf_tbi) + ch_versions = ch_versions.mix(VCF_SAMPLES_BCFTOOLS.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(VCF_SAMPLES_BCFTOOLS.out.vcf_tbi) + + } + if (params.tools.split(',').contains("quilt")) { + log.info("Impute with QUILT") + + // Use previous chunks if --steps panelprep + if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { + ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt + // Use provided chunks if --chunks + } else if (params.chunks) { + CHUNK_PREPARE_CHANNEL(ch_chunks, "quilt") + ch_chunks_quilt = CHUNK_PREPARE_CHANNEL.out.chunks } - // Create CSV from imputation step - CHANNEL_IMPUTE_CREATE_CSV(ch_input_validate, params.outdir) + + // Use previous hap_legend if --steps panelprep + if (params.steps.split(',').find { it in ["all", "panelprep"] }) { + ch_hap_legend = VCF_NORMALIZE_BCFTOOLS.out.hap_legend + } + + // Impute BAMs with QUILT + BAM_IMPUTE_QUILT( + ch_input_impute, + ch_hap_legend, + ch_chunks_quilt + ) + ch_versions = ch_versions.mix(BAM_IMPUTE_QUILT.out.versions) + + // Concatenate by chromosomes + CONCAT_QUILT(BAM_IMPUTE_QUILT.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_QUILT.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_QUILT.out.vcf_tbi) } + // Create CSV from imputation step + CHANNEL_IMPUTE_CREATE_CSV(ch_input_validate, params.outdir) + + // Compute stats on imputed files + BCFTOOLS_STATS_TOOLS( + ch_input_validate, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] }) + ch_versions = ch_versions.mix(BCFTOOLS_STATS_TOOLS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS_TOOLS.out.stats.map{ [it[1]] }) + } if (params.steps.split(',').contains("validate") || params.steps.split(',').contains("all")) { @@ -349,6 +352,17 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(CONCAT_PANEL.out.versions) ch_panel_sites = CONCAT_PANEL.out.vcf_tbi + // Compute stats on panel + BCFTOOLS_STATS_PANEL( + ch_panel_sites, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] }) + ch_versions = ch_versions.mix(BCFTOOLS_STATS_PANEL.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS_PANEL.out.stats.map{ [it[1]] }) + ch_truth_vcf = Channel.empty() // Get extension of input files @@ -367,7 +381,6 @@ workflow PHASEIMPUTE { ch_posfile_glimpse, ch_fasta ) - ch_multiqc_files = ch_multiqc_files.mix(GL_TRUTH.out.multiqc_files) ch_versions = ch_versions.mix(GL_TRUTH.out.versions) // Mix the original vcf and the computed vcf @@ -379,6 +392,17 @@ workflow PHASEIMPUTE { CONCAT_TRUTH(ch_truth_vcf) ch_versions = ch_versions.mix(CONCAT_TRUTH.out.versions) + // Compute stats on truth files + BCFTOOLS_STATS_TRUTH( + CONCAT_TRUTH.out.vcf_tbi, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] }) + ch_versions = ch_versions.mix(BCFTOOLS_STATS_TRUTH.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS_TRUTH.out.stats.map{ [it[1]] }) + // Compute concordance analysis VCF_CONCORDANCE_GLIMPSE2( ch_input_validate, From af47ed51660ec8fbb93db001ca96ae0427a49140 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 11 Jun 2024 16:53:39 +0200 Subject: [PATCH 18/63] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0895a46b..f0a9db96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#78](https://github.com/nf-core/phaseimpute/pull/78) - Separate validate step from panel preparation. - [#84](https://github.com/nf-core/phaseimpute/pull/84) - Change depth computation to use SAMTOOLS_DEPTH and make separation by chromosome only if regions are specified. - [#85](https://github.com/nf-core/phaseimpute/pull/85) - Use external params in individual tests for tools. +- [#88](https://github.com/nf-core/phaseimpute/pull/88) - Improve multiqc report with more information. ### `Fixed` From 3ffdcdae1e28d0491243823676a120a21737c5b5 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 11 Jun 2024 16:57:03 +0200 Subject: [PATCH 19/63] Fix linting --- workflows/phaseimpute/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index d8ae5170..730c34da 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -206,7 +206,7 @@ workflow PHASEIMPUTE { if (params.posfile) { ch_posfile_glimpse = ch_posfile.map {meta, vcf, csi, txt -> [ meta, vcf, txt ]} } - + // Use panel from parameters if provided if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { ch_panel_phased = ch_panel From 535bc0e4e2781f77543884761d872f59d4bde8b2 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 11 Jun 2024 17:04:03 +0200 Subject: [PATCH 20/63] Add patch for bcftools pluginsplit --- conf/steps/imputation_stitch.config | 1 + modules.json | 169 +++++++++++++----- .../pluginsplit/bcftools-pluginsplit.diff | 25 ++- modules/nf-core/bcftools/pluginsplit/main.nf | 3 +- 4 files changed, 154 insertions(+), 44 deletions(-) diff --git a/conf/steps/imputation_stitch.config b/conf/steps/imputation_stitch.config index 7fd74708..e0e2de03 100644 --- a/conf/steps/imputation_stitch.config +++ b/conf/steps/imputation_stitch.config @@ -60,6 +60,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SAMPLES_BCFTOOLS:BCFTOOLS_PLUGINSPLIT' { ext.args = "-Oz" + ext.suffix = "_stitch" } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SAMPLES_BCFTOOLS:BCFTOOLS_INDEX' { diff --git a/modules.json b/modules.json index 29e7469b..7ff4b677 100644 --- a/modules.json +++ b/modules.json @@ -8,194 +8,269 @@ "bcftools/annotate": { "branch": "master", "git_sha": "2ad29c2aed06d815d9f68ad7ba20b3b1c574ce9c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/concat/bcftools-concat.diff" }, "bcftools/convert": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/index": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["multiple_impute_glimpse2", "vcf_impute_glimpse", "vcf_phase_shapeit5"] + "installed_by": [ + "multiple_impute_glimpse2", + "vcf_impute_glimpse", + "vcf_phase_shapeit5" + ] }, "bcftools/mpileup": { "branch": "master", "git_sha": "e7df38a545d7d72083eededabd8849f731a01502", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff" }, "bcftools/norm": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/pluginsplit": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ], + "patch": "modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff" }, "bcftools/query": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/stats": { "branch": "master", "git_sha": "a5ba4d59c2b248c0379b0f8aeb4e7e754566cd1f", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/view/bcftools-view.diff" }, "bedtools/makewindows": { "branch": "master", "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", - "installed_by": ["vcf_phase_shapeit5"] + "installed_by": [ + "vcf_phase_shapeit5" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gawk": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "glimpse/chunk": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["vcf_impute_glimpse"] + "installed_by": [ + "vcf_impute_glimpse" + ] }, "glimpse/ligate": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["vcf_impute_glimpse"] + "installed_by": [ + "vcf_impute_glimpse" + ] }, "glimpse/phase": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["vcf_impute_glimpse"] + "installed_by": [ + "vcf_impute_glimpse" + ] }, "glimpse2/chunk": { "branch": "master", "git_sha": "14ba46490cae3c78ed8e8f48d2c0f8f3be1e7c03", - "installed_by": ["multiple_impute_glimpse2"], + "installed_by": [ + "multiple_impute_glimpse2" + ], "patch": "modules/nf-core/glimpse2/chunk/glimpse2-chunk.diff" }, "glimpse2/concordance": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "glimpse2/ligate": { "branch": "master", "git_sha": "09d793219114004f268b98663b12f8062097a8c5", - "installed_by": ["multiple_impute_glimpse2"] + "installed_by": [ + "multiple_impute_glimpse2" + ] }, "glimpse2/phase": { "branch": "master", "git_sha": "9c71d32e372650e8bb3e1fb15339017aad5e3f7f", - "installed_by": ["multiple_impute_glimpse2"], + "installed_by": [ + "multiple_impute_glimpse2" + ], "patch": "modules/nf-core/glimpse2/phase/glimpse2-phase.diff" }, "glimpse2/splitreference": { "branch": "master", "git_sha": "fa12139827a18b324bd63fce654818586a8e9cc7", - "installed_by": ["multiple_impute_glimpse2"] + "installed_by": [ + "multiple_impute_glimpse2" + ] }, "gunzip": { "branch": "master", "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "quilt/quilt": { "branch": "master", "git_sha": "46265545d61e7f482adf40de941cc9a94e479bbe", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/coverage": { "branch": "master", "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/samtools/coverage/samtools-coverage.diff" }, "samtools/depth": { "branch": "master", "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/samtools/depth/samtools-depth.diff" }, "samtools/faidx": { "branch": "master", "git_sha": "f153f1f10e1083c49935565844cccb7453021682", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" }, "samtools/view": { "branch": "master", "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/samtools/view/samtools-view.diff" }, "shapeit5/ligate": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": ["vcf_phase_shapeit5"] + "installed_by": [ + "vcf_phase_shapeit5" + ] }, "shapeit5/phasecommon": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["vcf_phase_shapeit5"], + "installed_by": [ + "vcf_phase_shapeit5" + ], "patch": "modules/nf-core/shapeit5/phasecommon/shapeit5-phasecommon.diff" }, "stitch": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/stitch/stitch.diff" }, "tabix/bgzip": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/tabix": { "branch": "master", "git_sha": "9502adb23c0b97ed8e616bbbdfa73b4585aec9a1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "vcflib/vcffixup": { "branch": "master", "git_sha": "072cf562e143252953a1b5c5ed38ec55eaa930c8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -204,30 +279,40 @@ "multiple_impute_glimpse2": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_impute_glimpse": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff index 505fb035..64dccbba 100644 --- a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff +++ b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff @@ -1,7 +1,21 @@ Changes in module 'nf-core/bcftools/pluginsplit' --- modules/nf-core/bcftools/pluginsplit/main.nf +++ modules/nf-core/bcftools/pluginsplit/main.nf -@@ -32,6 +32,7 @@ +@@ -24,14 +24,21 @@ + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" ++ def suffix = task.ext.suffix ?: "" + + def samples_arg = samples ? "--samples-file ${samples}" : "" + def groups_arg = groups ? "--groups-file ${groups}" : "" + def regions_arg = regions ? "--regions-file ${regions}" : "" + def targets_arg = targets ? "--targets-file ${targets}" : "" ++ def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : ++ args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : ++ args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : ++ args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : ++ "vcf" """ bcftools plugin split \\ @@ -9,5 +23,14 @@ Changes in module 'nf-core/bcftools/pluginsplit' ${vcf} \\ ${samples_arg} \\ ${groups_arg} \\ +@@ -39,7 +46,7 @@ + ${targets_arg} \\ + --output ${prefix} + +- mv ${prefix}/* . ++ for i in ${prefix}/*; do cp "\$i" "./\$(basename "\$i" .${extension})${suffix}.${extension}"; done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": ************************************************************ diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf index 8b910ab7..4540f15b 100644 --- a/modules/nf-core/bcftools/pluginsplit/main.nf +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -24,6 +24,7 @@ process BCFTOOLS_PLUGINSPLIT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "" def samples_arg = samples ? "--samples-file ${samples}" : "" def groups_arg = groups ? "--groups-file ${groups}" : "" @@ -45,7 +46,7 @@ process BCFTOOLS_PLUGINSPLIT { ${targets_arg} \\ --output ${prefix} - for i in ${prefix}/*; do cp "\$i" "./\$(basename "\$i" .${extension})_stitch.${extension}"; done + for i in ${prefix}/*; do cp "\$i" "./\$(basename "\$i" .${extension})${suffix}.${extension}"; done cat <<-END_VERSIONS > versions.yml "${task.process}": From 2c990267728633493c9ffe8b04c5b3f998a7f102 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 11 Jun 2024 17:12:16 +0200 Subject: [PATCH 21/63] Fix linting --- modules.json | 168 +++++++++++++-------------------------------------- 1 file changed, 42 insertions(+), 126 deletions(-) diff --git a/modules.json b/modules.json index 7ff4b677..d7f85d35 100644 --- a/modules.json +++ b/modules.json @@ -8,269 +8,195 @@ "bcftools/annotate": { "branch": "master", "git_sha": "2ad29c2aed06d815d9f68ad7ba20b3b1c574ce9c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/concat/bcftools-concat.diff" }, "bcftools/convert": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/index": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "multiple_impute_glimpse2", - "vcf_impute_glimpse", - "vcf_phase_shapeit5" - ] + "installed_by": ["multiple_impute_glimpse2", "vcf_impute_glimpse", "vcf_phase_shapeit5"] }, "bcftools/mpileup": { "branch": "master", "git_sha": "e7df38a545d7d72083eededabd8849f731a01502", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff" }, "bcftools/norm": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/pluginsplit": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff" }, "bcftools/query": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/stats": { "branch": "master", "git_sha": "a5ba4d59c2b248c0379b0f8aeb4e7e754566cd1f", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/view/bcftools-view.diff" }, "bedtools/makewindows": { "branch": "master", "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", - "installed_by": [ - "vcf_phase_shapeit5" - ] + "installed_by": ["vcf_phase_shapeit5"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gawk": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "glimpse/chunk": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "vcf_impute_glimpse" - ] + "installed_by": ["vcf_impute_glimpse"] }, "glimpse/ligate": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "vcf_impute_glimpse" - ] + "installed_by": ["vcf_impute_glimpse"] }, "glimpse/phase": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "vcf_impute_glimpse" - ] + "installed_by": ["vcf_impute_glimpse"] }, "glimpse2/chunk": { "branch": "master", "git_sha": "14ba46490cae3c78ed8e8f48d2c0f8f3be1e7c03", - "installed_by": [ - "multiple_impute_glimpse2" - ], + "installed_by": ["multiple_impute_glimpse2"], "patch": "modules/nf-core/glimpse2/chunk/glimpse2-chunk.diff" }, "glimpse2/concordance": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "glimpse2/ligate": { "branch": "master", "git_sha": "09d793219114004f268b98663b12f8062097a8c5", - "installed_by": [ - "multiple_impute_glimpse2" - ] + "installed_by": ["multiple_impute_glimpse2"] }, "glimpse2/phase": { "branch": "master", "git_sha": "9c71d32e372650e8bb3e1fb15339017aad5e3f7f", - "installed_by": [ - "multiple_impute_glimpse2" - ], + "installed_by": ["multiple_impute_glimpse2"], "patch": "modules/nf-core/glimpse2/phase/glimpse2-phase.diff" }, "glimpse2/splitreference": { "branch": "master", "git_sha": "fa12139827a18b324bd63fce654818586a8e9cc7", - "installed_by": [ - "multiple_impute_glimpse2" - ] + "installed_by": ["multiple_impute_glimpse2"] }, "gunzip": { "branch": "master", "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "quilt/quilt": { "branch": "master", "git_sha": "46265545d61e7f482adf40de941cc9a94e479bbe", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/coverage": { "branch": "master", "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/samtools/coverage/samtools-coverage.diff" }, "samtools/depth": { "branch": "master", "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/samtools/depth/samtools-depth.diff" }, "samtools/faidx": { "branch": "master", "git_sha": "f153f1f10e1083c49935565844cccb7453021682", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" }, "samtools/view": { "branch": "master", "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/samtools/view/samtools-view.diff" }, "shapeit5/ligate": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": [ - "vcf_phase_shapeit5" - ] + "installed_by": ["vcf_phase_shapeit5"] }, "shapeit5/phasecommon": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "vcf_phase_shapeit5" - ], + "installed_by": ["vcf_phase_shapeit5"], "patch": "modules/nf-core/shapeit5/phasecommon/shapeit5-phasecommon.diff" }, "stitch": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/stitch/stitch.diff" }, "tabix/bgzip": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/tabix": { "branch": "master", "git_sha": "9502adb23c0b97ed8e616bbbdfa73b4585aec9a1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "vcflib/vcffixup": { "branch": "master", "git_sha": "072cf562e143252953a1b5c5ed38ec55eaa930c8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -279,40 +205,30 @@ "multiple_impute_glimpse2": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "vcf_impute_glimpse": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From 5b02a2c0fc99fee2ccc1fd5d90e5ace76fe45201 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 11 Jun 2024 17:58:51 +0200 Subject: [PATCH 22/63] Update snapshot --- workflows/phaseimpute/tests/test_all.nf.test.snap | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index 82562f27..c018fc50 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -164,20 +164,14 @@ "Check test_sim": { "content": [ [ - "simulation/NA12878.bam", - "simulation/NA12878.bam.bai", - "simulation/NA19401.bam", - "simulation/NA19401.bam.bai", - "simulation/NA20359.bam", - "simulation/NA20359.bam.bai", - "simulation/csv/simulate.csv" + ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-24T14:48:45.584042427" + "timestamp": "2024-06-11T17:29:33.926536569" }, "Check test_glimpse2": { "content": [ From 7f36c8c7b2c3f1149ff47dd39e0b3a1372cc8760 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 8 Jun 2024 19:07:17 +0000 Subject: [PATCH 23/63] move bcftools_convert to vcf_sites_extract --- conf/steps/panel_prep.config | 19 ++++++++------- .../local/vcf_normalize_bcftools/main.nf | 23 ++++-------------- .../local/vcf_sites_extract_bcftools/main.nf | 24 ++++++++++++++----- workflows/phaseimpute/main.nf | 4 ++-- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index b949c03e..51dadefb 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -99,15 +99,6 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' { - ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} - publishDir = [ - path: { "${params.outdir}/prep_panel/haplegend/" }, - mode: params.publish_dir_mode, - enabled: true - ] - } - // Subworkflow: VCF_PHASE_SHAPEIT5 withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:.*' { publishDir = [ @@ -126,10 +117,20 @@ process { } // Subworkflow: VCF_SITES_EXTRACT_BCFTOOLS + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:.*' { publishDir = [ enabled: false ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_CONVERT' { + ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} + publishDir = [ + path: { "${params.outdir}/prep_panel/haplegend/" }, + mode: params.publish_dir_mode, + enabled: true + ] + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_VIEW' { ext.args = [ "-G", diff --git a/subworkflows/local/vcf_normalize_bcftools/main.nf b/subworkflows/local/vcf_normalize_bcftools/main.nf index a1441984..4528eada 100644 --- a/subworkflows/local/vcf_normalize_bcftools/main.nf +++ b/subworkflows/local/vcf_normalize_bcftools/main.nf @@ -5,7 +5,6 @@ include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_3 } from '../../../modules/nf-core include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_4 } from '../../../modules/nf-core/bcftools/index' include { BCFTOOLS_VIEW as BCFTOOLS_DEL_MLT_ALL } from '../../../modules/nf-core/bcftools/view' include { BCFTOOLS_VIEW as BCFTOOLS_DEL_SPL } from '../../../modules/nf-core/bcftools/view' -include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert' include { VCFLIB_VCFFIXUP } from '../../../modules/nf-core/vcflib/vcffixup/main' @@ -49,14 +48,12 @@ workflow VCF_NORMALIZE_BCFTOOLS { BCFTOOLS_INDEX_3(BCFTOOLS_DEL_SPL.out.vcf) ch_versions = ch_versions.mix(BCFTOOLS_INDEX_3.out.versions) - ch_biallelic_vcf_tbi_spl = BCFTOOLS_DEL_SPL.out.vcf.join(BCFTOOLS_INDEX_3.out.tbi) - } else { - ch_biallelic_vcf_tbi_spl = ch_biallelic_vcf_tbi + ch_biallelic_vcf_tbi = BCFTOOLS_DEL_SPL.out.vcf.join(BCFTOOLS_INDEX_3.out.tbi) } + // (Optional) Fix panel (When AC/AN INFO fields in VCF are inconsistent with GT field) if (params.compute_freq == true) { - // Fix panel (AC/AN INFO fields in VCF are inconsistent with GT field) - VCFLIB_VCFFIXUP(ch_biallelic_vcf_tbi_spl) + VCFLIB_VCFFIXUP(ch_biallelic_vcf_tbi) ch_versions = ch_versions.mix(VCFLIB_VCFFIXUP.out.versions) // Index fixed panel @@ -64,20 +61,10 @@ workflow VCF_NORMALIZE_BCFTOOLS { ch_versions = ch_versions.mix(BCFTOOLS_INDEX_4.out.versions) // Join fixed vcf and tbi - ch_biallelic_vcf_tbi_freq = VCFLIB_VCFFIXUP.out.vcf.join(BCFTOOLS_INDEX_4.out.tbi) - } else { - ch_biallelic_vcf_tbi_freq = ch_biallelic_vcf_tbi_spl + ch_biallelic_vcf_tbi = VCFLIB_VCFFIXUP.out.vcf.join(BCFTOOLS_INDEX_4.out.tbi) } - // Convert VCF to Hap and Legend files - BCFTOOLS_CONVERT(ch_biallelic_vcf_tbi_freq, ch_fasta, []) - ch_versions = ch_versions.mix(BCFTOOLS_CONVERT.out.versions) - - // Output hap and legend files - ch_hap_legend = BCFTOOLS_CONVERT.out.hap.join(BCFTOOLS_CONVERT.out.legend) - emit: - vcf_tbi = ch_biallelic_vcf_tbi_freq // channel: [ [id, chr], vcf, tbi ] - hap_legend = ch_hap_legend // channel: [ [id, chr], '.hap', '.legend' ] + vcf_tbi = ch_biallelic_vcf_tbi // channel: [ [id, chr], vcf, tbi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf index e19838c3..9113381d 100644 --- a/subworkflows/local/vcf_sites_extract_bcftools/main.nf +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -1,17 +1,28 @@ -include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' -include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix' -include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' -include { GAWK } from '../../../modules/nf-core/gawk' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' +include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix' +include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' +include { GAWK } from '../../../modules/nf-core/gawk' +include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert' + workflow VCF_SITES_EXTRACT_BCFTOOLS { take: ch_vcf // channel: [ [id, chr], vcf, index ] + ch_fasta // channel: [ [genome], fasta, fai ] main: ch_versions = Channel.empty() + ch_fasta = ch_fasta.map { meta, fasta, fai -> [meta, fasta] } + + // Convert VCF to Hap and Legend files + BCFTOOLS_CONVERT(ch_vcf, ch_fasta, []) + ch_versions = ch_versions.mix(BCFTOOLS_CONVERT.out.versions) + + // Output hap and legend files + ch_hap_legend = BCFTOOLS_CONVERT.out.hap.join(BCFTOOLS_CONVERT.out.legend) // Extract sites positions BCFTOOLS_VIEW(ch_vcf, [], [], []) @@ -44,6 +55,7 @@ workflow VCF_SITES_EXTRACT_BCFTOOLS { ch_glimpse_posfile = ch_posfile.map{ metaPC, sites, s_index, tsv -> [metaPC, sites, tsv]} emit: + hap_legend = ch_hap_legend // channel: [ [id, chr], '.hap', '.legend' ] panel_tsv_stitch = GAWK.out.output // channel: [ [id, chr], txt ] panel_sites = ch_panel_sites // channel: [ [id, chr], vcf, csi ] posfile = ch_posfile // channel: [ [id, chr], vcf, csi, tsv.gz ] diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 730c34da..0c2e0e84 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -156,7 +156,7 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(VCF_NORMALIZE_BCFTOOLS.out.versions) // Extract sites from normalized vcf - VCF_SITES_EXTRACT_BCFTOOLS(VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi) + VCF_SITES_EXTRACT_BCFTOOLS(VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi, ch_fasta) ch_versions = ch_versions.mix(VCF_SITES_EXTRACT_BCFTOOLS.out.versions) // Generate all necessary channels @@ -186,7 +186,7 @@ workflow PHASEIMPUTE { CHANNEL_POSFILE_CREATE_CSV(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv_stitch, params.outdir) CHANNEL_CHUNKS_CREATE_CSV(VCF_CHUNK_GLIMPSE.out.chunks, params.outdir) CHANNEL_PANEL_CREATE_CSV(ch_panel_phased, - VCF_NORMALIZE_BCFTOOLS.out.hap_legend, + VCF_SITES_EXTRACT_BCFTOOLS.out.hap_legend, params.outdir) } From 581240f0ca6f4f86750ac4fcf19fbd1e7741aac6 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 8 Jun 2024 19:30:24 +0000 Subject: [PATCH 24/63] move channel definition to panelprep --- workflows/phaseimpute/main.nf | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 0c2e0e84..38436f4d 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -182,12 +182,15 @@ workflow PHASEIMPUTE { VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) + // Assign chunks channels + ch_chunks_glimpse1 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 + ch_chunks_glimpse2 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse2 + ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt + // Create CSVs from panelprep step CHANNEL_POSFILE_CREATE_CSV(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv_stitch, params.outdir) CHANNEL_CHUNKS_CREATE_CSV(VCF_CHUNK_GLIMPSE.out.chunks, params.outdir) - CHANNEL_PANEL_CREATE_CSV(ch_panel_phased, - VCF_SITES_EXTRACT_BCFTOOLS.out.hap_legend, - params.outdir) + CHANNEL_PANEL_CREATE_CSV(ch_panel_phased, ch_hap_legend, params.outdir) } From f3beb183c9e349c626b596bdc893c030a923c21b Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 9 Jun 2024 16:26:32 +0000 Subject: [PATCH 25/63] move channel haplegend from panel to posfile --- assets/schema_input_panel.json | 10 ------ assets/schema_posfile.json | 10 ++++++ conf/test_quilt.config | 2 +- .../utils_nfcore_phaseimpute_pipeline/main.nf | 32 ++++--------------- tests/csv/panel_2.csv | 4 +-- tests/csv/posfile.csv | 4 +-- 6 files changed, 22 insertions(+), 40 deletions(-) diff --git a/assets/schema_input_panel.json b/assets/schema_input_panel.json index 40b7e3b2..242a4136 100644 --- a/assets/schema_input_panel.json +++ b/assets/schema_input_panel.json @@ -28,16 +28,6 @@ "type": "string", "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?\\.(tbi|csi)$", "errorMessage": "Panel index file must be provided, cannot contain spaces and must have extension '.vcf' or '.bcf' with optional '.gz' extension and with '.csi' or '.tbi' extension" - }, - "hap": { - "type": "string", - "pattern": "^\\S+\\.(hap)(\\.gz)?$", - "errorMessage": "Hap file can be provided, cannot contain spaces and must have extension '.hap' with '.gz' extension" - }, - "legend": { - "type": "string", - "pattern": "^\\S+\\.(legend)(\\.gz)?$", - "errorMessage": "Legend file can be provided, cannot contain spaces and must have extension '.hap' with '.gz' extension" } }, "required": ["panel", "chr", "vcf", "index"] diff --git a/assets/schema_posfile.json b/assets/schema_posfile.json index c44247ff..7d2e2473 100644 --- a/assets/schema_posfile.json +++ b/assets/schema_posfile.json @@ -33,6 +33,16 @@ "type": "string", "pattern": "^\\S+\\.(txt|tsv)(\\.gz)?$", "errorMessage": "TXT with sites (position file) per chromosome must be provided. Must have .txt or .tsv extension with optional .gz" + }, + "hap": { + "type": "string", + "pattern": "^\\S+\\.(hap)(\\.gz)?$", + "errorMessage": "Hap file can be provided, cannot contain spaces and must have extension '.hap' with '.gz' extension" + }, + "legend": { + "type": "string", + "pattern": "^\\S+\\.(legend)(\\.gz)?$", + "errorMessage": "Legend file can be provided, cannot contain spaces and must have extension '.hap' with '.gz' extension" } }, "required": ["panel", "chr", "vcf", "index", "txt"] diff --git a/conf/test_quilt.config b/conf/test_quilt.config index b077b0ed..41ff4738 100644 --- a/conf/test_quilt.config +++ b/conf/test_quilt.config @@ -32,7 +32,7 @@ params { // External params chunks = "${projectDir}/tests/csv/chunks.csv" - panel = "${projectDir}/tests/csv/panel_2.csv" + posfile = "${projectDir}/tests/csv/posfile.csv" // Impute tools tools = "quilt" diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 5ab1cf6a..67ce8377 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -148,9 +148,6 @@ workflow PIPELINE_INITIALISATION { if (params.panel.endsWith("csv")) { print("Panel file provided as input is a samplesheet") ch_panel = Channel.fromSamplesheet("panel") - .map { meta, bcf, csi, hap, legend -> [meta, bcf, csi] } - ch_hap_legend = Channel.fromSamplesheet("panel") - .map { meta, bcf, csi, hap, legend -> [["panel": meta.id, "chr": meta.chr], hap, legend] } } else { // #TODO Wait for `oneOf()` to be supported in the nextflow_schema.json error "Panel file provided is of another format than CSV (not yet supported). Please separate your panel by chromosome and use the samplesheet format." @@ -158,7 +155,6 @@ workflow PIPELINE_INITIALISATION { } else { // #TODO check if panel is required ch_panel = Channel.of([[],[],[]]) - ch_hap_legend = Channel.empty() } // @@ -216,9 +212,13 @@ workflow PIPELINE_INITIALISATION { // if (params.posfile) { ch_posfile = Channel - .fromSamplesheet("posfile") + .fromSamplesheet("posfile") // ["panel", "chr", "vcf", "index", "txt"] + + ch_hap_legend = Channel.fromSamplesheet("posfile") + .map { meta, vcf, index, txt, hap, legend -> [meta, hap, legend] } } else { ch_posfile = [[],[]] + ch_hap_legend = Channel.empty() } // @@ -315,8 +315,8 @@ def validateInputParameters() { // Check that posfile and chunks are provided when running impute only. Steps with panelprep generate those files. if (params.steps.split(',').contains("impute") && !params.steps.split(',').find { it in ["all", "panelprep"] }) { - // Required by all tools except glimpse2 and quilt - if (!params.tools.split(',').find { it in ["glimpse2", "quilt"] }) { + // Required by all tools except glimpse2 + if (!params.tools.split(',').find { it in ["glimpse2"] }) { assert params.posfile, "No --posfile provided for --steps impute" } // Required by all tools except STITCH @@ -379,24 +379,6 @@ def getAllFilesExtension(ch_input) { } } -// -// Validate haplegend from panel channel -// - -def checkHapLegend(ch_hap_legend) { - ch_hap_legend.map { channel -> - def meta = channel[0] - def hap = channel[1] - def legend = channel[2] - - if (hap != [] || legend != []) { - log.warn "Hap or Legend files are not empty for panel ${meta.panel}, chromosome ${meta.chr}" - } - - return channel - } -} - // // Validate channels from input samplesheet diff --git a/tests/csv/panel_2.csv b/tests/csv/panel_2.csv index f06d9a19..de76a31c 100644 --- a/tests/csv/panel_2.csv +++ b/tests/csv/panel_2.csv @@ -1,2 +1,2 @@ -panel,chr,vcf,index,hap,legend -1000GP.s.norel,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz.tbi,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.hap.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.legend.gz +panel,chr,vcf,index +1000GP.s.norel,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz.tbi diff --git a/tests/csv/posfile.csv b/tests/csv/posfile.csv index b2a113bd..04065504 100644 --- a/tests/csv/posfile.csv +++ b/tests/csv/posfile.csv @@ -1,3 +1,3 @@ -panel,chr,vcf,index,txt +panel,chr,vcf,index,txt,hap,legend 1000GP.s.norel,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz" -1000GP.s.norel,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz" +1000GP.s.norel,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.legend.gz" From 5dcd43e631032c92eadca873be58f8147062663c Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 9 Jun 2024 16:26:59 +0000 Subject: [PATCH 26/63] remove haplegend check in panel --- workflows/phaseimpute/main.nf | 4 ---- 1 file changed, 4 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 38436f4d..29565704 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -13,7 +13,6 @@ include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_n include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' include { getAllFilesExtension } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' -include { checkHapLegend } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -148,9 +147,6 @@ workflow PHASEIMPUTE { VCF_CHR_CHECK(ch_panel, ch_fasta) ch_versions = ch_versions.mix(VCF_CHR_CHECK.out.versions) - // Emit a warning if hap_legend files are provided in the panel with `--steps panelprep` - checkHapLegend(ch_hap_legend) - // Normalize indels in panel VCF_NORMALIZE_BCFTOOLS(VCF_CHR_CHECK.out.vcf, ch_fasta) ch_versions = ch_versions.mix(VCF_NORMALIZE_BCFTOOLS.out.versions) From 7a2fe378f171b362dc1926307593507814dcecf4 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 9 Jun 2024 17:04:34 +0000 Subject: [PATCH 27/63] update docs --- docs/usage.md | 158 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 127 insertions(+), 31 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index b8d166a7..e30798f8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -53,19 +53,51 @@ You will need to create a samplesheet with information about the reference panel A final samplesheet file for the reference panel may look something like the one below. This is for 3 chromosomes. ```console -chr,vcf -1,ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz -2,ALL.chr2.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz -3,ALL.chr3.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz +panel,chr,vcf,index +1000G,chr1,ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz, ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G,chr2,ALL.chr2.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz, ALL.chr2.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G,chr3,ALL.chr3.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz, ALL.chr3.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi ``` -| Column | Description | -| ------ | --------------------------------------------------------------------------------------------------------- | -| `chr` | Name of the chromosome. Use the prefix 'chr' if the panel uses the prefix. | -| `vcf` | Full path to a VCF file for that chromosome. File has to be gzipped and have the extension ".vcf.gz".gz". | +| Column | Description | +| ------- | -------------------------------------------------------------------------------------------------------------- | +| `panel` | Name of the reference panel used. | +| `chr` | Name of the chromosome. Use the prefix 'chr' if the panel uses the prefix. | +| `vcf` | Full path to a VCF file for that chromosome. File has to be gzipped and have the extension ".vcf.gz". | +| `index` | Full path to the index for VCF file for that chromosome. File has to be gzipped and have the extension ".tbi". | An [example samplesheet](../assets/samplesheet_reference.csv) has been provided with the pipeline. +## Samplesheet posfile + +You will need a samplesheet with information about the reference panel sites for using the `--steps [impute,validate]`. You can generate this samplesheet from `--steps panelprep`. Use this parameter to specify its location. It has to be a comma-separated file with at least 5 columns, and a header row as shown in the examples below. + +```bash +--posfile '[path to samplesheet file]' +``` + +### Structure + +A final samplesheet file for the posfile may look something like the one below. This is for 2 chromosomes. + +```console +panel,chr,vcf,index,txt,hap,legend +1000GP.s.norel,chr21,1000GP.chr21.s.norel.sites.vcf.gz,1000GP.chr21.s.norel.sites.vcf.gz.csi,1000GP.chr21.s.norel.tsv.gz,, +1000GP.s.norel,chr22,1000GP.chr22.s.norel.sites.vcf.gz,1000GP.chr22.s.norel.sites.vcf.gz.csi,1000GP.chr22.s.norel.tsv.gz,1000GP.s.norel_chr22.hap.gz,1000GP.s.norel_chr22.legend.gz +``` + +| Column | Description | +| -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `panel` | Name of the reference panel used. | +| `chr` | Name of the chromosome. Use the prefix 'chr' if the panel uses the prefix. | +| `vcf` | Full path to a VCF containing the sites for that chromosome. File has to be gzipped and have the extension ".vcf.gz". | +| `index` | Full path to the index for the VCF file for that chromosome. File has to be gzipped and have the extension ".tbi". | +| `txt` | Full path to the ".tsv.gz" file containing the reference panel sites from the VCF file for that chromosome. File has to be gzipped. | +| `hap` | Full path to the ".hap" file containing the reference panel sites from the VCF file for that chromosome. (Required by QUILT) | +| `legend` | Full path to the ".legend.gz" file containing the reference panel sites from the VCF file for that chromosome in "legend" format. File has to be gzipped. (Required by QUILT) | + +## Genome reference + Remember to use the same reference genome for all the files. You can specify the [reference genome](https://nf-co.re/docs/usage/reference_genomes) using: ```bash @@ -83,10 +115,15 @@ or you can specify a custom genome using: The typical command for running the pre-processing of the panel and imputation of samples is as follows: ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --outdir results --genome GRCh37 -profile docker --steps panelprep,impute +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps panelprep,impute + --outdir results \ + --genome GRCh37 \ + -profile docker ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +This will launch the pipeline, preparing the reference panel and performing imputation, with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: @@ -131,7 +168,13 @@ nf-core/phaseimpute can be started at different points in the analysis by settin This steps of the pipeline allows to create synthetic low-coverage input files by downsizing high density input data. A typical use case is to obtain low-coverage input data from a sequenced sample. This method is useful for comparing the imputation results to the truth and evaluate the quality of the imputation. You can skip this steps if you already have low-pass genome sequencing data. A sample command for this steps is: ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --steps simulate --depth 1 --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps simulate \ + --depth 1 \ + --outdir results \ + --genome GRCh37 \ + -profile docker ``` The required flags for this mode are: @@ -141,7 +184,7 @@ The required flags for this mode are: - `--depth`: The final depth of the file [default: 1]. - `--genome` or `--fasta`: The reference genome of the samples. -You can find an overview of the results produced by this steps in the [Output](output.md). +You can find an overview of the results produced by this step in the [Output](output.md). ### Start with panel preparation `--steps panelprep` @@ -150,7 +193,11 @@ This steps pre-processes the reference panel in order to be ready for imputation For starting from panel preparation, the required flags are `--steps panelprep` and `--panel samplesheet_reference.csv`. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --steps panelprep --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --panel samplesheet_reference.csv \ + --steps panelprep --outdir results \ + --genome GRCh37 -profile docker ``` The required flags for this mode are: @@ -173,25 +220,22 @@ For starting from the imputation steps, the required flags are: - `--tools [glimpse1, quilt, stitch]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These required files are produced by `--steps panelprep` and used as input in: - `--chunks chunks.csv`: A samplesheet containing chunks per chromosome. These are produced by `--steps panelprep` using `GLIMPSE1`. - - `--posfile posfile.csv`: A samplesheet containing a TSV with the list of positions to genotype per chromosome. These are required by tools (for STITCH/GLIMPSE1). The posfile can be generated with `--steps panelprep`. - - `--panel panel.csv`: A samplesheet containing the post-processed VCF (required by GLIMPSE1 and GLIMPSE2) or the hap_legend files (required by QUILT). These files can be obtained with `--steps panelprep`. + - `--posfile posfile.csv`: A samplesheet containing a TSV with the list of positions to genotype per chromosome. These are required by tools (for STITCH/GLIMPSE1). It can also contain the hap_legend files (required by QUILT). The posfile can be generated with `--steps panelprep`. + - `--panel panel.csv`: A samplesheet containing the post-processed reference panel VCF (required by GLIMPSE1 and GLIMPSE2). These files can be obtained with `--steps panelprep`. #### Summary table of required parameters in `--steps impute` | | `--steps impute` | `--input` | `--genome` or `--fasta` | `--panel` | `--chunks` | `--posfile` | | ---------- | ---------------- | --------- | ----------------------- | --------- | ---------- | ----------- | -| `GLIMPSE1` | ✅ | ✅ | ✅ | ✅ ¹ | ✅ | ✅ ³ | +| `GLIMPSE1` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ ¹ | | `GLIMPSE2` | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| `QUILT` | ✅ | ✅ | ✅ | ✅ ² | ✅ | ❌ | +| `QUILT` | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ ² | | `STITCH` | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | #### Details: -¹ `GLIMPSE1` & `GLIMPSE2`: Should be a CSV with columns [panel, chr, vcf, index] - -² `QUILT`: Should be a CSV with columns [panel, chr, hap, legend] - -³ `GLIMPSE1`: Should be a CSV with columns [panel, chr, vcf, txt] +³ `GLIMPSE1`: Should be a CSV with columns [panel id, chr, vcf, txt] +² `QUILT`: Should be a CSV with columns [panel id, chr, hap, legend] ### Imputation tools `--steps impute --tools [glimpse1, glimpse2, quilt, stitch]` @@ -202,13 +246,21 @@ You can choose different software to perform the imputation. In the following se [QUILT](https://github.com/rwdavies/QUILT) is an R and C++ program for rapid genotype imputation from low-coverage sequence using a large reference panel. The required inputs for this program are bam samples provided in the input samplesheet (`--input`) and a csv file with the genomic chunks (`--chunks`). ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --panel panel_haplegend.csv --chunks chunks.csv --steps impute --tool quilt --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --posfile haplegend.csv \ + --chunks chunks.csv \ + --steps impute \ + --tools quilt \ + --outdir results \ + --genome GRCh37 \ + -profile docker ``` -The csv provided in `--panel` must contain at least four columns [panel, chr, hap, legend]. The first column is the name of the panel, the second is the chromosome, then the hap and legend files produced by `--steps panelprep` unique to each chromosome. The hap and legend files are mandatory to use QUILT. +The csv provided in `--posfile` must contain at least four columns [panel, chr, hap, legend]. The first column is the name of the panel, the second is the chromosome, then the hap and legend files produced by `--steps panelprep` unique to each chromosome. The hap and legend files are mandatory to use QUILT. ```console -panel,chr,vcf,index,hap,legend +panel,chr,vcf,index,txt,hap,legend 1000GP.s.norel,chr22,,,1000GP.s.norel_chr22.hap.gz,1000GP.s.norel_chr22.legend.gz ``` @@ -226,7 +278,14 @@ The file column should contain a TSV obtained from GLIMPSE1 with the following [ If you do not have a csv with chunks, you can provide a reference panel to run the `--steps panelprep` which produces a csv with these chunks, which is then used as input for QUILT. You can choose to run both steps sequentially as `--steps panelprep,impute` or simply collect the files produced by `--steps panelprep`. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --steps panelprep,impute --panel samplesheet_reference.csv --outdir results --genome GRCh37 -profile docker --tools quilt +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps panelprep,impute \ + --tools quilt \ + --panel samplesheet_reference.csv \ + --outdir results \ + --genome GRCh37 \ + -profile docker ``` #### STITCH @@ -236,13 +295,26 @@ nextflow run nf-core/phaseimpute --input samplesheet.csv --steps panelprep,imput If you do not have a list of position to genotype, you can provide a reference panel to run the `--steps panelprep` which produces a tsv with this list. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --steps panelprep --panel samplesheet_reference.csv --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps panelprep \ + --panel samplesheet_reference.csv \ + --outdir results \ + --genome GRCh37 \ + -profile docker ``` Otherwise, you can provide your own position file in the `--steps impute` with STITCH using the the `--posfile` parameter. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --steps impute --posfile samplesheet_posfile.csv --tool stitch --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps impute \ + --posfile samplesheet_posfile.csv \ + --tool stitch \ + --outdir results \ + --genome GRCh37 \ + -profile docker ``` The csv provided in `--posfile` must contain four columns [panel, chr, vcf, txt]. @@ -288,7 +360,16 @@ bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n' ${vcf} [GLIMPSE1](https://github.com/odelaneau/GLIMPSE/tree/glimpse1) is a set of tools for phasing and imputation for low-coverage sequencing datasets. Recommended for many samples at >0.5x coverage and small reference panels. This is an example command to run this tool from the `--steps impute`: ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --steps impute --tool glimpse1 --outdir results --genome GRCh37 -profile docker --posfile posfile.csv --chunks chunks.csv +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --panel samplesheet_reference.csv \ + --steps impute \ + --tool glimpse1 \ + --outdir results \ + --genome GRCh37 \ + -profile docker \ + --posfile posfile.csv + --chunks chunks.csv ``` The csv provided in `--posfile` must contain four columns [panel, chr, vcf, txt]. @@ -312,7 +393,16 @@ The csv provided in `--panel` must be prepared with `--steps panelprep` and must [GLIMPSE2](https://github.com/odelaneau/GLIMPSE) is a set of tools for phasing and imputation for low-coverage sequencing datasets. This is an example command to run this tool from the `--steps impute`: ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --steps impute --tool glimpse2 --outdir results --genome GRCh37 -profile docker --posfile posfile.csv --chunks chunks.csv +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --panel samplesheet_reference.csv \ + --steps impute \ + --tool glimpse2 \ + --outdir results \ + --posfile posfile.csv \ + --chunks chunks.csv \ + --genome GRCh37 \ + -profile docker ``` Make sure the csv with the input panel is the output from `--step panelprep` or has been previously prepared. @@ -323,7 +413,13 @@ This steps compares a _truth_ VCF to an _imputed_ VCF in order to compute imputa This also needs the frequency of the alleles. They can be computed from the reference panel by running the `--steps panelprep` and using the `--panel` with the `--compute_freq` flag ; or by using `--posfile samplesheet.csv`. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --input_truth truth.csv --steps validate --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --input_truth truth.csv \ + --steps validate \ + --outdir results \ + --genome GRCh37 \ + -profile docker ``` The required flags for this mode only are: From 0566d6da0eb73e6ed895736b314c2d92ea27226b Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 9 Jun 2024 17:06:51 +0000 Subject: [PATCH 28/63] update docs --- docs/usage.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index e30798f8..15c0375b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -35,8 +35,8 @@ SAMPLE6,AEG588A6.bam,AEG588A6.bai | Column | Description | | -------- | -------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. Spaces in sample names are automatically converted to underscores (`_`). | -| `bam` | Full path to a BAM file. File has to be gzipped and have the extension ".bam.gz".gz". | -| `bai` | Full path to a BAI file. File has to be gzipped and have the extension ".bam" or ".fq.gz". | +| `bam` | Full path to a BAM file. File has to be have the extension ".bam". | +| `bai` | Full path to a BAI file. File has to be have the extension ".bai". | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. From 0a295be8bb68915ea9cc86a825bca2cccd28425d Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 9 Jun 2024 17:45:37 +0000 Subject: [PATCH 29/63] correctly define ch_posfile --- subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf | 1 + tests/csv/posfile.csv | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 67ce8377..5f584162 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -213,6 +213,7 @@ workflow PIPELINE_INITIALISATION { if (params.posfile) { ch_posfile = Channel .fromSamplesheet("posfile") // ["panel", "chr", "vcf", "index", "txt"] + .map { meta, vcf, index, txt, hap, legend -> [meta, vcf, index, txt] } ch_hap_legend = Channel.fromSamplesheet("posfile") .map { meta, vcf, index, txt, hap, legend -> [meta, hap, legend] } diff --git a/tests/csv/posfile.csv b/tests/csv/posfile.csv index 04065504..38165955 100644 --- a/tests/csv/posfile.csv +++ b/tests/csv/posfile.csv @@ -1,3 +1,3 @@ panel,chr,vcf,index,txt,hap,legend -1000GP.s.norel,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz" +1000GP.s.norel,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz",,, 1000GP.s.norel,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.legend.gz" From 51a5eb7a4945b7763be0b563b5d09b802c98f0a0 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 9 Jun 2024 18:14:31 +0000 Subject: [PATCH 30/63] add tags to print id and chr when running --- conf/steps/imputation_glimpse1.config | 1 + conf/steps/imputation_glimpse2.config | 1 + conf/steps/imputation_quilt.config | 1 + conf/steps/imputation_stitch.config | 7 ++++--- conf/steps/panel_prep.config | 5 +++++ conf/steps/simulation.config | 5 +++++ conf/steps/validation.config | 3 +++ 7 files changed, 20 insertions(+), 3 deletions(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index 0e3b8a24..e52802d0 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -16,6 +16,7 @@ process { // Impute with GLIMPSE1 withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:.*' { publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} } // Call the variants before imputation diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index 8f31d5e1..8b705c6a 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -14,6 +14,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:.*' { publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE' { diff --git a/conf/steps/imputation_quilt.config b/conf/steps/imputation_quilt.config index a1fddd16..c7ca81cb 100644 --- a/conf/steps/imputation_quilt.config +++ b/conf/steps/imputation_quilt.config @@ -17,6 +17,7 @@ process { path: { "${params.outdir}/imputation/quilt/" }, mode: params.publish_dir_mode, ] + tag = {"${meta.id} ${meta.chr}"} } // Impute quilt diff --git a/conf/steps/imputation_stitch.config b/conf/steps/imputation_stitch.config index e0e2de03..981e94ee 100644 --- a/conf/steps/imputation_stitch.config +++ b/conf/steps/imputation_stitch.config @@ -16,16 +16,17 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:POSFILE_PREPARE_GAWK:GUNZIP' { - ext.prefix = { "${meta.panel}_${meta.chr}_original_posfile" } + ext.prefix = { "${meta.id}_${meta.chr}_original_posfile" } publishDir = [enabled: false] - tag = {"${meta.panel}_${meta.chr}"} + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:POSFILE_PREPARE_GAWK:GAWK' { ext.args = "'{ gsub(\",\", \"\\t\") ; key = \$1 FS \$2 } !seen[key]++'" // Remove duplicates - ext.prefix = { "${meta.panel}_${meta.chr}_posfile_stitch" } + ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } ext.suffix = "txt" publishDir = [enabled: false] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:STITCH' { diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 51dadefb..615810e7 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -17,6 +17,7 @@ process { mode: params.publish_dir_mode, enabled: false ] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHR_CHECK:VCF_CHR_RENAME:BCFTOOLS_ANNOTATE' { @@ -35,6 +36,7 @@ process { // Subworkflow: VCF_NORMALIZE_BCFTOOLS withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:.*' { publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_NORM' { @@ -106,6 +108,7 @@ process { mode: params.publish_dir_mode, enabled: false ] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' { @@ -120,6 +123,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:.*' { publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_CONVERT' { @@ -208,6 +212,7 @@ process { mode: params.publish_dir_mode, enabled: true ] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index f3355993..f2fbcfda 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -13,6 +13,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_REGION:.*' { publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_REGION:SAMTOOLS_VIEW' { ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}" } @@ -23,10 +24,12 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_DEPTH' { publishDir = [enabled: false] ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.stats" } + tag = {"${meta.id}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { ext.prefix = { params.input_region ? @@ -48,9 +51,11 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_MERGE' { ext.prefix = { "${meta.id}" } + tag = {"${meta.id}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_2' { ext.args = "" + tag = {"${meta.id}"} } // Coverage process diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 198ebe3c..39e29927 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -16,6 +16,7 @@ process { // Compute genotype likelihoods for the truth set withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:.*' { publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_MPILEUP' { @@ -65,6 +66,7 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + tag = {"${meta.id} ${meta.panel} ${meta.tools}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GLIMPSE2_CONCORDANCE' { @@ -85,5 +87,6 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { ext.args = "'(NR == 1) || (FNR > 1)'" // Skip header line ext.suffix = { "txt" } + tag = {"Test Quality"} } } From a29ae2836934515135629747d8831e440e60274b Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 9 Jun 2024 18:23:21 +0000 Subject: [PATCH 31/63] correct samtools depth tag --- conf/steps/simulation.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index f2fbcfda..b7a84639 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -29,7 +29,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_DEPTH' { publishDir = [enabled: false] ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.stats" } - tag = {"${meta.id}"} + tag = {"${meta1.id}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { ext.prefix = { params.input_region ? From 1d3574a02a357e6f0da76dcf8b1c1b74af8ca061 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 11 Jun 2024 18:48:51 -0300 Subject: [PATCH 32/63] Update conf/steps/validation.config Co-authored-by: Louis LE NEZET <58640615+LouisLeNezet@users.noreply.github.com> --- conf/steps/validation.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 39e29927..6313a483 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -66,7 +66,7 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - tag = {"${meta.id} ${meta.panel} ${meta.tools}"} + tag = {"${meta.id} ${meta.panel}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GLIMPSE2_CONCORDANCE' { From 5ca6ea3eb8dd1b0e807f910aa729899dc809c250 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 11 Jun 2024 18:51:54 -0300 Subject: [PATCH 33/63] Update conf/steps/simulation.config Co-authored-by: Louis LE NEZET <58640615+LouisLeNezet@users.noreply.github.com> --- conf/steps/simulation.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index b7a84639..f5d05c2f 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -28,7 +28,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_DEPTH' { publishDir = [enabled: false] - ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.stats" } + ext.prefix = { "${meta1.id}_C${meta1.chr ?: "all"}.depth" } tag = {"${meta1.id}"} } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { From cbefc66cbcfb0e37b66eb2f28653d4c7ff7058c3 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 11 Jun 2024 22:06:11 +0000 Subject: [PATCH 34/63] add haplegend for chr21 --- tests/csv/posfile.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/csv/posfile.csv b/tests/csv/posfile.csv index 38165955..d1a245b7 100644 --- a/tests/csv/posfile.csv +++ b/tests/csv/posfile.csv @@ -1,3 +1,3 @@ panel,chr,vcf,index,txt,hap,legend -1000GP.s.norel,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz",,, +1000GP.s.norel,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.s.norel_chr21.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.s.norel_chr21.legend.gz" 1000GP.s.norel,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.legend.gz" From ea8e48997749726eb62fbdbd5f22c196a0e871d6 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 7 Jun 2024 21:45:49 +0200 Subject: [PATCH 35/63] Add stats to multiqc --- conf/steps/panel_prep.config | 9 +++++++++ workflows/phaseimpute/main.nf | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 615810e7..d97a425b 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -242,4 +242,13 @@ process { ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS:.*' { + prefix = { "${meta.id}_${meta.chr}_panel" } + publishDir = [ + path: { "${params.outdir}/prep_panel/stats/" }, + mode: params.publish_dir_mode, + enabled: true + ] + } + } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 29565704..29c563a3 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -174,6 +174,17 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions) } + // Compute stats on panel + BCFTOOLS_STATS( + ch_panel_phased, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] }) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS.out.stats.map{ [it[1]] }) + // Create chunks from reference VCF VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) From 15ea394a0dce20e1183aeb54a4676a2a870cd743 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 7 Jun 2024 17:27:27 +0200 Subject: [PATCH 36/63] Add parameter sim_by_chr and dinamically publish downsampled file USe samtools depth instead of coverage --- conf/steps/simulation.config | 19 ++++++++++++----- nextflow.config | 1 + nextflow_schema.json | 6 ++++++ subworkflows/local/bam_downsample/main.nf | 15 +++++++------ workflows/phaseimpute/main.nf | 26 +++++++++++++++-------- 5 files changed, 47 insertions(+), 20 deletions(-) diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index f5d05c2f..36d8ff8a 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -28,25 +28,24 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_DEPTH' { publishDir = [enabled: false] - ext.prefix = { "${meta1.id}_C${meta1.chr ?: "all"}.depth" } - tag = {"${meta1.id}"} + ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.stats" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { - ext.prefix = { params.input_region ? + ext.prefix = { params.sim_by_chr ? "${meta.id}_D${meta.depth}_C${meta.chr ?: "all"}" : "${meta.id}" } publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.input_region ? null : it } + saveAs: { params.sim_by_chr ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_1' { publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.input_region ? null : it } + saveAs: { params.sim_by_chr ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_MERGE' { @@ -67,4 +66,14 @@ process { ext.prefix = { "${meta.id}" } publishDir = [ enabled: false ] } + + // Coverage process + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_TRT' { + ext.prefix = { "${meta.id}_truth" } + publishDir = [ enabled: false ] + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_SIM' { + ext.prefix = { "${meta.id}" } + publishDir = [ enabled: false ] + } } diff --git a/nextflow.config b/nextflow.config index b8ae5b48..919f061f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,7 @@ params { // Simulate depth = 1 genotype = null + sim_by_chr = true // Validation input_truth = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 66127d46..d1ac56d3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,6 +84,12 @@ "default": 1, "fa_icon": "fas fa-list-ol" }, + "sim_by_chr": { + "type": "boolean", + "description": "Simulate data by chromosome then merge it or simulate all data at once", + "default": true, + "pattern": "true|false" + }, "genotype": { "type": "string", "description": "Genotype position to use to simulate the data", diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index 5edd5f87..304af995 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -57,13 +57,16 @@ workflow BAM_DOWNSAMPLE { ch_bam_emul = SAMTOOLS_VIEW.out.bam .combine(SAMTOOLS_INDEX_1.out.bai, by:0) - if (params.input_region) { + if (params.sim_by_chr == true) { SAMTOOLS_MERGE( ch_bam_emul .map{ metaICRD, bam, index -> [metaICRD.subMap("id", "depth"), bam, index] } - .groupTuple(), + .groupTuple() + .map{ metaID, bam, index -> + [ metaID + ["chr": "all"], bam, index ] + }, ch_fasta ) ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first()) @@ -71,13 +74,13 @@ workflow BAM_DOWNSAMPLE { SAMTOOLS_INDEX_2(SAMTOOLS_MERGE.out.bam) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_2.out.versions.first()) - ch_bam_emul = SAMTOOLS_MERGE.out.bam + ch_bam_emul_all = SAMTOOLS_MERGE.out.bam .combine(SAMTOOLS_INDEX_2.out.bai, by:0) + } else { + ch_bam_emul_all = ch_bam_emul } - ch_bam_emul = ch_bam_emul - .map{ meta, bam, index -> [meta + [chr: "all"], bam, index]} emit: - bam_emul = ch_bam_emul // channel: [ [id, chr, region, depth], bam, bai ] + bam_emul = ch_bam_emul_all // channel: [ [id, chr, region, depth], bam, bai ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 29c563a3..f7b857a4 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -109,25 +109,33 @@ workflow PHASEIMPUTE { // Compute coverage of input files SAMTOOLS_COVERAGE_TRT(ch_input_sim, ch_fasta) - ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_TRT.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_TRT.out.versions) ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_TRT.out.coverage.map{it[1]}) - if (params.input_region) { - // Split the bam into the regions specified + if (params.sim_by_chr == true) { + // Split the bam into the region specified BAM_REGION(ch_input_sim, ch_region, ch_fasta) - ch_versions = ch_versions.mix(BAM_REGION.out.versions) - ch_input_sim = BAM_REGION.out.bam_region + ch_versions = ch_versions.mix(BAM_REGION.out.versions) + ch_input_dwn = BAM_REGION.out.bam_region + } else { + ch_input_dwn = ch_input_sim + .map{ meta, bam, index -> [ meta + [chr: "all"], bam, index ] } } if (params.depth) { // Downsample input to desired depth - BAM_DOWNSAMPLE(ch_input_sim, ch_depth, ch_fasta) - ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) - ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul + BAM_DOWNSAMPLE( + ch_input_dwn, + ch_depth, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) + ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul + ch_input_validate_truth = ch_input_sim // Compute coverage of input files SAMTOOLS_COVERAGE_SIM(BAM_DOWNSAMPLE.out.bam_emul, ch_fasta) - ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_SIM.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_SIM.out.versions) ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_SIM.out.coverage.map{it[1]}) } From d63350ee321cb243471759505fae6508ee9d29d3 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 7 Jun 2024 17:42:26 +0200 Subject: [PATCH 37/63] Add description and change parameter name --- conf/steps/simulation.config | 6 +++--- nextflow.config | 2 +- nextflow_schema.json | 4 ++-- subworkflows/local/bam_downsample/main.nf | 2 +- workflows/phaseimpute/main.nf | 14 +++++++++----- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index 36d8ff8a..aa48cfb4 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -31,21 +31,21 @@ process { ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.stats" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { - ext.prefix = { params.sim_by_chr ? + ext.prefix = { params.sim_by_reg ? "${meta.id}_D${meta.depth}_C${meta.chr ?: "all"}" : "${meta.id}" } publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.sim_by_chr ? null : it } + saveAs: { params.sim_by_reg ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_1' { publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.sim_by_chr ? null : it } + saveAs: { params.sim_by_reg ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_MERGE' { diff --git a/nextflow.config b/nextflow.config index 919f061f..4602c687 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,7 @@ params { // Simulate depth = 1 genotype = null - sim_by_chr = true + sim_by_reg = true // Validation input_truth = null diff --git a/nextflow_schema.json b/nextflow_schema.json index d1ac56d3..19b6f113 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,9 +84,9 @@ "default": 1, "fa_icon": "fas fa-list-ol" }, - "sim_by_chr": { + "sim_by_reg": { "type": "boolean", - "description": "Simulate data by chromosome then merge it or simulate all data at once", + "description": "Simulate data only for the given region then merge it or simulate all data at once", "default": true, "pattern": "true|false" }, diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index 304af995..09b04b1f 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -57,7 +57,7 @@ workflow BAM_DOWNSAMPLE { ch_bam_emul = SAMTOOLS_VIEW.out.bam .combine(SAMTOOLS_INDEX_1.out.bai, by:0) - if (params.sim_by_chr == true) { + if (params.sim_by_reg == true) { SAMTOOLS_MERGE( ch_bam_emul .map{ diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index f7b857a4..8a4c9a33 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -101,6 +101,11 @@ workflow PHASEIMPUTE { // Simulate data if asked // if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { + // Output channel of simulate process + ch_sim_output = Channel.empty() + // Set truth channel + ch_input_validate_truth = ch_input_sim + // Test if the input are all bam files getAllFilesExtension(ch_input_sim) .map{ if (it != "bam") { @@ -109,13 +114,13 @@ workflow PHASEIMPUTE { // Compute coverage of input files SAMTOOLS_COVERAGE_TRT(ch_input_sim, ch_fasta) - ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_TRT.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_TRT.out.versions) ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_TRT.out.coverage.map{it[1]}) - if (params.sim_by_chr == true) { + if (params.sim_by_reg == true) { // Split the bam into the region specified BAM_REGION(ch_input_sim, ch_region, ch_fasta) - ch_versions = ch_versions.mix(BAM_REGION.out.versions) + ch_versions = ch_versions.mix(BAM_REGION.out.versions) ch_input_dwn = BAM_REGION.out.bam_region } else { ch_input_dwn = ch_input_sim @@ -131,11 +136,10 @@ workflow PHASEIMPUTE { ) ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul - ch_input_validate_truth = ch_input_sim // Compute coverage of input files SAMTOOLS_COVERAGE_SIM(BAM_DOWNSAMPLE.out.bam_emul, ch_fasta) - ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_SIM.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_SIM.out.versions) ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_SIM.out.coverage.map{it[1]}) } From ebac4b202edb06b92c05be15ca5e32f976dd8bd1 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sat, 8 Jun 2024 15:11:50 +0200 Subject: [PATCH 38/63] Change to used input_region --- conf/steps/simulation.config | 6 +++--- nextflow.config | 1 - nextflow_schema.json | 6 ------ subworkflows/local/bam_downsample/main.nf | 2 +- workflows/phaseimpute/main.nf | 4 +--- 5 files changed, 5 insertions(+), 14 deletions(-) diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index aa48cfb4..9943f45b 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -31,21 +31,21 @@ process { ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.stats" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { - ext.prefix = { params.sim_by_reg ? + ext.prefix = { params.input_region ? "${meta.id}_D${meta.depth}_C${meta.chr ?: "all"}" : "${meta.id}" } publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.sim_by_reg ? null : it } + saveAs: { params.input_region ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_1' { publishDir = [ path: { "${params.outdir}/simulation/" }, mode: params.publish_dir_mode, - saveAs: { params.sim_by_reg ? null : it } + saveAs: { params.input_region ? null : it } ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_MERGE' { diff --git a/nextflow.config b/nextflow.config index 4602c687..b8ae5b48 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,6 @@ params { // Simulate depth = 1 genotype = null - sim_by_reg = true // Validation input_truth = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 19b6f113..66127d46 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,12 +84,6 @@ "default": 1, "fa_icon": "fas fa-list-ol" }, - "sim_by_reg": { - "type": "boolean", - "description": "Simulate data only for the given region then merge it or simulate all data at once", - "default": true, - "pattern": "true|false" - }, "genotype": { "type": "string", "description": "Genotype position to use to simulate the data", diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index 09b04b1f..e8464cd8 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -57,7 +57,7 @@ workflow BAM_DOWNSAMPLE { ch_bam_emul = SAMTOOLS_VIEW.out.bam .combine(SAMTOOLS_INDEX_1.out.bai, by:0) - if (params.sim_by_reg == true) { + if (params.input_region) { SAMTOOLS_MERGE( ch_bam_emul .map{ diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 8a4c9a33..7a22f951 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -103,8 +103,6 @@ workflow PHASEIMPUTE { if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { // Output channel of simulate process ch_sim_output = Channel.empty() - // Set truth channel - ch_input_validate_truth = ch_input_sim // Test if the input are all bam files getAllFilesExtension(ch_input_sim) @@ -117,7 +115,7 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_TRT.out.versions) ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_TRT.out.coverage.map{it[1]}) - if (params.sim_by_reg == true) { + if (params.input_region) { // Split the bam into the region specified BAM_REGION(ch_input_sim, ch_region, ch_fasta) ch_versions = ch_versions.mix(BAM_REGION.out.versions) From 043680752dcd8099cec0fc04b2c7a7ecaf4a8c5f Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sat, 8 Jun 2024 18:48:28 +0200 Subject: [PATCH 39/63] Simplify channels creations --- subworkflows/local/bam_downsample/main.nf | 13 +++++-------- workflows/phaseimpute/main.nf | 20 +++++--------------- 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index e8464cd8..5edd5f87 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -63,10 +63,7 @@ workflow BAM_DOWNSAMPLE { .map{ metaICRD, bam, index -> [metaICRD.subMap("id", "depth"), bam, index] } - .groupTuple() - .map{ metaID, bam, index -> - [ metaID + ["chr": "all"], bam, index ] - }, + .groupTuple(), ch_fasta ) ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first()) @@ -74,13 +71,13 @@ workflow BAM_DOWNSAMPLE { SAMTOOLS_INDEX_2(SAMTOOLS_MERGE.out.bam) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_2.out.versions.first()) - ch_bam_emul_all = SAMTOOLS_MERGE.out.bam + ch_bam_emul = SAMTOOLS_MERGE.out.bam .combine(SAMTOOLS_INDEX_2.out.bai, by:0) - } else { - ch_bam_emul_all = ch_bam_emul } + ch_bam_emul = ch_bam_emul + .map{ meta, bam, index -> [meta + [chr: "all"], bam, index]} emit: - bam_emul = ch_bam_emul_all // channel: [ [id, chr, region, depth], bam, bai ] + bam_emul = ch_bam_emul // channel: [ [id, chr, region, depth], bam, bai ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 7a22f951..29c563a3 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -101,9 +101,6 @@ workflow PHASEIMPUTE { // Simulate data if asked // if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { - // Output channel of simulate process - ch_sim_output = Channel.empty() - // Test if the input are all bam files getAllFilesExtension(ch_input_sim) .map{ if (it != "bam") { @@ -116,24 +113,17 @@ workflow PHASEIMPUTE { ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_TRT.out.coverage.map{it[1]}) if (params.input_region) { - // Split the bam into the region specified + // Split the bam into the regions specified BAM_REGION(ch_input_sim, ch_region, ch_fasta) ch_versions = ch_versions.mix(BAM_REGION.out.versions) - ch_input_dwn = BAM_REGION.out.bam_region - } else { - ch_input_dwn = ch_input_sim - .map{ meta, bam, index -> [ meta + [chr: "all"], bam, index ] } + ch_input_sim = BAM_REGION.out.bam_region } if (params.depth) { // Downsample input to desired depth - BAM_DOWNSAMPLE( - ch_input_dwn, - ch_depth, - ch_fasta - ) - ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) - ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul + BAM_DOWNSAMPLE(ch_input_sim, ch_depth, ch_fasta) + ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) + ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul // Compute coverage of input files SAMTOOLS_COVERAGE_SIM(BAM_DOWNSAMPLE.out.bam_emul, ch_fasta) From eb7a42ebe84d7b4846514e8bc7f5f3d7a401eb22 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:40:59 +0000 Subject: [PATCH 40/63] emit final prepared vcf --- conf/steps/panel_prep.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index d97a425b..4ed60136 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -101,6 +101,14 @@ process { ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' { + ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} + publishDir = [ + path: { "${params.outdir}/prep_panel/compute_freq" }, + enabled: true + ] + } + // Subworkflow: VCF_PHASE_SHAPEIT5 withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:.*' { publishDir = [ From 5d8cc46dbf8f86d5a471400669077aa3eb0f82c5 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:44:39 +0000 Subject: [PATCH 41/63] accept external chunks in glimpse2 --- conf/test_glimpse2.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_glimpse2.config b/conf/test_glimpse2.config index 5c3ac281..39f93681 100644 --- a/conf/test_glimpse2.config +++ b/conf/test_glimpse2.config @@ -32,6 +32,7 @@ params { // External params chunks = "${projectDir}/tests/csv/chunks.csv" + posfile = "${projectDir}/tests/csv/posfile.csv" // Impute tools tools = "glimpse2" From f8755eb7034d5e578c60c0d9270760a35749cf95 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 8 Jun 2024 11:50:30 +0000 Subject: [PATCH 42/63] use input panel from panelprep --- tests/csv/panel_2.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/csv/panel_2.csv b/tests/csv/panel_2.csv index de76a31c..f06d9a19 100644 --- a/tests/csv/panel_2.csv +++ b/tests/csv/panel_2.csv @@ -1,2 +1,2 @@ -panel,chr,vcf,index -1000GP.s.norel,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz.tbi +panel,chr,vcf,index,hap,legend +1000GP.s.norel,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22_fixed.vcf.gz.tbi,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.hap.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.legend.gz From 0a369f1331a56b3c4b338ef3bd6a268083de1f19 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 9 Jun 2024 19:17:01 +0200 Subject: [PATCH 43/63] Update multiqc report --- conf/steps/panel_prep.config | 9 --------- conf/steps/simulation.config | 10 ---------- workflows/phaseimpute/main.nf | 11 ----------- 3 files changed, 30 deletions(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 4ed60136..5c5a70c7 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -250,13 +250,4 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS:.*' { - prefix = { "${meta.id}_${meta.chr}_panel" } - publishDir = [ - path: { "${params.outdir}/prep_panel/stats/" }, - mode: params.publish_dir_mode, - enabled: true - ] - } - } diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index 9943f45b..44bcb76b 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -66,14 +66,4 @@ process { ext.prefix = { "${meta.id}" } publishDir = [ enabled: false ] } - - // Coverage process - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_TRT' { - ext.prefix = { "${meta.id}_truth" } - publishDir = [ enabled: false ] - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_SIM' { - ext.prefix = { "${meta.id}" } - publishDir = [ enabled: false ] - } } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 29c563a3..29565704 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -174,17 +174,6 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions) } - // Compute stats on panel - BCFTOOLS_STATS( - ch_panel_phased, - [[],[]], - [[],[]], - [[],[]], - [[],[]], - ch_fasta.map{ [it[0], it[1]] }) - ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS.out.stats.map{ [it[1]] }) - // Create chunks from reference VCF VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) From 7c92b3c30900ca093cced979630720395abc0d86 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 15:05:06 +0200 Subject: [PATCH 44/63] Change names for multiqc --- conf/modules.config | 4 ++-- conf/steps/imputation_glimpse1.config | 1 - conf/steps/imputation_glimpse2.config | 1 - conf/steps/simulation.config | 10 ---------- conf/steps/validation.config | 2 +- 5 files changed, 3 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9bd02bf3..093c7249 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -31,11 +31,11 @@ process { // Simulation // Coverage process withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_TRT' { - ext.prefix = { "${meta.id}_truth" } + ext.prefix = { "${meta.id}_A-truth" } publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_SIM' { - ext.prefix = { "${meta.id}_sim" } + ext.prefix = { "${meta.id}_B-sim" } publishDir = [ enabled: false ] } diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index e52802d0..ef1e69a4 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -84,6 +84,5 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:BCFTOOLS_INDEX' { ext.args = "--tbi" - ext.prefix = { "${meta.id}_glimpse1" } } } diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index 8b705c6a..f6710ef8 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -55,7 +55,6 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE2:BCFTOOLS_INDEX' { ext.args = "--tbi" - ext.prefix = { "${meta.id}_glimpse2" } } } diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index 44bcb76b..45b93356 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -56,14 +56,4 @@ process { ext.args = "" tag = {"${meta.id}"} } - - // Coverage process - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_TRT' { - ext.prefix = { "${meta.id}_truth" } - publishDir = [ enabled: false ] - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_SIM' { - ext.prefix = { "${meta.id}" } - publishDir = [ enabled: false ] - } } diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 6313a483..31e8dc59 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -43,7 +43,6 @@ process { // Concatenate the truth set withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:.*' { - ext.prefix = { "${meta.id}_truth" } publishDir = [ path: { "${params.outdir}/validation/concat" }, mode: params.publish_dir_mode, @@ -53,6 +52,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_CONCAT' { ext.args = ["--ligate", "--output-type z",].join(' ') + ext.prefix = { "${meta.id}_A-truth" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_INDEX' { From 50dd78a32341ce983894c7f60f862b0bce694696 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 15:05:59 +0200 Subject: [PATCH 45/63] Move to bcf for glimpse2 --- conf/steps/imputation_glimpse2.config | 4 ++-- subworkflows/local/vcf_impute_glimpse2/main.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index f6710ef8..a902cdb3 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -20,12 +20,12 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE' { ext.prefix = { "${meta.id}_${meta.chunk.replace(':','_')}_glimpse2" } ext.args = "--keep-monomorphic-ref-sites" - ext.suffix = "vcf.gz" + ext.suffix = "bcf" publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1' { - ext.args = "--tbi" + ext.args = "--csi" publishDir = [ enabled: false ] } diff --git a/subworkflows/local/vcf_impute_glimpse2/main.nf b/subworkflows/local/vcf_impute_glimpse2/main.nf index cadc1b4a..19bdbbc8 100644 --- a/subworkflows/local/vcf_impute_glimpse2/main.nf +++ b/subworkflows/local/vcf_impute_glimpse2/main.nf @@ -49,7 +49,7 @@ workflow VCF_IMPUTE_GLIMPSE2 { // Ligate all phased files in one and index it ligate_input = GLIMPSE2_PHASE.out.phased_variants - .join( BCFTOOLS_INDEX_1.out.tbi ) + .join( BCFTOOLS_INDEX_1.out.csi ) .map{ metaIPCR, vcf, index -> [metaIPCR.subMap("id", "panel", "chr"), vcf, index] } .groupTuple() From bb8e1839f643d62674e1032ae8cd3698b1324658 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 15:06:46 +0200 Subject: [PATCH 46/63] Fix indent --- .../local/utils_nfcore_phaseimpute_pipeline/main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 5f584162..b462b8ab 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -329,10 +329,10 @@ def validateInputParameters() { assert params.panel, "No --panel provided for imputation with GLIMPSE" } - // Check that input_truth is provided when running validate - if (params.steps.split(',').find { it in ["all", "validate"] } ) { - assert params.input_truth, "No --input_truth was provided for --steps validate" - } + // Check that input_truth is provided when running validate + if (params.steps.split(',').find { it in ["all", "validate"] } ) { + assert params.input_truth, "No --input_truth was provided for --steps validate" + } } // Emit a warning if both panel and (chunks || posfile) are used as input @@ -347,7 +347,7 @@ def validateInputParameters() { if (params.panel && params.steps.split(',').find { it in ["impute"] } && !params.steps.split(',').find { it in ["all", "panelprep"] } ) { log.info("Provided `--panel` will be used in `--steps impute`. Make sure it has been previously prepared with `--steps panelprep`") } - } +} // // Check if all input files have the same extension From 71e655d973904972a7214673a7252590e0f6d5a3 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 15:08:52 +0200 Subject: [PATCH 47/63] Update snap with stats --- .../phaseimpute/tests/test_all.nf.test.snap | 102 ++++++++++++------ 1 file changed, 69 insertions(+), 33 deletions(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index c018fc50..d38fc737 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -53,12 +53,25 @@ "imputation/quilt/concat/NA20359_quilt.vcf.gz", "imputation/quilt/concat/NA20359_quilt.vcf.gz.tbi", "imputation/quilt/concat/versions.yml", - "imputation/stitch/concat/NA12878.vcf.gz", - "imputation/stitch/concat/NA12878.vcf.gz.tbi", - "imputation/stitch/concat/NA19401.vcf.gz", - "imputation/stitch/concat/NA19401.vcf.gz.tbi", - "imputation/stitch/concat/NA20359.vcf.gz", - "imputation/stitch/concat/NA20359.vcf.gz.tbi", + "imputation/stats/NA12878_glimpse1.bcftools_stats.txt", + "imputation/stats/NA12878_glimpse2.bcftools_stats.txt", + "imputation/stats/NA12878_quilt.bcftools_stats.txt", + "imputation/stats/NA12878_stitch_stitch.bcftools_stats.txt", + "imputation/stats/NA19401_glimpse1.bcftools_stats.txt", + "imputation/stats/NA19401_glimpse2.bcftools_stats.txt", + "imputation/stats/NA19401_quilt.bcftools_stats.txt", + "imputation/stats/NA19401_stitch_stitch.bcftools_stats.txt", + "imputation/stats/NA20359_glimpse1.bcftools_stats.txt", + "imputation/stats/NA20359_glimpse2.bcftools_stats.txt", + "imputation/stats/NA20359_quilt.bcftools_stats.txt", + "imputation/stats/NA20359_stitch_stitch.bcftools_stats.txt", + "imputation/stats/versions.yml", + "imputation/stitch/concat/NA12878_stitch.vcf.gz", + "imputation/stitch/concat/NA12878_stitch.vcf.gz.tbi", + "imputation/stitch/concat/NA19401_stitch.vcf.gz", + "imputation/stitch/concat/NA19401_stitch.vcf.gz.tbi", + "imputation/stitch/concat/NA20359_stitch.vcf.gz", + "imputation/stitch/concat/NA20359_stitch.vcf.gz.tbi", "imputation/stitch/concat/versions.yml" ], [ @@ -90,36 +103,39 @@ "prep_panel/sites/vcf/1000GP.s.norel_chr21_glimpse1_sites.vcf.gz.csi", "prep_panel/sites/vcf/1000GP.s.norel_chr22_glimpse1_sites.vcf.gz", "prep_panel/sites/vcf/1000GP.s.norel_chr22_glimpse1_sites.vcf.gz.csi", - "prep_panel/sites/vcf/versions.yml" + "prep_panel/sites/vcf/versions.yml", + "prep_panel/stats/1000GP.s.norel.bcftools_stats.txt", + "prep_panel/stats/versions.yml" ], [ "validation/NA12878_P1000GP.s.norel_Tglimpse1_SNP.txt", "validation/NA12878_P1000GP.s.norel_Tglimpse2_SNP.txt", "validation/NA12878_P1000GP.s.norel_Tquilt_SNP.txt", - "validation/NA12878_P1000GP.s.norel_Tstitch_SNP.txt", "validation/NA19401_P1000GP.s.norel_Tglimpse1_SNP.txt", "validation/NA19401_P1000GP.s.norel_Tglimpse2_SNP.txt", "validation/NA19401_P1000GP.s.norel_Tquilt_SNP.txt", - "validation/NA19401_P1000GP.s.norel_Tstitch_SNP.txt", "validation/NA20359_P1000GP.s.norel_Tglimpse1_SNP.txt", "validation/NA20359_P1000GP.s.norel_Tglimpse2_SNP.txt", "validation/NA20359_P1000GP.s.norel_Tquilt_SNP.txt", - "validation/NA20359_P1000GP.s.norel_Tstitch_SNP.txt", "validation/TestQuality.txt", - "validation/concat/NA12878_truth_concat.vcf.gz", - "validation/concat/NA12878_truth_concat.vcf.gz.tbi", - "validation/concat/NA19401_truth_concat.vcf.gz", - "validation/concat/NA19401_truth_concat.vcf.gz.tbi", - "validation/concat/NA20359_truth_concat.vcf.gz", - "validation/concat/NA20359_truth_concat.vcf.gz.tbi" + "validation/concat/NA12878_A-truth.vcf.gz", + "validation/concat/NA12878_A-truth.vcf.gz.tbi", + "validation/concat/NA19401_A-truth.vcf.gz", + "validation/concat/NA19401_A-truth.vcf.gz.tbi", + "validation/concat/NA20359_A-truth.vcf.gz", + "validation/concat/NA20359_A-truth.vcf.gz.tbi", + "validation/stats/NA12878_truth.bcftools_stats.txt", + "validation/stats/NA19401_truth.bcftools_stats.txt", + "validation/stats/NA20359_truth.bcftools_stats.txt", + "validation/stats/versions.yml" ], 1779 ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "23.10.1" }, - "timestamp": "2024-06-08T12:22:26.975587517" + "timestamp": "2024-06-12T13:02:50.1244856" }, "Check test_validate": { "content": [ @@ -128,19 +144,23 @@ "validation/NA19401_Pnull_Tnull_SNP.txt", "validation/NA20359_Pnull_Tnull_SNP.txt", "validation/TestQuality.txt", - "validation/concat/NA12878_truth_concat.vcf.gz", - "validation/concat/NA12878_truth_concat.vcf.gz.tbi", - "validation/concat/NA19401_truth_concat.vcf.gz", - "validation/concat/NA19401_truth_concat.vcf.gz.tbi", - "validation/concat/NA20359_truth_concat.vcf.gz", - "validation/concat/NA20359_truth_concat.vcf.gz.tbi" + "validation/concat/NA12878_A-truth.vcf.gz", + "validation/concat/NA12878_A-truth.vcf.gz.tbi", + "validation/concat/NA19401_A-truth.vcf.gz", + "validation/concat/NA19401_A-truth.vcf.gz.tbi", + "validation/concat/NA20359_A-truth.vcf.gz", + "validation/concat/NA20359_A-truth.vcf.gz.tbi", + "validation/stats/NA12878_truth.bcftools_stats.txt", + "validation/stats/NA19401_truth.bcftools_stats.txt", + "validation/stats/NA20359_truth.bcftools_stats.txt", + "validation/stats/versions.yml" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-24T15:09:05.11577274" + "timestamp": "2024-06-12T12:54:36.243451019" }, "Check test_quilt": { "content": [ @@ -164,51 +184,67 @@ "Check test_sim": { "content": [ [ - + "simulation/NA12878.bam", + "simulation/NA12878.bam.bai", + "simulation/NA19401.bam", + "simulation/NA19401.bam.bai", + "simulation/NA20359.bam", + "simulation/NA20359.bam.bai", + "simulation/csv/simulate.csv" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-11T17:29:33.926536569" + "timestamp": "2024-06-12T12:53:09.565138506" }, "Check test_glimpse2": { "content": [ [ + "imputation/csv/impute.csv", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", - "imputation/glimpse2/concat/versions.yml" + "imputation/glimpse2/concat/versions.yml", + "imputation/stats/NA12878_glimpse2.bcftools_stats.txt", + "imputation/stats/NA19401_glimpse2.bcftools_stats.txt", + "imputation/stats/NA20359_glimpse2.bcftools_stats.txt", + "imputation/stats/versions.yml" ], - 1777 + 936 ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-24T15:12:55.355916727" + "timestamp": "2024-06-12T14:22:47.18990097" }, "Check test": { "content": [ [ + "imputation/csv/impute.csv", "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz.tbi", "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz.tbi", "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/versions.yml" + "imputation/glimpse1/concat/versions.yml", + "imputation/stats/NA12878_glimpse1.bcftools_stats.txt", + "imputation/stats/NA19401_glimpse1.bcftools_stats.txt", + "imputation/stats/NA20359_glimpse1.bcftools_stats.txt", + "imputation/stats/versions.yml" ], - 1779 + 930 ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-24T14:30:09.449862457" + "timestamp": "2024-06-12T12:47:57.195765127" } } \ No newline at end of file From 06abcb9960d4f987dc10ef23a7888d0ea6657add Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 17:24:32 +0200 Subject: [PATCH 48/63] Update stitch samples id to correctly assess it from filename --- assets/schema_input.json | 4 ++-- subworkflows/local/vcf_samples_bcftools/main.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 971c3fb3..f7083b29 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -9,8 +9,8 @@ "properties": { "sample": { "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", + "pattern": "^[a-zA-Z0-9]+$", + "errorMessage": "Sample name must be provided and cannot contain spaces nor special characters '_' or '.' .", "meta": ["id"] }, "file": { diff --git a/subworkflows/local/vcf_samples_bcftools/main.nf b/subworkflows/local/vcf_samples_bcftools/main.nf index 51e42bc1..b2f1382e 100644 --- a/subworkflows/local/vcf_samples_bcftools/main.nf +++ b/subworkflows/local/vcf_samples_bcftools/main.nf @@ -14,7 +14,7 @@ workflow VCF_SAMPLES_BCFTOOLS { ch_vcf_samples = BCFTOOLS_PLUGINSPLIT.out.vcf .transpose() - .map{metaITC, vcf -> [metaITC + [id: vcf.getBaseName().tokenize(".")[0]], vcf]} + .map{metaITC, vcf -> [metaITC + [id: vcf.getBaseName().tokenize(".")[0].tokenize("_")[0]], vcf]} BCFTOOLS_INDEX(ch_vcf_samples) ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions.first()) From 762b791f92f7cccb0e96744b4dab703b5a35b09f Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 17:25:38 +0200 Subject: [PATCH 49/63] Remove versions.yml from output --- conf/modules.config | 7 +++++-- conf/steps/imputation_glimpse1.config | 1 + conf/steps/imputation_glimpse2.config | 1 + conf/steps/imputation_quilt.config | 1 + conf/steps/imputation_stitch.config | 1 + conf/steps/panel_prep.config | 25 +++++++++++++++++-------- 6 files changed, 26 insertions(+), 10 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 093c7249..e1846284 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -46,7 +46,8 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/stats/" }, mode: params.publish_dir_mode, - enabled: true + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -57,6 +58,7 @@ process { path: { "${params.outdir}/validation/stats" }, mode: params.publish_dir_mode, enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } // IMPUTE @@ -65,7 +67,8 @@ process { publishDir = [ path: { "${params.outdir}/imputation/stats" }, mode: params.publish_dir_mode, - enabled: true + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index ef1e69a4..351008d0 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -74,6 +74,7 @@ process { publishDir = [ path: { "${params.outdir}/imputation/glimpse1/concat" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index a902cdb3..c87a7c7f 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -45,6 +45,7 @@ process { publishDir = [ path: { "${params.outdir}/imputation/glimpse2/concat" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } diff --git a/conf/steps/imputation_quilt.config b/conf/steps/imputation_quilt.config index c7ca81cb..932ab755 100644 --- a/conf/steps/imputation_quilt.config +++ b/conf/steps/imputation_quilt.config @@ -48,6 +48,7 @@ process { publishDir = [ path: { "${params.outdir}/imputation/quilt/concat" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } diff --git a/conf/steps/imputation_stitch.config b/conf/steps/imputation_stitch.config index 981e94ee..b452d954 100644 --- a/conf/steps/imputation_stitch.config +++ b/conf/steps/imputation_stitch.config @@ -56,6 +56,7 @@ process { publishDir = [ path: { "${params.outdir}/imputation/stitch/concat" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 5c5a70c7..46f170a5 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -15,6 +15,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: false ] tag = {"${meta.id} ${meta.chr}"} @@ -55,6 +56,7 @@ process { ext.prefix = { "${meta.id}_${meta.chr}_biallelic_snps" } publishDir = [ path: { "${params.outdir}/prep_panel/normalized" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -63,6 +65,7 @@ process { ext.args = "--tbi" publishDir = [ path: { "${params.outdir}/prep_panel/normalized" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -73,6 +76,7 @@ process { ext.prefix = { "${meta.id}_${meta.chr}_biallelic_removed_samples" } publishDir = [ path: { "${params.outdir}/prep_panel/normalized" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -81,6 +85,7 @@ process { ext.args = "--tbi" publishDir = [ path: { "${params.outdir}/prep_panel/normalized" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -89,6 +94,7 @@ process { ext.prefix = { "${meta.id}_${meta.chr}" } publishDir = [ path: { "${params.outdir}/prep_panel/compute_freq" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -97,14 +103,7 @@ process { ext.args = "--tbi" publishDir = [ path: { "${params.outdir}/prep_panel/compute_freq" }, - enabled: true - ] - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' { - ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} - publishDir = [ - path: { "${params.outdir}/prep_panel/compute_freq" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -114,6 +113,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/phasing" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: false ] tag = {"${meta.id} ${meta.chr}"} @@ -139,6 +139,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/haplegend/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -156,6 +157,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/sites/vcf/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -165,6 +167,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/sites/vcf/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -186,6 +189,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/sites/tsv/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -200,6 +204,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/sites/tsv/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -218,6 +223,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/chunks/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] tag = {"${meta.id} ${meta.chr}"} @@ -228,6 +234,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse1/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -237,6 +244,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse2/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } @@ -246,6 +254,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse2/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } From 1138ee98437044cee9a083f36d0ed7e72888987f Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 17:26:30 +0200 Subject: [PATCH 50/63] Add indent --- conf/test_quilt.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_quilt.config b/conf/test_quilt.config index 41ff4738..07db7668 100644 --- a/conf/test_quilt.config +++ b/conf/test_quilt.config @@ -31,7 +31,7 @@ params { steps = "impute" // External params - chunks = "${projectDir}/tests/csv/chunks.csv" + chunks = "${projectDir}/tests/csv/chunks.csv" posfile = "${projectDir}/tests/csv/posfile.csv" // Impute tools From acc92945518ebdee05ea2df1a358f39ec3782824 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 17:26:53 +0200 Subject: [PATCH 51/63] Add chunks for chr 21 --- tests/csv/chunks.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/csv/chunks.csv b/tests/csv/chunks.csv index e77b3b76..d78c3858 100644 --- a/tests/csv/chunks.csv +++ b/tests/csv/chunks.csv @@ -1,2 +1,3 @@ panel,chr,file 1000GP.s.norel,chr22,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/22/1000GP.s.norel_chr22_chunks_glimpse1.txt" +1000GP.s.norel,chr21,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/21/1000GP.s.norel_chr21_chunks_glimpse1.txt" From 32df3c7ecca72820a71391943b99e3a94a27ba14 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 17:27:21 +0200 Subject: [PATCH 52/63] Fix main script --- workflows/phaseimpute/main.nf | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 29565704..ff5f71f7 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -160,6 +160,7 @@ workflow PHASEIMPUTE { ch_posfile_stitch = VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv_stitch ch_panel_sites = VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites ch_panel_phased = VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi + ch_hap_legend = VCF_SITES_EXTRACT_BCFTOOLS.out.hap_legend // Phase panel with Shapeit5 if (params.phased == false) { @@ -191,6 +192,11 @@ workflow PHASEIMPUTE { } if (params.steps.split(',').contains("impute") || params.steps.split(',').contains("all")) { + // Use panel from parameters if provided + if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { + ch_panel_phased = ch_panel + } + if (params.tools.split(',').contains("glimpse1")) { log.info("Impute with GLIMPSE1") @@ -198,19 +204,12 @@ workflow PHASEIMPUTE { if (params.chunks) { CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") ch_chunks_glimpse1 = CHUNK_PREPARE_CHANNEL.out.chunks - } else if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { - ch_chunks_glimpse1 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 } if (params.posfile) { ch_posfile_glimpse = ch_posfile.map {meta, vcf, csi, txt -> [ meta, vcf, txt ]} } - // Use panel from parameters if provided - if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { - ch_panel_phased = ch_panel - } - // Run imputation VCF_IMPUTE_GLIMPSE1( ch_input_impute, @@ -232,19 +231,11 @@ workflow PHASEIMPUTE { if (params.tools.split(',').contains("glimpse2")) { log.info("Impute with GLIMPSE2") - // Use chunks from parameters if provided or use previous chunks from panelprep - if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { - ch_chunks_glimpse2 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse2 - } else if (params.chunks) { + if (params.chunks) { CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") ch_chunks_glimpse2 = CHUNK_PREPARE_CHANNEL.out.chunks } - // Use panel from parameters if provided - if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { - ch_panel_phased = ch_panel - } - // Run imputation VCF_IMPUTE_GLIMPSE2( ch_input_impute, @@ -295,20 +286,12 @@ workflow PHASEIMPUTE { if (params.tools.split(',').contains("quilt")) { log.info("Impute with QUILT") - // Use previous chunks if --steps panelprep - if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { - ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt // Use provided chunks if --chunks - } else if (params.chunks) { + if (params.chunks) { CHUNK_PREPARE_CHANNEL(ch_chunks, "quilt") ch_chunks_quilt = CHUNK_PREPARE_CHANNEL.out.chunks } - // Use previous hap_legend if --steps panelprep - if (params.steps.split(',').find { it in ["all", "panelprep"] }) { - ch_hap_legend = VCF_NORMALIZE_BCFTOOLS.out.hap_legend - } - // Impute BAMs with QUILT BAM_IMPUTE_QUILT( ch_input_impute, From 428b8195c150f153942d01b79eacdfb8c18df3d8 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 17:28:17 +0200 Subject: [PATCH 53/63] Update nf-test --- workflows/phaseimpute/tests/test_all.nf.test | 2 +- .../phaseimpute/tests/test_all.nf.test.snap | 67 ++++++++----------- 2 files changed, 28 insertions(+), 41 deletions(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test b/workflows/phaseimpute/tests/test_all.nf.test index 0565a8ea..fdf57bd0 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test +++ b/workflows/phaseimpute/tests/test_all.nf.test @@ -110,7 +110,7 @@ nextflow_pipeline { .list() .collect { getRecursiveFileNames(it, outputDir) } .flatten(), - path("$outputDir/imputation/stitch/concat/NA12878.vcf.gz").linesGzip.size() + path("$outputDir/imputation/stitch/concat/NA12878_stitch.vcf.gz").linesGzip.size() ).match() } ) diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index d38fc737..c76ae845 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -3,21 +3,23 @@ "content": [ [ "imputation/csv/impute.csv", - "imputation/stitch/concat/NA12878.vcf.gz", - "imputation/stitch/concat/NA12878.vcf.gz.tbi", - "imputation/stitch/concat/NA19401.vcf.gz", - "imputation/stitch/concat/NA19401.vcf.gz.tbi", - "imputation/stitch/concat/NA20359.vcf.gz", - "imputation/stitch/concat/NA20359.vcf.gz.tbi", - "imputation/stitch/concat/versions.yml" + "imputation/stats/NA12878_stitch.bcftools_stats.txt", + "imputation/stats/NA19401_stitch.bcftools_stats.txt", + "imputation/stats/NA20359_stitch.bcftools_stats.txt", + "imputation/stitch/concat/NA12878_stitch.vcf.gz", + "imputation/stitch/concat/NA12878_stitch.vcf.gz.tbi", + "imputation/stitch/concat/NA19401_stitch.vcf.gz", + "imputation/stitch/concat/NA19401_stitch.vcf.gz.tbi", + "imputation/stitch/concat/NA20359_stitch.vcf.gz", + "imputation/stitch/concat/NA20359_stitch.vcf.gz.tbi" ], 1786 ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "23.10.1" }, - "timestamp": "2024-06-02T21:02:42.358049222" + "timestamp": "2024-06-12T17:20:52.933808288" }, "Check test_all": { "content": [ @@ -38,21 +40,18 @@ "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz.tbi", "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/versions.yml", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", - "imputation/glimpse2/concat/versions.yml", "imputation/quilt/concat/NA12878_quilt.vcf.gz", "imputation/quilt/concat/NA12878_quilt.vcf.gz.tbi", "imputation/quilt/concat/NA19401_quilt.vcf.gz", "imputation/quilt/concat/NA19401_quilt.vcf.gz.tbi", "imputation/quilt/concat/NA20359_quilt.vcf.gz", "imputation/quilt/concat/NA20359_quilt.vcf.gz.tbi", - "imputation/quilt/concat/versions.yml", "imputation/stats/NA12878_glimpse1.bcftools_stats.txt", "imputation/stats/NA12878_glimpse2.bcftools_stats.txt", "imputation/stats/NA12878_quilt.bcftools_stats.txt", @@ -65,22 +64,18 @@ "imputation/stats/NA20359_glimpse2.bcftools_stats.txt", "imputation/stats/NA20359_quilt.bcftools_stats.txt", "imputation/stats/NA20359_stitch_stitch.bcftools_stats.txt", - "imputation/stats/versions.yml", "imputation/stitch/concat/NA12878_stitch.vcf.gz", "imputation/stitch/concat/NA12878_stitch.vcf.gz.tbi", "imputation/stitch/concat/NA19401_stitch.vcf.gz", "imputation/stitch/concat/NA19401_stitch.vcf.gz.tbi", "imputation/stitch/concat/NA20359_stitch.vcf.gz", - "imputation/stitch/concat/NA20359_stitch.vcf.gz.tbi", - "imputation/stitch/concat/versions.yml" + "imputation/stitch/concat/NA20359_stitch.vcf.gz.tbi" ], [ "prep_panel/chunks/glimpse1/1000GP.s.norel_chr21_chunks_glimpse1.txt", "prep_panel/chunks/glimpse1/1000GP.s.norel_chr22_chunks_glimpse1.txt", - "prep_panel/chunks/glimpse1/versions.yml", "prep_panel/chunks/glimpse2/1000GP.s.norel_chr21_chunks_glimpse2.txt", "prep_panel/chunks/glimpse2/1000GP.s.norel_chr22_chunks_glimpse2.txt", - "prep_panel/chunks/glimpse2/versions.yml", "prep_panel/csv/chunks.csv", "prep_panel/csv/panel.csv", "prep_panel/csv/posfile.csv", @@ -90,22 +85,17 @@ "prep_panel/haplegend/1000GP.s.norel_chr22.hap.gz", "prep_panel/haplegend/1000GP.s.norel_chr22.legend.gz", "prep_panel/haplegend/1000GP.s.norel_chr22.samples", - "prep_panel/haplegend/versions.yml", "prep_panel/normalized/1000GP.s.norel_chr21_biallelic_snps.vcf.gz", "prep_panel/normalized/1000GP.s.norel_chr21_biallelic_snps.vcf.gz.tbi", "prep_panel/normalized/1000GP.s.norel_chr22_biallelic_snps.vcf.gz", "prep_panel/normalized/1000GP.s.norel_chr22_biallelic_snps.vcf.gz.tbi", - "prep_panel/normalized/versions.yml", "prep_panel/sites/tsv/1000GP.s.norel_chr21_glimpse1_sites_tsv.txt.gz", "prep_panel/sites/tsv/1000GP.s.norel_chr22_glimpse1_sites_tsv.txt.gz", - "prep_panel/sites/tsv/versions.yml", "prep_panel/sites/vcf/1000GP.s.norel_chr21_glimpse1_sites.vcf.gz", "prep_panel/sites/vcf/1000GP.s.norel_chr21_glimpse1_sites.vcf.gz.csi", "prep_panel/sites/vcf/1000GP.s.norel_chr22_glimpse1_sites.vcf.gz", "prep_panel/sites/vcf/1000GP.s.norel_chr22_glimpse1_sites.vcf.gz.csi", - "prep_panel/sites/vcf/versions.yml", - "prep_panel/stats/1000GP.s.norel.bcftools_stats.txt", - "prep_panel/stats/versions.yml" + "prep_panel/stats/1000GP.s.norel.bcftools_stats.txt" ], [ "validation/NA12878_P1000GP.s.norel_Tglimpse1_SNP.txt", @@ -126,8 +116,7 @@ "validation/concat/NA20359_A-truth.vcf.gz.tbi", "validation/stats/NA12878_truth.bcftools_stats.txt", "validation/stats/NA19401_truth.bcftools_stats.txt", - "validation/stats/NA20359_truth.bcftools_stats.txt", - "validation/stats/versions.yml" + "validation/stats/NA20359_truth.bcftools_stats.txt" ], 1779 ], @@ -135,7 +124,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T13:02:50.1244856" + "timestamp": "2024-06-12T17:05:02.964658867" }, "Check test_validate": { "content": [ @@ -152,26 +141,28 @@ "validation/concat/NA20359_A-truth.vcf.gz.tbi", "validation/stats/NA12878_truth.bcftools_stats.txt", "validation/stats/NA19401_truth.bcftools_stats.txt", - "validation/stats/NA20359_truth.bcftools_stats.txt", - "validation/stats/versions.yml" + "validation/stats/NA20359_truth.bcftools_stats.txt" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T12:54:36.243451019" + "timestamp": "2024-06-12T16:28:39.822160588" }, "Check test_quilt": { "content": [ [ + "imputation/csv/impute.csv", "imputation/quilt/concat/NA12878_quilt.vcf.gz", "imputation/quilt/concat/NA12878_quilt.vcf.gz.tbi", "imputation/quilt/concat/NA19401_quilt.vcf.gz", "imputation/quilt/concat/NA19401_quilt.vcf.gz.tbi", "imputation/quilt/concat/NA20359_quilt.vcf.gz", "imputation/quilt/concat/NA20359_quilt.vcf.gz.tbi", - "imputation/quilt/concat/versions.yml" + "imputation/stats/NA12878_quilt.bcftools_stats.txt", + "imputation/stats/NA19401_quilt.bcftools_stats.txt", + "imputation/stats/NA20359_quilt.bcftools_stats.txt" ], 1779 ], @@ -179,7 +170,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-24T14:32:54.985163559" + "timestamp": "2024-06-12T16:53:00.958242697" }, "Check test_sim": { "content": [ @@ -209,19 +200,17 @@ "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", - "imputation/glimpse2/concat/versions.yml", "imputation/stats/NA12878_glimpse2.bcftools_stats.txt", "imputation/stats/NA19401_glimpse2.bcftools_stats.txt", - "imputation/stats/NA20359_glimpse2.bcftools_stats.txt", - "imputation/stats/versions.yml" + "imputation/stats/NA20359_glimpse2.bcftools_stats.txt" ], - 936 + 1798 ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T14:22:47.18990097" + "timestamp": "2024-06-12T16:49:29.063030563" }, "Check test": { "content": [ @@ -233,11 +222,9 @@ "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz.tbi", "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/versions.yml", "imputation/stats/NA12878_glimpse1.bcftools_stats.txt", "imputation/stats/NA19401_glimpse1.bcftools_stats.txt", - "imputation/stats/NA20359_glimpse1.bcftools_stats.txt", - "imputation/stats/versions.yml" + "imputation/stats/NA20359_glimpse1.bcftools_stats.txt" ], 930 ], @@ -245,6 +232,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T12:47:57.195765127" + "timestamp": "2024-06-12T16:47:23.10046931" } } \ No newline at end of file From 61c5a8b45484a70318885eb28e734baa8a60a15f Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 18:19:01 +0200 Subject: [PATCH 54/63] Update test --- conf/steps/panel_prep.config | 1 + workflows/phaseimpute/tests/test_all.nf.test.snap | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index a18289a8..16d46ddd 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -138,6 +138,7 @@ process { publishDir = [ path: { "${params.outdir}/prep_panel/haplegend/" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index c76ae845..88f302c4 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -55,15 +55,15 @@ "imputation/stats/NA12878_glimpse1.bcftools_stats.txt", "imputation/stats/NA12878_glimpse2.bcftools_stats.txt", "imputation/stats/NA12878_quilt.bcftools_stats.txt", - "imputation/stats/NA12878_stitch_stitch.bcftools_stats.txt", + "imputation/stats/NA12878_stitch.bcftools_stats.txt", "imputation/stats/NA19401_glimpse1.bcftools_stats.txt", "imputation/stats/NA19401_glimpse2.bcftools_stats.txt", "imputation/stats/NA19401_quilt.bcftools_stats.txt", - "imputation/stats/NA19401_stitch_stitch.bcftools_stats.txt", + "imputation/stats/NA19401_stitch.bcftools_stats.txt", "imputation/stats/NA20359_glimpse1.bcftools_stats.txt", "imputation/stats/NA20359_glimpse2.bcftools_stats.txt", "imputation/stats/NA20359_quilt.bcftools_stats.txt", - "imputation/stats/NA20359_stitch_stitch.bcftools_stats.txt", + "imputation/stats/NA20359_stitch.bcftools_stats.txt", "imputation/stitch/concat/NA12878_stitch.vcf.gz", "imputation/stitch/concat/NA12878_stitch.vcf.gz.tbi", "imputation/stitch/concat/NA19401_stitch.vcf.gz", @@ -101,12 +101,15 @@ "validation/NA12878_P1000GP.s.norel_Tglimpse1_SNP.txt", "validation/NA12878_P1000GP.s.norel_Tglimpse2_SNP.txt", "validation/NA12878_P1000GP.s.norel_Tquilt_SNP.txt", + "validation/NA12878_P1000GP.s.norel_Tstitch_SNP.txt", "validation/NA19401_P1000GP.s.norel_Tglimpse1_SNP.txt", "validation/NA19401_P1000GP.s.norel_Tglimpse2_SNP.txt", "validation/NA19401_P1000GP.s.norel_Tquilt_SNP.txt", + "validation/NA19401_P1000GP.s.norel_Tstitch_SNP.txt", "validation/NA20359_P1000GP.s.norel_Tglimpse1_SNP.txt", "validation/NA20359_P1000GP.s.norel_Tglimpse2_SNP.txt", "validation/NA20359_P1000GP.s.norel_Tquilt_SNP.txt", + "validation/NA20359_P1000GP.s.norel_Tstitch_SNP.txt", "validation/TestQuality.txt", "validation/concat/NA12878_A-truth.vcf.gz", "validation/concat/NA12878_A-truth.vcf.gz.tbi", @@ -124,7 +127,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T17:05:02.964658867" + "timestamp": "2024-06-12T17:51:48.063603304" }, "Check test_validate": { "content": [ From ed5fae9290e02655d50eff2ff600fa6a0f789c5c Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 12 Jun 2024 18:26:40 +0200 Subject: [PATCH 55/63] Update test and align params --- conf/test.config | 8 +++---- conf/test_all.config | 23 +++++++++---------- conf/test_full.config | 11 +++++---- conf/test_glimpse2.config | 6 ++--- conf/test_panelprep.config | 2 +- conf/test_panelprep_fullchr.config | 8 +++---- conf/test_quilt.config | 10 ++++---- conf/test_sim.config | 10 ++++---- conf/test_stitch.config | 2 +- conf/test_validate.config | 12 +++++----- .../phaseimpute/tests/test_all.nf.test.snap | 4 ++-- 11 files changed, 48 insertions(+), 48 deletions(-) diff --git a/conf/test.config b/conf/test.config index 5231aa59..156e3416 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,18 +25,18 @@ params { // Genome references fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" - panel = "${projectDir}/tests/csv/panel_2.csv" + panel = "${projectDir}/tests/csv/panel.csv" phased = true // Pipeline steps - steps = "impute" + steps = "impute" // External params - chunks = "${projectDir}/tests/csv/chunks.csv" + chunks = "${projectDir}/tests/csv/chunks.csv" posfile = "${projectDir}/tests/csv/posfile.csv" // Impute tools - tools = "glimpse1" + tools = "glimpse1" } diff --git a/conf/test_all.config b/conf/test_all.config index e837d54b..872c731d 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -15,39 +15,38 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' // Input data - input = "${projectDir}/tests/csv/sample_sim.csv" - input_region = "${projectDir}/tests/csv/region.csv" - depth = 1 + input = "${projectDir}/tests/csv/sample_sim.csv" + input_region = "${projectDir}/tests/csv/region.csv" + depth = 1 // Genome references fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" panel = "${projectDir}/tests/csv/panel.csv" phased = false compute_freq = false - //map = "${projectDir}/tests/csv/map.csv" // Pipeline steps - steps = "all" + steps = "all" // Impute tools - tools = "glimpse1,glimpse2,stitch,quilt" + tools = "glimpse1,glimpse2,stitch,quilt" } process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { - ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') + ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { - ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' { - ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') } } diff --git a/conf/test_full.config b/conf/test_full.config index 0e15401a..3c45a62d 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -24,15 +24,16 @@ params { max_time = '6.h' // Input data - input = "${projectDir}/tests/csv/sample_sim_full.csv" - panel = "${projectDir}/tests/csv/panel_full.csv" + input = "${projectDir}/tests/csv/sample_sim_full.csv" + panel = "${projectDir}/tests/csv/panel_full.csv" + map = "${projectDir}/tests/csv/map.csv" // Pipeline steps - steps = "all" + steps = "all" // Panelprep optional args - remove_samples = "NA12878,NA12891,NA12892" + remove_samples = "NA12878,NA12891,NA12892" // Impute tools - tools = "glimpse1" + tools = "glimpse1" } diff --git a/conf/test_glimpse2.config b/conf/test_glimpse2.config index 39f93681..d654ec6b 100644 --- a/conf/test_glimpse2.config +++ b/conf/test_glimpse2.config @@ -20,7 +20,7 @@ params { max_time = '1.h' // Input data - input = "${projectDir}/tests/csv/sample_bam.csv" + input = "${projectDir}/tests/csv/sample_bam.csv" // Genome references fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" @@ -28,10 +28,10 @@ params { phased = true // Pipeline steps - steps = "impute" + steps = "impute" // External params - chunks = "${projectDir}/tests/csv/chunks.csv" + chunks = "${projectDir}/tests/csv/chunks.csv" posfile = "${projectDir}/tests/csv/posfile.csv" // Impute tools diff --git a/conf/test_panelprep.config b/conf/test_panelprep.config index 4316a610..b986e0f1 100644 --- a/conf/test_panelprep.config +++ b/conf/test_panelprep.config @@ -31,6 +31,6 @@ params { process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' { - ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') } } diff --git a/conf/test_panelprep_fullchr.config b/conf/test_panelprep_fullchr.config index 227685cc..c36f48c3 100644 --- a/conf/test_panelprep_fullchr.config +++ b/conf/test_panelprep_fullchr.config @@ -20,10 +20,10 @@ params { max_time = '4.h' // Genome references - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" - panel = "${projectDir}/tests/csv/panel_fullchr.csv" - phased = false + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" + panel = "${projectDir}/tests/csv/panel_fullchr.csv" + phased = false // Pipeline steps - steps = "panelprep" + steps = "panelprep" } diff --git a/conf/test_quilt.config b/conf/test_quilt.config index 07db7668..ec7a0ee3 100644 --- a/conf/test_quilt.config +++ b/conf/test_quilt.config @@ -24,16 +24,16 @@ params { input_region = "${projectDir}/tests/csv/region.csv" // Genome references - fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" - phased = true + fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" + phased = true // Pipeline steps steps = "impute" // External params - chunks = "${projectDir}/tests/csv/chunks.csv" - posfile = "${projectDir}/tests/csv/posfile.csv" + chunks = "${projectDir}/tests/csv/chunks.csv" + posfile = "${projectDir}/tests/csv/posfile.csv" // Impute tools - tools = "quilt" + tools = "quilt" } diff --git a/conf/test_sim.config b/conf/test_sim.config index 42418f06..37be4a5b 100644 --- a/conf/test_sim.config +++ b/conf/test_sim.config @@ -20,13 +20,13 @@ params { max_time = '6.h' // Input data - input = "${projectDir}/tests/csv/sample_sim.csv" - input_region = "${projectDir}/tests/csv/region.csv" - depth = 1 + input = "${projectDir}/tests/csv/sample_sim.csv" + input_region = "${projectDir}/tests/csv/region.csv" + depth = 1 // Genome references - fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" + fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" // Pipeline steps - steps = "simulate" + steps = "simulate" } diff --git a/conf/test_stitch.config b/conf/test_stitch.config index 9520980b..01612313 100644 --- a/conf/test_stitch.config +++ b/conf/test_stitch.config @@ -31,5 +31,5 @@ params { steps = "impute" // Impute tools - tools = "stitch" + tools = "stitch" } diff --git a/conf/test_validate.config b/conf/test_validate.config index 98641844..96a49960 100644 --- a/conf/test_validate.config +++ b/conf/test_validate.config @@ -15,14 +15,14 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' // Input data - input = "${projectDir}/tests/csv/sample_validate_imputed.csv" - input_truth = "${projectDir}/tests/csv/sample_validate_truth.csv" - input_region = "${projectDir}/tests/csv/region.csv" + input = "${projectDir}/tests/csv/sample_validate_imputed.csv" + input_truth = "${projectDir}/tests/csv/sample_validate_truth.csv" + input_region = "${projectDir}/tests/csv/region.csv" // Genome references fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index 88f302c4..e9e314dd 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -229,12 +229,12 @@ "imputation/stats/NA19401_glimpse1.bcftools_stats.txt", "imputation/stats/NA20359_glimpse1.bcftools_stats.txt" ], - 930 + 1800 ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T16:47:23.10046931" + "timestamp": "2024-06-12T18:24:52.810087967" } } \ No newline at end of file From 6351deb45ac7a8455fa7f3b734c1726133942dd8 Mon Sep 17 00:00:00 2001 From: LouisBzh Date: Thu, 13 Jun 2024 12:43:39 +0200 Subject: [PATCH 56/63] Move to dot --- assets/schema_input.json | 4 ++-- conf/modules.config | 10 +++++----- conf/steps/imputation_glimpse1.config | 2 +- conf/steps/imputation_glimpse2.config | 2 +- conf/steps/imputation_quilt.config | 3 +-- conf/steps/imputation_stitch.config | 6 ++---- conf/steps/panel_prep.config | 4 ++++ conf/steps/validation.config | 2 +- subworkflows/local/vcf_samples_bcftools/main.nf | 2 +- 9 files changed, 18 insertions(+), 17 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index f7083b29..405e0250 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -9,8 +9,8 @@ "properties": { "sample": { "type": "string", - "pattern": "^[a-zA-Z0-9]+$", - "errorMessage": "Sample name must be provided and cannot contain spaces nor special characters '_' or '.' .", + "pattern": "^[a-zA-Z0-9_]+$", + "errorMessage": "Sample name must be provided and cannot contain spaces nor special character '.' .", "meta": ["id"] }, "file": { diff --git a/conf/modules.config b/conf/modules.config index e1846284..f8d7ca14 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -31,18 +31,18 @@ process { // Simulation // Coverage process withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_TRT' { - ext.prefix = { "${meta.id}_A-truth" } + ext.prefix = { "${meta.id}.A-truth" } publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_SIM' { - ext.prefix = { "${meta.id}_B-sim" } + ext.prefix = { "${meta.id}.B-sim" } publishDir = [ enabled: false ] } // VCF // PANEL withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS_PANEL' { - ext.prefix = { "${meta.id}" } + ext.prefix = { "${meta.id}.panel" } publishDir = [ path: { "${params.outdir}/prep_panel/stats/" }, mode: params.publish_dir_mode, @@ -53,7 +53,7 @@ process { // TRUTH withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS_TRUTH' { - ext.prefix = { "${meta.id}_truth" } + ext.prefix = { "${meta.id}.truth" } publishDir = [ path: { "${params.outdir}/validation/stats" }, mode: params.publish_dir_mode, @@ -63,7 +63,7 @@ process { } // IMPUTE withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS_TOOLS' { - ext.prefix = { "${meta.id}_${meta.tools}" } + ext.prefix = { "${meta.id}.${meta.tools}" } publishDir = [ path: { "${params.outdir}/imputation/stats" }, mode: params.publish_dir_mode, diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index 351008d0..74d1bea7 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -80,7 +80,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:BCFTOOLS_CONCAT' { ext.args = ["--ligate", "--output-type z"].join(' ') - ext.prefix = { "${meta.id}_glimpse1" } + ext.prefix = { "${meta.id}.glimpse1" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:BCFTOOLS_INDEX' { diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index c87a7c7f..f1c9ac88 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -51,7 +51,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE2:BCFTOOLS_CONCAT' { ext.args = ["--ligate", "--output-type z"].join(' ') - ext.prefix = { "${meta.id}_glimpse2" } + ext.prefix = { "${meta.id}.glimpse2" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE2:BCFTOOLS_INDEX' { diff --git a/conf/steps/imputation_quilt.config b/conf/steps/imputation_quilt.config index 932ab755..3bb18cf9 100644 --- a/conf/steps/imputation_quilt.config +++ b/conf/steps/imputation_quilt.config @@ -54,12 +54,11 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT:BCFTOOLS_CONCAT' { ext.args = ["--ligate", "--output-type z",].join(' ') - ext.prefix = { "${meta.id}_quilt" } + ext.prefix = { "${meta.id}.quilt" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT:BCFTOOLS_INDEX' { ext.args = "--tbi" - ext.prefix = { "${meta.id}_quilt" } } } diff --git a/conf/steps/imputation_stitch.config b/conf/steps/imputation_stitch.config index b452d954..447f6f03 100644 --- a/conf/steps/imputation_stitch.config +++ b/conf/steps/imputation_stitch.config @@ -30,7 +30,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:STITCH' { - ext.prefix = { "${meta.id}_stitch" } + ext.prefix = { "${meta.id}.stitch" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:BCFTOOLS_INDEX' { @@ -44,12 +44,10 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_STITCH:BCFTOOLS_CONCAT' { ext.args = ["--ligate", "--output-type z"].join(' ') - ext.prefix = { "${meta.id}_stitch" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_STITCH:BCFTOOLS_INDEX' { ext.args = "--tbi" - ext.prefix = { "${meta.id}_stitch" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SAMPLES_BCFTOOLS:.*' { @@ -62,7 +60,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SAMPLES_BCFTOOLS:BCFTOOLS_PLUGINSPLIT' { ext.args = "-Oz" - ext.suffix = "_stitch" + ext.suffix = ".stitch" } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SAMPLES_BCFTOOLS:BCFTOOLS_INDEX' { diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 16d46ddd..e9d7c975 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -213,6 +213,10 @@ process { publishDir = [ enabled: false ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_PANEL:BCFTOOLS_CONCAT' { + ext.prefix = { "${meta.id}.panel" } + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_PANEL:BCFTOOLS_INDEX' { ext.args = "--tbi" } diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 31e8dc59..65c2186a 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -52,7 +52,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_CONCAT' { ext.args = ["--ligate", "--output-type z",].join(' ') - ext.prefix = { "${meta.id}_A-truth" } + ext.prefix = { "${meta.id}.A-truth" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_INDEX' { diff --git a/subworkflows/local/vcf_samples_bcftools/main.nf b/subworkflows/local/vcf_samples_bcftools/main.nf index b2f1382e..51e42bc1 100644 --- a/subworkflows/local/vcf_samples_bcftools/main.nf +++ b/subworkflows/local/vcf_samples_bcftools/main.nf @@ -14,7 +14,7 @@ workflow VCF_SAMPLES_BCFTOOLS { ch_vcf_samples = BCFTOOLS_PLUGINSPLIT.out.vcf .transpose() - .map{metaITC, vcf -> [metaITC + [id: vcf.getBaseName().tokenize(".")[0].tokenize("_")[0]], vcf]} + .map{metaITC, vcf -> [metaITC + [id: vcf.getBaseName().tokenize(".")[0]], vcf]} BCFTOOLS_INDEX(ch_vcf_samples) ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions.first()) From ab8b036b1ccd674e2171f6238ab1c19cd667332c Mon Sep 17 00:00:00 2001 From: LouisBzh Date: Thu, 13 Jun 2024 12:44:02 +0200 Subject: [PATCH 57/63] Remove dot from panel --- tests/csv/chunks.csv | 4 ++-- tests/csv/panel.csv | 4 ++-- tests/csv/panel_full.csv | 44 ++++++++++++++++++++-------------------- tests/csv/posfile.csv | 4 ++-- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/tests/csv/chunks.csv b/tests/csv/chunks.csv index d78c3858..bf308029 100644 --- a/tests/csv/chunks.csv +++ b/tests/csv/chunks.csv @@ -1,3 +1,3 @@ panel,chr,file -1000GP.s.norel,chr22,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/22/1000GP.s.norel_chr22_chunks_glimpse1.txt" -1000GP.s.norel,chr21,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/21/1000GP.s.norel_chr21_chunks_glimpse1.txt" +1000GP,chr22,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/22/1000GP_chr22_chunks_glimpse1.txt" +1000GP,chr21,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/21/1000GP_chr21_chunks_glimpse1.txt" diff --git a/tests/csv/panel.csv b/tests/csv/panel.csv index 8a5c58b1..7a009fa8 100644 --- a/tests/csv/panel.csv +++ b/tests/csv/panel.csv @@ -1,3 +1,3 @@ panel,chr,vcf,index -1000GP.s.norel,chr21,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.bcf.csi -1000GP.s.norel,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.bcf.csi +1000GP,chr21,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.bcf.csi +1000GP,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.bcf.csi diff --git a/tests/csv/panel_full.csv b/tests/csv/panel_full.csv index 782b4a78..a2a18ee0 100644 --- a/tests/csv/panel_full.csv +++ b/tests/csv/panel_full.csv @@ -1,23 +1,23 @@ panel,chr,vcf,index -1000GP.s.norel,chr1,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr1.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr1.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr2,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr2.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr2.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr3,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr3.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr3.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr4,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr4.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr4.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr5,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr5.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr5.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr6,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr6.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr6.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr7,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr7.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr7.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr8,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr8.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr8.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr9,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr9.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr9.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr10,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr10.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr10.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr11,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr11.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr11.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr12,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr12.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr12.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr13,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr13.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr13.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr14,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr14.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr14.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr15,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr15.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr15.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr16,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr16.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr16.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr17,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr17.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr17.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr18,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr18.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr18.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr19,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr19.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr19.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr20,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr20.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr20.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr21,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.filtered.shapeit2-duohmm-phased.vcf.gz.tbi -1000GP.s.norel,chr22,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr22.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr22.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr1,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr1.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr1.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr2,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr2.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr2.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr3,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr3.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr3.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr4,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr4.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr4.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr5,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr5.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr5.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr6,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr6.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr6.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr7,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr7.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr7.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr8,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr8.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr8.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr9,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr9.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr9.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr10,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr10.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr10.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr11,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr11.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr11.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr12,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr12.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr12.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr13,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr13.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr13.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr14,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr14.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr14.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr15,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr15.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr15.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr16,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr16.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr16.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr17,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr17.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr17.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr18,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr18.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr18.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr19,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr19.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr19.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr20,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr20.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr20.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr21,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr22,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr22.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr22.filtered.shapeit2-duohmm-phased.vcf.gz.tbi diff --git a/tests/csv/posfile.csv b/tests/csv/posfile.csv index d1a245b7..91ea4dca 100644 --- a/tests/csv/posfile.csv +++ b/tests/csv/posfile.csv @@ -1,3 +1,3 @@ panel,chr,vcf,index,txt,hap,legend -1000GP.s.norel,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.s.norel_chr21.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.s.norel_chr21.legend.gz" -1000GP.s.norel,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.legend.gz" +1000GP,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP_chr21.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP_chr21.legend.gz" +1000GP,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP_chr22.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP_chr22.legend.gz" From 8b04ba6d4ca4d2c597e620bb58fa6ca816aaa6f4 Mon Sep 17 00:00:00 2001 From: LouisBzh Date: Thu, 13 Jun 2024 12:47:03 +0200 Subject: [PATCH 58/63] Update multiqc config --- assets/multiqc_config.yml | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b7bcd51a..e569adf5 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,24 +1,14 @@ report_comment: > - This report has been generated by the nf-core/phaseimpute - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/phaseimpute + analysis pipeline. For information about how to interpret these results, please see the + documentation. report_section_order: - "nf-core-phaseimpute-methods-description": - order: -1000 - software_versions: - order: -1001 - "nf-core-phaseimpute-summary": - order: -1002 - -top_modules: - - "samtools": - name: "Samtools coverage before and after downsampling" - path_filters: - - "*_.stats" - - "bcftools": - name: "BCFtools stats of phased reference panel" - path_filters: - - "*_bcftools_stats.txt" + "nf-core-phaseimpute-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-phaseimpute-summary": + order: -1002 export_plots: true From 636107c11212de1db60eb7865062d9af3c197aa0 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 13 Jun 2024 12:58:13 +0200 Subject: [PATCH 59/63] Fix csv --- tests/csv/chunks.csv | 4 ++-- tests/csv/posfile.csv | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/csv/chunks.csv b/tests/csv/chunks.csv index bf308029..3f76ee76 100644 --- a/tests/csv/chunks.csv +++ b/tests/csv/chunks.csv @@ -1,3 +1,3 @@ panel,chr,file -1000GP,chr22,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/22/1000GP_chr22_chunks_glimpse1.txt" -1000GP,chr21,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/21/1000GP_chr21_chunks_glimpse1.txt" +1000GP,chr22,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/22/1000GP.s.norel_chr22_chunks_glimpse1.txt" +1000GP,chr21,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/21/1000GP.s.norel_chr21_chunks_glimpse1.txt" diff --git a/tests/csv/posfile.csv b/tests/csv/posfile.csv index 91ea4dca..c0627df6 100644 --- a/tests/csv/posfile.csv +++ b/tests/csv/posfile.csv @@ -1,3 +1,3 @@ panel,chr,vcf,index,txt,hap,legend -1000GP,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP_chr21.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP_chr21.legend.gz" -1000GP,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP_chr22.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP_chr22.legend.gz" +1000GP,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.s.norel_chr21.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.s.norel_chr21.legend.gz" +1000GP,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.s.norel_chr22.legend.gz" From d109b0bdb5c370d356d78ede3c17ae5d3757dbca Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 13 Jun 2024 12:58:33 +0200 Subject: [PATCH 60/63] Order tools --- assets/multiqc_config.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index e569adf5..58d0e839 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -10,6 +10,10 @@ report_section_order: "nf-core-phaseimpute-summary": order: -1002 +top_module: + - samtools + - bcftools + export_plots: true disable_version_detection: true From 8684f31892a09c9c6e3e7763cee60a7559c190dd Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 13 Jun 2024 12:58:52 +0200 Subject: [PATCH 61/63] Add check for all imputation tools --- workflows/phaseimpute/tests/test_all.nf.test | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test b/workflows/phaseimpute/tests/test_all.nf.test index fdf57bd0..a807936d 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test +++ b/workflows/phaseimpute/tests/test_all.nf.test @@ -201,7 +201,10 @@ nextflow_pipeline { .list() .collect { getRecursiveFileNames(it, outputDir) } .flatten(), - path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip.size() + path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip.size(), + path("$outputDir/imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz").linesGzip.size(), + path("$outputDir/imputation/stitch/concat/NA12878_stitch.vcf.gz").linesGzip.size(), + path("$outputDir/imputation/quilt/concat/NA12878_quilt.vcf.gz").linesGzip.size(), ).match() } ) From b81c002ad5199bb6a7e67eba8b0411f8837be2b9 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 13 Jun 2024 14:10:20 +0200 Subject: [PATCH 62/63] Update snapshot --- assets/multiqc_config.yml | 2 +- conf/steps/panel_prep.config | 6 + workflows/phaseimpute/tests/test_all.nf.test | 16 +- .../phaseimpute/tests/test_all.nf.test.snap | 263 +++++++++--------- 4 files changed, 149 insertions(+), 138 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 58d0e839..992e4eff 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -10,7 +10,7 @@ report_section_order: "nf-core-phaseimpute-summary": order: -1002 -top_module: +top_modules: - samtools - bcftools diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index e9d7c975..1537fe1b 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -181,6 +181,12 @@ process { ext.args = "'{ gsub(\",\", \"\\t\") ; key = \$1 FS \$2 } !seen[key]++'" // Remove duplicates ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } ext.suffix = "txt" + publishDir = [ + path: { "${params.outdir}/prep_panel/posfile/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:TABIX_BGZIP' { diff --git a/workflows/phaseimpute/tests/test_all.nf.test b/workflows/phaseimpute/tests/test_all.nf.test index a807936d..3392881b 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test +++ b/workflows/phaseimpute/tests/test_all.nf.test @@ -27,7 +27,7 @@ nextflow_pipeline { .list() .collect { getRecursiveFileNames(it, outputDir) } .flatten(), - path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip.size() + path("$outputDir/imputation/glimpse1/concat/NA12878.glimpse1.vcf.gz").linesGzip.size() ).match() } ) @@ -55,7 +55,7 @@ nextflow_pipeline { .list() .collect { getRecursiveFileNames(it, outputDir) } .flatten(), - path("$outputDir/imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz").linesGzip.size() + path("$outputDir/imputation/glimpse2/concat/NA12878.glimpse2.vcf.gz").linesGzip.size() ).match() } ) @@ -82,7 +82,7 @@ nextflow_pipeline { .list() .collect { getRecursiveFileNames(it, outputDir) } .flatten(), - path("$outputDir/imputation/quilt/concat/NA12878_quilt.vcf.gz").linesGzip.size() + path("$outputDir/imputation/quilt/concat/NA12878.quilt.vcf.gz").linesGzip.size() ).match() } ) @@ -110,7 +110,7 @@ nextflow_pipeline { .list() .collect { getRecursiveFileNames(it, outputDir) } .flatten(), - path("$outputDir/imputation/stitch/concat/NA12878_stitch.vcf.gz").linesGzip.size() + path("$outputDir/imputation/stitch/concat/NA12878.stitch.vcf.gz").linesGzip.size() ).match() } ) @@ -201,10 +201,10 @@ nextflow_pipeline { .list() .collect { getRecursiveFileNames(it, outputDir) } .flatten(), - path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip.size(), - path("$outputDir/imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz").linesGzip.size(), - path("$outputDir/imputation/stitch/concat/NA12878_stitch.vcf.gz").linesGzip.size(), - path("$outputDir/imputation/quilt/concat/NA12878_quilt.vcf.gz").linesGzip.size(), + path("$outputDir/imputation/glimpse1/concat/NA12878.glimpse1.vcf.gz").linesGzip.size(), + path("$outputDir/imputation/glimpse2/concat/NA12878.glimpse2.vcf.gz").linesGzip.size(), + path("$outputDir/imputation/stitch/concat/NA12878.stitch.vcf.gz").linesGzip.size(), + path("$outputDir/imputation/quilt/concat/NA12878.quilt.vcf.gz").linesGzip.size(), ).match() } ) diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index e9e314dd..dd92288a 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -3,15 +3,15 @@ "content": [ [ "imputation/csv/impute.csv", - "imputation/stats/NA12878_stitch.bcftools_stats.txt", - "imputation/stats/NA19401_stitch.bcftools_stats.txt", - "imputation/stats/NA20359_stitch.bcftools_stats.txt", - "imputation/stitch/concat/NA12878_stitch.vcf.gz", - "imputation/stitch/concat/NA12878_stitch.vcf.gz.tbi", - "imputation/stitch/concat/NA19401_stitch.vcf.gz", - "imputation/stitch/concat/NA19401_stitch.vcf.gz.tbi", - "imputation/stitch/concat/NA20359_stitch.vcf.gz", - "imputation/stitch/concat/NA20359_stitch.vcf.gz.tbi" + "imputation/stats/NA12878.stitch.bcftools_stats.txt", + "imputation/stats/NA19401.stitch.bcftools_stats.txt", + "imputation/stats/NA20359.stitch.bcftools_stats.txt", + "imputation/stitch/concat/NA12878.stitch.vcf.gz", + "imputation/stitch/concat/NA12878.stitch.vcf.gz.tbi", + "imputation/stitch/concat/NA19401.stitch.vcf.gz", + "imputation/stitch/concat/NA19401.stitch.vcf.gz.tbi", + "imputation/stitch/concat/NA20359.stitch.vcf.gz", + "imputation/stitch/concat/NA20359.stitch.vcf.gz.tbi" ], 1786 ], @@ -19,7 +19,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T17:20:52.933808288" + "timestamp": "2024-06-13T13:24:05.918523634" }, "Check test_all": { "content": [ @@ -34,100 +34,105 @@ ], [ "imputation/csv/impute.csv", - "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz", - "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz", - "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", - "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", - "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz", - "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz.tbi", - "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz", - "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz.tbi", - "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", - "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", - "imputation/quilt/concat/NA12878_quilt.vcf.gz", - "imputation/quilt/concat/NA12878_quilt.vcf.gz.tbi", - "imputation/quilt/concat/NA19401_quilt.vcf.gz", - "imputation/quilt/concat/NA19401_quilt.vcf.gz.tbi", - "imputation/quilt/concat/NA20359_quilt.vcf.gz", - "imputation/quilt/concat/NA20359_quilt.vcf.gz.tbi", - "imputation/stats/NA12878_glimpse1.bcftools_stats.txt", - "imputation/stats/NA12878_glimpse2.bcftools_stats.txt", - "imputation/stats/NA12878_quilt.bcftools_stats.txt", - "imputation/stats/NA12878_stitch.bcftools_stats.txt", - "imputation/stats/NA19401_glimpse1.bcftools_stats.txt", - "imputation/stats/NA19401_glimpse2.bcftools_stats.txt", - "imputation/stats/NA19401_quilt.bcftools_stats.txt", - "imputation/stats/NA19401_stitch.bcftools_stats.txt", - "imputation/stats/NA20359_glimpse1.bcftools_stats.txt", - "imputation/stats/NA20359_glimpse2.bcftools_stats.txt", - "imputation/stats/NA20359_quilt.bcftools_stats.txt", - "imputation/stats/NA20359_stitch.bcftools_stats.txt", - "imputation/stitch/concat/NA12878_stitch.vcf.gz", - "imputation/stitch/concat/NA12878_stitch.vcf.gz.tbi", - "imputation/stitch/concat/NA19401_stitch.vcf.gz", - "imputation/stitch/concat/NA19401_stitch.vcf.gz.tbi", - "imputation/stitch/concat/NA20359_stitch.vcf.gz", - "imputation/stitch/concat/NA20359_stitch.vcf.gz.tbi" + "imputation/glimpse1/concat/NA12878.glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA12878.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA19401.glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA19401.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA20359.glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA20359.glimpse1.vcf.gz.tbi", + "imputation/glimpse2/concat/NA12878.glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA12878.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/NA19401.glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA19401.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/NA20359.glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA20359.glimpse2.vcf.gz.tbi", + "imputation/quilt/concat/NA12878.quilt.vcf.gz", + "imputation/quilt/concat/NA12878.quilt.vcf.gz.tbi", + "imputation/quilt/concat/NA19401.quilt.vcf.gz", + "imputation/quilt/concat/NA19401.quilt.vcf.gz.tbi", + "imputation/quilt/concat/NA20359.quilt.vcf.gz", + "imputation/quilt/concat/NA20359.quilt.vcf.gz.tbi", + "imputation/stats/NA12878.glimpse1.bcftools_stats.txt", + "imputation/stats/NA12878.glimpse2.bcftools_stats.txt", + "imputation/stats/NA12878.quilt.bcftools_stats.txt", + "imputation/stats/NA12878.stitch.bcftools_stats.txt", + "imputation/stats/NA19401.glimpse1.bcftools_stats.txt", + "imputation/stats/NA19401.glimpse2.bcftools_stats.txt", + "imputation/stats/NA19401.quilt.bcftools_stats.txt", + "imputation/stats/NA19401.stitch.bcftools_stats.txt", + "imputation/stats/NA20359.glimpse1.bcftools_stats.txt", + "imputation/stats/NA20359.glimpse2.bcftools_stats.txt", + "imputation/stats/NA20359.quilt.bcftools_stats.txt", + "imputation/stats/NA20359.stitch.bcftools_stats.txt", + "imputation/stitch/concat/NA12878.stitch.vcf.gz", + "imputation/stitch/concat/NA12878.stitch.vcf.gz.tbi", + "imputation/stitch/concat/NA19401.stitch.vcf.gz", + "imputation/stitch/concat/NA19401.stitch.vcf.gz.tbi", + "imputation/stitch/concat/NA20359.stitch.vcf.gz", + "imputation/stitch/concat/NA20359.stitch.vcf.gz.tbi" ], [ - "prep_panel/chunks/glimpse1/1000GP.s.norel_chr21_chunks_glimpse1.txt", - "prep_panel/chunks/glimpse1/1000GP.s.norel_chr22_chunks_glimpse1.txt", - "prep_panel/chunks/glimpse2/1000GP.s.norel_chr21_chunks_glimpse2.txt", - "prep_panel/chunks/glimpse2/1000GP.s.norel_chr22_chunks_glimpse2.txt", + "prep_panel/chunks/glimpse1/1000GP_chr21_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse1/1000GP_chr22_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse2/1000GP_chr21_chunks_glimpse2.txt", + "prep_panel/chunks/glimpse2/1000GP_chr22_chunks_glimpse2.txt", "prep_panel/csv/chunks.csv", "prep_panel/csv/panel.csv", "prep_panel/csv/posfile.csv", - "prep_panel/haplegend/1000GP.s.norel_chr21.hap.gz", - "prep_panel/haplegend/1000GP.s.norel_chr21.legend.gz", - "prep_panel/haplegend/1000GP.s.norel_chr21.samples", - "prep_panel/haplegend/1000GP.s.norel_chr22.hap.gz", - "prep_panel/haplegend/1000GP.s.norel_chr22.legend.gz", - "prep_panel/haplegend/1000GP.s.norel_chr22.samples", - "prep_panel/normalized/1000GP.s.norel_chr21_biallelic_snps.vcf.gz", - "prep_panel/normalized/1000GP.s.norel_chr21_biallelic_snps.vcf.gz.tbi", - "prep_panel/normalized/1000GP.s.norel_chr22_biallelic_snps.vcf.gz", - "prep_panel/normalized/1000GP.s.norel_chr22_biallelic_snps.vcf.gz.tbi", - "prep_panel/sites/tsv/1000GP.s.norel_chr21_glimpse1_sites_tsv.txt.gz", - "prep_panel/sites/tsv/1000GP.s.norel_chr22_glimpse1_sites_tsv.txt.gz", - "prep_panel/sites/vcf/1000GP.s.norel_chr21_glimpse1_sites.vcf.gz", - "prep_panel/sites/vcf/1000GP.s.norel_chr21_glimpse1_sites.vcf.gz.csi", - "prep_panel/sites/vcf/1000GP.s.norel_chr22_glimpse1_sites.vcf.gz", - "prep_panel/sites/vcf/1000GP.s.norel_chr22_glimpse1_sites.vcf.gz.csi", - "prep_panel/stats/1000GP.s.norel.bcftools_stats.txt" + "prep_panel/haplegend/1000GP_chr21.hap.gz", + "prep_panel/haplegend/1000GP_chr21.legend.gz", + "prep_panel/haplegend/1000GP_chr21.samples", + "prep_panel/haplegend/1000GP_chr22.hap.gz", + "prep_panel/haplegend/1000GP_chr22.legend.gz", + "prep_panel/haplegend/1000GP_chr22.samples", + "prep_panel/normalized/1000GP_chr21_biallelic_snps.vcf.gz", + "prep_panel/normalized/1000GP_chr21_biallelic_snps.vcf.gz.tbi", + "prep_panel/normalized/1000GP_chr22_biallelic_snps.vcf.gz", + "prep_panel/normalized/1000GP_chr22_biallelic_snps.vcf.gz.tbi", + "prep_panel/posfile/1000GP_chr21_posfile_stitch.txt", + "prep_panel/posfile/1000GP_chr22_posfile_stitch.txt", + "prep_panel/sites/tsv/1000GP_chr21_glimpse1_sites_tsv.txt.gz", + "prep_panel/sites/tsv/1000GP_chr22_glimpse1_sites_tsv.txt.gz", + "prep_panel/sites/vcf/1000GP_chr21_glimpse1_sites.vcf.gz", + "prep_panel/sites/vcf/1000GP_chr21_glimpse1_sites.vcf.gz.csi", + "prep_panel/sites/vcf/1000GP_chr22_glimpse1_sites.vcf.gz", + "prep_panel/sites/vcf/1000GP_chr22_glimpse1_sites.vcf.gz.csi", + "prep_panel/stats/1000GP.panel.bcftools_stats.txt" ], [ - "validation/NA12878_P1000GP.s.norel_Tglimpse1_SNP.txt", - "validation/NA12878_P1000GP.s.norel_Tglimpse2_SNP.txt", - "validation/NA12878_P1000GP.s.norel_Tquilt_SNP.txt", - "validation/NA12878_P1000GP.s.norel_Tstitch_SNP.txt", - "validation/NA19401_P1000GP.s.norel_Tglimpse1_SNP.txt", - "validation/NA19401_P1000GP.s.norel_Tglimpse2_SNP.txt", - "validation/NA19401_P1000GP.s.norel_Tquilt_SNP.txt", - "validation/NA19401_P1000GP.s.norel_Tstitch_SNP.txt", - "validation/NA20359_P1000GP.s.norel_Tglimpse1_SNP.txt", - "validation/NA20359_P1000GP.s.norel_Tglimpse2_SNP.txt", - "validation/NA20359_P1000GP.s.norel_Tquilt_SNP.txt", - "validation/NA20359_P1000GP.s.norel_Tstitch_SNP.txt", + "validation/NA12878_P1000GP_Tglimpse1_SNP.txt", + "validation/NA12878_P1000GP_Tglimpse2_SNP.txt", + "validation/NA12878_P1000GP_Tquilt_SNP.txt", + "validation/NA12878_P1000GP_Tstitch_SNP.txt", + "validation/NA19401_P1000GP_Tglimpse1_SNP.txt", + "validation/NA19401_P1000GP_Tglimpse2_SNP.txt", + "validation/NA19401_P1000GP_Tquilt_SNP.txt", + "validation/NA19401_P1000GP_Tstitch_SNP.txt", + "validation/NA20359_P1000GP_Tglimpse1_SNP.txt", + "validation/NA20359_P1000GP_Tglimpse2_SNP.txt", + "validation/NA20359_P1000GP_Tquilt_SNP.txt", + "validation/NA20359_P1000GP_Tstitch_SNP.txt", "validation/TestQuality.txt", - "validation/concat/NA12878_A-truth.vcf.gz", - "validation/concat/NA12878_A-truth.vcf.gz.tbi", - "validation/concat/NA19401_A-truth.vcf.gz", - "validation/concat/NA19401_A-truth.vcf.gz.tbi", - "validation/concat/NA20359_A-truth.vcf.gz", - "validation/concat/NA20359_A-truth.vcf.gz.tbi", - "validation/stats/NA12878_truth.bcftools_stats.txt", - "validation/stats/NA19401_truth.bcftools_stats.txt", - "validation/stats/NA20359_truth.bcftools_stats.txt" + "validation/concat/NA12878.A-truth.vcf.gz", + "validation/concat/NA12878.A-truth.vcf.gz.tbi", + "validation/concat/NA19401.A-truth.vcf.gz", + "validation/concat/NA19401.A-truth.vcf.gz.tbi", + "validation/concat/NA20359.A-truth.vcf.gz", + "validation/concat/NA20359.A-truth.vcf.gz.tbi", + "validation/stats/NA12878.truth.bcftools_stats.txt", + "validation/stats/NA19401.truth.bcftools_stats.txt", + "validation/stats/NA20359.truth.bcftools_stats.txt" ], + 1779, + 1777, + 1776, 1779 ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T17:51:48.063603304" + "timestamp": "2024-06-13T13:34:14.32940498" }, "Check test_validate": { "content": [ @@ -136,36 +141,36 @@ "validation/NA19401_Pnull_Tnull_SNP.txt", "validation/NA20359_Pnull_Tnull_SNP.txt", "validation/TestQuality.txt", - "validation/concat/NA12878_A-truth.vcf.gz", - "validation/concat/NA12878_A-truth.vcf.gz.tbi", - "validation/concat/NA19401_A-truth.vcf.gz", - "validation/concat/NA19401_A-truth.vcf.gz.tbi", - "validation/concat/NA20359_A-truth.vcf.gz", - "validation/concat/NA20359_A-truth.vcf.gz.tbi", - "validation/stats/NA12878_truth.bcftools_stats.txt", - "validation/stats/NA19401_truth.bcftools_stats.txt", - "validation/stats/NA20359_truth.bcftools_stats.txt" + "validation/concat/NA12878.A-truth.vcf.gz", + "validation/concat/NA12878.A-truth.vcf.gz.tbi", + "validation/concat/NA19401.A-truth.vcf.gz", + "validation/concat/NA19401.A-truth.vcf.gz.tbi", + "validation/concat/NA20359.A-truth.vcf.gz", + "validation/concat/NA20359.A-truth.vcf.gz.tbi", + "validation/stats/NA12878.truth.bcftools_stats.txt", + "validation/stats/NA19401.truth.bcftools_stats.txt", + "validation/stats/NA20359.truth.bcftools_stats.txt" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T16:28:39.822160588" + "timestamp": "2024-06-13T13:27:52.970989783" }, "Check test_quilt": { "content": [ [ "imputation/csv/impute.csv", - "imputation/quilt/concat/NA12878_quilt.vcf.gz", - "imputation/quilt/concat/NA12878_quilt.vcf.gz.tbi", - "imputation/quilt/concat/NA19401_quilt.vcf.gz", - "imputation/quilt/concat/NA19401_quilt.vcf.gz.tbi", - "imputation/quilt/concat/NA20359_quilt.vcf.gz", - "imputation/quilt/concat/NA20359_quilt.vcf.gz.tbi", - "imputation/stats/NA12878_quilt.bcftools_stats.txt", - "imputation/stats/NA19401_quilt.bcftools_stats.txt", - "imputation/stats/NA20359_quilt.bcftools_stats.txt" + "imputation/quilt/concat/NA12878.quilt.vcf.gz", + "imputation/quilt/concat/NA12878.quilt.vcf.gz.tbi", + "imputation/quilt/concat/NA19401.quilt.vcf.gz", + "imputation/quilt/concat/NA19401.quilt.vcf.gz.tbi", + "imputation/quilt/concat/NA20359.quilt.vcf.gz", + "imputation/quilt/concat/NA20359.quilt.vcf.gz.tbi", + "imputation/stats/NA12878.quilt.bcftools_stats.txt", + "imputation/stats/NA19401.quilt.bcftools_stats.txt", + "imputation/stats/NA20359.quilt.bcftools_stats.txt" ], 1779 ], @@ -173,7 +178,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T16:53:00.958242697" + "timestamp": "2024-06-13T13:21:00.392342738" }, "Check test_sim": { "content": [ @@ -197,15 +202,15 @@ "content": [ [ "imputation/csv/impute.csv", - "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz", - "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz.tbi", - "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz", - "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz.tbi", - "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", - "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", - "imputation/stats/NA12878_glimpse2.bcftools_stats.txt", - "imputation/stats/NA19401_glimpse2.bcftools_stats.txt", - "imputation/stats/NA20359_glimpse2.bcftools_stats.txt" + "imputation/glimpse2/concat/NA12878.glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA12878.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/NA19401.glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA19401.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/NA20359.glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA20359.glimpse2.vcf.gz.tbi", + "imputation/stats/NA12878.glimpse2.bcftools_stats.txt", + "imputation/stats/NA19401.glimpse2.bcftools_stats.txt", + "imputation/stats/NA20359.glimpse2.bcftools_stats.txt" ], 1798 ], @@ -213,21 +218,21 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T16:49:29.063030563" + "timestamp": "2024-06-13T13:17:20.333655444" }, "Check test": { "content": [ [ "imputation/csv/impute.csv", - "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz", - "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz", - "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", - "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", - "imputation/stats/NA12878_glimpse1.bcftools_stats.txt", - "imputation/stats/NA19401_glimpse1.bcftools_stats.txt", - "imputation/stats/NA20359_glimpse1.bcftools_stats.txt" + "imputation/glimpse1/concat/NA12878.glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA12878.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA19401.glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA19401.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA20359.glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA20359.glimpse1.vcf.gz.tbi", + "imputation/stats/NA12878.glimpse1.bcftools_stats.txt", + "imputation/stats/NA19401.glimpse1.bcftools_stats.txt", + "imputation/stats/NA20359.glimpse1.bcftools_stats.txt" ], 1800 ], @@ -235,6 +240,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-06-12T18:24:52.810087967" + "timestamp": "2024-06-13T14:07:47.885146064" } } \ No newline at end of file From 8964fe269a299259fde0ef1333c4e65d434de70e Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 13 Jun 2024 14:12:55 +0200 Subject: [PATCH 63/63] Set yml to 2 indent --- assets/multiqc_config.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 992e4eff..9bf18992 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,18 +1,18 @@ report_comment: > - This report has been generated by the nf-core/phaseimpute - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/phaseimpute + analysis pipeline. For information about how to interpret these results, please see the + documentation. report_section_order: - "nf-core-phaseimpute-methods-description": - order: -1000 - software_versions: - order: -1001 - "nf-core-phaseimpute-summary": - order: -1002 + "nf-core-phaseimpute-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-phaseimpute-summary": + order: -1002 top_modules: - - samtools - - bcftools + - samtools + - bcftools export_plots: true