From 92bc1be0da868d6bfa6c5ad8f950bbe1f8002ca7 Mon Sep 17 00:00:00 2001 From: bjlang <> Date: Mon, 17 Jul 2023 18:50:11 +0200 Subject: [PATCH 1/2] Allow empty lines in input samplesheet --- bin/check_samplesheet.py | 161 ++++++++++++++++++++------------------- 1 file changed, 82 insertions(+), 79 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index b7b285ab..7005bb9e 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -59,96 +59,99 @@ def check_samplesheet(file_in, file_out): ## Check sample entries for line in fin: - lspl = [x.strip().strip('"') for x in line.strip().split(",")] - - # Check valid number of columns per row - if len(lspl) < len(HEADER): - print_error( - "Invalid number of columns (minimum = {})!".format(len(HEADER)), - "Line", - line, - ) - num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: - print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), - "Line", - line, - ) - - ## Check sample name entries - sample, fastq_1, fastq_2, replicate, antibody, control, control_replicate = lspl[: len(HEADER)] - if sample.find(" ") != -1: - print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") - sample = sample.replace(" ", "_") - if not sample: - print_error("Sample entry has not been specified!", "Line", line) - - ## Check FastQ file extension - for fastq in [fastq_1, fastq_2]: - if fastq: - if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", "Line", line) - if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): - print_error( - "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - "Line", - line, - ) + if line.strip(): + lspl = [x.strip().strip('"') for x in line.strip().split(",")] - ## Check replicate column is integer - if not replicate.isdecimal(): - print_error("Replicate id not an integer!", "Line", line) - sys.exit(1) - - ## Check antibody and control columns have valid values - if antibody: - if antibody.find(" ") != -1: - print(f"WARNING: Spaces have been replaced by underscores for antibody: {antibody}") - antibody = antibody.replace(" ", "_") - if not control: + # Check valid number of columns per row + if len(lspl) < len(HEADER): print_error( - "Both antibody and control columns must be specified!", + "Invalid number of columns (minimum = {})!".format(len(HEADER)), "Line", line, ) - - if control: - if control.find(" ") != -1: - print(f"WARNING: Spaces have been replaced by underscores for control: {control}") - control = control.replace(" ", "_") - if not control_replicate.isdecimal(): - print_error("Control replicate id not an integer!", "Line", line) - sys.exit(1) - control = "{}_REP{}".format(control, control_replicate) - if not antibody: + num_cols = len([x for x in lspl[: len(HEADER)] if x]) + if num_cols < MIN_COLS: print_error( - "Both antibody and control columns must be specified!", + "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), "Line", line, ) - ## Auto-detect paired-end/single-end - sample_info = [] ## [single_end, fastq_1, fastq_2, replicate, antibody, control] - if sample and fastq_1 and fastq_2: ## Paired-end short reads - sample_info = ["0", fastq_1, fastq_2, replicate, antibody, control] - elif sample and fastq_1 and not fastq_2: ## Single-end short reads - sample_info = ["1", fastq_1, fastq_2, replicate, antibody, control] - else: - print_error("Invalid combination of columns provided!", "Line", line) - - ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, replicate, antibody, control ]]} - replicate = int(replicate) - sample_info = sample_info + lspl[len(HEADER) :] - if sample not in sample_mapping_dict: - sample_mapping_dict[sample] = {} - if replicate not in sample_mapping_dict[sample]: - sample_mapping_dict[sample][replicate] = [sample_info] - else: - if sample_info in sample_mapping_dict[sample][replicate]: - print_error("Samplesheet contains duplicate rows!", "Line", line) + ## Check sample name entries + sample, fastq_1, fastq_2, replicate, antibody, control, control_replicate = lspl[: len(HEADER)] + if sample.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") + sample = sample.replace(" ", "_") + if not sample: + print_error("Sample entry has not been specified!", "Line", line) + + ## Check FastQ file extension + for fastq in [fastq_1, fastq_2]: + if fastq: + if fastq.find(" ") != -1: + print_error("FastQ file contains spaces!", "Line", line) + if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): + print_error( + "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", + "Line", + line, + ) + + ## Check replicate column is integer + if not replicate.isdecimal(): + print_error("Replicate id not an integer!", "Line", line) + sys.exit(1) + + ## Check antibody and control columns have valid values + if antibody: + if antibody.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for antibody: {antibody}") + antibody = antibody.replace(" ", "_") + if not control: + print_error( + "Both antibody and control columns must be specified!", + "Line", + line, + ) + + if control: + if control.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for control: {control}") + control = control.replace(" ", "_") + if not control_replicate.isdecimal(): + print_error("Control replicate id not an integer!", "Line", line) + sys.exit(1) + control = "{}_REP{}".format(control, control_replicate) + if not antibody: + print_error( + "Both antibody and control columns must be specified!", + "Line", + line, + ) + + ## Auto-detect paired-end/single-end + sample_info = [] ## [single_end, fastq_1, fastq_2, replicate, antibody, control] + ## Paired-end short reads + if sample and fastq_1 and fastq_2: + sample_info = ["0", fastq_1, fastq_2, replicate, antibody, control] + ## Single-end short reads + elif sample and fastq_1 and not fastq_2: + sample_info = ["1", fastq_1, fastq_2, replicate, antibody, control] + else: + print_error("Invalid combination of columns provided!", "Line", line) + + ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, replicate, antibody, control ]]} + replicate = int(replicate) + sample_info = sample_info + lspl[len(HEADER) :] + if sample not in sample_mapping_dict: + sample_mapping_dict[sample] = {} + if replicate not in sample_mapping_dict[sample]: + sample_mapping_dict[sample][replicate] = [sample_info] else: - sample_mapping_dict[sample][replicate].append(sample_info) + if sample_info in sample_mapping_dict[sample][replicate]: + print_error("Samplesheet contains duplicate rows!", "Line", line) + else: + sample_mapping_dict[sample][replicate].append(sample_info) ## Write validated samplesheet with appropriate columns if len(sample_mapping_dict) > 0: From fa3221eb05e03e1e9a230642f239a35abfa4983a Mon Sep 17 00:00:00 2001 From: bjlang <> Date: Wed, 19 Jul 2023 12:01:46 +0200 Subject: [PATCH 2/2] Remove quay.io from biocontainer address --- modules/local/bam_remove_orphans.nf | 2 +- modules/local/bamtools_filter.nf | 2 +- modules/local/bedtools_genomecov.nf | 2 +- modules/local/deseq2_qc.nf | 2 +- modules/local/frip_score.nf | 2 +- modules/local/genome_blacklist_regions.nf | 2 +- modules/local/gtf2bed.nf | 2 +- modules/local/igv.nf | 2 +- modules/local/macs2_consensus.nf | 2 +- modules/local/multiqc.nf | 2 +- modules/local/multiqc_custom_peaks.nf | 4 +++- modules/local/multiqc_custom_phantompeakqualtools.nf | 2 +- modules/local/plot_homer_annotatepeaks.nf | 2 +- modules/local/plot_macs2_qc.nf | 2 +- modules/local/star_align.nf | 2 +- modules/local/star_genomegenerate.nf | 2 +- 16 files changed, 18 insertions(+), 16 deletions(-) diff --git a/modules/local/bam_remove_orphans.nf b/modules/local/bam_remove_orphans.nf index d6e221aa..2bfb7dfd 100644 --- a/modules/local/bam_remove_orphans.nf +++ b/modules/local/bam_remove_orphans.nf @@ -8,7 +8,7 @@ process BAM_REMOVE_ORPHANS { conda "bioconda::pysam=0.19.0 bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' : - 'quay.io/biocontainers/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' }" + 'biocontainers/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' }" input: tuple val(meta), path(bam) diff --git a/modules/local/bamtools_filter.nf b/modules/local/bamtools_filter.nf index c1e750d7..a1e8feed 100644 --- a/modules/local/bamtools_filter.nf +++ b/modules/local/bamtools_filter.nf @@ -5,7 +5,7 @@ process BAMTOOLS_FILTER { conda "bioconda::bamtools=2.5.2 bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' : - 'quay.io/biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" + 'biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf index 28dcc5dd..fb274631 100644 --- a/modules/local/bedtools_genomecov.nf +++ b/modules/local/bedtools_genomecov.nf @@ -5,7 +5,7 @@ process BEDTOOLS_GENOMECOV { conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" input: tuple val(meta), path(bam), path(flagstat) diff --git a/modules/local/deseq2_qc.nf b/modules/local/deseq2_qc.nf index d9b96e11..14b57f69 100644 --- a/modules/local/deseq2_qc.nf +++ b/modules/local/deseq2_qc.nf @@ -7,7 +7,7 @@ process DESEQ2_QC { conda "conda-forge::r-base bioconda::bioconductor-deseq2 bioconda::bioconductor-biocparallel bioconda::bioconductor-tximport bioconda::bioconductor-complexheatmap conda-forge::r-optparse conda-forge::r-ggplot2 conda-forge::r-rcolorbrewer conda-forge::r-pheatmap" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' : - 'quay.io/biocontainers/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' }" + 'biocontainers/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' }" input: tuple val(meta), path(counts) diff --git a/modules/local/frip_score.nf b/modules/local/frip_score.nf index 28133e0c..77f856bc 100644 --- a/modules/local/frip_score.nf +++ b/modules/local/frip_score.nf @@ -5,7 +5,7 @@ process FRIP_SCORE { conda "bioconda::bedtools=2.30.0 bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0': - 'quay.io/biocontainers/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0' }" + 'biocontainers/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0' }" input: tuple val(meta), path(bam), path(peak) diff --git a/modules/local/genome_blacklist_regions.nf b/modules/local/genome_blacklist_regions.nf index fcb8451c..c365c8a7 100644 --- a/modules/local/genome_blacklist_regions.nf +++ b/modules/local/genome_blacklist_regions.nf @@ -7,7 +7,7 @@ process GENOME_BLACKLIST_REGIONS { conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" input: path sizes diff --git a/modules/local/gtf2bed.nf b/modules/local/gtf2bed.nf index 8a8e7bf9..bc0eef54 100644 --- a/modules/local/gtf2bed.nf +++ b/modules/local/gtf2bed.nf @@ -5,7 +5,7 @@ process GTF2BED { conda "conda-forge::perl=5.26.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/perl:5.26.2': - 'quay.io/biocontainers/perl:5.26.2' }" + 'biocontainers/perl:5.26.2' }" input: path gtf diff --git a/modules/local/igv.nf b/modules/local/igv.nf index cf02fd9e..863af845 100644 --- a/modules/local/igv.nf +++ b/modules/local/igv.nf @@ -6,7 +6,7 @@ process IGV { conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3': - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: val aligner_dir diff --git a/modules/local/macs2_consensus.nf b/modules/local/macs2_consensus.nf index eb373d2c..6b2006e4 100644 --- a/modules/local/macs2_consensus.nf +++ b/modules/local/macs2_consensus.nf @@ -8,7 +8,7 @@ process MACS2_CONSENSUS { conda "conda-forge::biopython conda-forge::r-optparse=1.7.1 conda-forge::r-upsetr=1.4.0 bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0': - 'quay.io/biocontainers/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0' }" + 'biocontainers/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0' }" input: tuple val(meta), path(peaks) diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index d23d2cbe..32c35df7 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -4,7 +4,7 @@ process MULTIQC { conda "bioconda::multiqc=1.13a" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1': - 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + 'biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: path multiqc_config diff --git a/modules/local/multiqc_custom_peaks.nf b/modules/local/multiqc_custom_peaks.nf index c19d81a5..eb3e5cbb 100644 --- a/modules/local/multiqc_custom_peaks.nf +++ b/modules/local/multiqc_custom_peaks.nf @@ -1,9 +1,10 @@ process MULTIQC_CUSTOM_PEAKS { tag "$meta.id" + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/library/ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(peak), path(frip) @@ -13,6 +14,7 @@ process MULTIQC_CUSTOM_PEAKS { output: tuple val(meta), path("*.peak_count_mqc.tsv"), emit: count tuple val(meta), path("*.FRiP_mqc.tsv") , emit: frip + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/multiqc_custom_phantompeakqualtools.nf b/modules/local/multiqc_custom_phantompeakqualtools.nf index 9e59d057..c9239bde 100644 --- a/modules/local/multiqc_custom_phantompeakqualtools.nf +++ b/modules/local/multiqc_custom_phantompeakqualtools.nf @@ -3,7 +3,7 @@ process MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS { conda "conda-forge::r-base=3.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-base:3.5.1': - 'quay.io/biocontainers/r-base:3.5.1' }" + 'biocontainers/r-base:3.5.1' }" input: tuple val(meta), path(spp), path(rdata) diff --git a/modules/local/plot_homer_annotatepeaks.nf b/modules/local/plot_homer_annotatepeaks.nf index 36c4730e..ef4b1ce6 100644 --- a/modules/local/plot_homer_annotatepeaks.nf +++ b/modules/local/plot_homer_annotatepeaks.nf @@ -4,7 +4,7 @@ process PLOT_HOMER_ANNOTATEPEAKS { conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': - 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + 'biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" input: path annos diff --git a/modules/local/plot_macs2_qc.nf b/modules/local/plot_macs2_qc.nf index 7599c9f8..d514db46 100644 --- a/modules/local/plot_macs2_qc.nf +++ b/modules/local/plot_macs2_qc.nf @@ -4,7 +4,7 @@ process PLOT_MACS2_QC { conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': - 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + 'biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" input: path peaks diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index ec7fc2af..6a45b731 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -6,7 +6,7 @@ process STAR_ALIGN { conda "bioconda::star=2.6.1d" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/star:2.6.1d--0' : - 'quay.io/biocontainers/star:2.6.1d--0' }" + 'biocontainers/star:2.6.1d--0' }" input: tuple val(meta) , path(reads) diff --git a/modules/local/star_genomegenerate.nf b/modules/local/star_genomegenerate.nf index 285df97c..a36af0a8 100644 --- a/modules/local/star_genomegenerate.nf +++ b/modules/local/star_genomegenerate.nf @@ -6,7 +6,7 @@ process STAR_GENOMEGENERATE { conda "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }" + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }" input: path fasta