From 80a12ace5266de5529f0e84d633991a9550e7389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Steffen=20M=C3=B6ller?= Date: Mon, 1 Jul 2024 14:35:49 +0200 Subject: [PATCH] check: Adding line number to error messages. Co-authored-by: Jose Espinosa-Carrasco --- CHANGELOG.md | 1 + bin/check_samplesheet.py | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5677e847..777d00b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#370](https://github.com/nf-core/chipseq/issues/370)] - Fix stack overflow exceptions in phantompeakqualtools ([see here](https://github.com/kundajelab/phantompeakqualtools/issues/3)). - [[#387](https://github.com/nf-core/chipseq/issues/387)] - Get rid of the `lib` folder and rearrange the pipeline accordingly. - [[#385](https://github.com/nf-core/chipseq/issues/385)] - Fix `--save_unaligned` description in schema. +- [[PR #392](https://github.com/nf-core/chipseq/pull/392)] - Adding line numbers to warnings/errors messages in `bin/check_samplesheet.py` ### Software dependencies diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 7005bb9e..36351a71 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -58,22 +58,22 @@ def check_samplesheet(file_in, file_out): sys.exit(1) ## Check sample entries - for line in fin: + for line_number, line in enumerate(fin, start=1): if line.strip(): lspl = [x.strip().strip('"') for x in line.strip().split(",")] # Check valid number of columns per row if len(lspl) < len(HEADER): print_error( - "Invalid number of columns (minimum = {})!".format(len(HEADER)), - "Line", + "Invalid number of columns (found = {}, minimum = {})!".format(len(lspl),len(HEADER)), + "Line {}".format(line_number), line, ) num_cols = len([x for x in lspl[: len(HEADER)] if x]) if num_cols < MIN_COLS: print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), - "Line", + "Invalid number of populated columns (found = {}, minimum = {})!".format(num_cols,MIN_COLS), + "Line {}".format(line_number), line, ) @@ -83,23 +83,23 @@ def check_samplesheet(file_in, file_out): print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") sample = sample.replace(" ", "_") if not sample: - print_error("Sample entry has not been specified!", "Line", line) + print_error("Sample entry has not been specified!", "Line {}".format(line_number), line) ## Check FastQ file extension for fastq in [fastq_1, fastq_2]: if fastq: if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", "Line", line) + print_error("FastQ file contains spaces!", "Line {}".format(lineNo), line) if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): print_error( "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - "Line", + "Line {}".format(lineNo), line, ) ## Check replicate column is integer if not replicate.isdecimal(): - print_error("Replicate id not an integer!", "Line", line) + print_error("Replicate id not an integer!", "Line {}".format(lineNo), line) sys.exit(1) ## Check antibody and control columns have valid values @@ -110,7 +110,7 @@ def check_samplesheet(file_in, file_out): if not control: print_error( "Both antibody and control columns must be specified!", - "Line", + "Line {}".format(line_number), line, ) @@ -119,13 +119,13 @@ def check_samplesheet(file_in, file_out): print(f"WARNING: Spaces have been replaced by underscores for control: {control}") control = control.replace(" ", "_") if not control_replicate.isdecimal(): - print_error("Control replicate id not an integer!", "Line", line) + print_error("Control replicate id not an integer!", "Line {}".format(line_number), line) sys.exit(1) control = "{}_REP{}".format(control, control_replicate) if not antibody: print_error( "Both antibody and control columns must be specified!", - "Line", + "Line {}".format(line_number), line, ) @@ -138,7 +138,7 @@ def check_samplesheet(file_in, file_out): elif sample and fastq_1 and not fastq_2: sample_info = ["1", fastq_1, fastq_2, replicate, antibody, control] else: - print_error("Invalid combination of columns provided!", "Line", line) + print_error("Invalid combination of columns provided!", "Line {}".format(line_number), line) ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, replicate, antibody, control ]]} replicate = int(replicate) @@ -149,7 +149,7 @@ def check_samplesheet(file_in, file_out): sample_mapping_dict[sample][replicate] = [sample_info] else: if sample_info in sample_mapping_dict[sample][replicate]: - print_error("Samplesheet contains duplicate rows!", "Line", line) + print_error("Samplesheet contains duplicate rows!", "Line {}".format(line_number), line) else: sample_mapping_dict[sample][replicate].append(sample_info)