From 5a94eb3d3a1d225ff00e2be6a95fa810d97a99ce Mon Sep 17 00:00:00 2001 From: Sigve Nakken Date: Thu, 4 Jan 2024 14:12:38 +0100 Subject: [PATCH] remove_file fix and type fix --- pcgr/arg_checker.py | 12 ++++++------ pcgr/cpsr.py | 6 +++--- pcgr/utils.py | 2 +- pcgr/variant.py | 4 ++-- scripts/cpsr_validate_input.py | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pcgr/arg_checker.py b/pcgr/arg_checker.py index 24154b55..af13ad8c 100644 --- a/pcgr/arg_checker.py +++ b/pcgr/arg_checker.py @@ -75,18 +75,18 @@ def check_args(arg_dict): err_msg = f"Minimum sequencing depth tumor - TMB calculation ('tmb_dp_min' = {arg_dict['tmb_dp_min']}) must be >= 0" error_message(err_msg, logger) - if int(arg_dict['tmb_dp_min']) < int(arg_dict['tumor_dp_min']): - err_msg = f"Minimum sequencing depth (tumor) for TMB calculation ('tmb_dp_min' = {arg_dict['tmb_dp_min']}) must be ", - err_msg += f"greater or equal to minimum sequencing depth tumor {arg_dict['tumor_dp_min']} (i.e. filter for variant inclusion in report)" + if int(arg_dict['tmb_dp_min']) > 0 and (int(arg_dict['tmb_dp_min']) < int(arg_dict['tumor_dp_min'])): + err_msg = f"Minimum sequencing depth (tumor) for TMB calculation ('tmb_dp_min' = {str(arg_dict['tmb_dp_min'])}) must be ", + err_msg += f"greater or equal to minimum sequencing depth tumor {str(arg_dict['tumor_dp_min'])} (i.e. filter for variant inclusion in report)" error_message(err_msg, logger) if float(arg_dict['tmb_af_min']) < 0 or float(arg_dict['tmb_af_min']) > 1: err_msg = f"Minimum AF (tumor) for TMB calculation ('tmb_af_min' = {arg_dict['tmb_af_min']}) must be within [0, 1]" error_message(err_msg, logger) - if float(arg_dict['tmb_af_min']) < float(arg_dict['tumor_af_min']): - err_msg = f"Minimum AF (tumor) for TMB calculation ('tmb_af_min' = {arg_dict['tmb_af_min']}) must be ", - err_msg += f"greater or equal to minimum AF tumor {arg_dict['tumor_dp_min']} (i.e. filter for variant inclusion in report)" + if float(arg_dict['tmb_af_min']) > 0 and (float(arg_dict['tmb_af_min']) < float(arg_dict['tumor_af_min'])): + err_msg = f"Minimum AF (tumor) for TMB calculation ('tmb_af_min' = {str(arg_dict['tmb_af_min'])}) must be ", + err_msg += f"greater or equal to minimum AF tumor {str(arg_dict['tumor_dp_min'])} (i.e. filter for variant inclusion in report)" error_message(err_msg, logger) # Check that coding target size region of sequencing assay is set correctly diff --git a/pcgr/cpsr.py b/pcgr/cpsr.py index dbd297a3..b14ca629 100755 --- a/pcgr/cpsr.py +++ b/pcgr/cpsr.py @@ -15,7 +15,7 @@ from glob import glob from argparse import RawTextHelpFormatter from pcgr import pcgr_vars, arg_checker, utils, vep, config, variant -from pcgr.utils import check_subprocess, getlogger, error_message, warn_message +from pcgr.utils import check_subprocess, getlogger, error_message, warn_message, remove_file from pcgr.config import populate_config_data def get_args(): @@ -267,7 +267,7 @@ def run_cpsr(conf_options, cpsr_paths): # do not delete if debugging if not debug: for fn in delete_files: - utils.remove(fn) + remove_file(fn) logger.info('Finished cpsr-summarise main command') # CPSR|vcf2tsvpy - convert VCF to TSV with https://github.com/sigven/vcf2tsvpy @@ -291,7 +291,7 @@ def run_cpsr(conf_options, cpsr_paths): variant_set.to_csv(output_pass_tsv_gz, sep="\t", compression="gzip", index=False) if not debug: - utils.remove(output_pass_vcf2tsv_gz) + remove_file(output_pass_vcf2tsv_gz) logger.info('Finished cpsr-summarise') diff --git a/pcgr/utils.py b/pcgr/utils.py index 36d0c017..c73d987c 100644 --- a/pcgr/utils.py +++ b/pcgr/utils.py @@ -169,7 +169,7 @@ def sort_bed(unsorted_bed_fname: str, sorted_bed_fname: str, debug = False, logg check_subprocess(logger, cmd_sort_custom_bed1, debug) check_subprocess(logger, cmd_sort_custom_bed2, debug) if not debug: - remove(str(unsorted_bed_fname)) + remove_file(str(unsorted_bed_fname)) else: err_msg = 'File ' + str(unsorted_bed_fname) + ' does not exist or is empty' error_message(err_msg, logger) diff --git a/pcgr/variant.py b/pcgr/variant.py index 120ff105..8c05d8e9 100644 --- a/pcgr/variant.py +++ b/pcgr/variant.py @@ -286,10 +286,10 @@ def clean_annotations(variant_set: pd.DataFrame, yaml_data: dict, germline: bool for tag in ['AN','AC','NHOMALT']: vcf_info_tag = 'gnomADe_non_cancer_' + str(pop) + '_' + str(tag) if vcf_info_tag in variant_set.columns: - variant_set[vcf_info_tag] = variant_set[vcf_info_tag].astype(str) + #variant_set[vcf_info_tag] = variant_set[vcf_info_tag].astype(str) variant_set.loc[variant_set[vcf_info_tag].notna(), vcf_info_tag] = \ - variant_set.loc[variant_set[vcf_info_tag].notna(), vcf_info_tag].astype(float).astype(int) + variant_set.loc[variant_set[vcf_info_tag].notna(), vcf_info_tag].astype(str).astype(float).astype(int) for elem in ['NUM_SUBMITTERS','ALLELE_ID','ENTREZGENE','REVIEW_STATUS_STARS']: vcf_info_tag = 'CLINVAR_' + str(elem) diff --git a/scripts/cpsr_validate_input.py b/scripts/cpsr_validate_input.py index 18367b2c..166cb0ce 100755 --- a/scripts/cpsr_validate_input.py +++ b/scripts/cpsr_validate_input.py @@ -12,7 +12,7 @@ from cyvcf2 import VCF from pcgr import utils, annoutils, vcf, pcgr_vars -from pcgr.utils import error_message, check_subprocess, random_id_generator, sort_bed, check_file_exists +from pcgr.utils import error_message, check_subprocess, random_id_generator, sort_bed, check_file_exists, remove_file def __main__(): @@ -262,10 +262,10 @@ def simplify_vcf(input_vcf, validated_vcf, vcf, custom_bed, pcgr_directory, geno bcftools_simplify_log, vt_decompose_log]: #print(f"Deleting {fn}") - utils.remove(fn) + remove_file(fn) - utils.remove(temp_files["vcf_2"] + str('.tbi')) - utils.remove(temp_files["vcf_3"] + str('.tbi')) + remove_file(temp_files["vcf_2"] + str('.tbi')) + remove_file(temp_files["vcf_3"] + str('.tbi')) if check_file_exists(f'{validated_vcf}.gz'): vcf = VCF(validated_vcf + '.gz')