adaptions to VEP 110

sigven · Dec 15, 2023 · f20a0e7 · f20a0e7
1 parent cccaf42
commit f20a0e7
Show file tree

Hide file tree

Showing 6 changed files with 22 additions and 13 deletions.
diff --git a/pcgr/arg_checker.py b/pcgr/arg_checker.py
@@ -187,15 +187,15 @@ def check_args(arg_dict):
     # Check that VEP pick criteria is formatted correctly
     if not arg_dict['vep_pick_order'] is None:
         values = str(arg_dict['vep_pick_order']).split(',')
-        permitted_sources = ['canonical', 'appris', 'tsl', 'biotype', 'ccds', 'rank', 'length', 'mane']
+        permitted_sources = ['canonical', 'appris', 'tsl', 'biotype', 'ccds', 'rank', 'length', 'mane_select','mane_plus_clinical']
         num_permitted_sources = 0
         for v in values:
             if v in permitted_sources:
                 num_permitted_sources += 1
 
-        if num_permitted_sources != 8:
+        if num_permitted_sources != 9:
             err_msg = (f"'--vep_pick_order' = {arg_dict['vep_pick_order']} is formatted incorrectly, should be "
-                "a comma-separated string of the following values: mane,canonical,appris,tsl,biotype,ccds,rank,length")
+                "a comma-separated string of the following values: mane_select,mane_plus_clinical,canonical,appris,tsl,biotype,ccds,rank,length")
             error_message(err_msg, logger)
     return
 

diff --git a/pcgr/config.py b/pcgr/config.py
@@ -158,7 +158,7 @@ def populate_config_data(conf_options: dict, db_dir: str, workflow = "PCGR", log
     conf_data['molecular_data']['fname_mut_vcf'] = conf_options['annotated_vcf']
     conf_data['molecular_data']['fname_mut_tsv'] = conf_options['annotated_tsv']
     conf_data['molecular_data']['fname_cna_tsv'] = "None"
-    if workflow == "PCGR" and conf_options['annotated_cna'] is not "None":
+    if workflow == "PCGR" and conf_options['annotated_cna'] != "None":
         conf_data['molecular_data']['fname_cna_tsv'] = conf_options['annotated_cna']
         del conf_options['annotated_cna']
 
@@ -187,7 +187,7 @@ def populate_config_data(conf_options: dict, db_dir: str, workflow = "PCGR", log
         if check_file_exists(metadata_fname, logger):
             metadata_df = pd.read_csv(metadata_fname, sep="\t", na_values=".")
             metadata_df["source_type"] = dtype
-            metadata_pd = metadata_pd.append(metadata_df, ignore_index=True)
+            metadata_pd = metadata_pd._append(metadata_df, ignore_index=True)
 
     conf_data['reference_data']['source_metadata'] = metadata_pd.to_dict(orient='records')
 

diff --git a/pcgr/cpsr.py b/pcgr/cpsr.py
@@ -64,7 +64,7 @@ def get_args():
     optional_vep.add_argument('--vep_buffer_size', default = 500, type = int, help="Variant buffer size (variants read into memory simultaneously, option '--buffer_size' in VEP) " + \
        "\n- set lower to reduce memory usage, default: %(default)s")
     optional_vep.add_argument("--vep_gencode_basic", action="store_true", help = "Consider basic GENCODE transcript set only with Variant Effect Predictor (VEP) (option '--gencode_basic' in VEP).")
-    optional_vep.add_argument('--vep_pick_order', default = "canonical,appris,biotype,ccds,rank,tsl,length,mane", help="Comma-separated string " + \
+    optional_vep.add_argument('--vep_pick_order', default = "mane_select,mane_plus_clinical,canonical,appris,tsl,biotype,ccds,rank,length", help="Comma-separated string " + \
        "of ordered transcript properties for primary variant pick\n ( option '--pick_order' in VEP), default: %(default)s")
     optional_vep.add_argument('--vep_no_intergenic', action = "store_true", help="Skip intergenic variants during processing (option '--no_intergenic' in VEP), default: %(default)s")
 

diff --git a/pcgr/main.py b/pcgr/main.py
@@ -97,7 +97,8 @@ def cli():
     optional_vcfanno.add_argument("--vcfanno_n_proc", default=4, type=int, help="Number of vcfanno processes (option '-p' in vcfanno), default: %(default)s")
     optional_vep.add_argument("--vep_n_forks", default=4, type=int, help="Number of forks (VEP option '--fork'), default: %(default)s")
     optional_vep.add_argument("--vep_buffer_size", default=500, type=int, help=f"Variant buffer size (variants read into memory simultaneously, VEP option '--buffer_size')\n- set lower to reduce memory usage, default: %(default)s")
-    optional_vep.add_argument("--vep_pick_order", default="mane,canonical,appris,tsl,biotype,ccds,rank,length", help=f"Comma-separated string of ordered transcript/variant properties for selection of primary variant consequence\n(option '--pick_order' in VEP), default: %(default)s")
+    optional_vep.add_argument("--vep_pick_order", default="mane_select,mane_plus_clinical,canonical,appris,tsl,biotype,ccds,rank,length", help=f"Comma-separated string " + \
+        "of ordered transcript/variant properties for selection of primary variant consequence\n(option '--pick_order' in VEP), default: %(default)s")
     optional_vep.add_argument("--vep_no_intergenic", action="store_true", help="Skip intergenic variants during processing (VEP option '--no_intergenic' in VEP), default: %(default)s")
     optional_vep.add_argument("--vep_regulatory", action="store_true", help="Add VEP regulatory annotations (VEP option '--regulatory') or non-coding interpretation, default: %(default)s")
     optional_vep.add_argument("--vep_gencode_basic", action="store_true", help = "Consider basic GENCODE transcript set only with Variant Effect Predictor (VEP) (VEP option '--gencode_basic').")
@@ -329,6 +330,7 @@ def run_pcgr(pcgr_paths, conf_options):
         check_subprocess(logger, vep_command['tabix'], debug)
         logger.info('Finished pcgr-vep')
         print('----')
+        exit(0)
 
         # PCGR|vcf2maf - if option set, convert VCF to MAF with https://github.com/mskcc/vcf2maf
         if run_vcf2maf:

diff --git a/pcgr/pcgr_vars.py b/pcgr/pcgr_vars.py
@@ -12,13 +12,13 @@
 RECOMMENDED_N_MUT_SIGNATURE = 200
 
 ## GENCODE
-GENCODE_VERSION = {'grch38': 39,'grch37': 19}
+GENCODE_VERSION = {'grch38': 44,'grch37': 19}
 
 ## vcfanno
 VCFANNO_MAX_PROC = 15
 
 ## VEP settings/versions
-VEP_VERSION = '105'
+VEP_VERSION = '110'
 VEP_ASSEMBLY = {'grch38': 'GRCh38','grch37': 'GRCh37'}
 VEP_MIN_FORKS = 1
 VEP_MAX_FORKS = 8

diff --git a/pcgr/vep.py b/pcgr/vep.py
@@ -160,7 +160,9 @@ def get_csq_record_annotations(csq_fields, varkey, logger, vep_csq_fields_map, t
     return(csq_record)
 
 
-def pick_single_gene_csq(vep_csq_results, pick_criteria_ordered = "mane,canonical,appris,tsl,biotype,ccds,rank,length", logger = None):
+def pick_single_gene_csq(vep_csq_results, 
+                         pick_criteria_ordered = "mane_select,mane_plus_clinical,canonical,appris,tsl,biotype,ccds,rank,length", 
+                         logger = None):
 
 
     csq_candidates = []
@@ -171,7 +173,8 @@ def pick_single_gene_csq(vep_csq_results, pick_criteria_ordered = "mane,canonica
         csq_candidate = {}
 
         ## default values (undefined properties)
-        csq_candidate['mane'] = 1
+        csq_candidate['mane_select'] = 1
+        csq_candidate['mane_plus_clinical'] = 1        
         csq_candidate['canonical'] = 1
         csq_candidate['appris'] = 8
         csq_candidate['biotype'] = 1
@@ -183,9 +186,13 @@ def pick_single_gene_csq(vep_csq_results, pick_criteria_ordered = "mane,canonica
         csq_candidate['PICKED'] = True
         csq_candidate['varkey'] = csq_elem['VARKEY']
 
-        ## MANE status - lower value prioritized
+        ## MANE select status - lower value prioritized
         if not csq_elem['MANE_SELECT'] is None:
-            csq_candidate['mane'] = 0
+            csq_candidate['mane_select'] = 0
+
+        ## MANE PLUS clnical status - lower value prioritized
+        if not csq_elem['MANE_PLUS_CLINICAL'] is None:
+            csq_candidate['mane_plus_clinical'] = 0
 
         ## CANONICAL status - lower value prioritized
         if not csq_elem['CANONICAL'] is None: