Skip to content

Commit

Permalink
adaptions to VEP 110
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Dec 15, 2023
1 parent cccaf42 commit f20a0e7
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 13 deletions.
6 changes: 3 additions & 3 deletions pcgr/arg_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,15 @@ def check_args(arg_dict):
# Check that VEP pick criteria is formatted correctly
if not arg_dict['vep_pick_order'] is None:
values = str(arg_dict['vep_pick_order']).split(',')
permitted_sources = ['canonical', 'appris', 'tsl', 'biotype', 'ccds', 'rank', 'length', 'mane']
permitted_sources = ['canonical', 'appris', 'tsl', 'biotype', 'ccds', 'rank', 'length', 'mane_select','mane_plus_clinical']
num_permitted_sources = 0
for v in values:
if v in permitted_sources:
num_permitted_sources += 1

if num_permitted_sources != 8:
if num_permitted_sources != 9:
err_msg = (f"'--vep_pick_order' = {arg_dict['vep_pick_order']} is formatted incorrectly, should be "
"a comma-separated string of the following values: mane,canonical,appris,tsl,biotype,ccds,rank,length")
"a comma-separated string of the following values: mane_select,mane_plus_clinical,canonical,appris,tsl,biotype,ccds,rank,length")
error_message(err_msg, logger)
return

Expand Down
4 changes: 2 additions & 2 deletions pcgr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def populate_config_data(conf_options: dict, db_dir: str, workflow = "PCGR", log
conf_data['molecular_data']['fname_mut_vcf'] = conf_options['annotated_vcf']
conf_data['molecular_data']['fname_mut_tsv'] = conf_options['annotated_tsv']
conf_data['molecular_data']['fname_cna_tsv'] = "None"
if workflow == "PCGR" and conf_options['annotated_cna'] is not "None":
if workflow == "PCGR" and conf_options['annotated_cna'] != "None":
conf_data['molecular_data']['fname_cna_tsv'] = conf_options['annotated_cna']
del conf_options['annotated_cna']

Expand Down Expand Up @@ -187,7 +187,7 @@ def populate_config_data(conf_options: dict, db_dir: str, workflow = "PCGR", log
if check_file_exists(metadata_fname, logger):
metadata_df = pd.read_csv(metadata_fname, sep="\t", na_values=".")
metadata_df["source_type"] = dtype
metadata_pd = metadata_pd.append(metadata_df, ignore_index=True)
metadata_pd = metadata_pd._append(metadata_df, ignore_index=True)

conf_data['reference_data']['source_metadata'] = metadata_pd.to_dict(orient='records')

Expand Down
2 changes: 1 addition & 1 deletion pcgr/cpsr.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_args():
optional_vep.add_argument('--vep_buffer_size', default = 500, type = int, help="Variant buffer size (variants read into memory simultaneously, option '--buffer_size' in VEP) " + \
"\n- set lower to reduce memory usage, default: %(default)s")
optional_vep.add_argument("--vep_gencode_basic", action="store_true", help = "Consider basic GENCODE transcript set only with Variant Effect Predictor (VEP) (option '--gencode_basic' in VEP).")
optional_vep.add_argument('--vep_pick_order', default = "canonical,appris,biotype,ccds,rank,tsl,length,mane", help="Comma-separated string " + \
optional_vep.add_argument('--vep_pick_order', default = "mane_select,mane_plus_clinical,canonical,appris,tsl,biotype,ccds,rank,length", help="Comma-separated string " + \
"of ordered transcript properties for primary variant pick\n ( option '--pick_order' in VEP), default: %(default)s")
optional_vep.add_argument('--vep_no_intergenic', action = "store_true", help="Skip intergenic variants during processing (option '--no_intergenic' in VEP), default: %(default)s")

Expand Down
4 changes: 3 additions & 1 deletion pcgr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def cli():
optional_vcfanno.add_argument("--vcfanno_n_proc", default=4, type=int, help="Number of vcfanno processes (option '-p' in vcfanno), default: %(default)s")
optional_vep.add_argument("--vep_n_forks", default=4, type=int, help="Number of forks (VEP option '--fork'), default: %(default)s")
optional_vep.add_argument("--vep_buffer_size", default=500, type=int, help=f"Variant buffer size (variants read into memory simultaneously, VEP option '--buffer_size')\n- set lower to reduce memory usage, default: %(default)s")
optional_vep.add_argument("--vep_pick_order", default="mane,canonical,appris,tsl,biotype,ccds,rank,length", help=f"Comma-separated string of ordered transcript/variant properties for selection of primary variant consequence\n(option '--pick_order' in VEP), default: %(default)s")
optional_vep.add_argument("--vep_pick_order", default="mane_select,mane_plus_clinical,canonical,appris,tsl,biotype,ccds,rank,length", help=f"Comma-separated string " + \
"of ordered transcript/variant properties for selection of primary variant consequence\n(option '--pick_order' in VEP), default: %(default)s")
optional_vep.add_argument("--vep_no_intergenic", action="store_true", help="Skip intergenic variants during processing (VEP option '--no_intergenic' in VEP), default: %(default)s")
optional_vep.add_argument("--vep_regulatory", action="store_true", help="Add VEP regulatory annotations (VEP option '--regulatory') or non-coding interpretation, default: %(default)s")
optional_vep.add_argument("--vep_gencode_basic", action="store_true", help = "Consider basic GENCODE transcript set only with Variant Effect Predictor (VEP) (VEP option '--gencode_basic').")
Expand Down Expand Up @@ -329,6 +330,7 @@ def run_pcgr(pcgr_paths, conf_options):
check_subprocess(logger, vep_command['tabix'], debug)
logger.info('Finished pcgr-vep')
print('----')
exit(0)

# PCGR|vcf2maf - if option set, convert VCF to MAF with https://github.com/mskcc/vcf2maf
if run_vcf2maf:
Expand Down
4 changes: 2 additions & 2 deletions pcgr/pcgr_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
RECOMMENDED_N_MUT_SIGNATURE = 200

## GENCODE
GENCODE_VERSION = {'grch38': 39,'grch37': 19}
GENCODE_VERSION = {'grch38': 44,'grch37': 19}

## vcfanno
VCFANNO_MAX_PROC = 15

## VEP settings/versions
VEP_VERSION = '105'
VEP_VERSION = '110'
VEP_ASSEMBLY = {'grch38': 'GRCh38','grch37': 'GRCh37'}
VEP_MIN_FORKS = 1
VEP_MAX_FORKS = 8
Expand Down
15 changes: 11 additions & 4 deletions pcgr/vep.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ def get_csq_record_annotations(csq_fields, varkey, logger, vep_csq_fields_map, t
return(csq_record)


def pick_single_gene_csq(vep_csq_results, pick_criteria_ordered = "mane,canonical,appris,tsl,biotype,ccds,rank,length", logger = None):
def pick_single_gene_csq(vep_csq_results,
pick_criteria_ordered = "mane_select,mane_plus_clinical,canonical,appris,tsl,biotype,ccds,rank,length",
logger = None):


csq_candidates = []
Expand All @@ -171,7 +173,8 @@ def pick_single_gene_csq(vep_csq_results, pick_criteria_ordered = "mane,canonica
csq_candidate = {}

## default values (undefined properties)
csq_candidate['mane'] = 1
csq_candidate['mane_select'] = 1
csq_candidate['mane_plus_clinical'] = 1
csq_candidate['canonical'] = 1
csq_candidate['appris'] = 8
csq_candidate['biotype'] = 1
Expand All @@ -183,9 +186,13 @@ def pick_single_gene_csq(vep_csq_results, pick_criteria_ordered = "mane,canonica
csq_candidate['PICKED'] = True
csq_candidate['varkey'] = csq_elem['VARKEY']

## MANE status - lower value prioritized
## MANE select status - lower value prioritized
if not csq_elem['MANE_SELECT'] is None:
csq_candidate['mane'] = 0
csq_candidate['mane_select'] = 0

## MANE PLUS clnical status - lower value prioritized
if not csq_elem['MANE_PLUS_CLINICAL'] is None:
csq_candidate['mane_plus_clinical'] = 0

## CANONICAL status - lower value prioritized
if not csq_elem['CANONICAL'] is None:
Expand Down

0 comments on commit f20a0e7

Please sign in to comment.