Skip to content

Commit

Permalink
update reading vcf function
Browse files Browse the repository at this point in the history
  • Loading branch information
ketringjoni committed Dec 12, 2023
1 parent 59b6319 commit b64fc47
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions scripts/reading_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,15 @@ def read_input(in_file, var_set):

if any(['END' in x and 'SVLEN' in x for x in variants.INFO]): # BNDs don't have 'END'

variants.loc[variants.INFO.str.startswith('END='),
'END'] = variants.loc[variants.INFO.str.startswith('END='),
'INFO'].str.split('END=').str[1].str.split(';').str[0]
variants.loc[~(variants.INFO.str.startswith('END=')),
'END'] = variants.loc[~(variants.INFO.str.startswith('END=')),
'INFO'].str.split(';END=').str[1].str.split(';').str[0]
if any([x.startswith('END=') for x in variants.INFO]):
variants.loc[variants.INFO.str.startswith('END='),
'END'] = variants.loc[variants.INFO.str.startswith('END='),
'INFO'].str.split('END=').str[1].str.split(';').str[0]
if any([';END' in x for x in variants.INFO]):
variants.loc[~(variants.INFO.str.startswith('END=')),
'END'] = variants.loc[~(variants.INFO.str.startswith('END=')),
'INFO'].str.split(';END=').str[1].str.split(';').str[0]


variants.loc[~pd.isnull(variants.END), 'END'] = variants.loc[~pd.isnull(variants.END), 'END'].astype('int')
variants['SVLEN'] = variants.INFO.str.split('SVLEN=').str[1].str.split(';').str[0] # this SVLEN (END-POS) would be 0 for SNPs
Expand Down

0 comments on commit b64fc47

Please sign in to comment.