Skip to content

Commit

Permalink
The -c option can be omitted when a VEP subfield is used in filtering…
Browse files Browse the repository at this point in the history
… expressions

Note that this is an experimental feature.
  • Loading branch information
pd3 committed Feb 10, 2023
1 parent f0ad6aa commit ab5de54
Show file tree
Hide file tree
Showing 6 changed files with 331 additions and 189 deletions.
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ Changes affecting specific commands:
VEP subfields. The +split-vep plugin can now work with such files, replacing the offending
commas with slash (/) characters. See also https://github.com/Ensembl/ensembl-vep/issues/1351

- Newly the `-c, --columns` option can be omitted when a subfield is used in `-i/-e` filtering
expression. Note that `-c` may still have to be given when it is not possible to infer the
type of the subfield. Note that this is an experimental feature.

* bcftools stats

- The per-sample stats (PSC) would not be computed when `-i/-e` filtering options and
Expand Down
53 changes: 50 additions & 3 deletions filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ struct _filter_t
#if ENABLE_PERL_FILTERS
PerlInterpreter *perl;
#endif
char **undef_tag;
int nundef_tag;
int status, exit_on_error;
};


Expand Down Expand Up @@ -304,6 +307,28 @@ static int filters_next_token(char **str, int *len)
return TOK_VAL;
}

#define FILTER_OK 0
#define FILTER_ERR_UNKN_TAGS 1
#define FILTER_ERR_OTHER 2

static void filter_add_undef_tag(filter_t *filter, char *str)
{
int i;
for (i=0; i<filter->nundef_tag; i++)
if ( !strcmp(str,filter->undef_tag[i]) ) break;
if ( i<filter->nundef_tag ) return;
filter->nundef_tag++;
filter->undef_tag = (char**)realloc(filter->undef_tag,sizeof(*filter->undef_tag)*filter->nundef_tag);
if ( !filter->undef_tag ) error("Could not allocate memory\n");
filter->undef_tag[filter->nundef_tag-1] = strdup(str);
if ( !filter->undef_tag[filter->nundef_tag-1] ) error("Could not allocate memory\n");
}
const char **filter_list_undef_tags(filter_t *filter, int *ntags)
{
*ntags = filter->nundef_tag;
return (const char**)filter->undef_tag;
}


/*
Simple path expansion, expands ~/, ~user, $var. The result must be freed by the caller.
Expand Down Expand Up @@ -3063,15 +3088,20 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
{
errno = 0;
tok->threshold = strtod(tmp.s, &end); // float?
if ( errno!=0 || end!=tmp.s+len ) error("[%s:%d %s] Error: the tag \"%s\" is not defined in the VCF header\n", __FILE__,__LINE__,__FUNCTION__,tmp.s);
if ( errno!=0 || end!=tmp.s+len )
{
if ( filter->exit_on_error )
error("[%s:%d %s] Error: the tag \"%s\" is not defined in the VCF header\n", __FILE__,__LINE__,__FUNCTION__,tmp.s);
filter->status |= FILTER_ERR_UNKN_TAGS;
filter_add_undef_tag(filter,tmp.s);
}
}
tok->is_constant = 1;

if ( tmp.s ) free(tmp.s);
return 0;
}


static void filter_debug_print(token_t *toks, token_t **tok_ptrs, int ntoks)
{
int i;
Expand Down Expand Up @@ -3221,12 +3251,13 @@ static void perl_destroy(filter_t *filter)


// Parse filter expression and convert to reverse polish notation. Dijkstra's shunting-yard algorithm
filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
static filter_t *filter_init_(bcf_hdr_t *hdr, const char *str, int exit_on_error)
{
filter_t *filter = (filter_t *) calloc(1,sizeof(filter_t));
filter->str = strdup(str);
filter->hdr = hdr;
filter->max_unpack |= BCF_UN_STR;
filter->exit_on_error = exit_on_error;

int nops = 0, mops = 0; // operators stack
int nout = 0, mout = 0; // filter tokens, RPN
Expand Down Expand Up @@ -3608,6 +3639,14 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
filter->flt_stack = (token_t **)malloc(sizeof(token_t*)*nout);
return filter;
}
filter_t *filter_parse(bcf_hdr_t *hdr, const char *str)
{
return filter_init_(hdr, str, 0);
}
filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
{
return filter_init_(hdr, str, 1);
}

void filter_destroy(filter_t *filter)
{
Expand All @@ -3629,6 +3668,8 @@ void filter_destroy(filter_t *filter)
free(filter->filters[i].regex);
}
}
for (i=0; i<filter->nundef_tag; i++) free(filter->undef_tag[i]);
free(filter->undef_tag);
free(filter->cached_GT.buf);
free(filter->cached_GT.mask);
free(filter->filters);
Expand All @@ -3642,6 +3683,7 @@ void filter_destroy(filter_t *filter)

int filter_test(filter_t *filter, bcf1_t *line, const uint8_t **samples)
{
if ( filter->status != FILTER_OK ) error("Error: the caller did not check the filter status\n");
bcf_unpack(line, filter->max_unpack);

int i, nstack = 0;
Expand Down Expand Up @@ -3804,3 +3846,8 @@ void filter_set_samples(filter_t *filter, const uint8_t *samples)
}
}

int filter_status(filter_t *filter)
{
return filter->status;
}

21 changes: 20 additions & 1 deletion filter.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* filter.h -- filter expressions.
Copyright (C) 2013-2021 Genome Research Ltd.
Copyright (C) 2013-2023 Genome Research Ltd.
Author: Petr Danecek <pd3@sanger.ac.uk>
Expand Down Expand Up @@ -32,6 +32,8 @@ typedef struct _filter_t filter_t;
/**
* @hdr: BCF header file
* @str: see the bcftools filter command help for description
*
* Same as filter_parse() but exits on errors
*/
filter_t *filter_init(bcf_hdr_t *hdr, const char *str);

Expand Down Expand Up @@ -61,4 +63,21 @@ const double *filter_get_doubles(filter_t *filter, int *nval, int *nval1);
void filter_expression_info(FILE *fp);
int filter_max_unpack(filter_t *filter);

/**
* Same as filter_init() but may not exit on some type of errors. The caller
* must check if the returned value is not NULL and if the consequent call
* of filter_status() returns FILTER_OK before the filter_pass() can be called.
*/
filter_t *filter_parse(bcf_hdr_t *hdr, const char *str);

#define FILTER_OK 0
#define FILTER_ERR_UNKN_TAGS 1
#define FILTER_ERR_OTHER 2

/**
* Check if filter_parse() was successful
*/
int filter_status(filter_t *filter);
const char **filter_list_undef_tags(filter_t *filter, int *nundef);

#endif
Loading

0 comments on commit ab5de54

Please sign in to comment.