Skip to content

Commit

Permalink
adding start of changes to parse values and fields
Browse files Browse the repository at this point in the history
Signed-off-by: vsoch <vsochat@stanford.edu>
  • Loading branch information
vsoch committed Mar 7, 2020
1 parent 115a9c2 commit 6a487ca
Show file tree
Hide file tree
Showing 13 changed files with 456 additions and 59 deletions.
64 changes: 54 additions & 10 deletions deid/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,28 +91,40 @@ def _get_section(self, name):
"""
section = None
if self.deid is not None:
if name in self.deid:
section = self.deid[name]
section = self.deid.get(name)
return section

# Get Sections

def get_format(self):
"""return the format of the loaded deid, if one exists
"""
return self._get_section("format")

def _get_named_section(self, section_name, name=None):
"""a helper function to return an entire section, or if a name is
provided, a named section under it. If the section is not
defined, we appropriately return None.
"""
section = self._get_section(section_name)
if name is not None and section is not None:
section = section.get(name, [])
return section

def get_filters(self, name=None):
"""return all filters for a deid recipe, or a set based on a name
"""
filters = self._get_section("filter")
if name is not None and filters is not None:
filters = filters[name]
return filters
return self._get_named_section("filter", name)

def ls_filters(self):
"""list names of filter groups
def get_values_lists(self, name=None):
"""return a values list by name
"""
filters = self._get_section("filter")
return list(filters.keys())
return self._get_named_section("values", name)

def get_fields_lists(self, name=None):
"""return a values list by name
"""
return self._get_named_section("fields", name)

def get_actions(self, action=None, field=None):
"""get deid actions to perform on a header, or a subset based on a type
Expand All @@ -137,6 +149,38 @@ def get_actions(self, action=None, field=None):

return header

# Boolean properties

def _has_list_content(self, name):
return len(self.deid.get(name, [])) > 0

def has_fields_lists(self):
return self._has_list_content("fields")

def has_values_lists(self):
return self._has_list_content("values")

def has_actions(self):
return self._has_list_content("header")

# Listing

def listof(self, section):
"""return a list of keys for a section"""
listing = self._get_section(section)
return list(listing.keys())

def ls_filters(self):
return self.listof("filter")

def ls_valuelists(self):
return self.listof("values")

def ls_fieldlists(self):
return self.listof("fields")

# Init

def _init_deid(self, deid=None, base=False, default_base="dicom"):
"""initalize the recipe with one or more deids, optionally including
the default. This function is called at init time. If you need to add
Expand Down
6 changes: 3 additions & 3 deletions deid/config/standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
# Supported Header Actions
actions = ("ADD", "BLANK", "JITTER", "KEEP", "REPLACE", "REMOVE", "LABEL")

# Supported Group actions
fields_actions = ["FIELD"]
values_actions = ["FIELD", "SPLIT"]
# Supported Group actions (SPLIT only supported for values)
groups = ["values", "fields"]
group_actions = ("FIELD", "SPLIT")

# Valid actions for a filter action
filters = (
Expand Down
139 changes: 111 additions & 28 deletions deid/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
actions,
sections,
filters,
fields_actions,
values_actions,
groups,
group_actions,
)
from collections import OrderedDict
import os
Expand Down Expand Up @@ -145,15 +145,9 @@ def load_deid(path=None):
if line.startswith("#"):
continue

# Starts with Format?
elif bool(re.match("format", line, re.I)):
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
if fmt not in formats:
bot.exit("%s is not a valid format." % fmt)

# Set format
config["format"] = fmt
bot.debug("FORMAT set to %s" % fmt)
# Set format
elif bool(re.match("^format", line, re.I)):
config["format"] = parse_format(line)

# A new section?
elif line.startswith("%"):
Expand All @@ -174,24 +168,20 @@ def load_deid(path=None):
config=config, section=section, section_name=section_name
)

# An action (replace, blank, remove, keep, jitter)
# A %fields action (only field allowed), %values allows split
elif line.upper().startswith(group_actions) and section in groups:
print("SECTION %s" % section)
print(config)
config = parse_group_action(
section=section, section_name=section_name, line=line, config=config
)

# An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL)
elif line.upper().startswith(actions):

# Start of a filter group
if line.upper().startswith("LABEL") and section == "filter":
members = []
keep_going = True
while keep_going is True:
next_line = spec[0]
if next_line.upper().strip().startswith("LABEL"):
keep_going = False
elif next_line.upper().strip().startswith("%"):
keep_going = False
else:
new_member = spec.pop(0)
members.append(new_member)
if len(spec) == 0:
keep_going = False
members = parse_filter_group(spec)

# Add the filter label to the config
config = parse_label(
Expand Down Expand Up @@ -244,6 +234,48 @@ def find_deid(path=None):
return path


def parse_format(line):
"""given a line that starts with FORMAT, parse the format of the
file and check that it is supported. If not, exit on error. If yes,
return the format.
Parameters
==========
line: the line that starts with format.
"""
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
if fmt not in formats:
bot.exit("%s is not a valid format." % fmt)
bot.debug("FORMAT set to %s" % fmt)
return fmt


def parse_filter_group(spec):
"""given the specification (a list of lines) continue parsing lines
until the filter group ends, as indicated by the start of a new LABEL,
(case 1), the start of a new section (case 2) or the end of the spec
file (case 3). Returns a list of members (lines) that belong to the
filter group. The list (by way of using pop) is updated in the calling
function.
Parameters
==========
spec: unparsed lines of the deid recipe file
"""
members = []
keep_going = True
while keep_going and spec:
next_line = spec[0]
if next_line.upper().strip().startswith("LABEL"):
keep_going = False
elif next_line.upper().strip().startswith("%"):
keep_going = False
else:
new_member = spec.pop(0)
members.append(new_member)
return members


def parse_label(section, config, section_name, members, label=None):
"""parse label will add a (optionally named) label to the filter
section, including one or more criteria
Expand Down Expand Up @@ -295,7 +327,10 @@ def parse_label(section, config, section_name, members, label=None):


def parse_member(members, operator=None):

"""a parsing function for a filter member. Will return a single member
with fields, values, and an operator. In the case of multiple and/or
statements that are chained, will instead return a list.
"""
main_operator = operator

actions = []
Expand Down Expand Up @@ -388,7 +423,7 @@ def add_section(config, section, section_name=None):
if section is None:
bot.exit("You must define a section (e.g. %header) before any action.")

if section == "filter" and section_name is None:
if section in ["filter", "values", "fields"] and section_name is None:
bot.exit("You must provide a name for a filter section.")

if section not in sections:
Expand Down Expand Up @@ -421,6 +456,55 @@ def _remove_comments(parts):
return value.split("#")[0] # remove comments


def parse_group_action(section, line, config, section_name):
"""parse a group action, either FIELD or SPLIT, which must belong to
either a fields or values section.
Parameters
=========
section: a valid section name from the deid config file
line: the line content to parse for the section/action
config: the growing/current config dictionary
section_name: optionally, a section name
"""
if not line.upper().startswith(group_actions):
bot.exit("%s is not a valid group action." % line)

if not line.upper().startswith("FIELD") and section == "fields":
bot.exit("%fields only supports FIELD actions.")

# We may have to deal with cases of spaces
bot.debug("%s: adding %s" % (section, line))
parts = line.split(" ")
action = parts.pop(0).replace(" ", "")

# Both require some parts
if not parts:
bot.exit("%s action %s requires additional arguments" % (section, action))

# For both, the second is always a field or field expander
field = parts.pop(0)

# Fields supports one or more fields with expanders (no third arguments)
if section == "fields":
config[section][section_name].append({"action": action, "field": field})

# Values supports FIELD or SPLIT
elif section == "values":

# If we have a third set of arguments
if parts:
value = _remove_comments(parts)
print(value)
config[section][section_name].append(
{"action": action, "field": field, "value": value}
)
else:
config[section][section_name].append({"action": action, "field": field})

return config


def parse_config_action(section, line, config, section_name=None):
"""add action will take a line from a deid config file, a config (dictionary), and
an active section name (eg header) and add an entry to the config file to perform
Expand All @@ -434,7 +518,6 @@ def parse_config_action(section, line, config, section_name=None):
section_name: optionally, a section name
"""

if not line.upper().startswith(actions):
bot.exit("%s is not a valid action line." % line)

Expand Down
2 changes: 0 additions & 2 deletions deid/dicom/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,5 @@
)

from .utils import get_files

from .fields import extract_sequence

from .pixels import has_burned_pixels, DicomCleaner
30 changes: 26 additions & 4 deletions deid/dicom/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from deid.logger import bot
from deid.config.standards import actions as valid_actions

from .fields import expand_field_expression
from .fields import expand_field_expression, find_by_values

from deid.utils import get_timestamp, parse_value

Expand All @@ -51,7 +51,7 @@ def perform_action(dicom, action, item=None, fields=None, return_seen=False):
"action" (eg, REPLACE) what to do with the field
"value": if needed, the field from the response to replace with
"""
field = action.get("field") # e.g: PatientID, endswith:ID
field = action.get("field") # e.g: PatientID, endswith:ID, values:name, fields:name
value = action.get("value") # "suid" or "var:field"
action = action.get("action") # "REPLACE"

Expand All @@ -60,8 +60,30 @@ def perform_action(dicom, action, item=None, fields=None, return_seen=False):
bot.warning("%s in not a valid choice. Defaulting to blanked." % action)
action = "BLANK"

# If there is an expander applied to field, we iterate over
fields = expand_field_expression(field=field, dicom=dicom, contenders=fields)
# If values or fields is provided, ids is required
if re.search("^(values|fields)", field):
if not item:
bot.exit(
"An item lookup must be provided to reference a list of values or fields."
)

# A values list returns fields with the value
if re.search("^values", field):
values = item.get(re.sub("^values:", "", field), [])
fields = find_by_values(values=values, dicom=dicom)

# A fields list is used vertabim
elif re.search("^fields", field):
listing = []
for contender in item.get(re.sub("^fields:", "", field), []):
listing += expand_field_expression(
field=contender, dicom=dicom, contenders=fields
)
fields = listing

else:
# If there is an expander applied to field, we iterate over
fields = expand_field_expression(field=field, dicom=dicom, contenders=fields)

# Keep track of fields we have seen
seen = []
Expand Down
19 changes: 17 additions & 2 deletions deid/dicom/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ def extract_sequence(sequence, prefix=None):
return items


def find_by_values(values, dicom):
"""Given a list of values, find fields in the dicom that contain any
of those values, as determined by a regular expression search.
"""
fields = []
contenders = get_fields(dicom)

# Create single regular expression to search by
regexp = "(%s)" % "|".join(values)
for field, value in contenders.items():
if re.search(regexp, value):
fields.append(field)

return fields


def expand_field_expression(field, dicom, contenders=None):
"""Get a list of fields based on an expression. If
no expression found, return single field. Options for fields include:
Expand All @@ -95,8 +111,7 @@ def expand_field_expression(field, dicom, contenders=None):
startswith: filter to fields that start with the expression
contains: filter to fields that contain the expression
allfields: include all fields
exceptfields: filter to all fields except those listed ( | separated)
exceptfields: filter to all fields except those listed ( | separated)
"""
# Expanders that don't have a : must be checked for
expanders = ["all"]
Expand Down
Loading

0 comments on commit 6a487ca

Please sign in to comment.