Skip to content

Commit

Permalink
Misc fixes and warnings from data ingestion testing
Browse files Browse the repository at this point in the history
  • Loading branch information
pipliggins committed Aug 29, 2024
1 parent cd1257d commit 9300fc2
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 17 deletions.
3 changes: 3 additions & 0 deletions fhirflat/flat2fhir.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,9 @@ def create_single_extension(k: str, v: dict | str | float | bool) -> dict:
else:
raise e # pragma: no cover

except KeyError:
continue

raise RuntimeError(f"extension not created from {k, v}") # pragma: no cover


Expand Down
42 changes: 31 additions & 11 deletions fhirflat/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import argparse
import hashlib
import logging
import os
import shutil
import timeit
Expand All @@ -25,6 +26,8 @@
import fhirflat
from fhirflat.util import get_local_resource, group_keys

logger = logging.getLogger(__name__)

# 1:1 (single row, single resource) mapping: Patient, Encounter
# 1:M (single row, multiple resources) mapping: Observation, Condition, Procedure, ...

Expand Down Expand Up @@ -95,12 +98,12 @@ def find_field_value(
return return_val


def format_dates(date_str: str, date_format: str, timezone: str) -> str:
def format_dates(date_str: str | float, date_format: str, timezone: str) -> str:
"""
Converts dates into ISO8601 format with timezone information.
"""

if date_str is None:
if date_str is None or date_str is np.nan:
return date_str

new_tz = ZoneInfo(timezone)
Expand All @@ -124,7 +127,7 @@ def format_dates(date_str: str, date_format: str, timezone: str) -> str:
f"Date {date_str} could not be converted using date format"
f" {date_format}",
UserWarning,
stacklevel=2,
stacklevel=1,
)
return date_str

Expand Down Expand Up @@ -167,7 +170,7 @@ def create_dict_wide(
warnings.warn(
f"No mapping for column {column} response {response}",
UserWarning,
stacklevel=2,
stacklevel=1,
)
continue
else:
Expand Down Expand Up @@ -261,11 +264,15 @@ def create_dict_long(
except KeyError:
# No mapping found for this column and response despite presence
# in mapping file
warnings.warn(
f"No mapping for column {column} response {response}",
UserWarning,
stacklevel=2,
)
if response == 0.0:
# mostly this is ignoring unfilled responses
logger.info(f"No mapping for column {column} response {response}")
else:
warnings.warn(
f"No mapping for column {column} response {response}",
UserWarning,
stacklevel=1,
)
return None
return None

Expand Down Expand Up @@ -329,14 +336,20 @@ def condense(x):
# If the column contains a single non-nan value, return it
non_nan_values = x.dropna()
if non_nan_values.nunique() == 1:
return non_nan_values
return (
non_nan_values
if len(non_nan_values) == 1
else non_nan_values.unique()[0]
)
elif non_nan_values.empty:
return np.nan
else:
raise ValueError("Multiple values found in one-to-one mapping")
else:
if len(x) == 1:
return x
elif x.nunique() == 1:
return x.unique()[0]
else:
raise ValueError("Multiple values found in one-to-one mapping")

Expand Down Expand Up @@ -364,6 +377,13 @@ def condense(x):

# Set multi-index for easier access
map_df.set_index(["raw_variable", "raw_response"], inplace=True)
map_df.sort_index(inplace=True) # for performance improvements

if not map_df.index.is_unique:
raise ValueError(
f"Mapping file for the {resource} resource has duplicate entries "
f"{map_df.index[map_df.index.duplicated()]}"
)

# Generate the flat_like dictionary
if one_to_one:
Expand Down Expand Up @@ -522,7 +542,7 @@ def convert_resource(
date_format=date_format,
timezone=timezone,
)
if df is None:
if df is None or df.empty:
return None
else:
raise ValueError(f"Unknown mapping type {t}")
Expand Down
1 change: 1 addition & 0 deletions fhirflat/resources/diagnosticreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def cleanup(cls, data: dict) -> dict:
{
"basedOn",
"subject",
"encounter",
"performer",
"resultsInterpreter",
"specimen",
Expand Down
17 changes: 11 additions & 6 deletions fhirflat/resources/encounter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@
from pydantic.v1 import Field, validator

from .base import FHIRFlatBase
from .extension_types import relativePeriodType, timingPhaseDetailType, timingPhaseType
from .extensions import relativePeriod, timingPhase, timingPhaseDetail
from .extension_types import (
durationType,
relativePeriodType,
timingPhaseDetailType,
timingPhaseType,
)
from .extensions import Duration, relativePeriod, timingPhase, timingPhaseDetail

JsonString: TypeAlias = str

Expand All @@ -26,6 +31,7 @@ class Encounter(_Encounter, FHIRFlatBase):
relativePeriodType,
timingPhaseType,
timingPhaseDetailType,
durationType,
fhirtypes.ExtensionType,
]
] = Field(
Expand Down Expand Up @@ -71,11 +77,10 @@ def validate_extension_contents(cls, extensions):
rel_phase_count = sum(isinstance(item, relativePeriod) for item in extensions)
timing_count = sum(isinstance(item, timingPhase) for item in extensions)
detail_count = sum(isinstance(item, timingPhaseDetail) for item in extensions)
dur_count = sum(isinstance(item, Duration) for item in extensions)

if rel_phase_count > 1 or timing_count > 1 or detail_count > 1:
raise ValueError(
"relativePeriod, timingPhase and timingPhaseDetail can only appear once." # noqa E501
)
if rel_phase_count > 1 or timing_count > 1 or detail_count > 1 or dur_count > 1:
raise ValueError("Each extension can only appear once.")

if timing_count > 0 and detail_count > 0:
raise ValueError(
Expand Down

0 comments on commit 9300fc2

Please sign in to comment.