diff --git a/fhirflat/flat2fhir.py b/fhirflat/flat2fhir.py index 977317e..a821199 100644 --- a/fhirflat/flat2fhir.py +++ b/fhirflat/flat2fhir.py @@ -251,6 +251,9 @@ def create_single_extension(k: str, v: dict | str | float | bool) -> dict: else: raise e # pragma: no cover + except KeyError: + continue + raise RuntimeError(f"extension not created from {k, v}") # pragma: no cover diff --git a/fhirflat/ingest.py b/fhirflat/ingest.py index 8969a67..3a911b3 100644 --- a/fhirflat/ingest.py +++ b/fhirflat/ingest.py @@ -5,6 +5,7 @@ import argparse import hashlib +import logging import os import shutil import timeit @@ -25,6 +26,8 @@ import fhirflat from fhirflat.util import get_local_resource, group_keys +logger = logging.getLogger(__name__) + # 1:1 (single row, single resource) mapping: Patient, Encounter # 1:M (single row, multiple resources) mapping: Observation, Condition, Procedure, ... @@ -95,12 +98,12 @@ def find_field_value( return return_val -def format_dates(date_str: str, date_format: str, timezone: str) -> str: +def format_dates(date_str: str | float, date_format: str, timezone: str) -> str: """ Converts dates into ISO8601 format with timezone information. """ - if date_str is None: + if date_str is None or date_str is np.nan: return date_str new_tz = ZoneInfo(timezone) @@ -124,7 +127,7 @@ def format_dates(date_str: str, date_format: str, timezone: str) -> str: f"Date {date_str} could not be converted using date format" f" {date_format}", UserWarning, - stacklevel=2, + stacklevel=1, ) return date_str @@ -167,7 +170,7 @@ def create_dict_wide( warnings.warn( f"No mapping for column {column} response {response}", UserWarning, - stacklevel=2, + stacklevel=1, ) continue else: @@ -261,11 +264,15 @@ def create_dict_long( except KeyError: # No mapping found for this column and response despite presence # in mapping file - warnings.warn( - f"No mapping for column {column} response {response}", - UserWarning, - stacklevel=2, - ) + if response == 0.0: + # mostly this is ignoring unfilled responses + logger.info(f"No mapping for column {column} response {response}") + else: + warnings.warn( + f"No mapping for column {column} response {response}", + UserWarning, + stacklevel=1, + ) return None return None @@ -329,7 +336,11 @@ def condense(x): # If the column contains a single non-nan value, return it non_nan_values = x.dropna() if non_nan_values.nunique() == 1: - return non_nan_values + return ( + non_nan_values + if len(non_nan_values) == 1 + else non_nan_values.unique()[0] + ) elif non_nan_values.empty: return np.nan else: @@ -337,6 +348,8 @@ def condense(x): else: if len(x) == 1: return x + elif x.nunique() == 1: + return x.unique()[0] else: raise ValueError("Multiple values found in one-to-one mapping") @@ -364,6 +377,13 @@ def condense(x): # Set multi-index for easier access map_df.set_index(["raw_variable", "raw_response"], inplace=True) + map_df.sort_index(inplace=True) # for performance improvements + + if not map_df.index.is_unique: + raise ValueError( + f"Mapping file for the {resource} resource has duplicate entries " + f"{map_df.index[map_df.index.duplicated()]}" + ) # Generate the flat_like dictionary if one_to_one: @@ -522,7 +542,7 @@ def convert_resource( date_format=date_format, timezone=timezone, ) - if df is None: + if df is None or df.empty: return None else: raise ValueError(f"Unknown mapping type {t}") diff --git a/fhirflat/resources/diagnosticreport.py b/fhirflat/resources/diagnosticreport.py index 8de5640..323326e 100644 --- a/fhirflat/resources/diagnosticreport.py +++ b/fhirflat/resources/diagnosticreport.py @@ -79,6 +79,7 @@ def cleanup(cls, data: dict) -> dict: { "basedOn", "subject", + "encounter", "performer", "resultsInterpreter", "specimen", diff --git a/fhirflat/resources/encounter.py b/fhirflat/resources/encounter.py index 6270d75..65d4e65 100644 --- a/fhirflat/resources/encounter.py +++ b/fhirflat/resources/encounter.py @@ -14,8 +14,13 @@ from pydantic.v1 import Field, validator from .base import FHIRFlatBase -from .extension_types import relativePeriodType, timingPhaseDetailType, timingPhaseType -from .extensions import relativePeriod, timingPhase, timingPhaseDetail +from .extension_types import ( + durationType, + relativePeriodType, + timingPhaseDetailType, + timingPhaseType, +) +from .extensions import Duration, relativePeriod, timingPhase, timingPhaseDetail JsonString: TypeAlias = str @@ -26,6 +31,7 @@ class Encounter(_Encounter, FHIRFlatBase): relativePeriodType, timingPhaseType, timingPhaseDetailType, + durationType, fhirtypes.ExtensionType, ] ] = Field( @@ -71,11 +77,10 @@ def validate_extension_contents(cls, extensions): rel_phase_count = sum(isinstance(item, relativePeriod) for item in extensions) timing_count = sum(isinstance(item, timingPhase) for item in extensions) detail_count = sum(isinstance(item, timingPhaseDetail) for item in extensions) + dur_count = sum(isinstance(item, Duration) for item in extensions) - if rel_phase_count > 1 or timing_count > 1 or detail_count > 1: - raise ValueError( - "relativePeriod, timingPhase and timingPhaseDetail can only appear once." # noqa E501 - ) + if rel_phase_count > 1 or timing_count > 1 or detail_count > 1 or dur_count > 1: + raise ValueError("Each extension can only appear once.") if timing_count > 0 and detail_count > 0: raise ValueError(