Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add historic curves to the History Matching results metadata #394

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ CHANGELOG
2024.2 (unreleased)
===================

*
* Added support to read historic data curves directly from the results of History Matching analyses.


2024.1 (2024-05-27)
Expand Down
124 changes: 91 additions & 33 deletions src/alfasim_sdk/result_reader/aggregator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import functools
import json
import os
Expand Down Expand Up @@ -28,6 +30,9 @@
HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
)
from alfasim_sdk.result_reader.aggregator_constants import HISTORY_MATCHING_GROUP_NAME
from alfasim_sdk.result_reader.aggregator_constants import (
HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME,
)
from alfasim_sdk.result_reader.aggregator_constants import (
HISTORY_MATCHING_PROBABILISTIC_DSET_NAME,
)
Expand Down Expand Up @@ -219,44 +224,71 @@ def map_data(
)


@attr.define(slots=True, hash=True)
class HistoricDataCurveMetadata:
"""
Metadata of the historic data curves used in the History Matching analysis.
"""

curve_id: str
curve_name: str
domain_unit: str
image_unit: str
image_category: str

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> Self:
return cls(
curve_id=data["curve_id"],
curve_name=data["curve_name"],
domain_unit=data["domain_unit"],
image_unit=data["image_unit"],
image_category=data["image_category"],
)


@attr.s(slots=True, hash=False)
class HistoryMatchingMetadata:
"""
Holder for the History Matching results metadata.
:ivar hm_items:
Map of the data id and its associated metadata.
:ivar objective_functions:
Map of observed curve id to a dict of Quantity of Interest data, populated with keys
'trend_id' and 'property_id'. This represents the setup for this HM analysis.
:ivar parametric_vars:
Map of parametric vars to the values that represents the analysis, with all existent vars.
Values are either the optimal values (deterministic) or the base values (probabilistic).
:ivar result_directory:
The directory in which the result is saved.
"""

#: Map of the data id and its associated metadata.
hm_items: Dict[str, HMItem] = attr.ib(validator=attr.validators.instance_of(Dict))
#: Map of observed curve id to a dict of Quantity of Interest data, populated with keys
#: 'trend_id' and 'property_id'. This represents the setup for this HM analysis.
objective_functions: Dict[str, Dict[str, str]] = attr.ib(
validator=attr.validators.instance_of(Dict)
)
#: Map of parametric vars to the values that represents the analysis, with all existent vars.
#: Values are either the optimal values (deterministic) or the base values (probabilistic).
parametric_vars: Dict[str, float] = attr.ib(
validator=attr.validators.instance_of(Dict)
)
#: The directory in which the result is saved.
result_directory: Path = attr.ib(validator=attr.validators.instance_of(Path))
#: Metadata of the historic curves present in the results. Optional as this was introduced
#: later (ASIM-5713).
historic_data_curve_infos: Optional[List[HistoricDataCurveMetadata]] = attr.ib(
validator=attr.validators.optional(attr.validators.instance_of(list)),
default=None,
)

@attr.s(slots=True, hash=False)
class HMItem:
"""
Metadata associated with each item of the HM results.
:ivar parametric_var_id:
The id of the associated parametric var.
:ivar parametric_var_name:
The name of the associated parametric var.
:ivar min_value:
Lower limit of the specified range for the parametric var.
:ivar max_value:
Upper limit of the specified range for the parametric var.
:ivar data_index:
The index of the data in the result datasets.
"""

#: The id of the associated parametric var.
parametric_var_id: str = attr.ib(validator=attr.validators.instance_of(str))
#: The name of the associated parametric var.
parametric_var_name: str = attr.ib(validator=attr.validators.instance_of(str))
#: Lower limit of the specified range for the parametric var.
min_value: float = attr.ib(validator=attr.validators.instance_of(float))
#: Upper limit of the specified range for the parametric var.
max_value: float = attr.ib(validator=attr.validators.instance_of(float))
#: The index of the data in the result datasets.
data_index: int = attr.ib(validator=attr.validators.instance_of(int))

@classmethod
Expand All @@ -274,15 +306,6 @@ def from_dict(cls, data: Dict[str, Any]) -> Self:
data_index=data["data_index"],
)

hm_items: Dict[str, HMItem] = attr.ib(validator=attr.validators.instance_of(Dict))
objective_functions: Dict[str, Dict[str, str]] = attr.ib(
validator=attr.validators.instance_of(Dict)
)
parametric_vars: Dict[str, float] = attr.ib(
validator=attr.validators.instance_of(Dict)
)
result_directory: Path = attr.ib(validator=attr.validators.instance_of(Path))

@classmethod
def empty(cls, result_directory: Path) -> Self:
return cls(
Expand All @@ -300,12 +323,19 @@ def from_result_directory(cls, result_directory: Path) -> Self:
If result file is not ready or doesn't exist, return an empty metadata.
"""

def map_data(hm_metadata: Dict) -> Dict[str, HistoryMatchingMetadata.HMItem]:
def map_meta_items(
hm_metadata: Dict,
) -> Dict[str, HistoryMatchingMetadata.HMItem]:
return {
key: HistoryMatchingMetadata.HMItem.from_dict(data)
for key, data in hm_metadata.items()
}

def map_historic_data_infos(
infos: List[Dict[str, Any]]
) -> List[HistoricDataCurveMetadata]:
return [HistoricDataCurveMetadata.from_dict(info) for info in infos]

with open_result_file(result_directory) as result_file:
if not result_file:
return cls.empty(result_directory=result_directory)
Expand All @@ -321,10 +351,14 @@ def map_data(hm_metadata: Dict) -> Dict[str, HistoryMatchingMetadata.HMItem]:

objective_functions = some_item_metadata["objective_functions"]
parametric_vars = some_item_metadata["parametric_vars"]
historic_curve_infos = some_item_metadata.get("historic_data_curves_info")
if historic_curve_infos is not None:
historic_curve_infos = map_historic_data_infos(historic_curve_infos)

return cls(
hm_items=map_data(loaded_metadata),
hm_items=map_meta_items(loaded_metadata),
objective_functions=objective_functions,
historic_data_curve_infos=historic_curve_infos,
parametric_vars=parametric_vars,
result_directory=result_directory,
)
Expand Down Expand Up @@ -1776,6 +1810,30 @@ def read_history_matching_result(
return result_map


def read_history_matching_historic_data_curves(
metadata: HistoryMatchingMetadata,
) -> Dict[str, np.ndarray]:
"""
:return:
Map of historic data curve id to the actual curve, represented as an array of points in the
form [[y1, y2, ..., yn], [x1, x1, ..., xn]].
"""
with open_result_file(metadata.result_directory) as result_file:
if not result_file:
return {}

result = result_file.get(HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME)

if result is None:
# Old result files may not have this data group.
return {}

return {
info.curve_id: result[info.curve_id][:]
for info in metadata.historic_data_curve_infos
}


@contextmanager
def open_result_file(
result_directory: Path, result_filename: str = "result"
Expand Down
1 change: 1 addition & 0 deletions src/alfasim_sdk/result_reader/aggregator_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
GLOBAL_SENSITIVITY_ANALYSIS_GROUP_NAME = "global_sensitivity_analysis"

HISTORY_MATCHING_GROUP_NAME = "history_matching"
HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME = "history_matching_historic_data"
HISTORY_MATCHING_DETERMINISTIC_DSET_NAME = "history_matching_deterministic"
HISTORY_MATCHING_PROBABILISTIC_DSET_NAME = "history_matching_probabilistic"

Expand Down
78 changes: 67 additions & 11 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import shutil
import textwrap
from pathlib import Path
from typing import Dict
from typing import List

import h5py
Expand All @@ -11,6 +12,9 @@
from _pytest.fixtures import FixtureRequest
from _pytest.monkeypatch import MonkeyPatch

from alfasim_sdk.result_reader.aggregator import (
HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME,
)
from alfasim_sdk.result_reader.aggregator_constants import (
GLOBAL_SENSITIVITY_ANALYSIS_GROUP_NAME,
)
Expand Down Expand Up @@ -272,21 +276,17 @@ def global_sa_results_dir(datadir: Path) -> Path:
def _create_and_populate_hm_result_file(
result_dir: Path,
result: np.ndarray,
dataset_key: str,
result_dataset_key: str,
historic_data_curves: Dict[str, np.ndarray],
) -> None:
result_dir.mkdir(parents=True, exist_ok=True)
result_filepath = result_dir / "result"

with h5py.File(result_filepath, "x", libver="latest", locking=False) as file:
meta_group = file.create_group(META_GROUP_NAME, track_order=True)
data_group = file.create_group(HISTORY_MATCHING_GROUP_NAME, track_order=True)
result_group = file.create_group(HISTORY_MATCHING_GROUP_NAME, track_order=True)

dataset = data_group.create_dataset(
dataset_key,
shape=result.shape,
dtype=np.float64,
maxshape=tuple(None for _ in result.shape),
)
result_group.create_dataset(result_dataset_key, data=result)

objective_functions = {
"observed_curve_1": {"trend_id": "trend_1", "property_id": "holdup"},
Expand Down Expand Up @@ -314,9 +314,34 @@ def _create_and_populate_hm_result_file(
"data_index": 1,
},
}
if historic_data_curves:
historic_curves_group = file.create_group(
HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME
)
for curve_id, curve in historic_data_curves.items():
historic_curves_group.create_dataset(curve_id, data=curve)

historic_curves_meta = [
{
"curve_id": "observed_curve_1",
"curve_name": "curve 1",
"domain_unit": "s",
"image_unit": "m3/m3",
"image_category": "volume fraction",
},
{
"curve_id": "observed_curve_2",
"curve_name": "curve 2",
"domain_unit": "s",
"image_unit": "Pa",
"image_category": "pressure",
},
]
meta_entries = list(fake_meta.values())
for entry in meta_entries:
entry["historic_data_curves_info"] = historic_curves_meta

meta_group.attrs[HISTORY_MATCHING_GROUP_NAME] = json.dumps(fake_meta)
dataset[:] = result

file.swmr_mode = True

Expand All @@ -332,11 +357,16 @@ def hm_probabilistic_results_dir(datadir: Path) -> Path:
probabilistic_result = np.array(
[[0.1, 0.22, 1.0, 0.8, 0.55], [3.0, 6.0, 5.1, 4.7, 6.3]]
)
historic_data_curves = {
"observed_curve_1": np.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]]),
"observed_curve_2": np.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]]),
}

_create_and_populate_hm_result_file(
result_dir=result_dir,
result=probabilistic_result,
dataset_key=HISTORY_MATCHING_PROBABILISTIC_DSET_NAME,
result_dataset_key=HISTORY_MATCHING_PROBABILISTIC_DSET_NAME,
historic_data_curves=historic_data_curves,
)

return result_dir
Expand All @@ -349,13 +379,39 @@ def hm_deterministic_results_dir(datadir: Path) -> Path:
"""
import numpy as np

result_dir = datadir / "main-HM-deterministic"
deterministic_result = np.array([0.1, 3.2])
historic_data_curves = {
"observed_curve_1": np.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]]),
"observed_curve_2": np.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]]),
}

_create_and_populate_hm_result_file(
result_dir=result_dir,
result=deterministic_result,
result_dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
historic_data_curves=historic_data_curves,
)

return result_dir


@pytest.fixture()
def hm_results_dir_without_historic_data(datadir: Path) -> Path:
"""
Create a History Matching Deterministic result folder with a populated HDF5 file in the old
format, i.e. without historic data curves.
"""
import numpy as np

result_dir = datadir / "main-HM-deterministic"
deterministic_result = np.array([0.1, 3.2])

_create_and_populate_hm_result_file(
result_dir=result_dir,
result=deterministic_result,
dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
result_dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
historic_data_curves={},
)

return result_dir
Loading
Loading