diff --git a/pypesto/result/__init__.py b/pypesto/result/__init__.py index 9be25cd78..a6814bc2e 100644 --- a/pypesto/result/__init__.py +++ b/pypesto/result/__init__.py @@ -7,7 +7,7 @@ optimization, profiling, sampling. """ -from .optimize import OptimizeResult, OptimizerResult +from .optimize import LazyOptimizerResult, OptimizeResult, OptimizerResult from .predict import PredictionConditionResult, PredictionResult from .profile import ProfileResult, ProfilerResult from .result import Result diff --git a/pypesto/result/optimize.py b/pypesto/result/optimize.py index 6f1d69964..fdc6ed47c 100644 --- a/pypesto/result/optimize.py +++ b/pypesto/result/optimize.py @@ -7,6 +7,7 @@ from copy import deepcopy from typing import Union +import h5py import numpy as np import pandas as pd @@ -425,3 +426,279 @@ def get_by_id(self, ores_id: str): return res else: raise ValueError(f"no optimization result with id={ores_id}") + + +class LazyOptimizerResult(OptimizerResult): + """ + A class to handle lazy loading of optimizer results from an HDF5 file. + + This class extends the OptimizerResult class and overrides methods to + load data only when it is accessed, improving memory usage and performance + for large datasets. + + Attributes + ---------- + filename : str + The path to the HDF5 file containing the optimizer results. + group_name : str + The name of the group in the HDF5 file where the results are stored. + _data : dict + A dictionary to store loaded data. + _metadata_loaded : bool + A flag indicating whether metadata has been loaded. + """ + + def __init__(self, filename, group_name): + """ + Initialize a LazyOptimizerResult instance. + + Parameters + ---------- + filename : str + The path to the HDF5 file containing the optimizer results. + group_name : str + The name of the group in the HDF5 file where the results are stored. + """ + super().__init__() + self.filename = filename + self.group_name = group_name + self._data = {} + + def _get_value(self, key): + """ + Get the value of a key. + + Parameters + ---------- + key : str + The key to get the value of. + + Returns + ------- + value + The value of the key. + """ + if key not in self._data: + with h5py.File(self.filename, "r") as f: + if key in f[self.group_name]: + self._data[key] = f[f"{self.group_name}/{key}"][()] + elif key in f[self.group_name].attrs: + self._data[key] = f[self.group_name].attrs[key] + else: + raise AttributeError(f"{key} not found in the HDF5 file.") + return self._data[key] + + @property + def id(self): + """See :class:`OptimizerResult`.""" + return self._get_value("id") + + @property + def x(self): + """See :class:`OptimizerResult`.""" + return self._get_value("x") + + @x.setter + def x(self, value): + """Setter for the x property.""" + self._data["x"] = value + + @property + def fval(self): + """See :class:`OptimizerResult`.""" + return self._get_value("fval") + + @fval.setter + def fval(self, value): + """Setter for the fval property.""" + self._data["fval"] = value + + @property + def grad(self): + """See :class:`OptimizerResult`.""" + return self._get_value("grad") + + @grad.setter + def grad(self, value): + """Setter for the grad property.""" + self._data["grad"] = value + + @property + def hess(self): + """See :class:`OptimizerResult`.""" + return self._get_value("hess") + + @hess.setter + def hess(self, value): + """Setter for the hess property.""" + self._data["hess"] = value + + @property + def res(self): + """See :class:`OptimizerResult`.""" + return self._get_value("res") + + @res.setter + def res(self, value): + """Setter for the res property.""" + self._data["res"] = value + + @property + def sres(self): + """See :class:`OptimizerResult`.""" + return self._get_value("sres") + + @sres.setter + def sres(self, value): + """Setter for the sres property.""" + self._data["sres"] = value + + @property + def n_fval(self): + """See :class:`OptimizerResult`.""" + return self._get_value("n_fval") + + @n_fval.setter + def n_fval(self, value): + """Setter for the n_fval property.""" + self._data["n_fval"] = value + + @property + def n_grad(self): + """See :class:`OptimizerResult`.""" + return self._get_value("n_grad") + + @n_grad.setter + def n_grad(self, value): + """Setter for the n_grad property.""" + self._data["n_grad"] = value + + @property + def n_hess(self): + """See :class:`OptimizerResult`.""" + return self._get_value("n_hess") + + @n_hess.setter + def n_hess(self, value): + """Setter for the n_hess property.""" + self._data["n_hess"] = value + + @property + def n_res(self): + """See :class:`OptimizerResult`.""" + return self._get_value("n_res") + + @n_res.setter + def n_res(self, value): + """Setter for the n_res property.""" + self._data["n_res"] = value + + @property + def n_sres(self): + """See :class:`OptimizerResult`.""" + return self._get_value("n_sres") + + @n_sres.setter + def n_sres(self, value): + """Setter for the n_sres property.""" + self._data["n_sres"] = value + + @property + def x0(self): + """See :class:`OptimizerResult`.""" + return self._get_value("x0") + + @x0.setter + def x0(self, value): + """Setter for the x0 property.""" + self._data["x0"] = value + + @property + def fval0(self): + """See :class:`OptimizerResult`.""" + return self._get_value("fval0") + + @fval0.setter + def fval0(self, value): + """Setter for the fval0 property.""" + self._data["fval0"] = value + + @property + def history(self): + """See :class:`OptimizerResult`.""" + return self._get_value("history") + + @history.setter + def history(self, value): + """Setter for the history property.""" + self._data["history"] = value + + @property + def exitflag(self): + """See :class:`OptimizerResult`.""" + return self._get_value("exitflag") + + @exitflag.setter + def exitflag(self, value): + """Setter for the exitflag property.""" + self._data["exitflag"] = value + + @property + def time(self): + """See :class:`OptimizerResult`.""" + return self._get_value("time") + + @time.setter + def time(self, value): + """Setter for the time property.""" + self._data["time"] = value + + @property + def message(self): + """See :class:`OptimizerResult`.""" + return self._get_value("message") + + @message.setter + def message(self, value): + """Setter for the message property.""" + self._data["message"] = value + + @property + def optimizer(self): + """See :class:`OptimizerResult`.""" + return self._get_value("optimizer") + + @optimizer.setter + def optimizer(self, value): + """Setter for the optimizer property.""" + self._data["optimizer"] = value + + @property + def free_indices(self): + """See :class:`OptimizerResult`.""" + return self._get_value("free_indices") + + @free_indices.setter + def free_indices(self, value): + """Setter for the free_indices property.""" + self._data["free_indices"] = value + + @property + def inner_parameters(self): + """See :class:`OptimizerResult`.""" + return self._get_value("inner_parameters") + + @inner_parameters.setter + def inner_parameters(self, value): + """Setter for the inner_parameters property.""" + self._data["inner_parameters"] = value + + @property + def spline_knots(self): + """See :class:`OptimizerResult`.""" + return self._get_value("spline_knots") + + @spline_knots.setter + def spline_knots(self, value): + """Setter for the spline_knots property.""" + self._data["spline_knots"] = value diff --git a/pypesto/store/read_from_hdf5.py b/pypesto/store/read_from_hdf5.py index 2d4e2d26b..8f011d114 100644 --- a/pypesto/store/read_from_hdf5.py +++ b/pypesto/store/read_from_hdf5.py @@ -49,9 +49,7 @@ def read_hdf5_profile( def read_hdf5_optimization( - f: h5py.File, - file_name: Union[Path, str], - opt_id: str, + f: h5py.File, file_name: Union[Path, str], opt_id: str, lazy: bool = False ) -> "OptimizerResult": """Read HDF5 results per start. @@ -63,9 +61,15 @@ def read_hdf5_optimization( The name of the HDF5 file, needed to create HDF5History opt_id: Specifies the start that is read from the HDF5 file + lazy: + Whether to use lazy loading for optimizer results """ - result = OptimizerResult() + if lazy: + from ..result import LazyOptimizerResult + + return LazyOptimizerResult(file_name, f"optimization/results/{opt_id}") + result = OptimizerResult() for optimization_key in result.keys(): if optimization_key == "history": if optimization_key in f: @@ -152,9 +156,11 @@ class OptimizationResultHDF5Reader: ---------- storage_filename: HDF5 result file name + lazy: + Whether to use lazy loading for optimizer results """ - def __init__(self, storage_filename: Union[str, Path]): + def __init__(self, storage_filename: Union[str, Path], lazy: bool = False): """ Initialize reader. @@ -162,16 +168,19 @@ def __init__(self, storage_filename: Union[str, Path]): ---------- storage_filename: HDF5 result file name + lazy: + Whether to use lazy loading for optimizer results """ self.storage_filename = storage_filename self.results = Result() + self.lazy = lazy def read(self) -> Result: """Read HDF5 result file and return pyPESTO result object.""" with h5py.File(self.storage_filename, "r") as f: for opt_id in f["/optimization/results"]: result = read_hdf5_optimization( - f, self.storage_filename, opt_id + f, self.storage_filename, opt_id, lazy=self.lazy ) self.results.optimize_result.append(result) self.results.optimize_result.sort() @@ -267,6 +276,7 @@ def read_result( optimize: bool = False, profile: bool = False, sample: bool = False, + lazy: bool = False, ) -> Result: """Save the whole pypesto.Result object in an HDF5 file. @@ -285,6 +295,8 @@ def read_result( Read the profile result. sample: Read the sample result. + lazy: + Whether to use lazy loading for optimizer results Returns ------- @@ -302,7 +314,7 @@ def read_result( result.problem = pypesto_problem_reader.read() if optimize: - pypesto_opt_reader = OptimizationResultHDF5Reader(filename) + pypesto_opt_reader = OptimizationResultHDF5Reader(filename, lazy=lazy) try: temp_result = pypesto_opt_reader.read() result.optimize_result = temp_result.optimize_result diff --git a/test/base/test_lazy_result.py b/test/base/test_lazy_result.py new file mode 100644 index 000000000..0e6be8964 --- /dev/null +++ b/test/base/test_lazy_result.py @@ -0,0 +1,83 @@ +import os +import unittest + +import h5py +import numpy as np + +from pypesto.result import LazyOptimizerResult, OptimizerResult, Result +from pypesto.store import read_result, write_result + +from ..visualize import create_optimization_result + + +class TestLazyOptimizerResult(unittest.TestCase): + @classmethod + def setUpClass(cls): + # Create a temporary HDF5 file with sample data + cls.filename = "test_optimization_results.h5" + cls.result = create_optimization_result() + + write_result(cls.result, cls.filename, overwrite=True) + + @classmethod + def tearDownClass(cls): + # Remove the temporary file after tests + if os.path.exists(cls.filename): + os.remove(cls.filename) + + def setUp(self): + # Load the results lazily for each test + self.lazy_results = [] + with h5py.File(self.filename, "r") as f: + for group_name in f["optimization/results"].keys(): + self.lazy_results.append( + LazyOptimizerResult( + self.filename, f"optimization/results/{group_name}" + ) + ) + + def test_initialization(self): + # Test that the lazy results are initialized correctly + for lazy_result in self.lazy_results: + self.assertEqual(lazy_result.filename, self.filename) + self.assertEqual(lazy_result._data, {}) + self.assertEqual( + lazy_result.group_name, + f"optimization/results/{lazy_result.id}", + ) + + def test_lazy_loading(self): + # Test that data is loaded lazily + for lazy_result in self.lazy_results: + self.assertIsNone(lazy_result._data.get("x")) + x = lazy_result.x + self.assertIsNotNone(lazy_result._data.get("x")) + np.testing.assert_array_equal(x, lazy_result._data["x"]) + + def test_attribute_access(self): + # Test accessing attributes + for lazy_result in self.lazy_results: + np.testing.assert_array_equal( + lazy_result.x, lazy_result._data["x"] + ) + + def test_read_result_lazy(self): + # Test reading results using the lazy loading option + result = read_result(self.filename, optimize=True, lazy=True) + self.assertIsInstance(result, Result) + self.assertTrue(result.optimize_result) + + # Check if the optimize results are instances of LazyOptimizerResult + for opt_result in result.optimize_result: + self.assertIsInstance(opt_result, LazyOptimizerResult) + + def test_read_result_non_lazy(self): + # Test reading results without using the lazy loading option + result = read_result(self.filename, optimize=True, lazy=False) + self.assertIsInstance(result, Result) + self.assertTrue(result.optimize_result) + + # Check if the optimize results are instances of OptimizerResult + for opt_result in result.optimize_result: + self.assertIsInstance(opt_result, OptimizerResult) + self.assertNotIsInstance(opt_result, LazyOptimizerResult)