Feature/auto compress pickle study (#107)

* move study save and load to io module * lzma compress study pickle --------- Signed-off-by: Grossberger Lukas (CR/AIR2.2) <Lukas.Grossberger@de.bosch.com>
boschresearch · Jun 5, 2023 · 5050e9f · 5050e9f
1 parent 96b2ac5
commit 5050e9f
Show file tree

Hide file tree

Showing 6 changed files with 196 additions and 178 deletions.
diff --git a/blackboxopt/__init__.py b/blackboxopt/__init__.py
@@ -2,6 +2,7 @@
 
 from parameterspace import ParameterSpace
 
+from . import io
 from .base import (
     ConstraintsError,
     ContextError,

diff --git a/blackboxopt/io.py b/blackboxopt/io.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2023 - for information on the respective copyright owner
+# see the NOTICE file and/or the repository https://github.com/boschresearch/blackboxopt
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import lzma
+import os
+import pickle
+from pathlib import Path
+from typing import List, Tuple
+
+import parameterspace as ps
+
+from blackboxopt.base import Objective
+from blackboxopt.evaluation import Evaluation
+
+
+def save_study_as_json(
+    search_space: ps.ParameterSpace,
+    objectives: List[Objective],
+    evaluations: List[Evaluation],
+    json_file_path: os.PathLike,
+    overwrite: bool = False,
+):
+    """Save space, objectives and evaluations as json at `json_file_path`."""
+    _file_path = Path(json_file_path)
+    if not _file_path.parent.exists():
+        raise IOError(
+            f"The parent directory for {_file_path} does not exist, please create it."
+        )
+    if _file_path.exists() and not overwrite:
+        raise IOError(f"{_file_path} exists and overwrite is False")
+
+    with open(_file_path, "w", encoding="UTF-8") as fh:
+        json.dump(
+            {
+                "search_space": search_space.to_dict(),
+                "objectives": [o.__dict__ for o in objectives],
+                "evaluations": [e.__dict__ for e in evaluations],
+            },
+            fh,
+        )
+
+
+def load_study_from_json(
+    json_file_path: os.PathLike,
+) -> Tuple[ps.ParameterSpace, List[Objective], List[Evaluation]]:
+    """Load space, objectives and evaluations from a given `json_file_path`."""
+    with open(json_file_path, "r", encoding="UTF-8") as fh:
+        study = json.load(fh)
+
+    search_space = ps.ParameterSpace.from_dict(study["search_space"])
+    objectives = [Objective(**o) for o in study["objectives"]]
+    evaluations = [Evaluation(**e) for e in study["evaluations"]]
+
+    return search_space, objectives, evaluations
+
+
+def save_study_as_pickle(
+    search_space: ps.ParameterSpace,
+    objectives: List[Objective],
+    evaluations: List[Evaluation],
+    pickle_file_path: os.PathLike,
+    overwrite: bool = False,
+):
+    """Save space, objectives and evaluations as an lzma compressed pickle."""
+    _file_path = Path(pickle_file_path)
+    if not _file_path.parent.exists():
+        raise IOError(
+            f"The parent directory for {_file_path} does not exist, please create it."
+        )
+    if _file_path.exists() and not overwrite:
+        raise IOError(f"{_file_path} exists and overwrite is False")
+
+    with lzma.open(_file_path, "wb") as fh:
+        pickle.dump(
+            {
+                "search_space": search_space,
+                "objectives": objectives,
+                "evaluations": evaluations,
+            },
+            fh,
+        )
+
+
+def load_study_from_pickle(
+    pickle_file_path: os.PathLike,
+) -> Tuple[ps.ParameterSpace, List[Objective], List[Evaluation]]:
+    """Load space, objectives and evaluations from a given lzma compressed pickle."""
+    with lzma.open(pickle_file_path, "rb") as fh:
+        study = pickle.load(fh)
+
+    return study["search_space"], study["objectives"], study["evaluations"]
diff --git a/blackboxopt/utils.py b/blackboxopt/utils.py
@@ -4,15 +4,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import hashlib
-import json
-import os
 import pickle
 from itertools import compress
-from pathlib import Path
-from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+from typing import Dict, Iterable, List, Optional, Sequence
 
 import numpy as np
-import parameterspace as ps
 
 from blackboxopt.base import Objective
 from blackboxopt.evaluation import Evaluation
@@ -103,81 +99,3 @@ def sort_evaluations(evaluations: Iterable[Evaluation]) -> Iterable[Evaluation]:
             )
         ).hexdigest(),
     )
-
-
-def save_study_as_json(
-    search_space: ps.ParameterSpace,
-    objectives: List[Objective],
-    evaluations: List[Evaluation],
-    json_file_path: os.PathLike,
-    overwrite: bool = False,
-):
-    """Save space, objectives and evaluations as json at `json_file_path`."""
-    _file_path = Path(json_file_path)
-    if not _file_path.parent.exists():
-        raise IOError(
-            f"The parent directory for {_file_path} does not exist, please create it."
-        )
-    if _file_path.exists() and not overwrite:
-        raise IOError(f"{_file_path} exists and overwrite is False")
-
-    with open(_file_path, "w", encoding="UTF-8") as fh:
-        json.dump(
-            {
-                "search_space": search_space.to_dict(),
-                "objectives": [o.__dict__ for o in objectives],
-                "evaluations": [e.__dict__ for e in evaluations],
-            },
-            fh,
-        )
-
-
-def load_study_from_json(
-    json_file_path: os.PathLike,
-) -> Tuple[ps.ParameterSpace, List[Objective], List[Evaluation]]:
-    """Load space, objectives and evaluations from a given `json_file_path`."""
-    with open(json_file_path, "r", encoding="UTF-8") as fh:
-        study = json.load(fh)
-
-    search_space = ps.ParameterSpace.from_dict(study["search_space"])
-    objectives = [Objective(**o) for o in study["objectives"]]
-    evaluations = [Evaluation(**e) for e in study["evaluations"]]
-
-    return search_space, objectives, evaluations
-
-
-def save_study_as_pickle(
-    search_space: ps.ParameterSpace,
-    objectives: List[Objective],
-    evaluations: List[Evaluation],
-    pickle_file_path: os.PathLike,
-    overwrite: bool = False,
-):
-    """Save space, objectives and evaluations as pickle at `pickle_file_path`."""
-    _file_path = Path(pickle_file_path)
-    if not _file_path.parent.exists():
-        raise IOError(
-            f"The parent directory for {_file_path} does not exist, please create it."
-        )
-    if _file_path.exists() and not overwrite:
-        raise IOError(f"{_file_path} exists and overwrite is False")
-
-    with open(_file_path, "wb") as fh:
-        pickle.dump(
-            {
-                "search_space": search_space,
-                "objectives": objectives,
-                "evaluations": evaluations,
-            },
-            fh,
-        )
-
-
-def load_study_from_pickle(
-    pickle_file_path: os.PathLike,
-) -> Tuple[ps.ParameterSpace, List[Objective], List[Evaluation]]:
-    """Load space, objectives and evaluations from a given `pickle_file_path`."""
-    with open(pickle_file_path, "rb") as fh:
-        study = pickle.load(fh)
-
-    return study["search_space"], study["objectives"], study["evaluations"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "blackboxopt"
-version = "4.15.1"
+version = "5.0.0"
 description = "A common interface for blackbox optimization algorithms along with useful helpers like parallel optimization loops, analysis and visualization scripts."
 readme = "README.md"
 repository = "https://github.com/boschresearch/blackboxopt"

diff --git a/tests/io_test.py b/tests/io_test.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2023 - for information on the respective copyright owner
+# see the NOTICE file and/or the repository https://github.com/boschresearch/blackboxopt
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+import parameterspace as ps
+import pytest
+
+import blackboxopt as bbo
+
+
+def test_save_and_load_study_pickle(tmp_path):
+    tmp_file = tmp_path / "out.json"
+
+    search_space = ps.ParameterSpace()
+    search_space.add(ps.IntegerParameter("p", (-10, 10)))
+    objectives = [bbo.Objective("loss", False), bbo.Objective("score", True)]
+    evaluations = [
+        bbo.Evaluation(configuration={"p": 1}, objectives={"loss": 1.0, "score": 3.0}),
+        bbo.Evaluation(configuration={"p": 2}, objectives={"loss": 0.1, "score": 2.0}),
+        bbo.Evaluation(configuration={"p": 3}, objectives={"loss": 0.0, "score": 1.0}),
+    ]
+    bbo.io.save_study_as_pickle(search_space, objectives, evaluations, tmp_file)
+
+    # Check that default overwrite=False causes IOError on existing file
+    with pytest.raises(IOError, match=str(tmp_file)):
+        bbo.io.save_study_as_pickle(search_space, objectives, evaluations, tmp_file)
+
+    loaded_study = bbo.io.load_study_from_pickle(tmp_file)
+
+    assert loaded_study[1] == objectives
+    assert loaded_study[2] == evaluations
+    for _ in range(128):
+        assert search_space.sample() == loaded_study[0].sample()
+
+
+def test_save_and_load_study_pickle_fails_on_missing_output_directory():
+    pickle_file_path = "/this/directory/does/not/exist/pickles/out.pkl"
+    with pytest.raises(IOError, match=pickle_file_path):
+        bbo.io.save_study_as_pickle(
+            search_space=ps.ParameterSpace(),
+            objectives=[],
+            evaluations=[],
+            pickle_file_path=pickle_file_path,
+        )
+
+
+def test_save_and_load_study_json(tmp_path):
+    tmp_file = tmp_path / "out.json"
+
+    search_space = ps.ParameterSpace()
+    search_space.add(ps.IntegerParameter("p", (-10.0, 10.0)))
+    objectives = [bbo.Objective("loss", False), bbo.Objective("score", True)]
+    evaluations = [
+        bbo.Evaluation(configuration={"p": 1}, objectives={"loss": 1.0, "score": 3.0}),
+        bbo.Evaluation(configuration={"p": 2}, objectives={"loss": 0.1, "score": 2.0}),
+        bbo.Evaluation(configuration={"p": 3}, objectives={"loss": 0.0, "score": 1.0}),
+    ]
+    bbo.io.save_study_as_json(search_space, objectives, evaluations, tmp_file)
+
+    # Check that default overwrite=False causes ValueError on existing file
+    with pytest.raises(IOError):
+        bbo.io.save_study_as_json(search_space, objectives, evaluations, tmp_file)
+
+    loaded_study = bbo.io.load_study_from_json(tmp_file)
+
+    assert loaded_study[1] == objectives
+    assert loaded_study[2] == evaluations
+    for _ in range(128):
+        assert search_space.sample() == loaded_study[0].sample()
+
+
+def test_save_and_load_study_json_fails_on_missing_output_directory():
+    json_file_path = "/this/directory/does/not/exist/jsons/out.json"
+    with pytest.raises(IOError, match=json_file_path):
+        bbo.io.save_study_as_json(
+            search_space=ps.ParameterSpace(),
+            objectives=[],
+            evaluations=[],
+            json_file_path=json_file_path,
+        )
+
+
+def test_save_and_load_study_json_fails_with_complex_type_in_evaluation(tmp_path):
+    tmp_file = tmp_path / "out.json"
+
+    search_space = ps.ParameterSpace()
+    objectives = []
+    evaluations = [
+        bbo.Evaluation(
+            configuration={},
+            objectives={},
+            user_info={"complex typed value": ps.ParameterSpace()},
+        ),
+    ]
+
+    with pytest.raises(TypeError):
+        bbo.io.save_study_as_json(search_space, objectives, evaluations, tmp_file)