removed mpc formulation

FilippoAiraldi · Dec 8, 2023 · 1a49326 · 1a49326
1 parent e160207
commit 1a49326
Show file tree

Hide file tree

Showing 9 changed files with 19 additions and 90 deletions.
diff --git a/benchmarking/run.py b/benchmarking/run.py
@@ -67,7 +67,6 @@ def run_problem(
     """Solves the given problem with the given algorithm (based on the specified
     horizon), and saves as result the performance of the run in terms of best-so-far
     and total cost."""
-    rollout = True
     c1, c2, eps = 1.0, 0.5, 1.0
     bsf_callback = BestSoFarCallback()
     dp_callback = DpStageCostCallback()
@@ -85,7 +84,7 @@ def run_problem(
         "seed": seed,
         "callback": callbacks,
         "pso_kwargs": {
-            "swarmsize": 5 * problem.dim * (rollout or horizon),
+            "swarmsize": 5 * problem.dim,
             "xtol": 1e-9,
             "ftol": 1e-9,
             "maxiter": 300,
@@ -95,14 +94,7 @@ def run_problem(
     if horizon == 1:
         go(**kwargs)
     else:
-        nmgo(
-            horizon=horizon,
-            discount=1.0,
-            rollout=rollout,
-            mc_iters=0,
-            parallel=None,
-            **kwargs,
-        )
+        nmgo(horizon=horizon, discount=1.0, mc_iters=0, parallel=None, **kwargs)
     cost = sum(dp_callback)
     bests = ",".join(map(str, bsf_callback))
     with shm_lock(shm_name), open(output_csv, "a") as f:

diff --git a/examples/non_myopic_acquisition.py b/examples/non_myopic_acquisition.py
@@ -39,7 +39,6 @@
     "ub": ub,
     "c1": c1,
     "c2": c2,
-    "rollout": True,
     "seed": 0,
 }
 nonmyopic_deterministic_a = nonmyopic_acquisition(**kwargs, mc_iters=0)

diff --git a/examples/non_myopic_global_optimization_1.py b/examples/non_myopic_global_optimization_1.py
@@ -49,7 +49,7 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No
         y_hat = predict(mdl, x_)
         if algorithm == "go":
             acq = myopic_acquisition(x_, mdl, c1, c2, y_hat, None).squeeze()
-        elif locals["rollout"]:
+        else:
             acq = nonmyopic_acquisition(
                 x_.transpose(1, 0, 2),
                 mdl,
@@ -59,7 +59,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No
                 ub,
                 c1,
                 c2,
-                locals["rollout"],
                 locals["mc_iters"],
                 locals["quasi_mc"],
                 locals["common_random_numbers"],
@@ -69,8 +68,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No
                 0,
                 {"n_jobs": -1, "backend": "loky"},
             )
-        else:
-            acq = np.full(x.size, np.nan)
         history.append(
             (
                 locals["x_new"],
@@ -113,7 +110,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No
     init_points=x0,
     c1=c1,
     c2=c2,
-    rollout=True,
     mc_iters=0,
     maxiter=ITERS,
     seed=1909,

diff --git a/examples/non_myopic_global_optimization_2.py b/examples/non_myopic_global_optimization_2.py
@@ -43,7 +43,7 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No
         y_hat = predict(mdl, x_)
         if algorithm == "go":
             acq = myopic_acquisition(x_, mdl, c1, c2, y_hat, None).squeeze()
-        elif locals["rollout"]:
+        else:
             acq = nonmyopic_acquisition(
                 x_.transpose(1, 0, 2),
                 mdl,
@@ -53,7 +53,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No
                 ub,
                 c1,
                 c2,
-                locals["rollout"],
                 locals["mc_iters"],
                 locals["quasi_mc"],
                 locals["common_random_numbers"],
@@ -63,8 +62,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No
                 0,
                 {"n_jobs": -1, "backend": "loky"},
             )
-        else:
-            acq = np.full(x.size, np.nan)
         history.append(
             (
                 locals["x_new"],
@@ -115,7 +112,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No
     init_points=x0,
     c1=c1,
     c2=c2,
-    rollout=True,
     mc_iters=2**10,
     parallel={"n_jobs": -1, "backend": "loky", "verbose": 1},
     maxiter=ITERS,

diff --git a/examples/variance_reduction.py b/examples/variance_reduction.py
@@ -38,7 +38,6 @@
     "discount": discount,
     "c1": c1,
     "c2": c2,
-    "rollout": True,
     "lb": lb,
     "ub": ub,
 }

diff --git a/src/globopt/nonmyopic/acquisition.py b/src/globopt/nonmyopic/acquisition.py
@@ -125,25 +125,17 @@ def _advance(
 
 
 def _next_query_point(
-    x: Array3d,
     mdl: RegressorType,
-    current_step: int,  # > 0
     c1: float,
     c2: float,
     dym: Array3d,
     lb: Array2d,
     ub: Array2d,
-    rollout: bool,
     pso_kwargs: dict[str, Any],
     np_random: np.random.Generator,
 ) -> tuple[Array3d, Array1d]:
-    """Computes the next point to query and its associated cost. If the strategy is
-    `"mpc"`, then the next point is just the next point in the trajectory. If the
-    strategy is `"rollout"`, then the next point is the minimizer of the myopic
-    acquisition function, i.e., base policy."""
-    if not rollout:
-        x_next = x[:, current_step, np.newaxis, :]  # ∈ (n_samples, 1, dim)
-        return x_next, myopic_acquisition(x_next, mdl, c1, c2, None, dym)[:, 0, 0]
+    """Computes the next point to query and its associated cost, i.e., the next point is
+    the minimizer of the myopic acquisition function, a.k.a., the base policy."""
 
     def func(q: Array3d) -> Array2d:
         return myopic_acquisition(q, mdl, c1, c2, None, dym)[:, :, 0]
@@ -177,7 +169,6 @@ def _compute_nonmyopic_cost(
     c2: float,
     lb: Array2d,
     ub: Array2d,
-    rollout: bool,
     terminal_cost: bool,
     pso_kwargs: dict[str, Any],
     prediction_rng: Optional[Array2d],
@@ -197,7 +188,7 @@ def _compute_nonmyopic_cost(
         if h >= horizon:
             break
         x_next, current_cost = _next_query_point(
-            x_trajectory, mdl, h, c1, c2, dym, lb, ub, rollout, pso_kwargs, np_random
+            mdl, c1, c2, dym, lb, ub, pso_kwargs, np_random
         )
         cost += (discount**h) * current_cost  # accumulate in-place
     if terminal_cost:
@@ -246,7 +237,6 @@ def acquisition(
     ub: Array1d,
     c1: float = 1.5078,
     c2: float = 1.4246,
-    rollout: bool = True,
     #
     mc_iters: int = 1024,
     quasi_mc: bool = True,
@@ -265,13 +255,10 @@ def acquisition(
 
     Parameters
     ----------
-    x : array of shape (n_samples, horizon, dim) or (n_samples, 1, dim)
+    x : array of shape (n_samples, 1, dim)
         Array of points for which to compute the acquisition. `n_samples` is the number
         of target points for which to compute the acquisition, and `dim` is the number
-        of features/variables of each point. In case of `rollout = False`, `horizon` is
-        the length of the prediced trajectory of sampled points; while in case of
-        `rollout == True`, this dimension has to be 1, since only the first sample
-        point is optimized over.
+        of features/variables of each point.
     mdl : Idw or Rbf
         Fitted model to use for computing the acquisition function.
     horizon : int
@@ -286,12 +273,6 @@ def acquisition(
         Weight of the contribution of the variance function, by default `1.5078`.
     c2 : float, optional
         Weight of the contribution of the distance function, by default `1.4246`.
-    rollout : bool, optional
-        The strategy to be used for approximately solving the optimal control problem.
-        If `True`, rollout is used where only the first sample point is optimized and
-        then the remaining samples in the horizon are computed via the myopic
-        acquisition base policy. If `False`, the whole horizon trajectory is optimized
-        in an MPC fashion.
     mc_iters : int, optional
         Number of Monte Carlo iterations, by default `1024`. For better sampling, the
         iterations should be a power of 2. If `0`, the acquisition is computed
@@ -327,20 +308,15 @@ def acquisition(
         The non-myopic acquisition function for each target point (per MC iteration,
         if `return_as_list == True`)
     """
-    n_samples, h_, _ = x.shape
     if check:
         assert mdl.Xm_.shape[0] == 1, "regression model must be non-batched"
-        if rollout:
-            assert h_ == 1, "x must have only one time step for rollout"
-        else:
-            assert (
-                h_ == horizon
-            ), "x must have the same number of time steps as the horizon"
+        assert x.shape[1] == 1, "x must have only one time step"
     if pso_kwargs is None:
         pso_kwargs = {}
 
     # compute the first (myopic) iteration of the acquisition function - this is in
     # common across all MC iterations
+    n_samples = x.shape[0]
     myopic_cost, mdl, lb, ub, y_min, y_max = _compute_myopic_cost(
         x, mdl, n_samples, c1, c2, lb, ub
     )
@@ -360,7 +336,6 @@ def acquisition(
             c2,
             lb,
             ub,
-            rollout,
             terminal_cost,
             pso_kwargs,
             None,
@@ -399,7 +374,6 @@ def acquisition(
             c2,
             lb,
             ub,
-            rollout,
             terminal_cost,
             pso_kwargs,
             prediction_random_numbers[i],

diff --git a/src/globopt/nonmyopic/algorithm.py b/src/globopt/nonmyopic/algorithm.py
@@ -31,7 +31,6 @@ def _next_query_point(
     discount: float,
     c1: float,
     c2: float,
-    rollout: bool,
     mc_iters: int,
     quasi_mc: bool,
     common_random_numbers: bool,
@@ -43,27 +42,18 @@ def _next_query_point(
     pso_kwargs: Optional[dict[str, Any]],
 ) -> tuple[Array1d, float]:
     """Computes the next point to query by minimizing the acquisition function."""
-    if rollout:
-        h_ = 1
-        lb_acquisition = lb[0]
-        ub_acquisition = ub[0]
-    else:
-        h_ = horizon
-        lb = lb[:, : h_ * dim]  # due to shrinking horizon
-        ub = ub[:, : h_ * dim]
-        lb_acquisition = lb[0, :dim]
-        ub_acquisition = ub[0, :dim]
+    lb_acquisition = lb[0]
+    ub_acquisition = ub[0]
     check_acquisition = iteration == 1
     vpso_func = lambda x: acquisition(
-        x.reshape(-1, h_, dim),
+        x.reshape(-1, 1, dim),
         mdl,
         horizon,
         discount,
         lb_acquisition,
         ub_acquisition,
         c1,
         c2,
-        rollout,
         mc_iters,
         quasi_mc,
         common_random_numbers,
@@ -90,7 +80,6 @@ def nmgo(
     c1: float = 1.5078,
     c2: float = 1.4246,
     #
-    rollout: bool = True,
     mc_iters: int = 1024,
     quasi_mc: bool = True,
     common_random_numbers: bool = True,
@@ -133,12 +122,6 @@ def nmgo(
     c2 : float, optional
         Weight of the contribution of the distance function in the algorithm's
         acquisition function, by default `1.4246`.
-    rollout : bool, optional
-        The strategy to be used for approximately solving the optimal control problem.
-        If `True`, rollout is used where only the first sample point is optimized and
-        then the remaining samples in the horizon are computed via the myopic
-        acquisition base policy. If `False`, the whole horizon trajectory is optimized
-        in an MPC fashion.
     mc_iters : int, optional
         Number of Monte Carlo iterations, by default `1024`. For better sampling, the
         iterations should be a power of 2. If `0`, the acquisition is computed
@@ -186,9 +169,6 @@ def nmgo(
     if callback is not None:
         callback("nmgo", locals())
 
-    if not rollout:
-        lb = np.tile(lb, (1, horizon))  # due to increased dimension of search space
-        ub = np.tile(ub, (1, horizon))
     if parallel is None:
         parallel = Parallel(n_jobs=1, verbose=0)
     elif isinstance(parallel, dict):
@@ -205,7 +185,6 @@ def nmgo(
                 discount,
                 c1,
                 c2,
-                rollout,
                 mc_iters,
                 quasi_mc,
                 common_random_numbers,

diff --git a/tests/test_non_myopic.py b/tests/test_non_myopic.py
@@ -1,6 +1,5 @@
 import pickle
 import unittest
-from itertools import product
 
 import numpy as np
 from parameterized import parameterized
@@ -13,8 +12,8 @@
 
 
 class TestAcquisition(unittest.TestCase):
-    @parameterized.expand(product((0, 2**3), (False, True)))
-    def test__returns_correct_values(self, mc_iters: int, rollout: bool):
+    @parameterized.expand([(0,), (2**3,)])
+    def test__returns_correct_values(self, mc_iters: int):
         np_random = np.random.default_rng(1909)
         n_samples = 7
         horizon = 5
@@ -25,9 +24,7 @@ def test__returns_correct_values(self, mc_iters: int, rollout: bool):
         discount = np_random.random()
         c1 = np_random.random() * 2 + 1
         c2 = np_random.random() * 2 + 1
-        x = np_random.standard_normal(
-            (n_samples * 2, 1, dim) if rollout else (n_samples * 2, horizon, dim)
-        )
+        x = np_random.standard_normal(size=(n_samples * 2, 1, dim))
 
         a = acquisition(
             x=x,
@@ -36,7 +33,6 @@ def test__returns_correct_values(self, mc_iters: int, rollout: bool):
             discount=discount,
             c1=c1,
             c2=c2,
-            rollout=rollout,
             lb=X.min(1).squeeze() - 0.1,
             ub=X.max(1).squeeze() + 0.1,
             mc_iters=mc_iters,
@@ -46,7 +42,7 @@ def test__returns_correct_values(self, mc_iters: int, rollout: bool):
             pso_kwargs={"maxiter": 2000, "ftol": 1e-12, "xtol": 1e-12},
         )
 
-        expected = RESULTS[(mc_iters, rollout)]
+        expected = RESULTS[(mc_iters, True)]
         np.testing.assert_allclose(a, expected, atol=1e0, rtol=1e-1)
 
 

diff --git a/tests/test_util.py b/tests/test_util.py
@@ -109,9 +109,7 @@ def test__dp_stage_cost_callback(self, algorithm: Literal["go", "nmgo"]):
                 0.0719124302371252,
             ]
             func = nmgo
-            kwargs.update(
-                {"horizon": 2, "discount": 0.9, "rollout": True, "mc_iters": 0}
-            )
+            kwargs.update({"horizon": 2, "discount": 0.9, "mc_iters": 0})
 
         func(**kwargs, callback=callback)