diff --git a/benchmarking/run.py b/benchmarking/run.py index 89a77fd..70e9b02 100644 --- a/benchmarking/run.py +++ b/benchmarking/run.py @@ -67,7 +67,6 @@ def run_problem( """Solves the given problem with the given algorithm (based on the specified horizon), and saves as result the performance of the run in terms of best-so-far and total cost.""" - rollout = True c1, c2, eps = 1.0, 0.5, 1.0 bsf_callback = BestSoFarCallback() dp_callback = DpStageCostCallback() @@ -85,7 +84,7 @@ def run_problem( "seed": seed, "callback": callbacks, "pso_kwargs": { - "swarmsize": 5 * problem.dim * (rollout or horizon), + "swarmsize": 5 * problem.dim, "xtol": 1e-9, "ftol": 1e-9, "maxiter": 300, @@ -95,14 +94,7 @@ def run_problem( if horizon == 1: go(**kwargs) else: - nmgo( - horizon=horizon, - discount=1.0, - rollout=rollout, - mc_iters=0, - parallel=None, - **kwargs, - ) + nmgo(horizon=horizon, discount=1.0, mc_iters=0, parallel=None, **kwargs) cost = sum(dp_callback) bests = ",".join(map(str, bsf_callback)) with shm_lock(shm_name), open(output_csv, "a") as f: diff --git a/examples/non_myopic_acquisition.py b/examples/non_myopic_acquisition.py index e1c5326..a047f61 100644 --- a/examples/non_myopic_acquisition.py +++ b/examples/non_myopic_acquisition.py @@ -39,7 +39,6 @@ "ub": ub, "c1": c1, "c2": c2, - "rollout": True, "seed": 0, } nonmyopic_deterministic_a = nonmyopic_acquisition(**kwargs, mc_iters=0) diff --git a/examples/non_myopic_global_optimization_1.py b/examples/non_myopic_global_optimization_1.py index 4912f70..1b320bb 100644 --- a/examples/non_myopic_global_optimization_1.py +++ b/examples/non_myopic_global_optimization_1.py @@ -49,7 +49,7 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No y_hat = predict(mdl, x_) if algorithm == "go": acq = myopic_acquisition(x_, mdl, c1, c2, y_hat, None).squeeze() - elif locals["rollout"]: + else: acq = nonmyopic_acquisition( x_.transpose(1, 0, 2), mdl, @@ -59,7 +59,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No ub, c1, c2, - locals["rollout"], locals["mc_iters"], locals["quasi_mc"], locals["common_random_numbers"], @@ -69,8 +68,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No 0, {"n_jobs": -1, "backend": "loky"}, ) - else: - acq = np.full(x.size, np.nan) history.append( ( locals["x_new"], @@ -113,7 +110,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No init_points=x0, c1=c1, c2=c2, - rollout=True, mc_iters=0, maxiter=ITERS, seed=1909, diff --git a/examples/non_myopic_global_optimization_2.py b/examples/non_myopic_global_optimization_2.py index 95a1997..f8e72c6 100644 --- a/examples/non_myopic_global_optimization_2.py +++ b/examples/non_myopic_global_optimization_2.py @@ -43,7 +43,7 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No y_hat = predict(mdl, x_) if algorithm == "go": acq = myopic_acquisition(x_, mdl, c1, c2, y_hat, None).squeeze() - elif locals["rollout"]: + else: acq = nonmyopic_acquisition( x_.transpose(1, 0, 2), mdl, @@ -53,7 +53,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No ub, c1, c2, - locals["rollout"], locals["mc_iters"], locals["quasi_mc"], locals["common_random_numbers"], @@ -63,8 +62,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No 0, {"n_jobs": -1, "backend": "loky"}, ) - else: - acq = np.full(x.size, np.nan) history.append( ( locals["x_new"], @@ -115,7 +112,6 @@ def save_history(algorithm: Literal["go", "nmgo"], locals: dict[str, Any]) -> No init_points=x0, c1=c1, c2=c2, - rollout=True, mc_iters=2**10, parallel={"n_jobs": -1, "backend": "loky", "verbose": 1}, maxiter=ITERS, diff --git a/examples/variance_reduction.py b/examples/variance_reduction.py index a5e01eb..e1564d2 100644 --- a/examples/variance_reduction.py +++ b/examples/variance_reduction.py @@ -38,7 +38,6 @@ "discount": discount, "c1": c1, "c2": c2, - "rollout": True, "lb": lb, "ub": ub, } diff --git a/src/globopt/nonmyopic/acquisition.py b/src/globopt/nonmyopic/acquisition.py index 083287c..51a849f 100644 --- a/src/globopt/nonmyopic/acquisition.py +++ b/src/globopt/nonmyopic/acquisition.py @@ -125,25 +125,17 @@ def _advance( def _next_query_point( - x: Array3d, mdl: RegressorType, - current_step: int, # > 0 c1: float, c2: float, dym: Array3d, lb: Array2d, ub: Array2d, - rollout: bool, pso_kwargs: dict[str, Any], np_random: np.random.Generator, ) -> tuple[Array3d, Array1d]: - """Computes the next point to query and its associated cost. If the strategy is - `"mpc"`, then the next point is just the next point in the trajectory. If the - strategy is `"rollout"`, then the next point is the minimizer of the myopic - acquisition function, i.e., base policy.""" - if not rollout: - x_next = x[:, current_step, np.newaxis, :] # ∈ (n_samples, 1, dim) - return x_next, myopic_acquisition(x_next, mdl, c1, c2, None, dym)[:, 0, 0] + """Computes the next point to query and its associated cost, i.e., the next point is + the minimizer of the myopic acquisition function, a.k.a., the base policy.""" def func(q: Array3d) -> Array2d: return myopic_acquisition(q, mdl, c1, c2, None, dym)[:, :, 0] @@ -177,7 +169,6 @@ def _compute_nonmyopic_cost( c2: float, lb: Array2d, ub: Array2d, - rollout: bool, terminal_cost: bool, pso_kwargs: dict[str, Any], prediction_rng: Optional[Array2d], @@ -197,7 +188,7 @@ def _compute_nonmyopic_cost( if h >= horizon: break x_next, current_cost = _next_query_point( - x_trajectory, mdl, h, c1, c2, dym, lb, ub, rollout, pso_kwargs, np_random + mdl, c1, c2, dym, lb, ub, pso_kwargs, np_random ) cost += (discount**h) * current_cost # accumulate in-place if terminal_cost: @@ -246,7 +237,6 @@ def acquisition( ub: Array1d, c1: float = 1.5078, c2: float = 1.4246, - rollout: bool = True, # mc_iters: int = 1024, quasi_mc: bool = True, @@ -265,13 +255,10 @@ def acquisition( Parameters ---------- - x : array of shape (n_samples, horizon, dim) or (n_samples, 1, dim) + x : array of shape (n_samples, 1, dim) Array of points for which to compute the acquisition. `n_samples` is the number of target points for which to compute the acquisition, and `dim` is the number - of features/variables of each point. In case of `rollout = False`, `horizon` is - the length of the prediced trajectory of sampled points; while in case of - `rollout == True`, this dimension has to be 1, since only the first sample - point is optimized over. + of features/variables of each point. mdl : Idw or Rbf Fitted model to use for computing the acquisition function. horizon : int @@ -286,12 +273,6 @@ def acquisition( Weight of the contribution of the variance function, by default `1.5078`. c2 : float, optional Weight of the contribution of the distance function, by default `1.4246`. - rollout : bool, optional - The strategy to be used for approximately solving the optimal control problem. - If `True`, rollout is used where only the first sample point is optimized and - then the remaining samples in the horizon are computed via the myopic - acquisition base policy. If `False`, the whole horizon trajectory is optimized - in an MPC fashion. mc_iters : int, optional Number of Monte Carlo iterations, by default `1024`. For better sampling, the iterations should be a power of 2. If `0`, the acquisition is computed @@ -327,20 +308,15 @@ def acquisition( The non-myopic acquisition function for each target point (per MC iteration, if `return_as_list == True`) """ - n_samples, h_, _ = x.shape if check: assert mdl.Xm_.shape[0] == 1, "regression model must be non-batched" - if rollout: - assert h_ == 1, "x must have only one time step for rollout" - else: - assert ( - h_ == horizon - ), "x must have the same number of time steps as the horizon" + assert x.shape[1] == 1, "x must have only one time step" if pso_kwargs is None: pso_kwargs = {} # compute the first (myopic) iteration of the acquisition function - this is in # common across all MC iterations + n_samples = x.shape[0] myopic_cost, mdl, lb, ub, y_min, y_max = _compute_myopic_cost( x, mdl, n_samples, c1, c2, lb, ub ) @@ -360,7 +336,6 @@ def acquisition( c2, lb, ub, - rollout, terminal_cost, pso_kwargs, None, @@ -399,7 +374,6 @@ def acquisition( c2, lb, ub, - rollout, terminal_cost, pso_kwargs, prediction_random_numbers[i], diff --git a/src/globopt/nonmyopic/algorithm.py b/src/globopt/nonmyopic/algorithm.py index d05c4db..30485ef 100644 --- a/src/globopt/nonmyopic/algorithm.py +++ b/src/globopt/nonmyopic/algorithm.py @@ -31,7 +31,6 @@ def _next_query_point( discount: float, c1: float, c2: float, - rollout: bool, mc_iters: int, quasi_mc: bool, common_random_numbers: bool, @@ -43,19 +42,11 @@ def _next_query_point( pso_kwargs: Optional[dict[str, Any]], ) -> tuple[Array1d, float]: """Computes the next point to query by minimizing the acquisition function.""" - if rollout: - h_ = 1 - lb_acquisition = lb[0] - ub_acquisition = ub[0] - else: - h_ = horizon - lb = lb[:, : h_ * dim] # due to shrinking horizon - ub = ub[:, : h_ * dim] - lb_acquisition = lb[0, :dim] - ub_acquisition = ub[0, :dim] + lb_acquisition = lb[0] + ub_acquisition = ub[0] check_acquisition = iteration == 1 vpso_func = lambda x: acquisition( - x.reshape(-1, h_, dim), + x.reshape(-1, 1, dim), mdl, horizon, discount, @@ -63,7 +54,6 @@ def _next_query_point( ub_acquisition, c1, c2, - rollout, mc_iters, quasi_mc, common_random_numbers, @@ -90,7 +80,6 @@ def nmgo( c1: float = 1.5078, c2: float = 1.4246, # - rollout: bool = True, mc_iters: int = 1024, quasi_mc: bool = True, common_random_numbers: bool = True, @@ -133,12 +122,6 @@ def nmgo( c2 : float, optional Weight of the contribution of the distance function in the algorithm's acquisition function, by default `1.4246`. - rollout : bool, optional - The strategy to be used for approximately solving the optimal control problem. - If `True`, rollout is used where only the first sample point is optimized and - then the remaining samples in the horizon are computed via the myopic - acquisition base policy. If `False`, the whole horizon trajectory is optimized - in an MPC fashion. mc_iters : int, optional Number of Monte Carlo iterations, by default `1024`. For better sampling, the iterations should be a power of 2. If `0`, the acquisition is computed @@ -186,9 +169,6 @@ def nmgo( if callback is not None: callback("nmgo", locals()) - if not rollout: - lb = np.tile(lb, (1, horizon)) # due to increased dimension of search space - ub = np.tile(ub, (1, horizon)) if parallel is None: parallel = Parallel(n_jobs=1, verbose=0) elif isinstance(parallel, dict): @@ -205,7 +185,6 @@ def nmgo( discount, c1, c2, - rollout, mc_iters, quasi_mc, common_random_numbers, diff --git a/tests/test_non_myopic.py b/tests/test_non_myopic.py index 2534a9d..677386e 100644 --- a/tests/test_non_myopic.py +++ b/tests/test_non_myopic.py @@ -1,6 +1,5 @@ import pickle import unittest -from itertools import product import numpy as np from parameterized import parameterized @@ -13,8 +12,8 @@ class TestAcquisition(unittest.TestCase): - @parameterized.expand(product((0, 2**3), (False, True))) - def test__returns_correct_values(self, mc_iters: int, rollout: bool): + @parameterized.expand([(0,), (2**3,)]) + def test__returns_correct_values(self, mc_iters: int): np_random = np.random.default_rng(1909) n_samples = 7 horizon = 5 @@ -25,9 +24,7 @@ def test__returns_correct_values(self, mc_iters: int, rollout: bool): discount = np_random.random() c1 = np_random.random() * 2 + 1 c2 = np_random.random() * 2 + 1 - x = np_random.standard_normal( - (n_samples * 2, 1, dim) if rollout else (n_samples * 2, horizon, dim) - ) + x = np_random.standard_normal(size=(n_samples * 2, 1, dim)) a = acquisition( x=x, @@ -36,7 +33,6 @@ def test__returns_correct_values(self, mc_iters: int, rollout: bool): discount=discount, c1=c1, c2=c2, - rollout=rollout, lb=X.min(1).squeeze() - 0.1, ub=X.max(1).squeeze() + 0.1, mc_iters=mc_iters, @@ -46,7 +42,7 @@ def test__returns_correct_values(self, mc_iters: int, rollout: bool): pso_kwargs={"maxiter": 2000, "ftol": 1e-12, "xtol": 1e-12}, ) - expected = RESULTS[(mc_iters, rollout)] + expected = RESULTS[(mc_iters, True)] np.testing.assert_allclose(a, expected, atol=1e0, rtol=1e-1) diff --git a/tests/test_util.py b/tests/test_util.py index 23b4bd2..ecfa112 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -109,9 +109,7 @@ def test__dp_stage_cost_callback(self, algorithm: Literal["go", "nmgo"]): 0.0719124302371252, ] func = nmgo - kwargs.update( - {"horizon": 2, "discount": 0.9, "rollout": True, "mc_iters": 0} - ) + kwargs.update({"horizon": 2, "discount": 0.9, "mc_iters": 0}) func(**kwargs, callback=callback)