implemented MC myopic acquisition

FilippoAiraldi · Dec 16, 2023 · 791f78a · 791f78a
1 parent c91e481
commit 791f78a
Show file tree

Hide file tree

Showing 2 changed files with 94 additions and 22 deletions.
diff --git a/examples_bt/myopic_acquisitions_examples.py b/examples_bt/myopic_acquisitions_examples.py
@@ -15,61 +15,65 @@
 import matplotlib.pyplot as plt
 import torch
 from botorch.optim import optimize_acqf
+from botorch.sampling import SobolQMCNormalSampler
 
 from globopt.myopic_acquisitions import (
     MyopicAcquisitionFunction,
     _idw_distance,
     acquisition_function,
+    qMcMyopicAcquisitionFunction,
 )
 from globopt.problems import SimpleProblem
 from globopt.regression import Rbf
 
 plt.style.use("bmh")
 
-# define the function and its domain
+# define the evaluation function and its domain
 problem = SimpleProblem()
 lb, ub = problem._bounds[0]
 
-# create data points - X has shape (batch, n_samples, dim)
+# create data points
 dv = "cpu"
 train_X = torch.as_tensor([-2.61, -1.92, -0.63, 0.38, 2], device=dv).view(-1, 1)
 train_Y = problem(train_X)
 
 # create regressor and fit it
 mdl = Rbf(train_X, train_Y, 0.5)
 
-# predict the (normal) posterior over all domain via fitted model
+# predict the posterior over all domain via fitted model
 X = torch.linspace(lb, ub, 1000).view(1, -1, 1)
 y_hat, s, W_sum_recipr, _ = mdl(X)
 
-# compute acquisition function by components
-z = _idw_distance(W_sum_recipr)
-
-# compute the overall acquisition function
+# compute the overall analytical acquisition function, component by component
 c1 = 1.0
 c2 = 0.5
 y_span = train_Y.amax(-2, keepdim=True) - train_Y.amin(-2, keepdim=True)
+z = _idw_distance(W_sum_recipr)
 a = acquisition_function(y_hat, s, y_span, W_sum_recipr, c1, c2).squeeze()
 
 # compute minimizer of analytic myopic acquisition function
+bounds = torch.as_tensor([[lb], [ub]])
 myopic_analytic_optimizer, myopic_analitic_opt = optimize_acqf(
     acq_function=MyopicAcquisitionFunction(mdl, c1, c2),
-    bounds=torch.as_tensor([[lb], [ub]]),
+    bounds=bounds,
     q=1,  # mc iterations - not supported for the analytical acquisition function
-    num_restarts=10,  # number of optimization restarts
-    raw_samples=20,  # initial samples to start the first `num_restarts` points
+    num_restarts=16,  # number of optimization restarts
+    raw_samples=32,  # initial samples to start the first `num_restarts` points
     options={"seed": 0},
 )
 
-# # compute minimizer of MC myopic acquisition function
-# myopic_mc_optimizer, myopic_mc_opt = optimize_acqf(
-#     acq_function=qMcMyopicAcquisitionFunction(mdl, c1, c2),
-#     bounds=torch.as_tensor([[lb], [ub]]),
-#     q=2**10,
-#     num_restarts=10,
-#     raw_samples=20,
-#     options={"seed": 0},
-# )
+# for the monte carlo version, we can directly use the forward method
+sampler = SobolQMCNormalSampler(sample_shape=256, seed=0)
+MCMAF = qMcMyopicAcquisitionFunction(mdl, c1, c2, sampler)
+a_mc = MCMAF(X.view(-1, 1, 1))
+myopic_mc_optimizer, myopic_mc_opt = optimize_acqf(
+    acq_function=MCMAF,
+    bounds=bounds,
+    q=1,  # must be one for plotting reasons
+    num_restarts=64,
+    raw_samples=128,
+    options={"seed": 0},
+)
 
 # plot
 _, ax = plt.subplots(1, 1, constrained_layout=True, figsize=(6, 2.5))
@@ -86,7 +90,7 @@
     alpha=0.2,
 )
 ax.plot(X, z.squeeze(), label="$z(x)$", color="C2")
-ax.plot(X, a - a.min(), "--", lw=2.5, label="Analitycal $a(x)$", color="C3")
+ax.plot(X, a - a.amin(), "--", lw=1, label="Analitycal $a(x)$", color="C3")
 ax.plot(
     myopic_analytic_optimizer.squeeze(),
     myopic_analitic_opt - a.min(),
@@ -95,6 +99,15 @@
     markersize=17,
     color="C3",
 )
+ax.plot(X, a_mc - a_mc.min(), "--", lw=1, label="Monte Carlo $a(x)$", color="C4")
+ax.plot(
+    myopic_mc_optimizer.squeeze(),
+    myopic_mc_opt - a_mc.amin(),
+    "*",
+    label=None,
+    markersize=17,
+    color="C4",
+)
 ax.set_xlim(lb, ub)
 ax.set_ylim(0, 2.5)
 ax.legend()

diff --git a/src/globopt/myopic_acquisitions.py b/src/globopt/myopic_acquisitions.py
@@ -8,14 +8,16 @@
     functions. Computational Optimization and Applications, 77(2):571–595, 2020
 """
 
-from typing import Union
+from typing import Optional, Union
 
 import torch
 from botorch.acquisition.analytic import AnalyticAcquisitionFunction
+from botorch.acquisition.monte_carlo import MCAcquisitionFunction
+from botorch.sampling.base import MCSampler
 from botorch.utils import t_batch_mode_transform
 from torch import Tensor
 
-from globopt.regression import Idw, Rbf
+from globopt.regression import Idw, Rbf, _idw_scale
 
 
 def _idw_distance(W_sum_recipr: Tensor) -> Tensor:
@@ -152,3 +154,60 @@ def forward(self, X: Tensor) -> Tensor:
             self.c1,
             self.c2,
         ).squeeze((0, 2))
+
+
+class qMcMyopicAcquisitionFunction(MCAcquisitionFunction, AcquisitionFunctionMixin):
+    """Monte Carlo-based myopic acquisition function for Global Optimization based on
+    RBF/IDW regression.
+
+    In contrast to `qAnalyticalMyopicAcquisitionFunction`, this acquisition function
+    does not approximate the expectation w.r.t. the IDW variance, and instead uses
+    Monte Carlo sampling to compute the expectation of the acquisition values
+
+    Example
+    -------
+    >>> model = Idw(train_X, train_Y)
+    >>> sampler = SobolQMCNormalSampler(1024)
+    >>> McMAF = qMcMyopicAcquisitionFunction(model, sampler)
+    >>> af = McMAF(test_X)
+    """
+
+    def __init__(
+        self,
+        model: Union[Idw, Rbf],
+        c1: Union[float, Tensor],
+        c2: Union[float, Tensor],
+        sampler: Optional[MCSampler],
+    ) -> None:
+        """Instantiates the myopic acquisition function.
+
+        Parameters
+        ----------
+        model : Idw or Rbf
+            BoTorch model based on IDW or RBF regression.
+        c1 : float or scalar Tensor
+            Weight of the contribution of the variance function.
+        c2 : float or scalar Tensor
+            Weight of the contribution of the distance function.
+        sampler : MCSampler, optional
+            The sampler used to draw base samples.
+        """
+        super().__init__(model, sampler)
+        self._setup(c1, c2, accept_batch_regression=False)
+
+    @t_batch_mode_transform()
+    def forward(self, X: Tensor) -> Tensor:
+        # input of this forward is `b x q x d`, and output `b`. See the note in
+        # regression.py to understand these shapes.
+        # first, compute the posterior for X, and sample from it
+        mdl = self.model
+        posterior = mdl.posterior(X)
+        samples = self.get_posterior_samples(posterior)
+
+        # then, for each sample, compute its mean prediction and its IDW scale, and
+        # finally, compute acquisition and reduce in t- and q-batches
+        scale = _idw_scale(samples, mdl.train_Y, posterior._V)
+        acqvals = acquisition_function(
+            samples, scale, self.span_Y, posterior._W_sum_recipr, self.c1, self.c2
+        )
+        return acqvals.amax((-2, -1)).mean(0)  # tuple(range(acqvals.ndim - 2))