From c91e4813fa6b588aae83dd7ac1446eb0ace46aa7 Mon Sep 17 00:00:00 2001 From: Filippo Airaldi Date: Sat, 16 Dec 2023 00:10:21 +0100 Subject: [PATCH] added V to regressor forward --- examples_bt/myopic_acquisitions_examples.py | 2 +- src/globopt/regression.py | 44 +++++++++++++-------- tests/test_myopic.py | 2 +- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/examples_bt/myopic_acquisitions_examples.py b/examples_bt/myopic_acquisitions_examples.py index 3325f14..66c3af6 100644 --- a/examples_bt/myopic_acquisitions_examples.py +++ b/examples_bt/myopic_acquisitions_examples.py @@ -40,7 +40,7 @@ # predict the (normal) posterior over all domain via fitted model X = torch.linspace(lb, ub, 1000).view(1, -1, 1) -y_hat, s, W_sum_recipr = mdl(X) +y_hat, s, W_sum_recipr, _ = mdl(X) # compute acquisition function by components z = _idw_distance(W_sum_recipr) diff --git a/src/globopt/regression.py b/src/globopt/regression.py index 6b35dac..18a1e9a 100644 --- a/src/globopt/regression.py +++ b/src/globopt/regression.py @@ -18,8 +18,11 @@ # Conventions # ------------ # botorch uses the convention `b0 x b1 x ... x q x d`, where -# * `b-i` is the number of batches of candidates to evaluate in parallel -# * `q` is the number of candidates to consider jointly +# * `b-i` is the number of batches of candidates to evaluate in parallel; the +# minimization of the acquisition function consists in minimizing its sum over the +# batches, and taking the best one at last +# * `q` is the number of candidates to consider jointly per batch; often, the best is +# taken out of these per batch # * `d` is the dimension of the design space of each `q`-th candidate. # Note that while there might be more than one batch dimension, usually we need just one # in important methods. @@ -38,13 +41,17 @@ # Interfacing # ----------- # We make here the distinction between the myopic and non-myopic case. -# * myopic case: for the simplest cases, i.e., the analytical acquisition functions, we -# expect `q = 1`. This means that the `b` dimension is botorch can be swapped in -# second place and used as the `m`. Usually, we call it `n` to distinguish the `m` -# training points from the `n` prediction points. -# For the Monte Carlo myopic case, TODO -# * non-myopic case: TODO: would `b x q x 1 x d` work for regressors as repeated as -# `b x q x m x d`? +# * myopic case: +# * `MyopicAcquisitionFunction`: in this acquisition function, `q = 1`. This means +# that the `b` dimension is botorch is automatically swapped in second place and +# used as the `m` (usually, we use `n` for prediction points, and `m` for training). +# * `MyopicAcquisitionFunctionInExpectation`: TODO +# * `qMcMyopicAcquisitionFunction`: here, `q > 1` is the number of points considered +# in parallel, while `b` is the number of batches of these. The acquisition function +# is minimized over the sum of its batches, and for each the best candidate out of +# `q` is taken. +# * non-myopic case: +# TODO: would `b x q x 1 x d` work for regressors as repeated as `b x q x m x d`? from typing import Any, Optional, Union @@ -89,7 +96,7 @@ def posterior(self, X: Tensor, **_: Any) -> GPyTorchPosterior: self.eval() # NOTE: do not modify input/output shapes here. It is the responsibility of the # acquisition function calling this method to do so. - mean, scale, W_sum_recipr = self.forward(X) + mean, scale, W_sum_recipr, V = self.forward(X) # NOTE: it's a bit sketchy, but `W_sum_recipr` is needed by the acquisition # functions. It gets first computed here, so it is convenient to manually attach # it to the model for later re-use. @@ -99,6 +106,7 @@ def posterior(self, X: Tensor, **_: Any) -> GPyTorchPosterior: posterior = GPyTorchPosterior(distribution) posterior._scale = scale posterior._W_sum_recipr = W_sum_recipr + posterior._V = V return posterior @@ -125,20 +133,20 @@ def _idw_scale(Y: Tensor, train_Y: Tensor, V: Tensor) -> Tensor: def _idw_predict( train_X: Tensor, train_Y: Tensor, X: Tensor -) -> tuple[Tensor, Tensor, Tensor]: +) -> tuple[Tensor, Tensor, Tensor, Tensor]: """Mean and scale for IDW regression.""" W = torch.cdist(X, train_X).square().clamp_min(DELTA).reciprocal() W_sum_recipr = W.sum(-1, keepdim=True).reciprocal() V = W.mul(W_sum_recipr) mean = V.matmul(train_Y) std = _idw_scale(mean, train_Y, V) - return mean, std, W_sum_recipr + return mean, std, W_sum_recipr, V class Idw(BaseRegression): """Inverse Distance Weighting regression model in Global Optimization.""" - def forward(self, X: Tensor) -> tuple[Tensor, Tensor, Tensor]: + def forward(self, X: Tensor) -> tuple[Tensor, Tensor, Tensor, Tensor]: """Computes the IDW regression model. Parameters @@ -155,6 +163,8 @@ def forward(self, X: Tensor) -> tuple[Tensor, Tensor, Tensor]: - the mean estimate `(b0 x b1 x ...) x n x 1` - the standard deviation of the estimate `(b0 x b1 x ...) x n x 1` - the reciprocal of the sum of the IDW weights `(b0 x b1 x ...) x n x 1` + - the normalized IDW weights `(b0 x b1 x ...) x n x m`, where `m` are + the number of training points. """ return _idw_predict(self.train_X, self.train_Y, X) @@ -205,7 +215,7 @@ def _rbf_partial_fit( def _rbf_predict( train_X: Tensor, train_Y: Tensor, eps: Tensor, coeffs: Tensor, X: Tensor -) -> tuple[Tensor, Tensor, Tensor]: +) -> tuple[Tensor, Tensor, Tensor, Tensor]: """Predicts mean and scale for RBF regression.""" # NOTE: here, we do not use `KernelLinearOperator` so as to avoid computing the # distance from `X` to `train_X` twice, one in the linear operator and one in the @@ -216,7 +226,7 @@ def _rbf_predict( W_sum_recipr = W.sum(-1, keepdim=True).reciprocal() V = W.mul(W_sum_recipr) std = _idw_scale(mean, train_Y, V) - return mean, std, W_sum_recipr + return mean, std, W_sum_recipr, V class Rbf(BaseRegression): @@ -268,7 +278,7 @@ def Minv_and_coeffs(self) -> tuple[Tensor, Tensor]: coefficients. Use this to partially fit a new regressor (see `__init__`)""" return self.Minv, self.coeffs - def forward(self, X: Tensor) -> tuple[Tensor, Tensor, Tensor]: + def forward(self, X: Tensor) -> tuple[Tensor, Tensor, Tensor, Tensor]: """Computes the RBF regression model. Parameters @@ -285,5 +295,7 @@ def forward(self, X: Tensor) -> tuple[Tensor, Tensor, Tensor]: - the mean estimate `(b0 x b1 x ...) x n x 1` - the standard deviation of the estimate `(b0 x b1 x ...) x n x 1` - the reciprocal of the sum of the IDW weights `(b0 x b1 x ...) x n x 1` + - the normalized IDW weights `(b0 x b1 x ...) x n x m`, where `m` are + the number of training points. """ return _rbf_predict(self.train_X, self.train_Y, self.eps, self.coeffs, X) diff --git a/tests/test_myopic.py b/tests/test_myopic.py index bfae960..077f28e 100644 --- a/tests/test_myopic.py +++ b/tests/test_myopic.py @@ -26,7 +26,7 @@ def test__methods__returns_correct_values(self): MAF = MyopicAcquisitionFunction(mdl, 1.0, 0.5) a1 = MAF(x.transpose(1, 0)).squeeze().neg() - y_hat, s, W_sum_recipr = mdl(x) + y_hat, s, W_sum_recipr, _ = mdl(x) dym = Y.amax(-2) - Y.amin(-2) z = _idw_distance(W_sum_recipr) a2 = acquisition_function(y_hat, s, dym, W_sum_recipr, MAF.c1, MAF.c2).neg()