diff --git a/tests/unit/models/gpflow/test_interface.py b/tests/unit/models/gpflow/test_interface.py index 1de0f94b19..f81c6c2a8e 100644 --- a/tests/unit/models/gpflow/test_interface.py +++ b/tests/unit/models/gpflow/test_interface.py @@ -31,6 +31,9 @@ class _QuadraticPredictor(GPflowPredictor): def model(self) -> GPModel: return _QuadraticGPModel() + def optimize(self, dataset: Dataset) -> None: + self.optimizer.optimize(self.model, dataset) + def update(self, dataset: Dataset) -> None: return diff --git a/tests/unit/models/gpflow/test_models.py b/tests/unit/models/gpflow/test_models.py index 195093d5ca..d477d7163a 100644 --- a/tests/unit/models/gpflow/test_models.py +++ b/tests/unit/models/gpflow/test_models.py @@ -150,9 +150,13 @@ def test_gpflow_wrappers_default_optimize( args = {} loss = internal_model.training_loss(**args) - model.optimize(Dataset(*data)) + model.optimize_and_save_result(Dataset(*data)) - assert internal_model.training_loss(**args) < loss + new_loss = internal_model.training_loss(**args) + assert new_loss < loss + if not isinstance(internal_model, SVGP): + assert model.last_result is not None + npt.assert_allclose(new_loss, model.last_result.fun) def test_gpflow_wrappers_ref_optimize(gpflow_interface_factory: ModelFactoryType) -> None: diff --git a/tests/unit/models/gpflux/test_models.py b/tests/unit/models/gpflux/test_models.py index dff7c48374..17e83fb90e 100644 --- a/tests/unit/models/gpflux/test_models.py +++ b/tests/unit/models/gpflux/test_models.py @@ -297,9 +297,10 @@ def test_deep_gaussian_process_with_lr_scheduler( optimizer = KerasOptimizer(tf.optimizers.Adam(lr_schedule), fit_args) model = DeepGaussianProcess(two_layer_model(x), optimizer) - model.optimize(Dataset(x, y)) + model.optimize_and_save_result(Dataset(x, y)) - assert len(model.model_keras.history.history["loss"]) == epochs + assert model.last_result is not None + assert len(model.last_result.history["loss"]) == epochs def test_deep_gaussian_process_default_optimizer_is_correct( diff --git a/tests/unit/models/keras/test_models.py b/tests/unit/models/keras/test_models.py index 1bcee8b594..3cba666197 100644 --- a/tests/unit/models/keras/test_models.py +++ b/tests/unit/models/keras/test_models.py @@ -215,9 +215,10 @@ def scheduler(epoch: int, lr: float) -> float: npt.assert_allclose(model.model.optimizer.lr.numpy(), init_lr, rtol=1e-6) - model.optimize(example_data) + model.optimize_and_save_result(example_data) - npt.assert_allclose(model.model.history.history["lr"], [0.5, 0.25]) + assert model.last_result is not None + npt.assert_allclose(model.last_result.history["lr"], [0.5, 0.25]) npt.assert_allclose(model.model.optimizer.lr.numpy(), init_lr, rtol=1e-6) diff --git a/tests/unit/models/test_interfaces.py b/tests/unit/models/test_interfaces.py index 6a8784feac..648dcb1724 100644 --- a/tests/unit/models/test_interfaces.py +++ b/tests/unit/models/test_interfaces.py @@ -193,7 +193,8 @@ def _assert_data(self, dataset: Dataset) -> None: stack = TrainableModelStack((model01, 2), (model2, 1), (model3, 1)) data = Dataset(tf.random.uniform([5, 7, 3]), tf.random.uniform([5, 7, 4])) stack.update(data) - stack.optimize(data) + stack.optimize_and_save_result(data) + assert stack.last_result == [None] * 3 def test_model_stack_reparam_sampler_raises_for_submodels_without_reparam_sampler() -> None: diff --git a/tests/util/models/gpflow/models.py b/tests/util/models/gpflow/models.py index 2f9e9f888a..efae92f91c 100644 --- a/tests/util/models/gpflow/models.py +++ b/tests/util/models/gpflow/models.py @@ -204,7 +204,7 @@ def mock_data() -> tuple[tf.Tensor, tf.Tensor]: class QuadraticMeanAndRBFKernelWithSamplers( - QuadraticMeanAndRBFKernel, HasTrajectorySampler, HasReparamSampler + QuadraticMeanAndRBFKernel, HasTrajectorySampler, HasReparamSampler, TrainableProbabilisticModel ): r""" A Gaussian process with scalar quadratic mean, an RBF kernel and diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py index 3b5c973963..f3528ed943 100644 --- a/trieste/ask_tell_optimization.py +++ b/trieste/ask_tell_optimization.py @@ -233,7 +233,7 @@ def __init__( for tag, model in self._models.items(): dataset = datasets[tag] model.update(dataset) - model.optimize(dataset) + model.optimize_and_save_result(dataset) summary_writer = logging.get_tensorboard_writer() if summary_writer: @@ -434,7 +434,7 @@ def tell(self, new_data: Mapping[Tag, Dataset] | Dataset) -> None: for tag, model in self._models.items(): dataset = self._datasets[tag] model.update(dataset) - model.optimize(dataset) + model.optimize_and_save_result(dataset) summary_writer = logging.get_tensorboard_writer() if summary_writer: diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py index 9a1fb5652e..2d8f3833ab 100644 --- a/trieste/bayesian_optimizer.py +++ b/trieste/bayesian_optimizer.py @@ -719,7 +719,7 @@ def optimize( for tag, model in models.items(): dataset = datasets[tag] model.update(dataset) - model.optimize(dataset) + model.optimize_and_save_result(dataset) if summary_writer: logging.set_step_number(0) with summary_writer.as_default(step=0): @@ -752,7 +752,7 @@ def optimize( for tag, model in models.items(): dataset = datasets[tag] model.update(dataset) - model.optimize(dataset) + model.optimize_and_save_result(dataset) if summary_writer: with summary_writer.as_default(step=step): diff --git a/trieste/models/gpflow/interface.py b/trieste/models/gpflow/interface.py index 22081dbd22..40184da962 100644 --- a/trieste/models/gpflow/interface.py +++ b/trieste/models/gpflow/interface.py @@ -32,6 +32,7 @@ SupportsGetKernel, SupportsGetObservationNoise, SupportsPredictJoint, + TrainableProbabilisticModel, ) from ..optimizer import Optimizer from ..utils import ( @@ -43,7 +44,12 @@ class GPflowPredictor( - SupportsPredictJoint, SupportsGetKernel, SupportsGetObservationNoise, HasReparamSampler, ABC + SupportsPredictJoint, + SupportsGetKernel, + SupportsGetObservationNoise, + HasReparamSampler, + TrainableProbabilisticModel, + ABC, ): """A trainable wrapper for a GPflow Gaussian process model.""" @@ -145,13 +151,13 @@ def get_observation_noise(self) -> TensorType: return noise_variance - def optimize(self, dataset: Dataset) -> None: - """ - Optimize the model with the specified `dataset`. - - :param dataset: The data with which to optimize the `model`. - """ - self.optimizer.optimize(self.model, dataset) + # def optimize(self, dataset: Dataset) -> None: + # """ + # Optimize the model with the specified `dataset`. + # + # :param dataset: The data with which to optimize the `model`. + # """ + # self.optimizer.optimize(self.model, dataset) def log(self, dataset: Optional[Dataset] = None) -> None: """ diff --git a/trieste/models/gpflow/models.py b/trieste/models/gpflow/models.py index 76a16ba305..153bf25ed2 100644 --- a/trieste/models/gpflow/models.py +++ b/trieste/models/gpflow/models.py @@ -48,7 +48,7 @@ TrainableProbabilisticModel, TrajectorySampler, ) -from ..optimizer import BatchOptimizer, Optimizer +from ..optimizer import BatchOptimizer, Optimizer, OptimizeResult from .inducing_point_selectors import InducingPointSelector from .interface import GPflowPredictor, SupportsCovarianceBetweenPoints from .sampler import DecoupledTrajectorySampler, RandomFourierFeatureTrajectorySampler @@ -64,7 +64,6 @@ class GaussianProcessRegression( GPflowPredictor, - TrainableProbabilisticModel, FastUpdateModel, SupportsCovarianceBetweenPoints, SupportsGetInternalData, @@ -247,7 +246,7 @@ def covariance_between_points( return cov - def optimize(self, dataset: Dataset) -> None: + def optimize(self, dataset: Dataset) -> OptimizeResult: """ Optimize the model with the specified `dataset`. @@ -282,8 +281,9 @@ def optimize(self, dataset: Dataset) -> None: self.find_best_model_initialization( self._num_kernel_samples * num_trainable_params_with_priors_or_constraints ) - self.optimizer.optimize(self.model, dataset) + result = self.optimizer.optimize(self.model, dataset) self.update_posterior_cache() + return result def find_best_model_initialization(self, num_kernel_samples: int) -> None: """ @@ -521,7 +521,6 @@ def conditional_predict_y( class SparseGaussianProcessRegression( GPflowPredictor, - TrainableProbabilisticModel, SupportsCovarianceBetweenPoints, SupportsGetInducingVariables, SupportsGetInternalData, @@ -634,14 +633,15 @@ def _ensure_variable_model_data(self) -> None: if not is_variable(self._model.num_data): self._model.num_data = tf.Variable(self._model.num_data, trainable=False) - def optimize(self, dataset: Dataset) -> None: + def optimize(self, dataset: Dataset) -> OptimizeResult: """ Optimize the model with the specified `dataset`. :param dataset: The data with which to optimize the `model`. """ - self.optimizer.optimize(self.model, dataset) + result = self.optimizer.optimize(self.model, dataset) self.update_posterior_cache() + return result def update(self, dataset: Dataset) -> None: self._ensure_variable_model_data() @@ -833,7 +833,6 @@ def get_internal_data(self) -> Dataset: class SparseVariational( GPflowPredictor, - TrainableProbabilisticModel, SupportsCovarianceBetweenPoints, SupportsGetInducingVariables, HasTrajectorySampler, @@ -975,14 +974,15 @@ def update(self, dataset: Dataset) -> None: self._update_inducing_variables(new_inducing_points) self.update_posterior_cache() - def optimize(self, dataset: Dataset) -> None: + def optimize(self, dataset: Dataset) -> OptimizeResult: """ Optimize the model with the specified `dataset`. :param dataset: The data with which to optimize the `model`. """ - self.optimizer.optimize(self.model, dataset) + result = self.optimizer.optimize(self.model, dataset) self.update_posterior_cache() + return result def _update_inducing_variables(self, new_inducing_points: TensorType) -> None: """ @@ -1094,7 +1094,6 @@ def trajectory_sampler(self) -> TrajectorySampler[SparseVariational]: class VariationalGaussianProcess( GPflowPredictor, - TrainableProbabilisticModel, SupportsCovarianceBetweenPoints, SupportsGetInducingVariables, HasTrajectorySampler, @@ -1257,7 +1256,7 @@ def update(self, dataset: Dataset, *, jitter: float = DEFAULTS.JITTER) -> None: update_vgp_data(self.model, (dataset.query_points, dataset.observations)) self.update_posterior_cache() - def optimize(self, dataset: Dataset) -> None: + def optimize(self, dataset: Dataset) -> Optional[OptimizeResult]: """ :class:`VariationalGaussianProcess` has a custom `optimize` method that (optionally) permits alternating between standard optimization steps (for kernel parameters) and natural gradient @@ -1292,13 +1291,14 @@ def perform_optimization_step() -> None: # alternate with natgrad optimizations for _ in range(base_optimizer.max_iter): # type: ignore perform_optimization_step() - gpflow.set_trainable(model.q_mu, True) # revert varitional params to trainable + gpflow.set_trainable(model.q_mu, True) # revert variational params to trainable gpflow.set_trainable(model.q_sqrt, True) - + result = None # TODO: find something useful to return else: - self.optimizer.optimize(model, dataset) + result = self.optimizer.optimize(model, dataset) self.update_posterior_cache() + return result def get_inducing_variables(self) -> Tuple[TensorType, TensorType, TensorType, bool]: """ diff --git a/trieste/models/gpflux/models.py b/trieste/models/gpflux/models.py index 9560680845..8f685f580d 100644 --- a/trieste/models/gpflux/models.py +++ b/trieste/models/gpflux/models.py @@ -18,6 +18,7 @@ import dill import gpflow +import keras.callbacks import tensorflow as tf from gpflow.inducing_variables import InducingPoints from gpflux.layers import GPLayer, LatentVariableLayer @@ -338,7 +339,7 @@ def update(self, dataset: Dataset) -> None: inputs = layer(inputs) - def optimize(self, dataset: Dataset) -> None: + def optimize(self, dataset: Dataset) -> keras.callbacks.History: """ Optimize the model with the specified `dataset`. :param dataset: The data with which to optimize the `model`. @@ -370,6 +371,8 @@ def optimize(self, dataset: Dataset) -> None: ): self.optimizer.optimizer.lr.assign(self.original_lr) + return hist + def log(self, dataset: Optional[Dataset] = None) -> None: """ Log model training information at a given optimization step to the Tensorboard. diff --git a/trieste/models/interfaces.py b/trieste/models/interfaces.py index ed83e1c007..d3e5917741 100644 --- a/trieste/models/interfaces.py +++ b/trieste/models/interfaces.py @@ -15,7 +15,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any, Callable, Generic, Optional, TypeVar +from typing import Any, Callable, Generic, Optional, Sequence, TypeVar import gpflow import tensorflow as tf @@ -128,15 +128,29 @@ def update(self, dataset: Dataset) -> None: raise NotImplementedError @abstractmethod - def optimize(self, dataset: Dataset) -> None: + def optimize(self, dataset: Dataset) -> Any: """ Optimize the model objective with respect to (hyper)parameters given the specified ``dataset``. :param dataset: The data with which to train the model. + :return: Any (optimizer-specific) optimization result. """ raise NotImplementedError + def optimize_and_save_result(self, dataset: Dataset) -> None: + """ + Optimize the model objective and save the optimization result in last_result. + """ + setattr(self, "_last_optimization_result", self.optimize(dataset)) + + @property + def last_result(self) -> Optional[Any]: + """ + The last saved (optimizer-specific) optimization result. + """ + return getattr(self, "_last_optimization_result") + @runtime_checkable class SupportsPredictJoint(ProbabilisticModel, Protocol): @@ -428,7 +442,7 @@ def update(self, dataset: Dataset) -> None: for model, obs in zip(self._models, observations): model.update(Dataset(dataset.query_points, obs)) - def optimize(self, dataset: Dataset) -> None: + def optimize(self, dataset: Dataset) -> Sequence[Any]: """ Optimize all the wrapped models on their corresponding data. The data for each model is extracted by splitting the observations in ``dataset`` along the event axis according to the @@ -437,9 +451,12 @@ def optimize(self, dataset: Dataset) -> None: :param dataset: The query points and observations for *all* the wrapped models. """ observations = tf.split(dataset.observations, self._event_sizes, axis=-1) + results = [] for model, obs in zip(self._models, observations): - model.optimize(Dataset(dataset.query_points, obs)) + results.append(model.optimize(Dataset(dataset.query_points, obs))) + + return results class HasReparamSamplerModelStack(ModelStack[HasReparamSampler], HasReparamSampler): diff --git a/trieste/models/keras/models.py b/trieste/models/keras/models.py index 2020653636..05b82e49b1 100644 --- a/trieste/models/keras/models.py +++ b/trieste/models/keras/models.py @@ -18,6 +18,7 @@ from typing import Any, Dict, Optional import dill +import keras.callbacks import tensorflow as tf import tensorflow_probability as tfp import tensorflow_probability.python.distributions as tfd @@ -349,7 +350,7 @@ def update(self, dataset: Dataset) -> None: """ return - def optimize(self, dataset: Dataset) -> None: + def optimize(self, dataset: Dataset) -> keras.callbacks.History: """ Optimize the underlying Keras ensemble model with the specified ``dataset``. @@ -393,6 +394,8 @@ def optimize(self, dataset: Dataset) -> None: ): self.optimizer.optimizer.lr.assign(self.original_lr) + return history + def log(self, dataset: Optional[Dataset] = None) -> None: """ Log model training information at a given optimization step to the Tensorboard.