Merge pull request #63 from MSDLLCpapers/surrogate-models

Improved DNN surrogate model
MSDLLCpapers · Sep 17, 2024 · 1a44adf · 1a44adf
2 parents ca96f31 + b712c9a
commit 1a44adf
Show file tree

Hide file tree

Showing 11 changed files with 230 additions and 35 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,13 @@
 # Changelog
 
 ## [Untracked Changes]
+### Added
+- Improved methods for fitting PyTorch surrogates, including auto-stopping by parameter value norm
+
+### Modified
+- Greatly reduced the number of samples for DNN posterior, speeding up optimization
+- Stabilized the mean estimate of ensemble surrogates by avoiding resampling
+- Disabled root caching for ensemble surrogates during optimization
 
 ## [0.8.5]
 ### Added

diff --git a/obsidian/acquisition/custom.py b/obsidian/acquisition/custom.py
@@ -64,10 +64,11 @@ class qSpaceFill(MCAcquisitionFunction):
     """
     def __init__(self,
                  model: Model,
+                 X_baseline: Tensor,
                  sampler: MCSampler | None = None,
                  objective: MCAcquisitionObjective | None = None,
                  posterior_transform: PosteriorTransform | None = None,
-                 X_pending: Tensor | None = None):
+                 X_pending: Tensor | None = None,):
 
         if sampler is None:
             sampler = SobolQMCNormalSampler(sample_shape=torch.Size([512]))
@@ -80,6 +81,8 @@ def __init__(self,
 
         super().__init__(model=model, sampler=sampler, objective=objective,
                          posterior_transform=posterior_transform, X_pending=X_pending)
+
+        self.register_buffer('X_baseline', X_baseline)
 
     @t_batch_mode_transform()
     def forward(self,
@@ -88,7 +91,7 @@ def forward(self,
         Evaluate the acquisition function on the candidate set x
         """
         # x dimensions: b * q * d
-        x_train = self.model.train_inputs[0][0]  # train_inputs is a list of tuples
+        x_train = self.X_baseline
 
         # For sequential mode, add pending data points to "train"
         if self.X_pending is not None:

diff --git a/obsidian/optimizer/bayesian.py b/obsidian/optimizer/bayesian.py
@@ -3,7 +3,7 @@
 from .base import Optimizer
 
 from obsidian.parameters import ParamSpace, Target, Task
-from obsidian.surrogates import SurrogateBoTorch, DNN
+from obsidian.surrogates import SurrogateBoTorch, EnsembleModel
 from obsidian.acquisition import aq_class_dict, aq_defaults, aq_hp_defaults, valid_aqs
 from obsidian.surrogates import model_class_dict
 from obsidian.objectives import Index_Objective, Objective_Sequence
@@ -18,7 +18,7 @@
 from botorch.sampling.index_sampler import IndexSampler
 from botorch.models.model_list_gp_regression import ModelListGP
 from botorch.models.gpytorch import GPyTorchModel
-from botorch.models.model import ModelList
+from botorch.models.model import ModelList, Model
 from botorch.utils.sampling import draw_sobol_samples
 from botorch.utils.multi_objective.box_decompositions.non_dominated import NondominatedPartitioning
 
@@ -478,6 +478,7 @@ def _parse_aq_kwargs(self,
                          hps: dict,
                          m_batch: int,
                          target_locs: list[int],
+                         model: Model,
                          X_t_pending: Tensor | None = None,
                          objective: MCAcquisitionObjective | None = None) -> dict:
         """
@@ -533,7 +534,9 @@ def _parse_aq_kwargs(self,
         # Noisy aqs require X_train reference
         if aq in ['NEI', 'NEHVI', 'NParEGO']:
             aq_kwargs['X_baseline'] = X_baseline
-
+            if any(isinstance(m, EnsembleModel) for m in model.models):
+                aq_kwargs['cache_root'] = False
+
         # Hypervolume requires reference point
         if aq in ['EHVI', 'NEHVI']:
 
@@ -570,6 +573,9 @@ def _parse_aq_kwargs(self,
                 w = w/torch.sum(torch.abs(w))
             aq_kwargs['scalarization_weights'] = w
 
+        if aq == 'SF':
+            aq_kwargs['X_baseline'] = X_baseline
+
         return aq_kwargs
 
     def suggest(self,
@@ -712,7 +718,7 @@ def suggest(self,
         if not isinstance(model, ModelListGP):
             samplers = []
             for m in model.models:
-                if isinstance(m, DNN):
+                if isinstance(m, EnsembleModel):
                     sampler_i = IndexSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
                 else:
                     sampler_i = SobolQMCNormalSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
@@ -757,7 +763,9 @@ def suggest(self,
             # Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
             aq_kwargs = {'model': model, 'sampler': sampler, 'X_pending': X_t_pending}
 
-            aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch, target_locs, X_t_pending, objective))
+            aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch,
+                                                   target_locs, model,
+                                                   X_t_pending, objective))
 
             # Raise errors related to certain constraints
             if aq_str in ['UCB', 'Mean', 'TS', 'SF', 'SR', 'NIPV']:
@@ -812,7 +820,10 @@ def suggest(self,
 
             # Hypervolume aqs fail with X_t_pending when optim_sequential=True
             if aq_str in ['NEHVI', 'EHVI']:
-                optim_sequential = False
+                if optim_sequential and X_t_pending is not None:
+                    warnings.warn('Hypervolume aqs with X_pending require joint optimization. \
+                                   Setting optim_sequential to False', UserWarning)
+                    optim_sequential = False
 
             # If it's random search, no need to do optimization; Otherwise, initialize the aq function and optimize
             if aq_str == 'RS':
@@ -978,7 +989,9 @@ def evaluate(self,
             # Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
             aq_kwargs = {'model': model, 'sampler': None, 'X_pending': X_t_pending}
 
-            aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, X_suggest.shape[0], target_locs, X_t_pending, objective))
+            aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, X_suggest.shape[0],
+                                                   target_locs, model,
+                                                   X_t_pending, objective))
 
             # If it's random search, no need to evaluate aq
             if aq_str == 'RS':

diff --git a/obsidian/plotting/plotly.py b/obsidian/plotting/plotly.py
@@ -10,7 +10,6 @@
 import plotly.graph_objects as go
 from plotly.graph_objects import Figure
 from plotly.subplots import make_subplots
-from sklearn.manifold import MDS
 
 import pandas as pd
 import numpy as np
@@ -99,6 +98,12 @@ def MDS_plot(campaign: Campaign) -> Figure:
     Returns:
         fig (Figure): The MDS plot
     """
+    try:
+        from sklearn.manifold import MDS
+    except ImportError:
+        raise ImportError('The `sklearn` package (>1.0) is required for the MDS plot. \
+                          Please install it using `pip install scikit-learn`')
+
     mds = MDS(n_components=2)
     X_mds = mds.fit_transform(campaign.X_space.encode(campaign.X))
 

diff --git a/obsidian/surrogates/__init__.py b/obsidian/surrogates/__init__.py
@@ -4,3 +4,4 @@
 from .custom_GP import *
 from .custom_torch import *
 from .config import *
+from .utils import *
diff --git a/obsidian/surrogates/botorch.py b/obsidian/surrogates/botorch.py
@@ -2,6 +2,7 @@
 
 from .base import SurrogateModel
 from .config import model_class_dict
+from .utils import fit_pytorch
 
 from obsidian.utils import tensordict_to_dict, dict_to_tensordict
 from obsidian.exceptions import SurrogateFitError
@@ -10,11 +11,11 @@
 from botorch.fit import fit_gpytorch_mll
 from botorch.optim.fit import fit_gpytorch_mll_torch, fit_gpytorch_mll_scipy
 from botorch.models.gpytorch import GPyTorchModel
+from botorch.models.ensemble import EnsembleModel
 from gpytorch.mlls import ExactMarginalLogLikelihood
 
 import torch
 import torch.nn as nn
-import torch.optim as optim
 import numpy as np
 import pandas as pd
 import warnings
@@ -156,20 +157,7 @@ def fit(self,
                     raise SurrogateFitError('BoTorch model failed to fit')
         else:
             self.loss_fcn = nn.MSELoss()
-            self.optimizer = optim.Adam(self.torch_model.parameters(), lr=1e-2)
-
-            self.torch_model.train()
-            for epoch in range(200):
-                self.optimizer.zero_grad()
-                output = self.torch_model(X_p)
-                loss = self.loss_fcn(output, y_p)
-                loss.backward()
-                self.optimizer.step()
-
-                if (epoch % 50 == 0 and self.verbose):
-                    print(f'Epoch {epoch}: Loss {loss.item()}')
-
-            self.torch_model.eval()
+            fit_pytorch(self.torch_model, X_p, y_p, loss_fcn=self.loss_fcn, verbose=self.verbose)
 
         self.is_fit = True
 
@@ -229,7 +217,14 @@ def predict(self,
         X_p = self._prepare(X)
 
         pred_posterior = self.torch_model.posterior(X_p)
-        mu = pred_posterior.mean.detach().cpu().squeeze(-1)
+
+        # We would prefer to have stability in the mean of ensemble models,
+        # So, we will not re-sample for prediction but use forward methods
+        if isinstance(self.torch_model, EnsembleModel):
+            mu = self.torch_model.forward(X_p).detach()
+        else:
+            mu = pred_posterior.mean.detach().cpu().squeeze(-1)
+
         if q is not None:
             if (q < 0) or (q > 1):
                 raise ValueError('Quantile must be between 0 and 1')

diff --git a/obsidian/surrogates/custom_torch.py b/obsidian/surrogates/custom_torch.py
@@ -1,11 +1,21 @@
 """Custom implementations of PyTorch surrogate models using BoTorch API"""
 
-from botorch.models.model import Model
+from .utils import fit_pytorch
+
+from obsidian.config import TORCH_DTYPE
+
+from botorch.models.model import FantasizeMixin
+from botorch.models.ensemble import EnsembleModel, Model
 from botorch.posteriors.ensemble import Posterior, EnsemblePosterior
 
+import torch
 import torch.nn as nn
+from torch.nn import Module
 from torch import Tensor
 
+from typing import TypeVar
+TFantasizeMixin = TypeVar("TFantasizeMixin", bound="FantasizeMixin")
+
 
 class DNNPosterior(EnsemblePosterior):
 
@@ -17,7 +27,7 @@ def quantile(self, value: Tensor) -> Tensor:
         return self.values.quantile(q=value.to(self.values), dim=-3, interpolation='linear')
 
 
-class DNN(Model):
+class DNN(EnsembleModel, FantasizeMixin):
     def __init__(self,
                  train_X: Tensor,
                  train_Y: Tensor,
@@ -33,6 +43,13 @@ def __init__(self,
         if p_dropout < 0 or p_dropout > 1:
             raise ValueError("p_dropout must be in [0, 1]")
 
+        self.register_buffer('train_X', train_X)
+        self.register_buffer('train_Y', train_Y)
+        self.register_buffer('p_dropout', torch.tensor(p_dropout, dtype=TORCH_DTYPE))
+        self.register_buffer('h_width', torch.tensor(h_width, dtype=torch.int))
+        self.register_buffer('h_layers', torch.tensor(h_layers, dtype=torch.int))
+        self.register_buffer('num_outputs', torch.tensor(num_outputs, dtype=torch.int))
+
         self.input_layer = nn.Sequential(
             nn.Linear(train_X.shape[-1], h_width),
             nn.PReLU(),
@@ -49,6 +66,7 @@ def __init__(self,
 
         self.outer_layer = nn.Linear(h_width, num_outputs)
         self._num_outputs = num_outputs
+        self.to(TORCH_DTYPE)
 
     def forward(self,
                 x: Tensor) -> Tensor:
@@ -60,7 +78,7 @@ def forward(self,
 
     def posterior(self,
                   X: Tensor,
-                  n_sample: int = 16384,
+                  n_sample: int = 512,
                   output_indices: list[int] = None,
                   observation_noise: bool | Tensor = False) -> Posterior:
         """Calculates the posterior distribution of the model at X"""
@@ -86,3 +104,53 @@ def posterior(self,
     def num_outputs(self) -> int:
         """Number of outputs of the model"""
         return self._num_outputs
+
+    def transform_inputs(self,
+                         X: Tensor,
+                         input_transform: Module = None) -> Tensor:
+        """
+        Transform inputs.
+
+        Args:
+            X: A tensor of inputs
+            input_transform: A Module that performs the input transformation.
+
+        Returns:
+            A tensor of transformed inputs
+        """
+        if input_transform is not None:
+            input_transform.to(X)
+            return input_transform(X)
+        try:
+            return self.input_transform(X)
+        except AttributeError:
+            return X
+
+    def condition_on_observations(self,
+                                  X: Tensor,
+                                  Y: Tensor) -> TFantasizeMixin:
+        """
+        Condition the model to new observations, returning a fantasy model
+        """
+
+        X_c = torch.concat((self.train_X, X), axis=0)
+        Y_c = torch.concat((self.train_Y, Y), axis=0)
+
+        # Create a new model based on the current one
+        fantasy = self.__class__(train_X=X_c, train_Y=Y_c,
+                                 p_dropout=float(self.p_dropout),
+                                 h_width=int(self.h_width), h_layers=int(self.h_layers),
+                                 num_outputs=int(self.num_outputs))
+
+        # Fit to the new data
+        fit_pytorch(fantasy, X_c, Y_c)
+
+        return fantasy
+
+    def fantasize(self,
+                  X: Tensor) -> Model:
+
+        Y_f = self.forward(X).detach()
+        fantasy = self.condition_on_observations(X, Y_f)
+
+        return fantasy