Skip to content

Commit

Permalink
Merge pull request #63 from MSDLLCpapers/surrogate-models
Browse files Browse the repository at this point in the history
Improved DNN surrogate model
  • Loading branch information
kstone40 authored Sep 17, 2024
2 parents ca96f31 + b712c9a commit 1a44adf
Show file tree
Hide file tree
Showing 11 changed files with 230 additions and 35 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Changelog

## [Untracked Changes]
### Added
- Improved methods for fitting PyTorch surrogates, including auto-stopping by parameter value norm

### Modified
- Greatly reduced the number of samples for DNN posterior, speeding up optimization
- Stabilized the mean estimate of ensemble surrogates by avoiding resampling
- Disabled root caching for ensemble surrogates during optimization

## [0.8.5]
### Added
Expand Down
7 changes: 5 additions & 2 deletions obsidian/acquisition/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,11 @@ class qSpaceFill(MCAcquisitionFunction):
"""
def __init__(self,
model: Model,
X_baseline: Tensor,
sampler: MCSampler | None = None,
objective: MCAcquisitionObjective | None = None,
posterior_transform: PosteriorTransform | None = None,
X_pending: Tensor | None = None):
X_pending: Tensor | None = None,):

if sampler is None:
sampler = SobolQMCNormalSampler(sample_shape=torch.Size([512]))
Expand All @@ -80,6 +81,8 @@ def __init__(self,

super().__init__(model=model, sampler=sampler, objective=objective,
posterior_transform=posterior_transform, X_pending=X_pending)

self.register_buffer('X_baseline', X_baseline)

@t_batch_mode_transform()
def forward(self,
Expand All @@ -88,7 +91,7 @@ def forward(self,
Evaluate the acquisition function on the candidate set x
"""
# x dimensions: b * q * d
x_train = self.model.train_inputs[0][0] # train_inputs is a list of tuples
x_train = self.X_baseline

# For sequential mode, add pending data points to "train"
if self.X_pending is not None:
Expand Down
27 changes: 20 additions & 7 deletions obsidian/optimizer/bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .base import Optimizer

from obsidian.parameters import ParamSpace, Target, Task
from obsidian.surrogates import SurrogateBoTorch, DNN
from obsidian.surrogates import SurrogateBoTorch, EnsembleModel
from obsidian.acquisition import aq_class_dict, aq_defaults, aq_hp_defaults, valid_aqs
from obsidian.surrogates import model_class_dict
from obsidian.objectives import Index_Objective, Objective_Sequence
Expand All @@ -18,7 +18,7 @@
from botorch.sampling.index_sampler import IndexSampler
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.models.gpytorch import GPyTorchModel
from botorch.models.model import ModelList
from botorch.models.model import ModelList, Model
from botorch.utils.sampling import draw_sobol_samples
from botorch.utils.multi_objective.box_decompositions.non_dominated import NondominatedPartitioning

Expand Down Expand Up @@ -478,6 +478,7 @@ def _parse_aq_kwargs(self,
hps: dict,
m_batch: int,
target_locs: list[int],
model: Model,
X_t_pending: Tensor | None = None,
objective: MCAcquisitionObjective | None = None) -> dict:
"""
Expand Down Expand Up @@ -533,7 +534,9 @@ def _parse_aq_kwargs(self,
# Noisy aqs require X_train reference
if aq in ['NEI', 'NEHVI', 'NParEGO']:
aq_kwargs['X_baseline'] = X_baseline

if any(isinstance(m, EnsembleModel) for m in model.models):
aq_kwargs['cache_root'] = False

# Hypervolume requires reference point
if aq in ['EHVI', 'NEHVI']:

Expand Down Expand Up @@ -570,6 +573,9 @@ def _parse_aq_kwargs(self,
w = w/torch.sum(torch.abs(w))
aq_kwargs['scalarization_weights'] = w

if aq == 'SF':
aq_kwargs['X_baseline'] = X_baseline

return aq_kwargs

def suggest(self,
Expand Down Expand Up @@ -712,7 +718,7 @@ def suggest(self,
if not isinstance(model, ModelListGP):
samplers = []
for m in model.models:
if isinstance(m, DNN):
if isinstance(m, EnsembleModel):
sampler_i = IndexSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
else:
sampler_i = SobolQMCNormalSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
Expand Down Expand Up @@ -757,7 +763,9 @@ def suggest(self,
# Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
aq_kwargs = {'model': model, 'sampler': sampler, 'X_pending': X_t_pending}

aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch, target_locs, X_t_pending, objective))
aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch,
target_locs, model,
X_t_pending, objective))

# Raise errors related to certain constraints
if aq_str in ['UCB', 'Mean', 'TS', 'SF', 'SR', 'NIPV']:
Expand Down Expand Up @@ -812,7 +820,10 @@ def suggest(self,

# Hypervolume aqs fail with X_t_pending when optim_sequential=True
if aq_str in ['NEHVI', 'EHVI']:
optim_sequential = False
if optim_sequential and X_t_pending is not None:
warnings.warn('Hypervolume aqs with X_pending require joint optimization. \
Setting optim_sequential to False', UserWarning)
optim_sequential = False

# If it's random search, no need to do optimization; Otherwise, initialize the aq function and optimize
if aq_str == 'RS':
Expand Down Expand Up @@ -978,7 +989,9 @@ def evaluate(self,
# Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
aq_kwargs = {'model': model, 'sampler': None, 'X_pending': X_t_pending}

aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, X_suggest.shape[0], target_locs, X_t_pending, objective))
aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, X_suggest.shape[0],
target_locs, model,
X_t_pending, objective))

# If it's random search, no need to evaluate aq
if aq_str == 'RS':
Expand Down
7 changes: 6 additions & 1 deletion obsidian/plotting/plotly.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import plotly.graph_objects as go
from plotly.graph_objects import Figure
from plotly.subplots import make_subplots
from sklearn.manifold import MDS

import pandas as pd
import numpy as np
Expand Down Expand Up @@ -99,6 +98,12 @@ def MDS_plot(campaign: Campaign) -> Figure:
Returns:
fig (Figure): The MDS plot
"""
try:
from sklearn.manifold import MDS
except ImportError:
raise ImportError('The `sklearn` package (>1.0) is required for the MDS plot. \
Please install it using `pip install scikit-learn`')

mds = MDS(n_components=2)
X_mds = mds.fit_transform(campaign.X_space.encode(campaign.X))

Expand Down
1 change: 1 addition & 0 deletions obsidian/surrogates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .custom_GP import *
from .custom_torch import *
from .config import *
from .utils import *
27 changes: 11 additions & 16 deletions obsidian/surrogates/botorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .base import SurrogateModel
from .config import model_class_dict
from .utils import fit_pytorch

from obsidian.utils import tensordict_to_dict, dict_to_tensordict
from obsidian.exceptions import SurrogateFitError
Expand All @@ -10,11 +11,11 @@
from botorch.fit import fit_gpytorch_mll
from botorch.optim.fit import fit_gpytorch_mll_torch, fit_gpytorch_mll_scipy
from botorch.models.gpytorch import GPyTorchModel
from botorch.models.ensemble import EnsembleModel
from gpytorch.mlls import ExactMarginalLogLikelihood

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import warnings
Expand Down Expand Up @@ -156,20 +157,7 @@ def fit(self,
raise SurrogateFitError('BoTorch model failed to fit')
else:
self.loss_fcn = nn.MSELoss()
self.optimizer = optim.Adam(self.torch_model.parameters(), lr=1e-2)

self.torch_model.train()
for epoch in range(200):
self.optimizer.zero_grad()
output = self.torch_model(X_p)
loss = self.loss_fcn(output, y_p)
loss.backward()
self.optimizer.step()

if (epoch % 50 == 0 and self.verbose):
print(f'Epoch {epoch}: Loss {loss.item()}')

self.torch_model.eval()
fit_pytorch(self.torch_model, X_p, y_p, loss_fcn=self.loss_fcn, verbose=self.verbose)

self.is_fit = True

Expand Down Expand Up @@ -229,7 +217,14 @@ def predict(self,
X_p = self._prepare(X)

pred_posterior = self.torch_model.posterior(X_p)
mu = pred_posterior.mean.detach().cpu().squeeze(-1)

# We would prefer to have stability in the mean of ensemble models,
# So, we will not re-sample for prediction but use forward methods
if isinstance(self.torch_model, EnsembleModel):
mu = self.torch_model.forward(X_p).detach()
else:
mu = pred_posterior.mean.detach().cpu().squeeze(-1)

if q is not None:
if (q < 0) or (q > 1):
raise ValueError('Quantile must be between 0 and 1')
Expand Down
74 changes: 71 additions & 3 deletions obsidian/surrogates/custom_torch.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
"""Custom implementations of PyTorch surrogate models using BoTorch API"""

from botorch.models.model import Model
from .utils import fit_pytorch

from obsidian.config import TORCH_DTYPE

from botorch.models.model import FantasizeMixin
from botorch.models.ensemble import EnsembleModel, Model
from botorch.posteriors.ensemble import Posterior, EnsemblePosterior

import torch
import torch.nn as nn
from torch.nn import Module
from torch import Tensor

from typing import TypeVar
TFantasizeMixin = TypeVar("TFantasizeMixin", bound="FantasizeMixin")


class DNNPosterior(EnsemblePosterior):

Expand All @@ -17,7 +27,7 @@ def quantile(self, value: Tensor) -> Tensor:
return self.values.quantile(q=value.to(self.values), dim=-3, interpolation='linear')


class DNN(Model):
class DNN(EnsembleModel, FantasizeMixin):
def __init__(self,
train_X: Tensor,
train_Y: Tensor,
Expand All @@ -33,6 +43,13 @@ def __init__(self,
if p_dropout < 0 or p_dropout > 1:
raise ValueError("p_dropout must be in [0, 1]")

self.register_buffer('train_X', train_X)
self.register_buffer('train_Y', train_Y)
self.register_buffer('p_dropout', torch.tensor(p_dropout, dtype=TORCH_DTYPE))
self.register_buffer('h_width', torch.tensor(h_width, dtype=torch.int))
self.register_buffer('h_layers', torch.tensor(h_layers, dtype=torch.int))
self.register_buffer('num_outputs', torch.tensor(num_outputs, dtype=torch.int))

self.input_layer = nn.Sequential(
nn.Linear(train_X.shape[-1], h_width),
nn.PReLU(),
Expand All @@ -49,6 +66,7 @@ def __init__(self,

self.outer_layer = nn.Linear(h_width, num_outputs)
self._num_outputs = num_outputs
self.to(TORCH_DTYPE)

def forward(self,
x: Tensor) -> Tensor:
Expand All @@ -60,7 +78,7 @@ def forward(self,

def posterior(self,
X: Tensor,
n_sample: int = 16384,
n_sample: int = 512,
output_indices: list[int] = None,
observation_noise: bool | Tensor = False) -> Posterior:
"""Calculates the posterior distribution of the model at X"""
Expand All @@ -86,3 +104,53 @@ def posterior(self,
def num_outputs(self) -> int:
"""Number of outputs of the model"""
return self._num_outputs

def transform_inputs(self,
X: Tensor,
input_transform: Module = None) -> Tensor:
"""
Transform inputs.
Args:
X: A tensor of inputs
input_transform: A Module that performs the input transformation.
Returns:
A tensor of transformed inputs
"""
if input_transform is not None:
input_transform.to(X)
return input_transform(X)
try:
return self.input_transform(X)
except AttributeError:
return X

def condition_on_observations(self,
X: Tensor,
Y: Tensor) -> TFantasizeMixin:
"""
Condition the model to new observations, returning a fantasy model
"""

X_c = torch.concat((self.train_X, X), axis=0)
Y_c = torch.concat((self.train_Y, Y), axis=0)

# Create a new model based on the current one
fantasy = self.__class__(train_X=X_c, train_Y=Y_c,
p_dropout=float(self.p_dropout),
h_width=int(self.h_width), h_layers=int(self.h_layers),
num_outputs=int(self.num_outputs))

# Fit to the new data
fit_pytorch(fantasy, X_c, Y_c)

return fantasy

def fantasize(self,
X: Tensor) -> Model:

Y_f = self.forward(X).detach()
fantasy = self.condition_on_observations(X, Y_f)

return fantasy
Loading

0 comments on commit 1a44adf

Please sign in to comment.