Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync plotting branch #65

Merged
merged 17 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Changelog

## [Untracked Changes]
### Added
- Improved methods for fitting PyTorch surrogates, including auto-stopping by parameter value norm

### Modified
- Greatly reduced the number of samples for DNN posterior, speeding up optimization
- Stabilized the mean estimate of ensemble surrogates by avoiding resampling
- Disabled root caching for ensemble surrogates during optimization

## [0.8.5]
### Added
Expand Down
6 changes: 3 additions & 3 deletions demo/Constrained multi-output min-max.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@
"metadata": {},
"outputs": [],
"source": [
"from obsidian.constraints import InConstraint_Generic"
"from obsidian.constraints import Linear_Constraint"
]
},
{
Expand All @@ -163,7 +163,7 @@
"source": [
"X_suggest, eval_suggest = campaign.optimizer.suggest(acquisition = [{'NEHVI':{'ref_point':[-350, -20]}}, 'SF'],\n",
" # X1 + X2 <= 6, written as -X1 - X2 >= -6\n",
" ineq_constraints = [InConstraint_Generic(X_space, indices=[0,1], coeff=[-1,-1], rhs=-6)])"
" ineq_constraints = [Linear_Constraint(X_space, ind=[0,1], weights=[-1,-1], rhs=-6, equality=True)])"
]
},
{
Expand Down Expand Up @@ -210,7 +210,7 @@
"for iter in range(5):\n",
" campaign.fit()\n",
" X_suggest, eval_suggest = campaign.optimizer.suggest(acquisition = [{'NEHVI':{'ref_point':[-350, -20]}}, 'SF'],\n",
" ineq_constraints = [InConstraint_Generic(X_space, indices=[0,1], coeff=[-1,-1], rhs=-6)])\n",
" ineq_constraints = [Linear_Constraint(X_space, ind=[0,1], weights=[-1,-1], rhs=-6, equality=True)])\n",
" y_iter = pd.DataFrame(simulator.simulate(X_suggest))\n",
" Z_iter = pd.concat([X_suggest, y_iter, eval_suggest], axis=1)\n",
" campaign.add_data(Z_iter)"
Expand Down
7 changes: 5 additions & 2 deletions obsidian/acquisition/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,11 @@ class qSpaceFill(MCAcquisitionFunction):
"""
def __init__(self,
model: Model,
X_baseline: Tensor,
sampler: MCSampler | None = None,
objective: MCAcquisitionObjective | None = None,
posterior_transform: PosteriorTransform | None = None,
X_pending: Tensor | None = None):
X_pending: Tensor | None = None,):

if sampler is None:
sampler = SobolQMCNormalSampler(sample_shape=torch.Size([512]))
Expand All @@ -80,6 +81,8 @@ def __init__(self,

super().__init__(model=model, sampler=sampler, objective=objective,
posterior_transform=posterior_transform, X_pending=X_pending)

self.register_buffer('X_baseline', X_baseline)

@t_batch_mode_transform()
def forward(self,
Expand All @@ -88,7 +91,7 @@ def forward(self,
Evaluate the acquisition function on the candidate set x
"""
# x dimensions: b * q * d
x_train = self.model.train_inputs[0][0] # train_inputs is a list of tuples
x_train = self.X_baseline

# For sequential mode, add pending data points to "train"
if self.X_pending is not None:
Expand Down
27 changes: 20 additions & 7 deletions obsidian/optimizer/bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .base import Optimizer

from obsidian.parameters import ParamSpace, Target, Task
from obsidian.surrogates import SurrogateBoTorch, DNN
from obsidian.surrogates import SurrogateBoTorch, EnsembleModel
from obsidian.acquisition import aq_class_dict, aq_defaults, aq_hp_defaults, valid_aqs
from obsidian.surrogates import model_class_dict
from obsidian.objectives import Index_Objective, Objective_Sequence
Expand All @@ -18,7 +18,7 @@
from botorch.sampling.index_sampler import IndexSampler
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.models.gpytorch import GPyTorchModel
from botorch.models.model import ModelList
from botorch.models.model import ModelList, Model
from botorch.utils.sampling import draw_sobol_samples
from botorch.utils.multi_objective.box_decompositions.non_dominated import NondominatedPartitioning

Expand Down Expand Up @@ -478,6 +478,7 @@ def _parse_aq_kwargs(self,
hps: dict,
m_batch: int,
target_locs: list[int],
model: Model,
X_t_pending: Tensor | None = None,
objective: MCAcquisitionObjective | None = None) -> dict:
"""
Expand Down Expand Up @@ -533,7 +534,9 @@ def _parse_aq_kwargs(self,
# Noisy aqs require X_train reference
if aq in ['NEI', 'NEHVI', 'NParEGO']:
aq_kwargs['X_baseline'] = X_baseline

if any(isinstance(m, EnsembleModel) for m in model.models):
aq_kwargs['cache_root'] = False

# Hypervolume requires reference point
if aq in ['EHVI', 'NEHVI']:

Expand Down Expand Up @@ -570,6 +573,9 @@ def _parse_aq_kwargs(self,
w = w/torch.sum(torch.abs(w))
aq_kwargs['scalarization_weights'] = w

if aq == 'SF':
aq_kwargs['X_baseline'] = X_baseline

return aq_kwargs

def suggest(self,
Expand Down Expand Up @@ -712,7 +718,7 @@ def suggest(self,
if not isinstance(model, ModelListGP):
samplers = []
for m in model.models:
if isinstance(m, DNN):
if isinstance(m, EnsembleModel):
sampler_i = IndexSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
else:
sampler_i = SobolQMCNormalSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
Expand Down Expand Up @@ -757,7 +763,9 @@ def suggest(self,
# Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
aq_kwargs = {'model': model, 'sampler': sampler, 'X_pending': X_t_pending}

aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch, target_locs, X_t_pending, objective))
aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch,
target_locs, model,
X_t_pending, objective))

# Raise errors related to certain constraints
if aq_str in ['UCB', 'Mean', 'TS', 'SF', 'SR', 'NIPV']:
Expand Down Expand Up @@ -812,7 +820,10 @@ def suggest(self,

# Hypervolume aqs fail with X_t_pending when optim_sequential=True
if aq_str in ['NEHVI', 'EHVI']:
optim_sequential = False
if optim_sequential and X_t_pending is not None:
warnings.warn('Hypervolume aqs with X_pending require joint optimization. \
Setting optim_sequential to False', UserWarning)
optim_sequential = False

# If it's random search, no need to do optimization; Otherwise, initialize the aq function and optimize
if aq_str == 'RS':
Expand Down Expand Up @@ -978,7 +989,9 @@ def evaluate(self,
# Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
aq_kwargs = {'model': model, 'sampler': None, 'X_pending': X_t_pending}

aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, X_suggest.shape[0], target_locs, X_t_pending, objective))
aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, X_suggest.shape[0],
target_locs, model,
X_t_pending, objective))

# If it's random search, no need to evaluate aq
if aq_str == 'RS':
Expand Down
19 changes: 11 additions & 8 deletions obsidian/plotting/plotly.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import plotly.graph_objects as go
from plotly.graph_objects import Figure
from plotly.subplots import make_subplots
from sklearn.manifold import MDS

import pandas as pd
import numpy as np
Expand Down Expand Up @@ -99,6 +98,12 @@ def MDS_plot(campaign: Campaign) -> Figure:
Returns:
fig (Figure): The MDS plot
"""
try:
from sklearn.manifold import MDS
except ImportError:
raise ImportError('The `sklearn` package (>1.0) is required for the MDS plot. \
Please install it using `pip install scikit-learn`')

mds = MDS(n_components=2)
X_mds = mds.fit_transform(campaign.X_space.encode(campaign.X))

Expand Down Expand Up @@ -280,13 +285,12 @@ def factor_plot(optimizer: Optimizer,
# Create a dataframe of test samples for plotting
n_samples = 100
if X_ref is None:
df_mean = optimizer.X_best_f
X_test = pd.concat([df_mean]*n_samples, axis=0).reset_index(drop=True)
X_ref = optimizer.X_best_f
else:
if not isinstance(X_ref, pd.DataFrame):
raise TypeError('X_ref must be a DataFrame')
X_test = pd.concat([X_ref]*n_samples, axis=0).reset_index(drop=True)

X_test = pd.concat([X_ref]*n_samples, axis=0).reset_index(drop=True)
# Vary the indicated column
X_name = X_test.columns[feature_id]
param_i = optimizer.X_space.params[feature_id]
Expand Down Expand Up @@ -316,7 +320,7 @@ def factor_plot(optimizer: Optimizer,
line={'color': obsidian_colors.teal},
name='Mean'),
)
if (X_ref is not None) and plotRef:
if plotRef:
Y_pred_ref = optimizer.predict(X_ref, return_f_inv=not f_transform)
Y_mu_ref = Y_pred_ref[y_name+('_t (pred)' if f_transform else ' (pred)')].values
fig.add_trace(go.Scatter(x=X_ref.iloc[:, feature_id].values, y=Y_mu_ref,
Expand Down Expand Up @@ -382,8 +386,7 @@ def surface_plot(optimizer: Optimizer,

# Create a dataframe of test samples for plotting
n_grid = 100
df_mean = optimizer.X_best_f
X_test = pd.concat([df_mean]*(n_grid**2), axis=0).reset_index(drop=True)
X_test = pd.concat([optimizer.X_best_f]*(n_grid**2), axis=0).reset_index(drop=True)

# Create a mesh grid which is necessary for the 3D plot
X0_name = X_test.columns[feature_ids[0]]
Expand Down
1 change: 1 addition & 0 deletions obsidian/surrogates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .custom_GP import *
from .custom_torch import *
from .config import *
from .utils import *
27 changes: 11 additions & 16 deletions obsidian/surrogates/botorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .base import SurrogateModel
from .config import model_class_dict
from .utils import fit_pytorch

from obsidian.utils import tensordict_to_dict, dict_to_tensordict
from obsidian.exceptions import SurrogateFitError
Expand All @@ -10,11 +11,11 @@
from botorch.fit import fit_gpytorch_mll
from botorch.optim.fit import fit_gpytorch_mll_torch, fit_gpytorch_mll_scipy
from botorch.models.gpytorch import GPyTorchModel
from botorch.models.ensemble import EnsembleModel
from gpytorch.mlls import ExactMarginalLogLikelihood

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import warnings
Expand Down Expand Up @@ -156,20 +157,7 @@ def fit(self,
raise SurrogateFitError('BoTorch model failed to fit')
else:
self.loss_fcn = nn.MSELoss()
self.optimizer = optim.Adam(self.torch_model.parameters(), lr=1e-2)

self.torch_model.train()
for epoch in range(200):
self.optimizer.zero_grad()
output = self.torch_model(X_p)
loss = self.loss_fcn(output, y_p)
loss.backward()
self.optimizer.step()

if (epoch % 50 == 0 and self.verbose):
print(f'Epoch {epoch}: Loss {loss.item()}')

self.torch_model.eval()
fit_pytorch(self.torch_model, X_p, y_p, loss_fcn=self.loss_fcn, verbose=self.verbose)

self.is_fit = True

Expand Down Expand Up @@ -229,7 +217,14 @@ def predict(self,
X_p = self._prepare(X)

pred_posterior = self.torch_model.posterior(X_p)
mu = pred_posterior.mean.detach().cpu().squeeze(-1)

# We would prefer to have stability in the mean of ensemble models,
# So, we will not re-sample for prediction but use forward methods
if isinstance(self.torch_model, EnsembleModel):
mu = self.torch_model.forward(X_p).detach()
else:
mu = pred_posterior.mean.detach().cpu().squeeze(-1)

if q is not None:
if (q < 0) or (q > 1):
raise ValueError('Quantile must be between 0 and 1')
Expand Down
74 changes: 71 additions & 3 deletions obsidian/surrogates/custom_torch.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
"""Custom implementations of PyTorch surrogate models using BoTorch API"""

from botorch.models.model import Model
from .utils import fit_pytorch

from obsidian.config import TORCH_DTYPE

from botorch.models.model import FantasizeMixin
from botorch.models.ensemble import EnsembleModel, Model
from botorch.posteriors.ensemble import Posterior, EnsemblePosterior

import torch
import torch.nn as nn
from torch.nn import Module
from torch import Tensor

from typing import TypeVar
TFantasizeMixin = TypeVar("TFantasizeMixin", bound="FantasizeMixin")


class DNNPosterior(EnsemblePosterior):

Expand All @@ -17,7 +27,7 @@ def quantile(self, value: Tensor) -> Tensor:
return self.values.quantile(q=value.to(self.values), dim=-3, interpolation='linear')


class DNN(Model):
class DNN(EnsembleModel, FantasizeMixin):
def __init__(self,
train_X: Tensor,
train_Y: Tensor,
Expand All @@ -33,6 +43,13 @@ def __init__(self,
if p_dropout < 0 or p_dropout > 1:
raise ValueError("p_dropout must be in [0, 1]")

self.register_buffer('train_X', train_X)
self.register_buffer('train_Y', train_Y)
self.register_buffer('p_dropout', torch.tensor(p_dropout, dtype=TORCH_DTYPE))
self.register_buffer('h_width', torch.tensor(h_width, dtype=torch.int))
self.register_buffer('h_layers', torch.tensor(h_layers, dtype=torch.int))
self.register_buffer('num_outputs', torch.tensor(num_outputs, dtype=torch.int))

self.input_layer = nn.Sequential(
nn.Linear(train_X.shape[-1], h_width),
nn.PReLU(),
Expand All @@ -49,6 +66,7 @@ def __init__(self,

self.outer_layer = nn.Linear(h_width, num_outputs)
self._num_outputs = num_outputs
self.to(TORCH_DTYPE)

def forward(self,
x: Tensor) -> Tensor:
Expand All @@ -60,7 +78,7 @@ def forward(self,

def posterior(self,
X: Tensor,
n_sample: int = 16384,
n_sample: int = 512,
output_indices: list[int] = None,
observation_noise: bool | Tensor = False) -> Posterior:
"""Calculates the posterior distribution of the model at X"""
Expand All @@ -86,3 +104,53 @@ def posterior(self,
def num_outputs(self) -> int:
"""Number of outputs of the model"""
return self._num_outputs

def transform_inputs(self,
X: Tensor,
input_transform: Module = None) -> Tensor:
"""
Transform inputs.

Args:
X: A tensor of inputs
input_transform: A Module that performs the input transformation.

Returns:
A tensor of transformed inputs
"""
if input_transform is not None:
input_transform.to(X)
return input_transform(X)
try:
return self.input_transform(X)
except AttributeError:
return X

def condition_on_observations(self,
X: Tensor,
Y: Tensor) -> TFantasizeMixin:
"""
Condition the model to new observations, returning a fantasy model
"""

X_c = torch.concat((self.train_X, X), axis=0)
Y_c = torch.concat((self.train_Y, Y), axis=0)

# Create a new model based on the current one
fantasy = self.__class__(train_X=X_c, train_Y=Y_c,
p_dropout=float(self.p_dropout),
h_width=int(self.h_width), h_layers=int(self.h_layers),
num_outputs=int(self.num_outputs))

# Fit to the new data
fit_pytorch(fantasy, X_c, Y_c)

return fantasy

def fantasize(self,
X: Tensor) -> Model:

Y_f = self.forward(X).detach()
fantasy = self.condition_on_observations(X, Y_f)

return fantasy
Loading
Loading