diff --git a/.flake8 b/.flake8 index efbf644..fb314fa 100644 --- a/.flake8 +++ b/.flake8 @@ -1,9 +1,11 @@ [flake8] + # VScode adds blank whitespaces extend-ignore = W293 +# More manageable for desktop editing max-line-length = 140 -per-file-ignores = +per-file-ignores = # Imported but unused / Unable to detect names __init__.py: F401, F403 obsidian/dash/*: F401, F403 @@ -16,6 +18,4 @@ exclude = demo/ logs/ dev/ - source/ - docs/ - build/ \ No newline at end of file + docs/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index d26f43a..73b019e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,16 @@ ### Added - Campaign.Explainer now added to PyTests - Docstrings and typing to Explainer methods +- Campaign.out property to dynamically capture measured responses "y" or objectives as appropriate +- Campaign.evaluate method to map optimizer.evaluate method ### Modified - Fixed SHAP explainer analysis and visualization functions - Changed SHAP visualization colors to use obsidian branding - Moved sensitivity method from campaign.analysis to campaign.explainer - Moved Explainer testing from optimizer pytests to campaign pytests +- Generalized plotting function MOO_results and renamed optim_progress +- Campaign analysis and plotting methods fixed for ### Removed - Removed code chunks regarding unused optional inputs to PDP ICE function imported from SHAP GitHub diff --git a/docs/_static/obsidian_logo_dark.svg b/docs/_static/obsidian_logo_dark.svg new file mode 100644 index 0000000..cd365bc --- /dev/null +++ b/docs/_static/obsidian_logo_dark.svg @@ -0,0 +1,316 @@ + + + + + + + + 2024-08-13T00:44:17.498278 + image/svg+xml + + + Matplotlib v3.9.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/conf.py b/docs/conf.py index 35114ae..af61474 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -86,7 +86,7 @@ "alt_text": "obsidian - Home", "text": "obsidian APO", "image_light": "_static/obsidian_logo.svg", - "image_dark": "_static/obsidian_logo.svg", + "image_dark": "_static/obsidian_logo_dark.svg", }, "icon_links": [ { diff --git a/obsidian/campaign/analysis.py b/obsidian/campaign/analysis.py index 07c663e..48e8ff2 100644 --- a/obsidian/campaign/analysis.py +++ b/obsidian/campaign/analysis.py @@ -1,112 +1,24 @@ "Analysis utility functions for examining metrics over the context of an optimization campaign" from obsidian.parameters import Param_Continuous - +from obsidian.optimizer import Optimizer import numpy as np import pandas as pd -import matplotlib.pyplot as plt - - -def plot_ofat_ranges(optimizer, ofat_ranges): - """ - Plots each parameter's 1D OFAT acceptable range - - Args: - optimizer (BayesianOptimizer): The optimizer object which contains a surrogate that has been fit to data - and can be used to make predictions. - ofat_ranges (pd.DataFrame): A DataFrame containing the acceptable range values for each parameter. - - Returns: - fig (matplotlib.figure.Figure): The parameter OFAT acceptable-range plot - """ - - fig = plt.figure(figsize=(8, 4)) - colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] - for i, (index, row) in enumerate(ofat_ranges.iloc[0:10, :].iterrows()): - color = colors[i] - - plt.plot([index, index], [row['Min_LB'], row['Max_LB']], - linewidth=6, linestyle='solid', color=color, label='High Confidence' if i == 0 else None) - if row['Min_LB'] > row['Min_Mu']: - plt.annotate( - f'{(row["Min_LB"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}', - xy=(i, row['Min_LB']), xytext=(i + 0.25, row['Min_LB']), - fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1)) - if row['Max_LB'] < row['Max_Mu']: - plt.annotate( - f'{(row["Max_LB"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}', - xy=(i, row['Max_LB']), xytext=(i + 0.25, row['Max_LB']), - fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1)) - - plt.plot([index, index], [row['Min_Mu'], row['Max_Mu']], linewidth=3, - linestyle='solid', color=color, label='Average' if i == 0 else None) - plt.annotate( - f'{(row["Min_Mu"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}', - xy=(i, row['Min_Mu']), xytext=(i + 0.25, row['Min_Mu']), - fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1)) - plt.annotate( - f'{(row["Max_Mu"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}', - xy=(i, row['Max_Mu']), xytext=(i + 0.25, row['Max_Mu']), - fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1)) - - if row['Min_UB'] < row['Min_Mu']: - plt.plot([index, index], [row['Min_UB'], row['Min_Mu']], linewidth=1, linestyle=':', color=color) - if row['Max_UB'] > row['Max_Mu']: - plt.plot([index, index], [row['Max_UB'], row['Max_Mu']], linewidth=1, linestyle=':', color=color) - plt.plot([0], [0], linewidth=1, linestyle=':', color=color, label='Low Confidence' if i == 0 else None) - - plt.xticks(rotation=90) - plt.ylabel('Parameter Value (Scaled)') - plt.ylim([-0.15, 1.15]) - plt.xlim([-1, i+1]) - plt.title(f'Univeriate Range (OFAT) Estimates from APO Model \n Ranges exceeding {row["Threshold"]} {row["Response"]}', - fontsize=10) - plt.legend(bbox_to_anchor=(1.1, 1.05)) - - return fig - - -def plot_interactions(optimizer, cor, clamp=False): - """ - Plots the parameter interaction matrix - Args: - optimizer (BayesianOptimizer): The optimizer object which contains a surrogate that has been fit to data - and can be used to make predictions. - cor (np.ndarray): The correlation matrix representing the parameter interactions. - clamp (bool, optional): Whether to clamp the colorbar range to (0, 1). Defaults to ``False``. - Returns: - matplotlib.figure.Figure: The parameter interaction plot - """ - fig = plt.figure(figsize=(5, 5)) - ax = fig.gca() - cax = ax.matshow(cor) - if clamp: - cax.set_clim(0, 1) - axis = np.arange(len(optimizer.X_space.X_names)) - names = optimizer.X_space.X_names - ax.set_xticks(axis) - ax.set_xticklabels(names, rotation=90) - ax.set_yticks(axis) - ax.set_yticklabels(names, rotation=0) - cbar = fig.colorbar(cax) - ax.set_title('Parameter Interactions') - cbar.ax.set_ylabel('Range Shrinkage') - for (i, j), z in np.ndenumerate(cor): - if z > 0.05: - ax.text(j, i, '{:0.2f}'.format(z), ha='center', va='center', fontsize=8) - return fig - - -def calc_ofat_ranges(optimizer, threshold, X_ref, PI_range=0.7, - steps=100, response_id=0, calc_interacts=True): +def calc_ofat_ranges(optimizer: Optimizer, + threshold: float, + X_ref: pd.DataFrame | pd.Series | None = None, + PI_range: float = 0.95, + steps: int = 100, + response_id: int = 0, + calc_interacts: bool = True): """ Calculates an OFAT design space using confidence bounds around the optimizer prediction. Also includes a matrix of interaction scores. Args: - optimizer (BayesianOptimizer): The optimizer object which contains a surrogate that has been fit to data + optimizer (Optimizer): The optimizer object which contains a surrogate that has been fit to data and can be used to make predictions. X_ref (pd.DataFrame): The reference data point from which the OFAT variations are calculated. threshold (float): The response value threshold (minimum value) which would be considered passing for OFAT variations. @@ -125,19 +37,22 @@ def calc_ofat_ranges(optimizer, threshold, X_ref, PI_range=0.7, in comparison to the corresponding two independent 1-factor variations. As such, diagonal elements are 0. """ - threshold = 0.4 ofat_ranges = [] response_name = optimizer.target[response_id].name + if X_ref is None: + X_ref = optimizer.X_space.mean() + if isinstance(X_ref, pd.Series): + X_ref = X_ref.to_frame().T + + # Calculate 1D OFAT ranges for p in optimizer.X_space: if isinstance(p, Param_Continuous): - X_min = p.min - X_max = p.max - X_range = p.range - X_span = np.linspace(X_min, X_max, steps) - + + X_span = np.linspace(0, 1, steps) X_sim = pd.DataFrame(np.repeat(X_ref.values, repeats=steps, axis=0), columns=X_ref.columns) - X_sim[p.name] = X_span + X_sim[p.name] = p.unit_demap(X_span) + df_pred = optimizer.predict(X_sim, PI_range=PI_range) lb = df_pred[response_name + ' lb'] ub = df_pred[response_name + ' ub'] @@ -147,47 +62,59 @@ def calc_ofat_ranges(optimizer, threshold, X_ref, PI_range=0.7, labels = ['Mu', 'LB', 'UB'] for label, y in zip(labels, [pred_mu, lb, ub]): - pass_ids = np.where(pred_mu > threshold) + pass_ids = np.where(y > threshold) pass_vals = X_sim[p.name].iloc[pass_ids] - row['Min_'+label] = (pass_vals.min()-X_min)/X_range - row['Max_'+label] = (pass_vals.max()-X_min)/X_range + row['Min_'+label] = p.encode(pass_vals.min()) + row['Max_'+label] = p.encode(pass_vals.max()) ofat_ranges.append(row) ofat_ranges = pd.DataFrame(ofat_ranges).set_index('Name') + # Calculate the correlation matrix as 2-FI range / diagional of 1-FI box if calc_interacts: cor = [] - for i, pi in enumerate(optimizer.X_space.X_names): + # Calculate with a nested loop of parameters + for pi in optimizer.X_space: cor_j = [] - Xi_pass_min = optimizer.X_space.X_min[pi] + optimizer.X_space.X_range[pi]*ofat_ranges['Min_Mu'][pi] - Xi_pass_max = optimizer.X_space.X_min[pi] + optimizer.X_space.X_range[pi]*ofat_ranges['Max_Mu'][pi] - Xi_pass_span = np.linspace(Xi_pass_min, Xi_pass_max, steps) - - for j, pj in enumerate(optimizer.X_space.X_names): - Xj_pass_min = optimizer.X_space.X_min[pj] + optimizer.X_space.X_range[pj]*ofat_ranges['Min_Mu'][pj] - Xj_pass_max = optimizer.X_space.X_min[pj] + optimizer.X_space.X_range[pj]*ofat_ranges['Max_Mu'][pj] - Xj_pass_span = np.linspace(Xj_pass_min, Xj_pass_max, steps) + if np.isnan(ofat_ranges['Min_Mu'][pi.name]): + cor.append([np.nan]*len(optimizer.X_space)) + continue + + # Enumerate a grid over the passing range at the MEAN + Xi_pass_span = pi.unit_demap(np.linspace(ofat_ranges['Min_Mu'][pi.name], + ofat_ranges['Max_Mu'][pi.name], steps)) - X_sim_cor = pd.DataFrame(np.repeat(X_ref.values, repeats=steps, axis=0), columns=X_ref.columns) + for pj in optimizer.X_space: - X_sim_cor[pj] = Xj_pass_span - if not pi == pj: - X_sim_cor[pi] = Xi_pass_span + if np.isnan(ofat_ranges['Min_Mu'][pj.name]): + cor_j.append([np.nan]*len(optimizer.X_space)) + continue + + Xj_pass_span = pi.unit_demap(np.linspace(ofat_ranges['Min_Mu'][pj.name], + ofat_ranges['Max_Mu'][pj.name], steps)) + + # Set up a simulation dataframe where these parameters will co-vary + X_sim_cor = pd.DataFrame(np.repeat(X_ref.values, repeats=steps, axis=0), columns=X_ref.columns) + X_sim_cor[pj.name] = Xj_pass_span + X_sim_cor[pi.name] = Xi_pass_span - pred_mu_cor_all, _ = optimizer.predict(X_sim_cor) + # Predict the responses, and extract the target one + pred_mu_cor_all = optimizer.predict(X_sim_cor) pred_mu_cor = pred_mu_cor_all.iloc[:, response_id] cor_passing = np.where(pred_mu_cor > threshold)[0] + # Want to calculate the number of steps along the diagonal which pass + # A value of 0 for cor_j means that the two parameters are independent if len(cor_passing) > 0: start = cor_passing[0] stop = cor_passing[-1] - cor_ij = 1-(stop-start)/(steps-1) - cor_j.append(cor_ij) + pass_ij = (stop-start)/(steps-1) else: - cor_j.append(0) + pass_ij = 0 + cor_j.append(1 - pass_ij) cor.append(cor_j) cor = np.array(cor) diff --git a/obsidian/campaign/campaign.py b/obsidian/campaign/campaign.py index da7a9a3..2981605 100644 --- a/obsidian/campaign/campaign.py +++ b/obsidian/campaign/campaign.py @@ -28,8 +28,10 @@ class Campaign(): Properties: m_exp (int): The number of observations in campaign.data y (pd.Series): The response data in campaign.data + y_names (list): The names of the response data columns f (pd.Series): The transformed response data o (pd.Series): The objective function evaluated on f + o_names (list): The names of the objective function columns X (pd.DataFrame): The input features of campaign.data response_max (float | pd.Series): The maximum for each response target (Target | list[Target]): The target(s) for optimization. @@ -126,6 +128,7 @@ def _eval_objective(self): df_o = self.o for col in df_o.columns: self.data[col] = df_o[col].values + self.o_names = [col for col in self.data.columns if 'Objective' in col] def set_objective(self, objective: Objective | None): """(Re)sets the campaign objective function""" @@ -138,6 +141,10 @@ def set_objective(self, objective: Objective | None): if self.optimizer.is_fit: self._analyze() + def clear_objective(self): + """Clears the campaign objective function""" + self._objective = None + @property def target(self): """Campaign experimental target(s)""" @@ -206,6 +213,9 @@ def f(self) -> pd.Series | pd.DataFrame: @property def o(self) -> pd.Series | pd.DataFrame: + """ + Objective function evaluated on f + """ if self.objective: try: x = self.X_space.encode(self.X).values @@ -219,7 +229,17 @@ def o(self) -> pd.Series | pd.DataFrame: raise IncompatibleObjectiveError('Objective(s) did not successfully execute on sample') else: return None - + + @property + def out(self) -> pd.Series | pd.DataFrame: + """ + Returns the objective function as appropriate, else the response data + """ + if self.objective and self.optimizer.is_fit: + return self.o + else: + return self.y + @property def X(self) -> pd.DataFrame: """ @@ -287,8 +307,7 @@ def __repr__(self): """String representation of object""" return f"obsidian Campaign for {getattr(self,'y_names', None)}; {getattr(self,'m_exp', 0)} observations" - def initialize(self, - design_kwargs={}): + def initialize(self, **design_kwargs): """ Maps ExpDesigner.initialize method """ @@ -307,8 +326,7 @@ def fit(self): self.optimizer.fit(self.data, target=self.target) - def suggest(self, - optim_kwargs={}): + def suggest(self, **optim_kwargs): """ Maps Optimizer.suggest method """ @@ -324,6 +342,12 @@ def suggest(self, X0 = self.initialize() return X0 + def evaluate(self, X_suggest: pd.DataFrame): + """ + Maps Optimizer.evaluate method + """ + return self.optimizer.evaluate(X_suggest, objective=self.objective) + def _profile_hv(self): """ Calculate and assign the hypervolume values to each iteration in the data. @@ -334,19 +358,14 @@ def _profile_hv(self): iters = self.data['Iteration'].unique() hv = {} - if self.objective: - out = self.o - else: - out = self.y - for i in iters: iter_index = self.data.query(f'Iteration <= {i}').index - out_iter = out.loc[iter_index, :] + out_iter = self.out.loc[iter_index, :] out_iter = torch.tensor(out_iter.values).to(self.optimizer.device) hv[i] = self.optimizer.hypervolume(out_iter) self.data['Hypervolume (iter)'] = self.data.apply(lambda x: hv[x['Iteration']], axis=1) - self.data['Pareto Front'] = self.optimizer.pareto(torch.tensor(out.values).to(self.optimizer.device)) + self.data['Pareto Front'] = self.optimizer.pareto(torch.tensor(self.out.values).to(self.optimizer.device)) return @@ -363,12 +382,7 @@ def _profile_max(self): columns=[col for col in self.data.columns if '(max) (iter)' in col] ) - if self.objective: - out_names = [col for col in self.data.columns if 'Objective' in col] - else: - out_names = self.y_names - - for out in out_names: + for out in self.out.columns: self.data[out+' (max) (iter)'] = self.data.apply( lambda x: self.data.query(f'Iteration<={x["Iteration"]}')[out].max(), axis=1 ) diff --git a/obsidian/plotting/mpl.py b/obsidian/plotting/mpl.py index cd4f9b6..71b4767 100644 --- a/obsidian/plotting/mpl.py +++ b/obsidian/plotting/mpl.py @@ -1,8 +1,14 @@ +"""Matplotlib figure-generating functions""" + +from obsidian.campaign import Campaign +from obsidian.optimizer import Optimizer + import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from matplotlib.pyplot import Figure -from obsidian.campaign import Campaign + import numpy as np +import pandas as pd def visualize_inputs(campaign: Campaign) -> Figure: @@ -41,3 +47,126 @@ def visualize_inputs(campaign: Campaign) -> Figure: plt.title('Correlation Plot') return fig + + +def plot_ofat_ranges(optimizer: Optimizer, + ofat_ranges: pd.DataFrame) -> Figure: + """ + Plots each parameter's 1D OFAT acceptable range + + Args: + optimizer (Optimizer): The optimizer object which contains a surrogate + that has been fit to data and can be used to make predictions. + ofat_ranges (pd.DataFrame): A DataFrame containing the acceptable range + values for each parameter, at the low bound, average, and high bound. + + Returns: + Figure: The parameter OFAT acceptable-range plot + """ + + fig = plt.figure(figsize=(2*len(ofat_ranges), 4)) + colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] + + # Iterate over the parameteres + for i, (p_name, row) in enumerate(ofat_ranges.iterrows()): + color = colors[i] + + # Plot as a bar chart; x-axis is the parameter name, y-axis is the scaled value + plt.plot([p_name, p_name], [row['Min_LB'], row['Max_LB']], + linewidth=6, linestyle='solid', color=color, label='High Confidence' if i == 0 else None) + + # If the edges of LB are too close to mean, only annotate LB (higher conf) + if row['Min_LB'] > row['Min_Mu']: + plt.annotate( + f'{(optimizer.X_space[i].unit_demap(row["Min_LB"])):.2f}', + xy=(i, row['Min_LB']), xytext=(i + 0.25, row['Min_LB']), + fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1)) + if row['Max_LB'] < row['Max_Mu']: + plt.annotate( + f'{(optimizer.X_space[i].unit_demap(row["Max_LB"])):.2f}', + xy=(i, row['Max_LB']), xytext=(i + 0.25, row['Max_LB']), + fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1)) + + plt.plot([p_name, p_name], [row['Min_Mu'], row['Max_Mu']], linewidth=3, + linestyle='solid', color=color, label='Average' if i == 0 else None) + + # If the edges of the mean are too close to the UB, only annotate mean (higher conf) + plt.annotate( + f'{(optimizer.X_space[i].unit_demap(row["Min_Mu"])):.2f}', + xy=(i, row['Min_Mu']), xytext=(i + 0.25, row['Min_Mu']), + fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1)) + plt.annotate( + f'{(optimizer.X_space[i].unit_demap(row["Max_Mu"])):.2f}', + xy=(i, row['Max_Mu']), xytext=(i + 0.25, row['Max_Mu']), + fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1)) + + # Only plot UB if it isn't already encompassed by higher-confidence ranges + if row['Min_UB'] < row['Min_Mu']: + plt.plot([p_name, p_name], [row['Min_UB'], row['Min_Mu']], linewidth=1, linestyle=':', color=color) + if row['Max_UB'] > row['Max_Mu']: + plt.plot([p_name, p_name], [row['Max_UB'], row['Max_Mu']], linewidth=1, linestyle=':', color=color) + plt.plot([0], [0], linewidth=1, linestyle=':', color=color, label='Low Confidence' if i == 0 else None) + + # Never annotate UB (low confidence) + + alpha = ofat_ranges['PI Range'].mode().iloc[0] + LCL = (1 - alpha) / 2 + UCL = 1 - LCL + + plt.xticks(rotation=90) + plt.ylabel('Parameter Value (Scaled)') + plt.ylim([-0.15, 1.15]) + plt.xlim([-1, len(ofat_ranges)]) + plt.title('Univariate Range (OFAT) Estimates from APO Model \n' + + f'Ranges Exceeding {row["Response"]} > {row["Threshold"]} \n' + + f'Confidence Range: {LCL*100:.1f} - {UCL*100:.1f}%', + fontsize=10) + plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') + plt.close(fig) + + return fig + + +def plot_interactions(optimizer: Optimizer, + cor: np.ndarray, + clamp: bool = False): + """ + Plots the parameter interaction matrix + + Args: + optimizer (ptimizer): The optimizer object which contains a surrogate + that has been fit to data and can be used to make predictions. + cor (np.ndarray): The correlation matrix representing the parameter interactions. + clamp (bool, optional): Whether to clamp the colorbar range to (0, 1). + Defaults to ``False``. + + Returns: + Figure: The parameter interaction plot + """ + + fig = plt.figure(figsize=(4, 4)) + ax = fig.gca() + + # Use matrix imshow to plot correlation matrix + cax = ax.matshow(cor) + if clamp: + cax.set_clim(0, 1) + + # Set axis labels and ticks + axis = np.arange(len(optimizer.X_space.X_names)) + names = optimizer.X_space.X_names + ax.set_xticks(axis) + ax.set_xticklabels(names, rotation=90) + ax.set_yticks(axis) + ax.set_yticklabels(names, rotation=0) + cbar = fig.colorbar(cax) + ax.set_title('Parameter Interactions') + cbar.ax.set_ylabel('Range Shrinkage') + + # Add text annotations if correlation is greater than 0.05 + for (i, j), z in np.ndenumerate(cor): + if z > 0.05: + ax.text(j, i, '{:0.2f}'.format(z), ha='center', va='center', fontsize=8) + plt.close(fig) + + return fig diff --git a/obsidian/plotting/plotly.py b/obsidian/plotting/plotly.py index 55b1a20..8a34307 100644 --- a/obsidian/plotting/plotly.py +++ b/obsidian/plotting/plotly.py @@ -321,50 +321,61 @@ def surface_plot(optimizer: Optimizer, return fig -def MOO_results(campaign: Campaign, - response_ids: list[int] = [0, 1], - color_feature_id: int | None = None, - y_suggest: pd.DataFrame | None = None) -> Figure: +def optim_progress(campaign: Campaign, + response_ids: int | tuple[int] | None = None, + color_feature_id: int | None | str = 'Iteration', + X_suggest: pd.DataFrame | None = None) -> Figure: """ - Generates a plotly figure to visualize multi-objective optimization (MOO) results. + Generates a plotly figure to visualize optimization progress Args: campaign (Campaign): The campaign object containing the data. response_ids (list[int], optional): The indices of the responses to plot. Defaults to ``[0, 1]``. color_feature_id (int | None, optional): The index of the feature to use for coloring the markers. - Defaults to ``None``. - y_suggest (pd.DataFrame | None, optional): The suggested data for the responses. + Defaults to ``None``, which will color by iteration. + X_suggest (pd.DataFrame | None, optional): The suggested next experiments to evaluate. Defaults to ``None``. Returns: Figure: The plotly figure. - Raises: - ValueError: If the campaign has less than two responses. - ValueError: If the response ID is out of range. - ValueError: If the color feature ID is out of range. - ValueError: If the suggested data does not contain all responses. """ fig = go.Figure() - if not campaign._is_moo: - raise ValueError('Campaign must have at least two responses for MOO results') + if response_ids is None: + if campaign._is_mo: + response_ids = (0, 1) + else: + response_ids = (0) + if isinstance(response_ids, int): + response_ids = (response_ids,) + + # Extract input and output names + out_names = [] + for id in response_ids: + out_names.append(campaign.out.columns[id]) + X_names = list(campaign.X.columns) + for id in response_ids: if id >= campaign.n_response: raise ValueError(f'Response ID {id} is out of range') - if color_feature_id is not None: + if isinstance(color_feature_id, int): if color_feature_id >= len(campaign.X_space): raise ValueError(f'Color feature ID {color_feature_id} is out of range') - - response_0 = campaign.y_names[response_ids[0]] - response_1 = campaign.y_names[response_ids[1]] - X_names = list(campaign.X_space.X_names) - - yexp_0 = campaign.data[response_0] - yexp_1 = campaign.data[response_1] + x_color_name = X_names[color_feature_id] + if isinstance(color_feature_id, str): + if color_feature_id not in campaign.data.columns: + raise ValueError(f'Color feature {color_feature_id} is not in the data') + x_color_name = color_feature_id + + # Unpack experimental data to plot progress + out_exp = campaign.out[out_names] + if not campaign._is_mo: + # In this case, we only have 1 response to plot, so use the index on x-axis + out_exp = out_exp.reset_index(drop=False).rename(columns={'index': 'Experiment'}) + out_names.insert(0, 'Experiment') if color_feature_id is not None: - x_color_name = campaign.X_space.X_names[color_feature_id] x_color = campaign.data[x_color_name] marker_dict = dict(color=x_color, colorscale=[[0, obsidian_colors.rich_blue], @@ -372,13 +383,12 @@ def MOO_results(campaign: Campaign, [1, obsidian_colors.lemon]], showscale=True, colorbar=dict(title=x_color_name)) - else: x_color = None marker_dict = dict(color=obsidian_colors.primary.teal) fig.add_trace(go.Scatter( - x=yexp_0, y=yexp_1, + x=out_exp.iloc[:, 0], y=out_exp.iloc[:, 1], mode='markers', marker=marker_dict, customdata=campaign.data[X_names], @@ -386,41 +396,68 @@ def MOO_results(campaign: Campaign, template = [""+str(name)+": "+" %{customdata["+str(i)+"]:.3G}
" for i, name in enumerate(X_names)] - fig.update_traces(hovertemplate=''.join(template)) + fig.update_traces(hovertemplate=''.join(template) + out_names[0] + + ": %{x:.3G}
" + out_names[1] + ": %{y:.3G}
") - if y_suggest is not None: - if not all(y+' (pred)' in y_suggest.columns for y in campaign.y_names): + if X_suggest is not None: + if not all(x in X_suggest.columns for x in campaign.X.columns): raise ValueError('Suggested data must contain all responses') - y_0 = y_suggest[response_0 + ' (pred)'] - y_1 = y_suggest[response_1 + ' (pred)'] - lb_0 = y_suggest[response_0 + ' lb'] - ub_0 = y_suggest[response_0 + ' ub'] - lb_1 = y_suggest[response_1 + ' lb'] - ub_1 = y_suggest[response_1 + ' ub'] + eval_suggest = campaign.evaluate(X_suggest) + + if campaign.objective is None: + y_mu = [] + lb = [] + ub = [] + for response in out_names: + y_mu.append(eval_suggest[response + ' (pred)']) + lb.append(eval_suggest[response + ' lb']) + ub.append(eval_suggest[response + ' ub']) + error_y_plus = ub[1] - y_mu[1] + error_y_minus = y_mu[1] - lb[1] + error_x_plus = ub[0] - y_mu[0] + error_x_minus = y_mu[0] - lb[0] + + y_mu = pd.concat(y_mu, axis=1) + + hovertext = out_names[0] + ' :%{x:.3G} +%{error_x.array:.2G}/- \ + %{error_x.arrayminus:.2G}
' + out_names[1] + ': %{y:.3G} \ + +%{error_y.array:.2G}/-%{error_y.arrayminus:.2G}' + + else: + if campaign._is_mo: + y_mu = eval_suggest[out_names] + else: + y_mu = eval_suggest[out_names[-1]] + m_data = len(out_exp) + m_suggest = len(X_suggest) + y_mu = pd.concat([pd.DataFrame(np.arange(m_data, m_data+m_suggest), columns=['Experiment']), + y_mu], axis=1) + error_y_plus = error_y_minus = error_x_minus = error_x_plus = None + + hovertext = out_names[0] + ' :%{x:.3G}' + '
' \ + + out_names[1] + ' :%{y:.3G}' fig.add_trace(go.Scatter( - x=y_0, - y=y_1, + x=y_mu.iloc[:, 0], + y=y_mu.iloc[:, 1], mode='markers', marker=dict(color=obsidian_colors.accent.pastel_blue, symbol='diamond-open', size=7, line=dict(width=2)), name='Suggested', - error_y={'array': ub_1 - y_1, - 'arrayminus': y_1 - lb_1, + error_y={'array': error_y_plus, + 'arrayminus': error_y_minus, 'color': 'gray', 'thickness': 1}, - error_x={'array': ub_0 - y_0, - 'arrayminus': y_0 - lb_0, + error_x={'array': error_x_plus, + 'arrayminus': error_x_minus, 'color': 'gray', 'thickness': 1}, - hovertemplate=response_0 + ' :%{x:.3G} +%{error_x.array:.2G}/- \ - %{error_x.arrayminus:.2G}
' + response_1 + ': %{y:.3G} \ - +%{error_y.array:.2G}/-%{error_y.arrayminus:.2G}')) + hovertemplate=hovertext)) fig.update_layout( - xaxis_title=response_0, - yaxis_title=response_1, + xaxis_title=out_names[0], + yaxis_title=out_names[1], title='Optimization Results' ) @@ -433,6 +470,6 @@ def MOO_results(campaign: Campaign, x=0.95 )) - fig.update_layout(coloraxis_colorbar_title_text='your title') fig.update_layout(width=500, height=400, template='ggplot2') + return fig diff --git a/obsidian/tests/test_campaign.py b/obsidian/tests/test_campaign.py index 37efa79..8722cb2 100644 --- a/obsidian/tests/test_campaign.py +++ b/obsidian/tests/test_campaign.py @@ -3,9 +3,11 @@ from obsidian.parameters import Target from obsidian.experiment import Simulator from obsidian.experiment.benchmark import two_leaves, shifted_parab -from obsidian.campaign import Campaign, Explainer +from obsidian.campaign import Campaign, Explainer, calc_ofat_ranges from obsidian.objectives import Identity_Objective, Scalar_WeightedNorm, Feature_Objective, \ Objective_Sequence, Utopian_Distance, Index_Objective, Bounded_Target +from obsidian.plotting import plot_interactions, plot_ofat_ranges + from obsidian.tests.utils import DEFAULT_MOO_PATH import json @@ -71,6 +73,7 @@ def test_campaign_objectives(obj): campaign2 = Campaign.load_state(obj_dict) campaign2.save_state() campaign2.__repr__() + campaign2.clear_objective() def test_explain(): @@ -92,5 +95,17 @@ def test_explain(): df_sens = exp.sensitivity(X_ref=X_ref) +X_ref_test = [None, + campaign.X.iloc[campaign.y.idxmax()['Response 1'], :]] + + +@pytest.mark.parametrize('X_ref', X_ref_test) +def test_analysis(X_ref): + ofat_ranges, cor = calc_ofat_ranges(campaign.optimizer, threshold=0.5, X_ref=X_ref) + + plot_interactions(campaign.optimizer, cor) + plot_ofat_ranges(campaign.optimizer, ofat_ranges) + + if __name__ == '__main__': pytest.main([__file__, '-m', 'not slow']) diff --git a/obsidian/tests/test_plotting.py b/obsidian/tests/test_plotting.py index e53c716..7c60728 100644 --- a/obsidian/tests/test_plotting.py +++ b/obsidian/tests/test_plotting.py @@ -1,5 +1,13 @@ from obsidian import Campaign -from obsidian.plotting import parity_plot, factor_plot, surface_plot, visualize_inputs +from obsidian.plotting import ( + parity_plot, + factor_plot, + surface_plot, + visualize_inputs, + optim_progress +) + +from obsidian.objectives import Scalar_WeightedSum import pytest from obsidian.tests.utils import DEFAULT_MOO_PATH @@ -54,5 +62,15 @@ def test_surface_plot_options(): fig = surface_plot(optimizer, f_transform=True) +@pytest.mark.fast +def test_optim_progress_plot(): + campaign.clear_objective() + X_suggest, eval_suggest = campaign.suggest(optim_samples=2, optim_restarts=1) + fig = optim_progress(campaign, X_suggest=X_suggest) + obj = Scalar_WeightedSum(weights=[1, 1]) + campaign.set_objective(obj) + fig = optim_progress(campaign, X_suggest=X_suggest) + + if __name__ == '__main__': pytest.main([__file__, '-m', 'not slow']) diff --git a/pyproject.toml b/pyproject.toml index 5b2a7da..84d3e00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,10 +30,10 @@ keywords = [ ] [project.urls] -Homepage = "https://obsidian-apo.readthedocs.io/" -Documentation = "https://obsidian-apo.readthedocs.io/en/latest/stubs/api_docs.html" +Homepage = "https://msdllcpapers.github.io/obsidian/" +Documentation = "https://msdllcpapers.github.io/obsidian/stubs/api_docs.html" Repository = "https://github.com/MSDLLCpapers/obsidian/" -Changelog = "https://obsidian-apo.readthedocs.io/en/latest/stubs/changelog.html" +Changelog = "https://msdllcpapers.github.io/obsidian/stubs/changelog.html" [tool.poetry.dependencies] python = "^3.10" @@ -62,11 +62,12 @@ sphinx = { version = "^7.3.7", optional = true} myst-parser = { version = "^3.0.1", optional = true} pydata-sphinx-theme = { version = "^0.15.4", optional = true} linkify-it-py = { version = "^2.0.3", optional = true} +tcl = { version = "^0.2", optional = true} [tool.poetry.extras] app = ["flask", "dash", "dash-daq", "dash-bootstrap-components"] -dev = ["pytest", "xlrd", "ipykernel", "jupyterlab", "flake8", "pytest-cov"] +dev = ["pytest", "xlrd", "ipykernel", "jupyterlab", "flake8", "pytest-cov", "tcl"] docs = ["sphinx", "myst-parser", "pydata-sphinx-theme", "linkify-it-py"] diff --git a/readme.md b/readme.md index dc9bd92..d0ed0a4 100644 --- a/readme.md +++ b/readme.md @@ -3,10 +3,11 @@ obsidian ReadMe --> +

obsidian logo

- +
@@ -16,7 +17,7 @@ ReadMe [![License](https://img.shields.io/badge/license-GPLv3-teal.svg)](https://github.com/MSDLLCpapers/obsidian/blob/main/LICENSE) [![Issues](https://img.shields.io/github/issues/msdllcpapers/obsidian?color=teal)](https://github.com/MSDLLCpapers/obsidian/issues) [![PyPI](https://img.shields.io/pypi/v/obsidian-apo.svg?color=teal)](https://pypi.org/project/obsidian-apo/) -[![Docs](https://img.shields.io/badge/read-docs-teal)](https://obsidian-apo.readthedocs.io/en/latest/index.html) +[![Docs](https://img.shields.io/badge/read-docs-teal)](https://msdllcpapers.github.io/obsidian/) [![Codecov](https://img.shields.io/codecov/c/github/kstone40/obsidian?color=teal)](https://codecov.io/github/kstone40/obsidian) __obsidian__ is a library for algorithmic process design and black-box optimization using AI-guided experiment design