From 24fb9946ad1d01bf8b615622c051a41af403d0ad Mon Sep 17 00:00:00 2001 From: Chandan Singh Date: Wed, 24 Aug 2022 22:00:45 -0700 Subject: [PATCH] minor version bump --- docs/tree/cart_ccp.html | 6 +- docs/tree/figs.html | 24 +++- docs/tree/hierarchical_shrinkage.html | 170 +++++++++---------------- imodels/tree/hierarchical_shrinkage.py | 1 + setup.py | 2 +- 5 files changed, 83 insertions(+), 120 deletions(-) diff --git a/docs/tree/cart_ccp.html b/docs/tree/cart_ccp.html index 9777d938..ebc115fd 100644 --- a/docs/tree/cart_ccp.html +++ b/docs/tree/cart_ccp.html @@ -822,7 +822,8 @@

Methods

https://arxiv.org/abs/2202.00858

Params

estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes

+Defaults to CART Classification Tree with 20 max leaf ndoes +Note: this estimator will be directly modified

reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

shrinkage_scheme: str @@ -898,7 +899,8 @@

Methods

https://arxiv.org/abs/2202.00858

Params

estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes

+Defaults to CART Classification Tree with 20 max leaf ndoes +Note: this estimator will be directly modified

reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

shrinkage_scheme: str diff --git a/docs/tree/figs.html b/docs/tree/figs.html index a52cfa2e..75315661 100644 --- a/docs/tree/figs.html +++ b/docs/tree/figs.html @@ -215,10 +215,14 @@ if isinstance(self, ClassifierMixin): self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs + if feature_names is None: + if isinstance(X, pd.DataFrame): + self.feature_names_ = X.columns + else: + self.feature_names_ = feature_names + X, y = check_X_y(X, y) y = y.astype(float) - if feature_names is not None: - self.feature_names_ = feature_names if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) @@ -693,10 +697,14 @@

Params

if isinstance(self, ClassifierMixin): self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs + if feature_names is None: + if isinstance(X, pd.DataFrame): + self.feature_names_ = X.columns + else: + self.feature_names_ = feature_names + X, y = check_X_y(X, y) y = y.astype(float) - if feature_names is not None: - self.feature_names_ = feature_names if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) @@ -972,10 +980,14 @@

Methods

if isinstance(self, ClassifierMixin): self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs + if feature_names is None: + if isinstance(X, pd.DataFrame): + self.feature_names_ = X.columns + else: + self.feature_names_ = feature_names + X, y = check_X_y(X, y) y = y.astype(float) - if feature_names is not None: - self.feature_names_ = feature_names if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) diff --git a/docs/tree/hierarchical_shrinkage.html b/docs/tree/hierarchical_shrinkage.html index 77ef8c24..743d559b 100644 --- a/docs/tree/hierarchical_shrinkage.html +++ b/docs/tree/hierarchical_shrinkage.html @@ -31,6 +31,7 @@ from sklearn.model_selection import cross_val_score from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, export_text +from sklearn.ensemble import GradientBoostingClassifier from imodels.util import checks from imodels.util.tree import compute_tree_complexity @@ -49,6 +50,7 @@ ------ estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) Defaults to CART Classification Tree with 20 max leaf ndoes + Note: this estimator will be directly modified reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0) @@ -100,22 +102,16 @@ left = tree.children_left[i] right = tree.children_right[i] is_leaf = left == right - n_samples = tree.n_node_samples[i] - if isinstance(self, RegressorMixin): - val = tree.value[i][0, 0] - else: - if len(tree.value[i][0]) == 1: - val = tree.value[i][0, 0] - else: - val = tree.value[i][0, 1] / (tree.value[i][0, 0] + tree.value[i][0, 1]) # binary classification + n_samples = tree.weighted_n_node_samples[i] + if isinstance(self, RegressorMixin) or isinstance(self.estimator_, GradientBoostingClassifier): + val = deepcopy(tree.value[i, :, :]) + else: # If classification, normalize to probability vector + val = tree.value[i, :, :] / n_samples + # Step 1: Update cum_sum # if root if parent_val is None and parent_num is None: - if not is_leaf: - self._shrink_tree(tree, reg_param, left, - parent_val=val, parent_num=n_samples, cum_sum=val) - self._shrink_tree(tree, reg_param, right, - parent_val=val, parent_num=n_samples, cum_sum=val) + cum_sum = val # if has parent else: @@ -123,51 +119,28 @@ val_new = (val - parent_val) / (1 + reg_param / parent_num) elif self.shrinkage_scheme_ == 'constant': val_new = (val - parent_val) / (1 + reg_param) - else: - val_new = val + else: # leaf_based + val_new = 0 cum_sum += val_new - if is_leaf: - if isinstance(self, RegressorMixin): - if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': - tree.value[i, 0, 0] = cum_sum - else: - # tree.value[i, 0, 0] = cum_sum/(1 + reg_param/n_samples) - tree.value[i, 0, 0] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / ( - 1 + reg_param / n_samples) - else: - if len(tree.value[i][0]) == 1: - if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': - tree.value[i, 0, 0,] = cum_sum - else: - tree.value[i, 0, 0,] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / ( - 1 + reg_param / n_samples) - else: - if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': - tree.value[i, 0, 1] = cum_sum - tree.value[i, 0, 0] = 1.0 - cum_sum - else: - root_prediction = tree.value[0][0, 1] / (tree.value[0][0, 0] + tree.value[0][0, 1]) - tree.value[i, 0, 1] = root_prediction + (val - root_prediction) / ( - 1 + reg_param / n_samples) - tree.value[i, 0, 0] = 1.0 - tree.value[i, 0, 1] + + # Step 2: Update node values + if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': + tree.value[i, :, :] = cum_sum + else: # leaf_based + if is_leaf: # update node values if leaf_based + root_val = tree.value[0, :, :] + tree.value[i, :, :] = root_val + (val - root_val) / (1 + reg_param / n_samples) else: - if isinstance(self, RegressorMixin): - tree.value[i][0, 0] = parent_val + val_new - else: - if len(tree.value[i][0]) == 1: - tree.value[i][0, 0] = parent_val + val_new - else: - tree.value[i][0, 1] = parent_val + val_new - tree.value[i][0, 0] = 1.0 - parent_val + val_new - - self._shrink_tree(tree, reg_param, left, - parent_val=val, parent_num=n_samples, cum_sum=cum_sum) - self._shrink_tree(tree, reg_param, right, - parent_val=val, parent_num=n_samples, cum_sum=cum_sum) + tree.value[i, :, :] = val - # edit the non-leaf nodes for later visualization (doesn't effect predictions) + # Step 3: Recurse if not leaf + if not is_leaf: + self._shrink_tree(tree, reg_param, left, + parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum)) + self._shrink_tree(tree, reg_param, right, + parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum)) - # pass # not sure exactly what to put here + # edit the non-leaf nodes for later visualization (doesn't effect predictions) return tree @@ -363,7 +336,8 @@

Classes

https://arxiv.org/abs/2202.00858

Params

estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes

+Defaults to CART Classification Tree with 20 max leaf ndoes +Note: this estimator will be directly modified

reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

shrinkage_scheme: str @@ -388,6 +362,7 @@

Params

------ estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) Defaults to CART Classification Tree with 20 max leaf ndoes + Note: this estimator will be directly modified reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0) @@ -439,22 +414,16 @@

Params

left = tree.children_left[i] right = tree.children_right[i] is_leaf = left == right - n_samples = tree.n_node_samples[i] - if isinstance(self, RegressorMixin): - val = tree.value[i][0, 0] - else: - if len(tree.value[i][0]) == 1: - val = tree.value[i][0, 0] - else: - val = tree.value[i][0, 1] / (tree.value[i][0, 0] + tree.value[i][0, 1]) # binary classification + n_samples = tree.weighted_n_node_samples[i] + if isinstance(self, RegressorMixin) or isinstance(self.estimator_, GradientBoostingClassifier): + val = deepcopy(tree.value[i, :, :]) + else: # If classification, normalize to probability vector + val = tree.value[i, :, :] / n_samples + # Step 1: Update cum_sum # if root if parent_val is None and parent_num is None: - if not is_leaf: - self._shrink_tree(tree, reg_param, left, - parent_val=val, parent_num=n_samples, cum_sum=val) - self._shrink_tree(tree, reg_param, right, - parent_val=val, parent_num=n_samples, cum_sum=val) + cum_sum = val # if has parent else: @@ -462,51 +431,28 @@

Params

val_new = (val - parent_val) / (1 + reg_param / parent_num) elif self.shrinkage_scheme_ == 'constant': val_new = (val - parent_val) / (1 + reg_param) - else: - val_new = val + else: # leaf_based + val_new = 0 cum_sum += val_new - if is_leaf: - if isinstance(self, RegressorMixin): - if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': - tree.value[i, 0, 0] = cum_sum - else: - # tree.value[i, 0, 0] = cum_sum/(1 + reg_param/n_samples) - tree.value[i, 0, 0] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / ( - 1 + reg_param / n_samples) - else: - if len(tree.value[i][0]) == 1: - if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': - tree.value[i, 0, 0,] = cum_sum - else: - tree.value[i, 0, 0,] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / ( - 1 + reg_param / n_samples) - else: - if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': - tree.value[i, 0, 1] = cum_sum - tree.value[i, 0, 0] = 1.0 - cum_sum - else: - root_prediction = tree.value[0][0, 1] / (tree.value[0][0, 0] + tree.value[0][0, 1]) - tree.value[i, 0, 1] = root_prediction + (val - root_prediction) / ( - 1 + reg_param / n_samples) - tree.value[i, 0, 0] = 1.0 - tree.value[i, 0, 1] + + # Step 2: Update node values + if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': + tree.value[i, :, :] = cum_sum + else: # leaf_based + if is_leaf: # update node values if leaf_based + root_val = tree.value[0, :, :] + tree.value[i, :, :] = root_val + (val - root_val) / (1 + reg_param / n_samples) else: - if isinstance(self, RegressorMixin): - tree.value[i][0, 0] = parent_val + val_new - else: - if len(tree.value[i][0]) == 1: - tree.value[i][0, 0] = parent_val + val_new - else: - tree.value[i][0, 1] = parent_val + val_new - tree.value[i][0, 0] = 1.0 - parent_val + val_new - - self._shrink_tree(tree, reg_param, left, - parent_val=val, parent_num=n_samples, cum_sum=cum_sum) - self._shrink_tree(tree, reg_param, right, - parent_val=val, parent_num=n_samples, cum_sum=cum_sum) + tree.value[i, :, :] = val - # edit the non-leaf nodes for later visualization (doesn't effect predictions) + # Step 3: Recurse if not leaf + if not is_leaf: + self._shrink_tree(tree, reg_param, left, + parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum)) + self._shrink_tree(tree, reg_param, right, + parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum)) - # pass # not sure exactly what to put here + # edit the non-leaf nodes for later visualization (doesn't effect predictions) return tree @@ -658,7 +604,8 @@

Methods

https://arxiv.org/abs/2202.00858

Params

estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes

+Defaults to CART Classification Tree with 20 max leaf ndoes +Note: this estimator will be directly modified

reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

shrinkage_scheme: str @@ -787,7 +734,8 @@

Methods

https://arxiv.org/abs/2202.00858

Params

estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes

+Defaults to CART Classification Tree with 20 max leaf ndoes +Note: this estimator will be directly modified

reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

shrinkage_scheme: str diff --git a/imodels/tree/hierarchical_shrinkage.py b/imodels/tree/hierarchical_shrinkage.py index 21b09928..7932ab8e 100644 --- a/imodels/tree/hierarchical_shrinkage.py +++ b/imodels/tree/hierarchical_shrinkage.py @@ -27,6 +27,7 @@ def __init__(self, estimator_: BaseEstimator = DecisionTreeClassifier(max_leaf_n ------ estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) Defaults to CART Classification Tree with 20 max leaf ndoes + Note: this estimator will be directly modified reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0) diff --git a/setup.py b/setup.py index 115cca60..188d8f81 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setuptools.setup( name="imodels", - version="1.3.3", + version="1.3.4", author="Chandan Singh, Keyan Nasseri, Bin Yu, and others", author_email="chandan_singh@berkeley.edu", description="Implementations of various interpretable models",