Skip to content

Commit

Permalink
minor version bump
Browse files Browse the repository at this point in the history
  • Loading branch information
csinva committed Aug 25, 2022
1 parent 9b856d7 commit 24fb994
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 120 deletions.
6 changes: 4 additions & 2 deletions docs/tree/cart_ccp.html
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,8 @@ <h3>Methods</h3>
<a href="https://arxiv.org/abs/2202.00858">https://arxiv.org/abs/2202.00858</a></p>
<h2 id="params">Params</h2>
<p>estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes</p>
Defaults to CART Classification Tree with 20 max leaf ndoes
Note: this estimator will be directly modified</p>
<p>reg_param: float
Higher is more regularization (can be arbitrarily large, should not be &lt; 0)</p>
<p>shrinkage_scheme: str
Expand Down Expand Up @@ -898,7 +899,8 @@ <h3>Methods</h3>
<a href="https://arxiv.org/abs/2202.00858">https://arxiv.org/abs/2202.00858</a></p>
<h2 id="params">Params</h2>
<p>estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes</p>
Defaults to CART Classification Tree with 20 max leaf ndoes
Note: this estimator will be directly modified</p>
<p>reg_param: float
Higher is more regularization (can be arbitrarily large, should not be &lt; 0)</p>
<p>shrinkage_scheme: str
Expand Down
24 changes: 18 additions & 6 deletions docs/tree/figs.html
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,14 @@
if isinstance(self, ClassifierMixin):
self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs

if feature_names is None:
if isinstance(X, pd.DataFrame):
self.feature_names_ = X.columns
else:
self.feature_names_ = feature_names

X, y = check_X_y(X, y)
y = y.astype(float)
if feature_names is not None:
self.feature_names_ = feature_names
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)

Expand Down Expand Up @@ -693,10 +697,14 @@ <h2 id="params">Params</h2>
if isinstance(self, ClassifierMixin):
self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs

if feature_names is None:
if isinstance(X, pd.DataFrame):
self.feature_names_ = X.columns
else:
self.feature_names_ = feature_names

X, y = check_X_y(X, y)
y = y.astype(float)
if feature_names is not None:
self.feature_names_ = feature_names
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)

Expand Down Expand Up @@ -972,10 +980,14 @@ <h3>Methods</h3>
if isinstance(self, ClassifierMixin):
self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs

if feature_names is None:
if isinstance(X, pd.DataFrame):
self.feature_names_ = X.columns
else:
self.feature_names_ = feature_names

X, y = check_X_y(X, y)
y = y.astype(float)
if feature_names is not None:
self.feature_names_ = feature_names
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)

Expand Down
170 changes: 59 additions & 111 deletions docs/tree/hierarchical_shrinkage.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, export_text
from sklearn.ensemble import GradientBoostingClassifier

from imodels.util import checks
from imodels.util.tree import compute_tree_complexity
Expand All @@ -49,6 +50,7 @@
------
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes
Note: this estimator will be directly modified

reg_param: float
Higher is more regularization (can be arbitrarily large, should not be &lt; 0)
Expand Down Expand Up @@ -100,74 +102,45 @@
left = tree.children_left[i]
right = tree.children_right[i]
is_leaf = left == right
n_samples = tree.n_node_samples[i]
if isinstance(self, RegressorMixin):
val = tree.value[i][0, 0]
else:
if len(tree.value[i][0]) == 1:
val = tree.value[i][0, 0]
else:
val = tree.value[i][0, 1] / (tree.value[i][0, 0] + tree.value[i][0, 1]) # binary classification
n_samples = tree.weighted_n_node_samples[i]
if isinstance(self, RegressorMixin) or isinstance(self.estimator_, GradientBoostingClassifier):
val = deepcopy(tree.value[i, :, :])
else: # If classification, normalize to probability vector
val = tree.value[i, :, :] / n_samples

# Step 1: Update cum_sum
# if root
if parent_val is None and parent_num is None:
if not is_leaf:
self._shrink_tree(tree, reg_param, left,
parent_val=val, parent_num=n_samples, cum_sum=val)
self._shrink_tree(tree, reg_param, right,
parent_val=val, parent_num=n_samples, cum_sum=val)
cum_sum = val

# if has parent
else:
if self.shrinkage_scheme_ == &#39;node_based&#39;:
val_new = (val - parent_val) / (1 + reg_param / parent_num)
elif self.shrinkage_scheme_ == &#39;constant&#39;:
val_new = (val - parent_val) / (1 + reg_param)
else:
val_new = val
else: # leaf_based
val_new = 0
cum_sum += val_new
if is_leaf:
if isinstance(self, RegressorMixin):
if self.shrinkage_scheme_ == &#39;node_based&#39; or self.shrinkage_scheme_ == &#39;constant&#39;:
tree.value[i, 0, 0] = cum_sum
else:
# tree.value[i, 0, 0] = cum_sum/(1 + reg_param/n_samples)
tree.value[i, 0, 0] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
1 + reg_param / n_samples)
else:
if len(tree.value[i][0]) == 1:
if self.shrinkage_scheme_ == &#39;node_based&#39; or self.shrinkage_scheme_ == &#39;constant&#39;:
tree.value[i, 0, 0,] = cum_sum
else:
tree.value[i, 0, 0,] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
1 + reg_param / n_samples)
else:
if self.shrinkage_scheme_ == &#39;node_based&#39; or self.shrinkage_scheme_ == &#39;constant&#39;:
tree.value[i, 0, 1] = cum_sum
tree.value[i, 0, 0] = 1.0 - cum_sum
else:
root_prediction = tree.value[0][0, 1] / (tree.value[0][0, 0] + tree.value[0][0, 1])
tree.value[i, 0, 1] = root_prediction + (val - root_prediction) / (
1 + reg_param / n_samples)
tree.value[i, 0, 0] = 1.0 - tree.value[i, 0, 1]

# Step 2: Update node values
if self.shrinkage_scheme_ == &#39;node_based&#39; or self.shrinkage_scheme_ == &#39;constant&#39;:
tree.value[i, :, :] = cum_sum
else: # leaf_based
if is_leaf: # update node values if leaf_based
root_val = tree.value[0, :, :]
tree.value[i, :, :] = root_val + (val - root_val) / (1 + reg_param / n_samples)
else:
if isinstance(self, RegressorMixin):
tree.value[i][0, 0] = parent_val + val_new
else:
if len(tree.value[i][0]) == 1:
tree.value[i][0, 0] = parent_val + val_new
else:
tree.value[i][0, 1] = parent_val + val_new
tree.value[i][0, 0] = 1.0 - parent_val + val_new

self._shrink_tree(tree, reg_param, left,
parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
self._shrink_tree(tree, reg_param, right,
parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
tree.value[i, :, :] = val

# edit the non-leaf nodes for later visualization (doesn&#39;t effect predictions)
# Step 3: Recurse if not leaf
if not is_leaf:
self._shrink_tree(tree, reg_param, left,
parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum))
self._shrink_tree(tree, reg_param, right,
parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum))

# pass # not sure exactly what to put here
# edit the non-leaf nodes for later visualization (doesn&#39;t effect predictions)

return tree

Expand Down Expand Up @@ -363,7 +336,8 @@ <h2 class="section-title" id="header-classes">Classes</h2>
<a href="https://arxiv.org/abs/2202.00858">https://arxiv.org/abs/2202.00858</a></p>
<h2 id="params">Params</h2>
<p>estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes</p>
Defaults to CART Classification Tree with 20 max leaf ndoes
Note: this estimator will be directly modified</p>
<p>reg_param: float
Higher is more regularization (can be arbitrarily large, should not be &lt; 0)</p>
<p>shrinkage_scheme: str
Expand All @@ -388,6 +362,7 @@ <h2 id="params">Params</h2>
------
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes
Note: this estimator will be directly modified

reg_param: float
Higher is more regularization (can be arbitrarily large, should not be &lt; 0)
Expand Down Expand Up @@ -439,74 +414,45 @@ <h2 id="params">Params</h2>
left = tree.children_left[i]
right = tree.children_right[i]
is_leaf = left == right
n_samples = tree.n_node_samples[i]
if isinstance(self, RegressorMixin):
val = tree.value[i][0, 0]
else:
if len(tree.value[i][0]) == 1:
val = tree.value[i][0, 0]
else:
val = tree.value[i][0, 1] / (tree.value[i][0, 0] + tree.value[i][0, 1]) # binary classification
n_samples = tree.weighted_n_node_samples[i]
if isinstance(self, RegressorMixin) or isinstance(self.estimator_, GradientBoostingClassifier):
val = deepcopy(tree.value[i, :, :])
else: # If classification, normalize to probability vector
val = tree.value[i, :, :] / n_samples

# Step 1: Update cum_sum
# if root
if parent_val is None and parent_num is None:
if not is_leaf:
self._shrink_tree(tree, reg_param, left,
parent_val=val, parent_num=n_samples, cum_sum=val)
self._shrink_tree(tree, reg_param, right,
parent_val=val, parent_num=n_samples, cum_sum=val)
cum_sum = val

# if has parent
else:
if self.shrinkage_scheme_ == &#39;node_based&#39;:
val_new = (val - parent_val) / (1 + reg_param / parent_num)
elif self.shrinkage_scheme_ == &#39;constant&#39;:
val_new = (val - parent_val) / (1 + reg_param)
else:
val_new = val
else: # leaf_based
val_new = 0
cum_sum += val_new
if is_leaf:
if isinstance(self, RegressorMixin):
if self.shrinkage_scheme_ == &#39;node_based&#39; or self.shrinkage_scheme_ == &#39;constant&#39;:
tree.value[i, 0, 0] = cum_sum
else:
# tree.value[i, 0, 0] = cum_sum/(1 + reg_param/n_samples)
tree.value[i, 0, 0] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
1 + reg_param / n_samples)
else:
if len(tree.value[i][0]) == 1:
if self.shrinkage_scheme_ == &#39;node_based&#39; or self.shrinkage_scheme_ == &#39;constant&#39;:
tree.value[i, 0, 0,] = cum_sum
else:
tree.value[i, 0, 0,] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
1 + reg_param / n_samples)
else:
if self.shrinkage_scheme_ == &#39;node_based&#39; or self.shrinkage_scheme_ == &#39;constant&#39;:
tree.value[i, 0, 1] = cum_sum
tree.value[i, 0, 0] = 1.0 - cum_sum
else:
root_prediction = tree.value[0][0, 1] / (tree.value[0][0, 0] + tree.value[0][0, 1])
tree.value[i, 0, 1] = root_prediction + (val - root_prediction) / (
1 + reg_param / n_samples)
tree.value[i, 0, 0] = 1.0 - tree.value[i, 0, 1]

# Step 2: Update node values
if self.shrinkage_scheme_ == &#39;node_based&#39; or self.shrinkage_scheme_ == &#39;constant&#39;:
tree.value[i, :, :] = cum_sum
else: # leaf_based
if is_leaf: # update node values if leaf_based
root_val = tree.value[0, :, :]
tree.value[i, :, :] = root_val + (val - root_val) / (1 + reg_param / n_samples)
else:
if isinstance(self, RegressorMixin):
tree.value[i][0, 0] = parent_val + val_new
else:
if len(tree.value[i][0]) == 1:
tree.value[i][0, 0] = parent_val + val_new
else:
tree.value[i][0, 1] = parent_val + val_new
tree.value[i][0, 0] = 1.0 - parent_val + val_new

self._shrink_tree(tree, reg_param, left,
parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
self._shrink_tree(tree, reg_param, right,
parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
tree.value[i, :, :] = val

# edit the non-leaf nodes for later visualization (doesn&#39;t effect predictions)
# Step 3: Recurse if not leaf
if not is_leaf:
self._shrink_tree(tree, reg_param, left,
parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum))
self._shrink_tree(tree, reg_param, right,
parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum))

# pass # not sure exactly what to put here
# edit the non-leaf nodes for later visualization (doesn&#39;t effect predictions)

return tree

Expand Down Expand Up @@ -658,7 +604,8 @@ <h3>Methods</h3>
<a href="https://arxiv.org/abs/2202.00858">https://arxiv.org/abs/2202.00858</a></p>
<h2 id="params">Params</h2>
<p>estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes</p>
Defaults to CART Classification Tree with 20 max leaf ndoes
Note: this estimator will be directly modified</p>
<p>reg_param: float
Higher is more regularization (can be arbitrarily large, should not be &lt; 0)</p>
<p>shrinkage_scheme: str
Expand Down Expand Up @@ -787,7 +734,8 @@ <h3>Methods</h3>
<a href="https://arxiv.org/abs/2202.00858">https://arxiv.org/abs/2202.00858</a></p>
<h2 id="params">Params</h2>
<p>estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes</p>
Defaults to CART Classification Tree with 20 max leaf ndoes
Note: this estimator will be directly modified</p>
<p>reg_param: float
Higher is more regularization (can be arbitrarily large, should not be &lt; 0)</p>
<p>shrinkage_scheme: str
Expand Down
1 change: 1 addition & 0 deletions imodels/tree/hierarchical_shrinkage.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(self, estimator_: BaseEstimator = DecisionTreeClassifier(max_leaf_n
------
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes
Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

setuptools.setup(
name="imodels",
version="1.3.3",
version="1.3.4",
author="Chandan Singh, Keyan Nasseri, Bin Yu, and others",
author_email="chandan_singh@berkeley.edu",
description="Implementations of various interpretable models",
Expand Down

0 comments on commit 24fb994

Please sign in to comment.