From 24fb9946ad1d01bf8b615622c051a41af403d0ad Mon Sep 17 00:00:00 2001
From: Chandan Singh
Date: Wed, 24 Aug 2022 22:00:45 -0700
Subject: [PATCH] minor version bump
---
docs/tree/cart_ccp.html | 6 +-
docs/tree/figs.html | 24 +++-
docs/tree/hierarchical_shrinkage.html | 170 +++++++++----------------
imodels/tree/hierarchical_shrinkage.py | 1 +
setup.py | 2 +-
5 files changed, 83 insertions(+), 120 deletions(-)
diff --git a/docs/tree/cart_ccp.html b/docs/tree/cart_ccp.html
index 9777d938..ebc115fd 100644
--- a/docs/tree/cart_ccp.html
+++ b/docs/tree/cart_ccp.html
@@ -822,7 +822,8 @@ Methods
https://arxiv.org/abs/2202.00858
Params
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf ndoes
+Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
shrinkage_scheme: str
@@ -898,7 +899,8 @@
Methods
https://arxiv.org/abs/2202.00858
Params
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf ndoes
+Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
shrinkage_scheme: str
diff --git a/docs/tree/figs.html b/docs/tree/figs.html
index a52cfa2e..75315661 100644
--- a/docs/tree/figs.html
+++ b/docs/tree/figs.html
@@ -215,10 +215,14 @@
if isinstance(self, ClassifierMixin):
self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs
+ if feature_names is None:
+ if isinstance(X, pd.DataFrame):
+ self.feature_names_ = X.columns
+ else:
+ self.feature_names_ = feature_names
+
X, y = check_X_y(X, y)
y = y.astype(float)
- if feature_names is not None:
- self.feature_names_ = feature_names
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
@@ -693,10 +697,14 @@
Params
if isinstance(self, ClassifierMixin):
self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs
+ if feature_names is None:
+ if isinstance(X, pd.DataFrame):
+ self.feature_names_ = X.columns
+ else:
+ self.feature_names_ = feature_names
+
X, y = check_X_y(X, y)
y = y.astype(float)
- if feature_names is not None:
- self.feature_names_ = feature_names
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
@@ -972,10 +980,14 @@ Methods
if isinstance(self, ClassifierMixin):
self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs
+ if feature_names is None:
+ if isinstance(X, pd.DataFrame):
+ self.feature_names_ = X.columns
+ else:
+ self.feature_names_ = feature_names
+
X, y = check_X_y(X, y)
y = y.astype(float)
- if feature_names is not None:
- self.feature_names_ = feature_names
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
diff --git a/docs/tree/hierarchical_shrinkage.html b/docs/tree/hierarchical_shrinkage.html
index 77ef8c24..743d559b 100644
--- a/docs/tree/hierarchical_shrinkage.html
+++ b/docs/tree/hierarchical_shrinkage.html
@@ -31,6 +31,7 @@
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, export_text
+from sklearn.ensemble import GradientBoostingClassifier
from imodels.util import checks
from imodels.util.tree import compute_tree_complexity
@@ -49,6 +50,7 @@
------
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes
+ Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
@@ -100,22 +102,16 @@
left = tree.children_left[i]
right = tree.children_right[i]
is_leaf = left == right
- n_samples = tree.n_node_samples[i]
- if isinstance(self, RegressorMixin):
- val = tree.value[i][0, 0]
- else:
- if len(tree.value[i][0]) == 1:
- val = tree.value[i][0, 0]
- else:
- val = tree.value[i][0, 1] / (tree.value[i][0, 0] + tree.value[i][0, 1]) # binary classification
+ n_samples = tree.weighted_n_node_samples[i]
+ if isinstance(self, RegressorMixin) or isinstance(self.estimator_, GradientBoostingClassifier):
+ val = deepcopy(tree.value[i, :, :])
+ else: # If classification, normalize to probability vector
+ val = tree.value[i, :, :] / n_samples
+ # Step 1: Update cum_sum
# if root
if parent_val is None and parent_num is None:
- if not is_leaf:
- self._shrink_tree(tree, reg_param, left,
- parent_val=val, parent_num=n_samples, cum_sum=val)
- self._shrink_tree(tree, reg_param, right,
- parent_val=val, parent_num=n_samples, cum_sum=val)
+ cum_sum = val
# if has parent
else:
@@ -123,51 +119,28 @@
val_new = (val - parent_val) / (1 + reg_param / parent_num)
elif self.shrinkage_scheme_ == 'constant':
val_new = (val - parent_val) / (1 + reg_param)
- else:
- val_new = val
+ else: # leaf_based
+ val_new = 0
cum_sum += val_new
- if is_leaf:
- if isinstance(self, RegressorMixin):
- if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
- tree.value[i, 0, 0] = cum_sum
- else:
- # tree.value[i, 0, 0] = cum_sum/(1 + reg_param/n_samples)
- tree.value[i, 0, 0] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
- 1 + reg_param / n_samples)
- else:
- if len(tree.value[i][0]) == 1:
- if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
- tree.value[i, 0, 0,] = cum_sum
- else:
- tree.value[i, 0, 0,] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
- 1 + reg_param / n_samples)
- else:
- if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
- tree.value[i, 0, 1] = cum_sum
- tree.value[i, 0, 0] = 1.0 - cum_sum
- else:
- root_prediction = tree.value[0][0, 1] / (tree.value[0][0, 0] + tree.value[0][0, 1])
- tree.value[i, 0, 1] = root_prediction + (val - root_prediction) / (
- 1 + reg_param / n_samples)
- tree.value[i, 0, 0] = 1.0 - tree.value[i, 0, 1]
+
+ # Step 2: Update node values
+ if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
+ tree.value[i, :, :] = cum_sum
+ else: # leaf_based
+ if is_leaf: # update node values if leaf_based
+ root_val = tree.value[0, :, :]
+ tree.value[i, :, :] = root_val + (val - root_val) / (1 + reg_param / n_samples)
else:
- if isinstance(self, RegressorMixin):
- tree.value[i][0, 0] = parent_val + val_new
- else:
- if len(tree.value[i][0]) == 1:
- tree.value[i][0, 0] = parent_val + val_new
- else:
- tree.value[i][0, 1] = parent_val + val_new
- tree.value[i][0, 0] = 1.0 - parent_val + val_new
-
- self._shrink_tree(tree, reg_param, left,
- parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
- self._shrink_tree(tree, reg_param, right,
- parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
+ tree.value[i, :, :] = val
- # edit the non-leaf nodes for later visualization (doesn't effect predictions)
+ # Step 3: Recurse if not leaf
+ if not is_leaf:
+ self._shrink_tree(tree, reg_param, left,
+ parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum))
+ self._shrink_tree(tree, reg_param, right,
+ parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum))
- # pass # not sure exactly what to put here
+ # edit the non-leaf nodes for later visualization (doesn't effect predictions)
return tree
@@ -363,7 +336,8 @@
https://arxiv.org/abs/2202.00858
Params
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf ndoes
+Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
shrinkage_scheme: str
@@ -388,6 +362,7 @@
Params
------
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes
+ Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
@@ -439,22 +414,16 @@ Params
left = tree.children_left[i]
right = tree.children_right[i]
is_leaf = left == right
- n_samples = tree.n_node_samples[i]
- if isinstance(self, RegressorMixin):
- val = tree.value[i][0, 0]
- else:
- if len(tree.value[i][0]) == 1:
- val = tree.value[i][0, 0]
- else:
- val = tree.value[i][0, 1] / (tree.value[i][0, 0] + tree.value[i][0, 1]) # binary classification
+ n_samples = tree.weighted_n_node_samples[i]
+ if isinstance(self, RegressorMixin) or isinstance(self.estimator_, GradientBoostingClassifier):
+ val = deepcopy(tree.value[i, :, :])
+ else: # If classification, normalize to probability vector
+ val = tree.value[i, :, :] / n_samples
+ # Step 1: Update cum_sum
# if root
if parent_val is None and parent_num is None:
- if not is_leaf:
- self._shrink_tree(tree, reg_param, left,
- parent_val=val, parent_num=n_samples, cum_sum=val)
- self._shrink_tree(tree, reg_param, right,
- parent_val=val, parent_num=n_samples, cum_sum=val)
+ cum_sum = val
# if has parent
else:
@@ -462,51 +431,28 @@ Params
val_new = (val - parent_val) / (1 + reg_param / parent_num)
elif self.shrinkage_scheme_ == 'constant':
val_new = (val - parent_val) / (1 + reg_param)
- else:
- val_new = val
+ else: # leaf_based
+ val_new = 0
cum_sum += val_new
- if is_leaf:
- if isinstance(self, RegressorMixin):
- if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
- tree.value[i, 0, 0] = cum_sum
- else:
- # tree.value[i, 0, 0] = cum_sum/(1 + reg_param/n_samples)
- tree.value[i, 0, 0] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
- 1 + reg_param / n_samples)
- else:
- if len(tree.value[i][0]) == 1:
- if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
- tree.value[i, 0, 0,] = cum_sum
- else:
- tree.value[i, 0, 0,] = tree.value[0][0, 0] + (val - tree.value[0][0, 0]) / (
- 1 + reg_param / n_samples)
- else:
- if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
- tree.value[i, 0, 1] = cum_sum
- tree.value[i, 0, 0] = 1.0 - cum_sum
- else:
- root_prediction = tree.value[0][0, 1] / (tree.value[0][0, 0] + tree.value[0][0, 1])
- tree.value[i, 0, 1] = root_prediction + (val - root_prediction) / (
- 1 + reg_param / n_samples)
- tree.value[i, 0, 0] = 1.0 - tree.value[i, 0, 1]
+
+ # Step 2: Update node values
+ if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant':
+ tree.value[i, :, :] = cum_sum
+ else: # leaf_based
+ if is_leaf: # update node values if leaf_based
+ root_val = tree.value[0, :, :]
+ tree.value[i, :, :] = root_val + (val - root_val) / (1 + reg_param / n_samples)
else:
- if isinstance(self, RegressorMixin):
- tree.value[i][0, 0] = parent_val + val_new
- else:
- if len(tree.value[i][0]) == 1:
- tree.value[i][0, 0] = parent_val + val_new
- else:
- tree.value[i][0, 1] = parent_val + val_new
- tree.value[i][0, 0] = 1.0 - parent_val + val_new
-
- self._shrink_tree(tree, reg_param, left,
- parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
- self._shrink_tree(tree, reg_param, right,
- parent_val=val, parent_num=n_samples, cum_sum=cum_sum)
+ tree.value[i, :, :] = val
- # edit the non-leaf nodes for later visualization (doesn't effect predictions)
+ # Step 3: Recurse if not leaf
+ if not is_leaf:
+ self._shrink_tree(tree, reg_param, left,
+ parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum))
+ self._shrink_tree(tree, reg_param, right,
+ parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum))
- # pass # not sure exactly what to put here
+ # edit the non-leaf nodes for later visualization (doesn't effect predictions)
return tree
@@ -658,7 +604,8 @@ Methods
https://arxiv.org/abs/2202.00858
Params
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf ndoes
+Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
shrinkage_scheme: str
@@ -787,7 +734,8 @@
Methods
https://arxiv.org/abs/2202.00858
Params
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf ndoes
+Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
shrinkage_scheme: str
diff --git a/imodels/tree/hierarchical_shrinkage.py b/imodels/tree/hierarchical_shrinkage.py
index 21b09928..7932ab8e 100644
--- a/imodels/tree/hierarchical_shrinkage.py
+++ b/imodels/tree/hierarchical_shrinkage.py
@@ -27,6 +27,7 @@ def __init__(self, estimator_: BaseEstimator = DecisionTreeClassifier(max_leaf_n
------
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
Defaults to CART Classification Tree with 20 max leaf ndoes
+ Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
diff --git a/setup.py b/setup.py
index 115cca60..188d8f81 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
setuptools.setup(
name="imodels",
- version="1.3.3",
+ version="1.3.4",
author="Chandan Singh, Keyan Nasseri, Bin Yu, and others",
author_email="chandan_singh@berkeley.edu",
description="Implementations of various interpretable models",