update boostedrules + arg handling

csinva · Dec 10, 2022 · 8244701 · 8244701
1 parent 8e034ce
commit 8244701
Show file tree

Hide file tree

Showing 33 changed files with 555 additions and 935 deletions.
diff --git a/docs/index.html b/docs/index.html
@@ -282,7 +282,7 @@ <h2 id="support-for-different-tasks">Support for different tasks</h2>
 <tr>
 <td style="text-align: left;">Boosted rule set</td>
 <td style="text-align: center;"><a href="https://csinva.io/imodels/rule_set/boosted_rules.html#imodels.rule_set.boosted_rules.BoostedRulesClassifier">BoostedRulesClassifier</a></td>
-<td style="text-align: center;"></td>
+<td style="text-align: center;"><a href="https://csinva.io/imodels/rule_set/boosted_rules.html#imodels.rule_set.boosted_rules.BoostedRulesRegressor">BoostedRulesRegressor</a></td>
 <td></td>
 </tr>
 <tr>

diff --git a/docs/rule_list/greedy_rule_list.html b/docs/rule_list/greedy_rule_list.html
@@ -42,6 +42,7 @@
 from sklearn.utils.validation import check_array, check_is_fitted
 from sklearn.tree import DecisionTreeClassifier
 from imodels.rule_list.rule_list import RuleList
+from imodels.util.arguments import check_fit_arguments
 
 
 class GreedyRuleListClassifier(BaseEstimator, RuleList, ClassifierMixin):
@@ -73,15 +74,7 @@
         depth
             the depth of the current layer (used to recurse)
         &#34;&#34;&#34;
-
-        if feature_names is None:
-            if isinstance(X, pd.DataFrame):
-                self.feature_names_ = X.columns
-            else:
-                self.feature_names_ = [&#39;X&#39; + str(i) for i in range(X.shape[1])]
-        else:
-            self.feature_names_ = feature_names
-        X, y = check_X_y(X, y)
+        X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
         return self.fit_node_recursive(X, y, depth=0, verbose=verbose)
 
     def fit_node_recursive(self, X, y, depth: int, verbose):
@@ -422,15 +415,7 @@ <h2 id="params">Params</h2>
         depth
             the depth of the current layer (used to recurse)
         &#34;&#34;&#34;
-
-        if feature_names is None:
-            if isinstance(X, pd.DataFrame):
-                self.feature_names_ = X.columns
-            else:
-                self.feature_names_ = [&#39;X&#39; + str(i) for i in range(X.shape[1])]
-        else:
-            self.feature_names_ = feature_names
-        X, y = check_X_y(X, y)
+        X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
         return self.fit_node_recursive(X, y, depth=0, verbose=verbose)
 
     def fit_node_recursive(self, X, y, depth: int, verbose):
@@ -751,15 +736,7 @@ <h3>Methods</h3>
     depth
         the depth of the current layer (used to recurse)
     &#34;&#34;&#34;
-
-    if feature_names is None:
-        if isinstance(X, pd.DataFrame):
-            self.feature_names_ = X.columns
-        else:
-            self.feature_names_ = [&#39;X&#39; + str(i) for i in range(X.shape[1])]
-    else:
-        self.feature_names_ = feature_names
-    X, y = check_X_y(X, y)
+    X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
     return self.fit_node_recursive(X, y, depth=0, verbose=verbose)</code></pre>
 </details>
 </dd>

diff --git a/docs/rule_list/one_r.html b/docs/rule_list/one_r.html
@@ -39,41 +39,29 @@
 
 from imodels import GreedyRuleListClassifier
 from imodels.rule_list.rule_list import RuleList
+from imodels.util.arguments import check_fit_arguments
 
 
 class OneRClassifier(GreedyRuleListClassifier):
     def __init__(self, max_depth=5, class_weight=None, criterion=&#39;gini&#39;):
         self.max_depth = max_depth
-        self.feature_names = None
+        self.feature_names_ = None
         self.class_weight = class_weight
         self.criterion = criterion
         self._estimator_type = &#39;classifier&#39;
 
-    def fit(self, X, y, depth=0, feature_names=None, verbose=False):
+    def fit(self, X, y, feature_names=None):
         &#34;&#34;&#34;Fit oneR
         &#34;&#34;&#34;
-
-        self.classes_ = unique_labels(y)
-
-        # set self.feature_names and make sure x, y are not pandas type
-        if &#39;pandas&#39; in str(type(X)):
-            self.feature_names = X.columns
-            X = X.values
-        else:
-            if feature_names is None:
-                self.feature_names = [&#39;feat &#39; + str(i) for i in range(X.shape[1])]
-        if feature_names is not None:
-            self.feature_names = feature_names
-        if &#39;pandas&#39; in str(type(y)):
-            y = y.values
+        X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
 
         ms = []
         accs = np.zeros(X.shape[1])
         for col_idx in range(X.shape[1]):
             x = X[:, col_idx].reshape(-1, 1)
             m = GreedyRuleListClassifier(max_depth=self.max_depth, class_weight=self.class_weight,
                                          criterion=self.criterion)
-            feat_names_single = [self.feature_names[col_idx]]
+            feat_names_single = [self.feature_names_[col_idx]]
             m.fit(x, y, feature_names=feat_names_single)
             accs[col_idx] = np.mean(m.predict(x) == y)
             ms.append(m)
@@ -122,36 +110,23 @@ <h2 id="params">Params</h2>
 <pre><code class="python">class OneRClassifier(GreedyRuleListClassifier):
     def __init__(self, max_depth=5, class_weight=None, criterion=&#39;gini&#39;):
         self.max_depth = max_depth
-        self.feature_names = None
+        self.feature_names_ = None
         self.class_weight = class_weight
         self.criterion = criterion
         self._estimator_type = &#39;classifier&#39;
 
-    def fit(self, X, y, depth=0, feature_names=None, verbose=False):
+    def fit(self, X, y, feature_names=None):
         &#34;&#34;&#34;Fit oneR
         &#34;&#34;&#34;
-
-        self.classes_ = unique_labels(y)
-
-        # set self.feature_names and make sure x, y are not pandas type
-        if &#39;pandas&#39; in str(type(X)):
-            self.feature_names = X.columns
-            X = X.values
-        else:
-            if feature_names is None:
-                self.feature_names = [&#39;feat &#39; + str(i) for i in range(X.shape[1])]
-        if feature_names is not None:
-            self.feature_names = feature_names
-        if &#39;pandas&#39; in str(type(y)):
-            y = y.values
+        X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
 
         ms = []
         accs = np.zeros(X.shape[1])
         for col_idx in range(X.shape[1]):
             x = X[:, col_idx].reshape(-1, 1)
             m = GreedyRuleListClassifier(max_depth=self.max_depth, class_weight=self.class_weight,
                                          criterion=self.criterion)
-            feat_names_single = [self.feature_names[col_idx]]
+            feat_names_single = [self.feature_names_[col_idx]]
             m.fit(x, y, feature_names=feat_names_single)
             accs[col_idx] = np.mean(m.predict(x) == y)
             ms.append(m)
@@ -177,39 +152,26 @@ <h3>Ancestors</h3>
 <h3>Methods</h3>
 <dl>
 <dt id="imodels.rule_list.one_r.OneRClassifier.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, X, y, depth=0, feature_names=None, verbose=False)</span>
+<span>def <span class="ident">fit</span></span>(<span>self, X, y, feature_names=None)</span>
 </code></dt>
 <dd>
 <div class="desc"><p>Fit oneR</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">def fit(self, X, y, depth=0, feature_names=None, verbose=False):
+<pre><code class="python">def fit(self, X, y, feature_names=None):
     &#34;&#34;&#34;Fit oneR
     &#34;&#34;&#34;
-
-    self.classes_ = unique_labels(y)
-
-    # set self.feature_names and make sure x, y are not pandas type
-    if &#39;pandas&#39; in str(type(X)):
-        self.feature_names = X.columns
-        X = X.values
-    else:
-        if feature_names is None:
-            self.feature_names = [&#39;feat &#39; + str(i) for i in range(X.shape[1])]
-    if feature_names is not None:
-        self.feature_names = feature_names
-    if &#39;pandas&#39; in str(type(y)):
-        y = y.values
+    X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
 
     ms = []
     accs = np.zeros(X.shape[1])
     for col_idx in range(X.shape[1]):
         x = X[:, col_idx].reshape(-1, 1)
         m = GreedyRuleListClassifier(max_depth=self.max_depth, class_weight=self.class_weight,
                                      criterion=self.criterion)
-        feat_names_single = [self.feature_names[col_idx]]
+        feat_names_single = [self.feature_names_[col_idx]]
         m.fit(x, y, feature_names=feat_names_single)
         accs[col_idx] = np.mean(m.predict(x) == y)
         ms.append(m)