From b564531c44790b6472a497b8994f8473409c58ad Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 25 Jan 2024 13:53:09 -0800
Subject: [PATCH] docstring fix, fix labels of confusion matrix

---
 ISLP/__init__.py | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/ISLP/__init__.py b/ISLP/__init__.py
index f409c19..6cd1ee1 100644
--- a/ISLP/__init__.py
+++ b/ISLP/__init__.py
@@ -6,9 +6,11 @@
 """
 
 from os.path import join as pjoin
-import pandas as pd, numpy as np
 from importlib.resources import (as_file,
                                  files)
+import pandas as pd, numpy as np
+from sklearn.metrics import confusion_matrix as _confusion_matrix
+from sklearn.metrics._classification import unique_labels
 
 # data originally saved via: [sm.datasets.get_rdataset(n, 'ISLR').data.to_csv('../ISLP/data/%s.csv' % n, index=False) for n in ['Carseats', 'College', 'Credit', 'Default', 'Hitters', 'Auto', 'OJ', 'Portfolio', 'Smarket', 'Wage', 'Weekly', 'Caravan']]
 
@@ -42,7 +44,15 @@ def _make_categorical(dataset):
             }
 _index = {'Auto':'name'}
 
+_datasets = sorted(list(_unordered.keys()) +
+                   list(_ordered.keys()) +
+                   ['NCI60',
+                    'Khan',
+                    'Bikeshare',
+                    'NYSE'])
+
 def load_data(dataset):
+    
     if dataset == 'NCI60':
         with as_file(files('ISLP').joinpath('data', 'NCI60data.npy')) as features:
             X = np.load(features)
@@ -103,19 +113,46 @@ def load_data(dataset):
         return df.set_index('date')
     else:
         return _make_categorical(dataset)
+load_data.__doc__ = f"""
+Load dataset from ISLP package.
 
-from sklearn.metrics import confusion_matrix as _confusion_matrix
+Choices are: {_datasets}
+
+Parameters
+----------
+
+dataset: str
+
+Returns
+-------
+
+data: array-like or dict
+    Either a `pd.DataFrame` representing the dataset or a dictionary
+    containing different parts of the dataset.
+    
+"""
 
 def confusion_table(predicted_labels,
-                    true_labels):
+                    true_labels,
+                    labels=None):
     """
     Return a data frame version of confusion 
     matrix with rows given by predicted label
     and columns the truth.
+
+    Parameters
+    ----------
+
+    predicted_labels: array-like
+        These will form rows of confusion matrix.
+
+    true_labels: array-like
+        These will form columns of confusion matrix.
     """
 
-    labels = sorted(np.unique(list(true_labels) +
-                              list(predicted_labels)))
+    if labels is None:
+        labels = unique_labels(true_labels,
+                               predicted_labels)
     C = _confusion_matrix(true_labels,
                           predicted_labels,
                           labels=labels)