diff --git a/CompStats/__init__.py b/CompStats/__init__.py index 4376582..e26ab70 100644 --- a/CompStats/__init__.py +++ b/CompStats/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -__version__ = '0.1.4' +__version__ = '0.1.5' from CompStats.bootstrap import StatisticSamples from CompStats.measurements import CI, SE, difference_p_value from CompStats.performance import performance, difference, all_differences, plot_performance, plot_difference diff --git a/CompStats/interface.py b/CompStats/interface.py index 2c8aeca..3935d79 100644 --- a/CompStats/interface.py +++ b/CompStats/interface.py @@ -107,6 +107,7 @@ def __init__(self, y_true, *y_pred, self.num_samples = num_samples self.n_jobs = n_jobs self.use_tqdm = use_tqdm + self.sorting_func = np.linalg.norm self._init() def _init(self): @@ -139,11 +140,14 @@ def __sklearn_clone__(self): ins = klass(**params) ins.predictions = dict(self.predictions) ins._statistic_samples._samples = self.statistic_samples._samples + ins.sorting_func = self.sorting_func return ins def __repr__(self): """Prediction statistics with standard error in parenthesis""" - return f"<{self.__class__.__name__}>\n{self}" + arg = 'score_func' if self.error_func is None else 'error_func' + func_name = self.statistic_func.__name__ + return f"<{self.__class__.__name__}({arg}={func_name})>\n{self}" def __str__(self): """Prediction statistics with standard error in parenthesis""" @@ -152,7 +156,14 @@ def __str__(self): output = ["Statistic with its standard error (se)"] output.append("statistic (se)") for key, value in self.statistic.items(): - output.append(f'{value:0.4f} ({se[key]:0.4f}) <= {key}') + if isinstance(value, float): + desc = f'{value:0.4f} ({se[key]:0.4f}) <= {key}' + else: + desc = [f'{v:0.4f} ({k:0.4f})' + for v, k in zip(value, se[key])] + desc = ', '.join(desc) + desc = f'{desc} <= {key}' + output.append(desc) return "\n".join(output) def __call__(self, y_pred, name=None): @@ -202,6 +213,7 @@ def difference(self, wrt_to: str=None): diff_ins = Difference(statistic_samples=clone(self.statistic_samples), statistic=self.statistic, best=self.best[0]) + diff_ins.sorting_func = self.sorting_func diff_ins.statistic_samples.calls = diff diff_ins.statistic_samples.info['best'] = self.best[0] return diff_ins @@ -214,10 +226,20 @@ def best(self): return self._best except AttributeError: statistic = [(k, v) for k, v in self.statistic.items()] - statistic = sorted(statistic, key=lambda x: x[1], + statistic = sorted(statistic, + key=lambda x: self.sorting_func(x[1]), reverse=self.statistic_samples.BiB) self._best = statistic[0] return self._best + + @property + def sorting_func(self): + """Rank systems when multiple performances are used""" + return self._sorting_func + + @sorting_func.setter + def sorting_func(self, value): + self._sorting_func = value @property def statistic(self): @@ -241,7 +263,8 @@ def statistic(self): data = sorted([(k, self.statistic_func(self.y_true, v)) for k, v in self.predictions.items()], - key=lambda x: x[1], reverse=self.statistic_samples.BiB) + key=lambda x: self.sorting_func(x[1]), + reverse=self.statistic_samples.BiB) return dict(data) @property @@ -419,6 +442,15 @@ class Difference: best:str=None statistic:dict=None + @property + def sorting_func(self): + """Rank systems when multiple performances are used""" + return self._sorting_func + + @sorting_func.setter + def sorting_func(self, value): + self._sorting_func = value + def __repr__(self): """p-value""" return f"<{self.__class__.__name__}>\n{self}" @@ -426,12 +458,20 @@ def __repr__(self): def __str__(self): """p-value""" output = [f"difference p-values w.r.t {self.best}"] - for k, v in self.p_value().items(): - output.append(f'{v:0.4f} <= {k}') + for key, value in self.p_value().items(): + if isinstance(value, float): + output.append(f'{value:0.4f} <= {key}') + else: + desc = [f'{v:0.4f}' for v in value] + desc = ', '.join(desc) + desc = f'{desc} <= {key}' return "\n".join(output) - def p_value(self): + def p_value(self, right:bool=True): """Compute p_value of the differences + + :param right: Estimate the p-value using :math:`\\text{sample} \\geq 2\\delta` + :type right: bool >>> from sklearn.svm import LinearSVC >>> from sklearn.ensemble import RandomForestClassifier @@ -452,10 +492,20 @@ def p_value(self): """ values = [] sign = 1 if self.statistic_samples.BiB else -1 + ndim = self.statistic[self.best].ndim for k, v in self.statistic_samples.calls.items(): delta = 2 * sign * (self.statistic[self.best] - self.statistic[k]) - values.append((k, (v > delta).mean())) - values.sort(key=lambda x: x[1]) + if ndim == 0: + if right: + values.append((k, (v >= delta).mean())) + else: + values.append((k, (v <= 0).mean())) + else: + if right: + values.append((k, (v >= delta).mean(axis=0))) + else: + values.append((k, (v <= 0).mean(axis=0))) + values.sort(key=lambda x: self.sorting_func(x[1])) return dict(values) def plot(self, **kwargs): diff --git a/CompStats/metrics.py b/CompStats/metrics.py index a7cfd8d..cbe04b2 100644 --- a/CompStats/metrics.py +++ b/CompStats/metrics.py @@ -11,15 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from functools import wraps from sklearn import metrics from CompStats.interface import Perf -from CompStats.utils import perf_docs +from CompStats.utils import metrics_docs + ######################################################## #################### Classification #################### ######################################################## -@perf_docs + +@metrics_docs(hy_name='y_pred', attr_name='score_func') def accuracy_score(y_true, *y_pred, normalize=True, sample_weight=None, num_samples: int=500, @@ -28,6 +31,7 @@ def accuracy_score(y_true, *y_pred, **kwargs): """accuracy_score""" + @wraps(metrics.accuracy_score) def inner(y, hy): return metrics.accuracy_score(y, hy, normalize=normalize, @@ -38,7 +42,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='score_func') def balanced_accuracy_score(y_true, *y_pred, sample_weight=None, adjusted=False, num_samples: int=500, @@ -47,6 +51,7 @@ def balanced_accuracy_score(y_true, *y_pred, **kwargs): """balanced_accuracy_score""" + @wraps(metrics.balanced_accuracy_score) def inner(y, hy): return metrics.balanced_accuracy_score(y, hy, adjusted=adjusted, @@ -57,7 +62,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_score', attr_name='score_func') def top_k_accuracy_score(y_true, *y_score, k=2, normalize=True, sample_weight=None, labels=None, @@ -67,6 +72,7 @@ def top_k_accuracy_score(y_true, *y_score, k=2, **kwargs): """top_k_accuracy_score""" + @wraps(metrics.top_k_accuracy_score) def inner(y, hy): return metrics.top_k_accuracy_score(y, hy, k=k, normalize=normalize, sample_weight=sample_weight, @@ -77,7 +83,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_score', attr_name='score_func') def average_precision_score(y_true, *y_score, average='macro', sample_weight=None, @@ -87,6 +93,7 @@ def average_precision_score(y_true, *y_score, **kwargs): """average_precision_score""" + @wraps(metrics.average_precision_score) def inner(y, hy): return metrics.average_precision_score(y, hy, average=average, @@ -97,7 +104,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_proba', attr_name='error_func') def brier_score_loss(y_true, *y_proba, sample_weight=None, pos_label=None, @@ -108,6 +115,7 @@ def brier_score_loss(y_true, *y_proba, ): """brier_score_loss""" + @wraps(metrics.brier_score_loss) def inner(y, hy): return metrics.brier_score_loss(y, hy, sample_weight=sample_weight, @@ -116,9 +124,9 @@ def inner(y, hy): num_samples=num_samples, n_jobs=n_jobs, use_tqdm=use_tqdm, **kwargs) - -@perf_docs + +@metrics_docs(hy_name='y_pred', attr_name='score_func') def f1_score(y_true, *y_pred, labels=None, pos_label=1, average='binary', sample_weight=None, zero_division='warn', num_samples: int=500, @@ -126,6 +134,7 @@ def f1_score(y_true, *y_pred, labels=None, pos_label=1, **kwargs): """f1_score""" + @wraps(metrics.f1_score) def inner(y, hy): return metrics.f1_score(y, hy, labels=labels, pos_label=pos_label, @@ -138,7 +147,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def log_loss(y_true, *y_pred, normalize=True, sample_weight=None, @@ -148,6 +157,8 @@ def log_loss(y_true, *y_pred, use_tqdm=True, **kwargs): """log_loss""" + + @wraps(metrics.log_loss) def inner(y, hy): return metrics.log_loss(y, hy, normalize=normalize, sample_weight=sample_weight, @@ -158,7 +169,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='score_func') def precision_score(y_true, *y_pred, labels=None, @@ -171,6 +182,8 @@ def precision_score(y_true, use_tqdm=True, **kwargs): """precision_score""" + + @wraps(metrics.precision_score) def inner(y, hy): return metrics.precision_score(y, hy, labels=labels, @@ -184,7 +197,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='score_func') def recall_score(y_true, *y_pred, labels=None, @@ -197,6 +210,8 @@ def recall_score(y_true, use_tqdm=True, **kwargs): """recall_score""" + + @wraps(metrics.recall_score) def inner(y, hy): return metrics.recall_score(y, hy, labels=labels, @@ -210,7 +225,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='score_func') def jaccard_score(y_true, *y_pred, labels=None, @@ -223,6 +238,8 @@ def jaccard_score(y_true, use_tqdm=True, **kwargs): """jaccard_score""" + + @wraps(metrics.jaccard_score) def inner(y, hy): return metrics.jaccard_score(y, hy, labels=labels, @@ -236,7 +253,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_score', attr_name='score_func') def roc_auc_score(y_true, *y_score, average='macro', @@ -249,6 +266,8 @@ def roc_auc_score(y_true, use_tqdm=True, **kwargs): """roc_auc_score""" + + @wraps(metrics.roc_auc_score) def inner(y, hy): return metrics.roc_auc_score(y, hy, average=average, @@ -262,7 +281,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_proba', attr_name='score_func') def d2_log_loss_score(y_true, *y_proba, sample_weight=None, labels=None, @@ -272,6 +291,7 @@ def d2_log_loss_score(y_true, *y_proba, **kwargs): """d2_log_loss_score""" + @wraps(metrics.d2_log_loss_score) def inner(y, hy): return metrics.d2_log_loss_score(y, hy, sample_weight=sample_weight, @@ -287,7 +307,7 @@ def inner(y, hy): ######################################################## -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='score_func') def explained_variance_score(y_true, *y_pred, sample_weight=None, @@ -298,6 +318,8 @@ def explained_variance_score(y_true, use_tqdm=True, **kwargs): """explained_variance_score""" + + @wraps(metrics.explained_variance_score) def inner(y, hy): return metrics.explained_variance_score(y, hy, sample_weight=sample_weight, @@ -309,13 +331,15 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def max_error(y_true, *y_pred, num_samples: int=500, n_jobs: int=-1, use_tqdm=True, **kwargs): """max_error""" + + @wraps(metrics.max_error) def inner(y, hy): return metrics.max_error(y, hy) return Perf(y_true, *y_pred, score_func=None, error_func=inner, @@ -324,7 +348,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def mean_absolute_error(y_true, *y_pred, sample_weight=None, @@ -334,6 +358,8 @@ def mean_absolute_error(y_true, use_tqdm=True, **kwargs): """mean_absolute_error""" + + @wraps(metrics.mean_absolute_error) def inner(y, hy): return metrics.mean_absolute_error(y, hy, sample_weight=sample_weight, @@ -345,7 +371,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def mean_squared_error(y_true, *y_pred, sample_weight=None, @@ -355,6 +381,8 @@ def mean_squared_error(y_true, use_tqdm=True, **kwargs): """mean_squared_error""" + + @wraps(metrics.mean_squared_error) def inner(y, hy): return metrics.mean_squared_error(y, hy, sample_weight=sample_weight, @@ -366,7 +394,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def root_mean_squared_error(y_true, *y_pred, sample_weight=None, @@ -376,6 +404,8 @@ def root_mean_squared_error(y_true, use_tqdm=True, **kwargs): """root_mean_squared_error""" + + @wraps(metrics.root_mean_squared_error) def inner(y, hy): return metrics.root_mean_squared_error(y, hy, sample_weight=sample_weight, @@ -387,7 +417,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def mean_squared_log_error(y_true, *y_pred, sample_weight=None, @@ -397,6 +427,8 @@ def mean_squared_log_error(y_true, use_tqdm=True, **kwargs): """mean_squared_log_error""" + + @wraps(metrics.mean_squared_log_error) def inner(y, hy): return metrics.mean_squared_log_error(y, hy, sample_weight=sample_weight, @@ -408,7 +440,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def root_mean_squared_log_error(y_true, *y_pred, sample_weight=None, @@ -418,6 +450,8 @@ def root_mean_squared_log_error(y_true, use_tqdm=True, **kwargs): """root_mean_squared_log_error""" + + @wraps(metrics.root_mean_squared_log_error) def inner(y, hy): return metrics.root_mean_squared_log_error(y, hy, sample_weight=sample_weight, @@ -429,7 +463,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def median_absolute_error(y_true, *y_pred, sample_weight=None, @@ -439,10 +473,12 @@ def median_absolute_error(y_true, use_tqdm=True, **kwargs): """median_absolute_error""" + + @wraps(metrics.median_absolute_error) def inner(y, hy): return metrics.median_absolute_error(y, hy, - sample_weight=sample_weight, - multioutput=multioutput) + sample_weight=sample_weight, + multioutput=multioutput) return Perf(y_true, *y_pred, score_func=None, error_func=inner, num_samples=num_samples, n_jobs=n_jobs, @@ -450,7 +486,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='score_func') def r2_score(y_true, *y_pred, sample_weight=None, @@ -461,6 +497,8 @@ def r2_score(y_true, use_tqdm=True, **kwargs): """r2_score""" + + @wraps(metrics.r2_score) def inner(y, hy): return metrics.r2_score(y, hy, sample_weight=sample_weight, @@ -473,7 +511,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def mean_poisson_deviance(y_true, *y_pred, sample_weight=None, @@ -482,6 +520,8 @@ def mean_poisson_deviance(y_true, use_tqdm=True, **kwargs): """mean_poisson_deviance""" + + @wraps(metrics.mean_poisson_deviance) def inner(y, hy): return metrics.mean_poisson_deviance(y, hy, sample_weight=sample_weight) @@ -492,7 +532,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def mean_gamma_deviance(y_true, *y_pred, sample_weight=None, @@ -501,6 +541,8 @@ def mean_gamma_deviance(y_true, use_tqdm=True, **kwargs): """mean_gamma_deviance""" + + @wraps(metrics.mean_gamma_deviance) def inner(y, hy): return metrics.mean_gamma_deviance(y, hy, sample_weight=sample_weight) @@ -511,7 +553,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='error_func') def mean_absolute_percentage_error(y_true, *y_pred, sample_weight=None, @@ -521,6 +563,8 @@ def mean_absolute_percentage_error(y_true, use_tqdm=True, **kwargs): """mean_absolute_percentage_error""" + + @wraps(metrics.mean_absolute_percentage_error) def inner(y, hy): return metrics.mean_absolute_percentage_error(y, hy, sample_weight=sample_weight, @@ -532,7 +576,7 @@ def inner(y, hy): **kwargs) -@perf_docs +@metrics_docs(hy_name='y_pred', attr_name='score_func') def d2_absolute_error_score(y_true, *y_pred, sample_weight=None, @@ -542,6 +586,8 @@ def d2_absolute_error_score(y_true, use_tqdm=True, **kwargs): """d2_absolute_error_score""" + + @wraps(metrics.d2_absolute_error_score) def inner(y, hy): return metrics.d2_absolute_error_score(y, hy, sample_weight=sample_weight, diff --git a/CompStats/tests/test_metrics.py b/CompStats/tests/test_metrics.py index e71b807..b0de63f 100644 --- a/CompStats/tests/test_metrics.py +++ b/CompStats/tests/test_metrics.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np from sklearn.svm import LinearSVC from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.naive_bayes import GaussianNB @@ -19,6 +20,24 @@ from sklearn import metrics +def test_difference_f1_score(): + """Test f1_score""" + from CompStats.metrics import f1_score + + X, y = load_iris(return_X_y=True) + _ = train_test_split(X, y, test_size=0.3) + X_train, X_val, y_train, y_val = _ + ens = RandomForestClassifier().fit(X_train, y_train) + nb = GaussianNB().fit(X_train, y_train) + perf = f1_score(y_val, nb.predict(X_val), + forest=ens.predict(X_val), + num_samples=50, average=None) + diff = perf.difference() + p_values = diff.p_value(right=False) + dd = list(p_values.values())[0] + assert isinstance(dd, np.ndarray) + print(diff) + def test_f1_score(): """Test f1_score""" from CompStats.metrics import f1_score @@ -33,6 +52,11 @@ def test_f1_score(): assert 'forest' in perf.statistic _ = metrics.f1_score(y_val, hy, average='macro') assert _ == perf.statistic['forest'] + perf = f1_score(y_val, hy, average=None) + assert str(perf) is not None + nb = GaussianNB().fit(X_train, y_train) + perf(nb.predict(X_val)) + assert str(perf) is not None def test_accuracy_score(): diff --git a/CompStats/utils.py b/CompStats/utils.py index 2df855c..c40edd7 100644 --- a/CompStats/utils.py +++ b/CompStats/utils.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from functools import wraps try: USE_TQDM = True from tqdm import tqdm @@ -25,31 +26,32 @@ def progress_bar(arg, use_tqdm: bool=True, **kwargs): return tqdm(arg, **kwargs) -from functools import wraps +def metrics_docs(hy_name='y_pred', attr_name='score_func'): + """Decorator to set docs""" + + def perf_docs(func): + """Decorator to Perf to write :py:class:`~sklearn.metrics` documentation""" + + func.__doc__ = f""":py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.{func.__name__}` as :py:attr:`{attr_name}.` The parameters not described can be found in :py:func:`~sklearn.metrics.{func.__name__}`. + + :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. + :type y_true: numpy.ndarray or pandas.DataFrame + :param {hy_name}: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`y_pred.` + :type {hy_name}: numpy.ndarray + :param kwargs: Predictions, the algorithms will be identified using the keyword + :type kwargs: numpy.ndarray + :param num_samples: Number of bootstrap samples, default=500. + :type num_samples: int + :param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads. + :type n_jobs: int + :param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True + :type use_tqdm: bool + + """ + func.__doc__ + @wraps(func) + def inner(*args, **kwargs): + return func(*args, **kwargs) -def perf_docs(func): - """Decorator to Perf with any write :py:class:`~sklearn.metrics` documentation - """ - - func.__doc__ = f""":py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.{func.__name__}` as :py:attr:`score_func.` The parameters not described can be found in :py:func:`~sklearn.metrics.{func.__name__}`. - -:param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. -:type y_true: numpy.ndarray or pandas.DataFrame -:param y_pred: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`y_pred.` -:type y_pred: numpy.ndarray -:param kwargs: Predictions, the algorithms will be identified using the keyword -:type kwargs: numpy.ndarray -:param num_samples: Number of bootstrap samples, default=500. -:type num_samples: int -:param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads. -:type n_jobs: int -:param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True -:type use_tqdm: bool -""" + func.__doc__ - - @wraps(func) - def inner(*args, **kwargs): - return func(*args, **kwargs) - - return inner \ No newline at end of file + return inner + return perf_docs \ No newline at end of file