diff --git a/skcriteria/cmp/ranks_cmp.py b/skcriteria/cmp/ranks_cmp.py index 581e9a9..4bf9cf8 100644 --- a/skcriteria/cmp/ranks_cmp.py +++ b/skcriteria/cmp/ranks_cmp.py @@ -30,6 +30,7 @@ from sklearn import metrics as _skl_metrics from ..core import SKCMethodABC +from ..madm import RankResult from ..utils import AccessorABC, Bunch, unique_names @@ -49,18 +50,28 @@ class RanksComparator(SKCMethodABC): - """Clase comparadora de rankings. + """Rankings comparator object. - Esta clase tiene como proposito contener una colección de rankings - que fueran obtenidos de manera diferente y los cuales desean ser - comparados. + This class is intended to contain a collection of rankings on which you + want to do comparative analysis. - Todos los rankings tienen que tener exactamente las mismas alternativas - aunque su orden puede variar. + All rankings must have exactly the same alternatives, although their order + may vary. - Esta coleccion puede tiene que ser una interable + All methods support the ``untied`` parameter, which serves to untie + rankings in case there are results that can assign more than one + alternative to the same position (e.g.``ELECTRE2``). + Parameters + ---------- + ranks : list + List of (name, ranking) tuples of ``skcriteria.madm.RankResult`` + with the same alternatives. + See Also + -------- + skcriteria.cmp.mkrank_cmp : Convenience function for simplified + ranks comparator construction. """ @@ -68,21 +79,24 @@ class RanksComparator(SKCMethodABC): _skcriteria_parameters = ["ranks"] def __init__(self, ranks): - self._validate_parts(ranks) + ranks = list(ranks) + self._validate_ranks(ranks) self._ranks = ranks # INTERNALS =============================================================== - def _validate_parts(self, parts): + def _validate_ranks(self, ranks): - if len(parts) <= 1: + if len(ranks) <= 1: raise ValueError("Please provide more than one ranking") used_names = set() - first_alternatives = set(parts[0][1].alternatives) - for name, part in parts: + first_alternatives = set(ranks[0][1].alternatives) + for name, part in ranks: if not isinstance(name, str): raise ValueError("'name' must be instance of str") + if not isinstance(part, RankResult): + raise TypeError("ranks must be instance of madm.RankResult") if name in used_names: raise ValueError(f"Duplicated name {name!r}") @@ -98,7 +112,7 @@ def _validate_parts(self, parts): # PROPERTIES ============================================================== @property def ranks(self): - """List of ranks in the comparator""" + """List of ranks in the comparator.""" return list(self._ranks) @property @@ -151,6 +165,25 @@ def __hash__(self): # TO DATA ================================================================= def to_dataframe(self, *, untied=False): + """Convert the entire RanksComparator into a dataframe. + + The alternatives are the rows, and the different rankings are the + columns. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + + Returns + ------- + :py:class:`pd.DataFrame` + A RanksComparator as pandas DataFrame. + + """ columns = { rank_name: rank.to_series(untied=untied) for rank_name, rank in self._ranks @@ -161,20 +194,92 @@ def to_dataframe(self, *, untied=False): return df - def corr(self, *, untied=False): - return self.to_dataframe(untied=untied).corr() + def corr(self, *, untied=False, **kwargs): + """Compute pairwise correlation of rankings, excluding NA/null values. + + By default the pearson correlation coefficient is used. + + Please check the full documentation of a ``pandas.DataFrame.corr()`` + method for details about the implementation. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``pandas.DataFrame.corr()`` method. + + Returns + ------- + :py:class:`pd.DataFrame` + A DataFrame with the correlation between rankings. + + """ + return self.to_dataframe(untied=untied).corr(**kwargs) + + def cov(self, *, untied=False, **kwargs): + """Compute pairwise covariance of rankings, excluding NA/null values. + + Please check the full documentation of a ``pandas.DataFrame.cov()`` + method for details about the implementation. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``pandas.DataFrame.cov()`` method. + + Returns + ------- + :py:class:`pd.DataFrame` + A DataFrame with the covariance between rankings. - def cov(self, *, untied=False): - return self.to_dataframe(untied=untied).cov() + """ + return self.to_dataframe(untied=untied).cov(**kwargs) + + def r2_score(self, *, untied=False, **kwargs): + """Compute pairwise coefficient of determination regression score \ + function of rankings, excluding NA/null values. + + Best possible score is 1.0 and it can be negative (because the + model can be arbitrarily worse). + + Please check the full documentation of a ``sklearn.metrics.r2_score`` + function for details about the implementation and the behaviour. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``sklearn.metrics.r2_score()`` function. + + Returns + ------- + :py:class:`pd.DataFrame` + A DataFrame with the coefficient of determination between rankings. - def r2_score(self, *, untied=False): + """ df = self.to_dataframe(untied=untied) # here we are going to create a dict of dict rows = defaultdict(dict) # combine the methods pairwise for r0, r1 in it.combinations(df.columns, 2): - r2_score = _skl_metrics.r2_score(df[r0], df[r1]) + r2_score = _skl_metrics.r2_score(df[r0], df[r1], **kwargs) # add the metrics in both directions rows[r0][r1] = r2_score @@ -189,9 +294,43 @@ def r2_score(self, *, untied=False): return r2_df - def distance(self, *, untied=False, metric="hamming"): + def distance(self, *, untied=False, metric="hamming", **kwargs): + """Compute pairwise distance between rankings. + + By default the 'hamming' distance is used, which is simply the + proportion of disagreeing components in Two rankings. + + Please check the full documentation of a + ``scipy.spatial.distance.pdist`` function for details about the + implementation and the behaviour. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + metric: str or function, default ``"hamming"`` + The distance metric to use. The distance function can + be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', + 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', + 'jaccard', 'jensenshannon', 'kulczynski1', + 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', + 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', + 'sqeuclidean', 'yule'. + kwargs: + Other keyword arguments are passed to the + ``scipy.spatial.distance.pdist()`` function. + + Returns + ------- + :py:class:`pd.DataFrame` + A DataFrame with the distance between rankings. + + """ df = self.to_dataframe(untied=untied).T - dis_array = distance.pdist(df, metric=metric) + dis_array = distance.pdist(df, metric=metric, **kwargs) dis_mtx = distance.squareform(dis_array) dis_df = pd.DataFrame( dis_mtx, columns=df.index.copy(), index=df.index.copy() @@ -213,6 +352,25 @@ def plot(self): class RanksComparatorPlotter(AccessorABC): + """RanksComparator plot utilities. + + Kind of plot to produce: + + - 'flow' : Changes in the rankings of the alternatives as flow lines + (default) + - 'reg' : Pairwise rankings data and a linear regression model fit plot. + - 'heatmap' : Rankings as a color-encoded matrix. + - 'corr' : Pairwise correlation of rankings as a color-encoded matrix. + - 'cov' : Pairwise covariance of rankings as a color-encoded matrix. + - 'r2_score' : Pairwise coefficient of determination regression score \ + function of rankings as a color-encoded matrix. + - 'distance' : Pairwise distance between rankings as a color-encoded \ + matrix. + - 'box' : Box-plot of rankings with respect to alternatives + - 'bar' : Ranking of alternatives by method with vertical bars. + - 'barh' : Ranking of alternatives by method with horizontal bars. + + """ _default_kind = "flow" @@ -223,6 +381,28 @@ def __init__(self, ranks_cmp): # These plots have a much more manually orchestrated code. def flow(self, *, untied=False, grid_kws=None, **kwargs): + """Represents changes in the rankings of the alternatives as lines \ + flowing through the ranking-methods. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + grid_kws: dict or None + Dict with keyword arguments passed to + ``matplotlib.axes.plt.Axes.grid`` + kwargs: + Other keyword arguments are passed to the ``seaborn.lineplot()`` + function. except for data, estimator and sort. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ df = self._ranks_cmp.to_dataframe(untied=untied) ax = sns.lineplot(data=df.T, estimator=None, sort=False, **kwargs) @@ -243,9 +423,40 @@ def reg( palette=None, legend=True, r2_fmt=".2g", + r2_kws=None, **kwargs, ): + """Plot a pairwise rankings data and a linear regression model fit. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + r2 : bool, default ``True`` + If True, the coefficient of determination results are added to the + regression legend. + palette: matplotlib/seaborn color palette, default ``None`` + Set of colors for mapping the hue variable. + legend: bool, default ``True`` + If False, suppress the legend for semantic variables. + r2_fmt: str, default ``"2.g"`` + String formatting code to use when adding the coefficient of + determination. + r2_kws: dict or None + Dict with keywords arguments passed to + ``sklearn.metrics.r2_score()`` function. + kwargs: + Other keyword arguments are passed to the ``seaborn.lineplot()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + """ df = self._ranks_cmp.to_dataframe(untied=untied) # Just to ensure that no manual color reaches regplot @@ -255,16 +466,17 @@ def reg( f"{cls_name}.reg() got an unexpected keyword argument 'color'" ) - # we create the infinite cycle of colors for the palette, - # so we take out as we need - colors = it.cycle(sns.color_palette(palette=palette)) - # if there is a custom axis, we take it out ax = kwargs.pop("ax", None) # r2 - if r2: - r2_df = self._ranks_cmp.r2_score(untied=untied) + if legend and r2: + r2_kws = {} if r2_kws is None else r2_kws + r2_df = self._ranks_cmp.r2_score(untied=untied, **r2_kws) + + # we create the infinite cycle of colors for the palette, + # so we take out as we need + colors = it.cycle(sns.color_palette(palette=palette)) # pairwise ranks iteration for x, y in it.combinations(df.columns, 2): @@ -272,7 +484,7 @@ def reg( # The r2 correlation index r2_label = "" - if r2: + if legend and r2: r2_score = format(r2_df[x][y], r2_fmt) r2_label = f" - $R^2={r2_score}$" @@ -293,31 +505,160 @@ def reg( # Thin wrapper around seaborn plots def heatmap(self, *, untied=False, **kwargs): + """Plot the rankings as a color-encoded matrix. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ df = self._ranks_cmp.to_dataframe(untied=untied) kwargs.setdefault("annot", True) kwargs.setdefault("cbar_kws", {"label": RANKS_LABELS[untied]}) return sns.heatmap(data=df, **kwargs) - def corr(self, *, untied=False, **kwargs): - corr = self._ranks_cmp.corr(untied=untied) + def corr(self, *, untied=False, corr_kws=None, **kwargs): + """Plot the pairwise correlation of rankings as a color-encoded matrix. + + By default the pearson correlation coefficient is used. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + corr_kws: dict or None + Dict with keywords arguments passed the + ``pandas.DataFrame.corr()`` method. + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + corr_kws = {} if corr_kws is None else corr_kws + corr = self._ranks_cmp.corr(untied=untied, **corr_kws) + kwargs.setdefault("annot", True) kwargs.setdefault("cbar_kws", {"label": "Correlation"}) return sns.heatmap(data=corr, **kwargs) - def cov(self, *, untied=False, **kwargs): - cov = self._ranks_cmp.cov(untied=untied) + def cov(self, *, untied=False, cov_kws=None, **kwargs): + """Plot the pairwise covariance of rankings as a color-encoded matrix. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + cov_kws: dict or None + Dict with keywords arguments passed the + ``pandas.DataFrame.cov()`` method. + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + cov_kws = {} if cov_kws is None else cov_kws + cov = self._ranks_cmp.cov(untied=untied, **cov_kws) + kwargs.setdefault("annot", True) kwargs.setdefault("cbar_kws", {"label": "Covariance"}) return sns.heatmap(data=cov, **kwargs) - def r2_score(self, untied=False, **kwargs): - r2 = self._ranks_cmp.r2_score(untied=untied) + def r2_score(self, untied=False, r2_kws=None, **kwargs): + """Plot the pairwise coefficient of determination regression score \ + function of rankings as a color-encoded matrix. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + cov_kws: dict or None + Dict with keywords arguments passed the + ``pandas.DataFrame.cov()`` method. + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + r2_kws = {} if r2_kws is None else r2_kws + r2 = self._ranks_cmp.r2_score(untied=untied, **r2_kws) + kwargs.setdefault("annot", True) kwargs.setdefault("cbar_kws", {"label": "$R^2$"}) return sns.heatmap(data=r2, **kwargs) - def distance(self, *, untied=False, metric="hamming", **kwargs): - dis = self._ranks_cmp.distance(untied=untied, metric=metric) + def distance( + self, *, untied=False, metric="hamming", distance_kws=None, **kwargs + ): + """Plot the pairwise distance between rankings as a color-encoded \ + matrix. + + By default the 'hamming' distance is used, which is simply the + proportion of disagreeing components in Two rankings. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + metric: str or function, default ``"hamming"`` + The distance metric to use. The distance function can + be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', + 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', + 'jaccard', 'jensenshannon', 'kulczynski1', + 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', + 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', + 'sqeuclidean', 'yule'. + distance_kws: dict or None + Dict with keywords arguments passed the + ``scipy.spatial.distance.pdist`` function + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + distance_kws = {} if distance_kws is None else distance_kws + dis = self._ranks_cmp.distance( + untied=untied, metric=metric, **distance_kws + ) + kwargs.setdefault("annot", True) kwargs.setdefault( "cbar_kws", {"label": f"{metric} distance".capitalize()} @@ -325,6 +666,24 @@ def distance(self, *, untied=False, metric="hamming", **kwargs): return sns.heatmap(data=dis, **kwargs) def box(self, *, untied=False, **kwargs): + """Draw a boxplot to show rankings with respect to alternatives. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the ``seaborn.boxplot()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ df = self._ranks_cmp.to_dataframe(untied=untied) ax = sns.boxplot(data=df.T, **kwargs) @@ -340,6 +699,25 @@ def box(self, *, untied=False, **kwargs): # Thin wrapper around pandas.DataFrame.plot def bar(self, *, untied=False, **kwargs): + """Draw plot that presents ranking of alternatives by method with \ + vertical bars. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``pandas.Dataframe.plot.bar()`` method. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ df = self._ranks_cmp.to_dataframe(untied=untied) kwargs["ax"] = kwargs.get("ax") or plt.gca() ax = df.plot.bar(**kwargs) @@ -347,6 +725,25 @@ def bar(self, *, untied=False, **kwargs): return ax def barh(self, *, untied=False, **kwargs): + """Draw plot that presents ranking of alternatives by method with \ + horizontal bars. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``pandas.Dataframe.plot.barh()`` method. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ df = self._ranks_cmp.to_dataframe(untied=untied) kwargs["ax"] = kwargs.get("ax") or plt.gca() ax = df.plot.barh(**kwargs) @@ -364,7 +761,7 @@ def mkrank_cmp(*ranks): This is a shorthand for the RankComparator constructor; it does not require, and does not permit, naming the estimators. Instead, their names - will be set to the method attribute of the rankins automatically. + will be set to the method attribute of the rankings automatically. Parameters ---------- diff --git a/skcriteria/core/plot.py b/skcriteria/core/plot.py index e383c53..1d74ec4 100644 --- a/skcriteria/core/plot.py +++ b/skcriteria/core/plot.py @@ -29,7 +29,7 @@ class DecisionMatrixPlotter(AccessorABC): - """Make plots of DecisionMatrix. + """DecisionMatrix plot utilities. Kind of plot to produce: diff --git a/skcriteria/madm/_madm_base.py b/skcriteria/madm/_madm_base.py index 9bcffe4..fd6f7a5 100644 --- a/skcriteria/madm/_madm_base.py +++ b/skcriteria/madm/_madm_base.py @@ -285,11 +285,11 @@ def equals(self, other): return self.aequals(other, 0, 0, False) def __eq__(self, other): - """x.__eq__(y) <==> x == y""" + """x.__eq__(y) <==> x == y.""" return self.equals(other) def __ne__(self, other): - """x.__eq__(y) <==> x == y""" + """x.__eq__(y) <==> x == y.""" return not self == other # REPR ==================================================================== diff --git a/skcriteria/pipeline.py b/skcriteria/pipeline.py index 0953ecd..7e6ebf9 100644 --- a/skcriteria/pipeline.py +++ b/skcriteria/pipeline.py @@ -32,10 +32,7 @@ class SKCPipeline(SKCMethodABC): The final decision-maker only needs to implement `evaluate`. The purpose of the pipeline is to assemble several steps that can be - applied together while setting different parameters. A step's - estimator may be replaced entirely by setting the parameter with its name - to another dmaker or a transformer removed by setting it to - `'passthrough'` or `None`. + applied together while setting different parameters. Parameters ---------- diff --git a/skcriteria/utils/bunch.py b/skcriteria/utils/bunch.py index 91d236e..85aafbe 100644 --- a/skcriteria/utils/bunch.py +++ b/skcriteria/utils/bunch.py @@ -66,7 +66,7 @@ def __getattr__(self, a): raise AttributeError(a) def __setstate__(self, state): - """Needed for some deep copy operations""" + """Needed for some deep copy operations.""" self.__dict__.update(state) def __iter__(self): diff --git a/tests/cmp/test_ranks_cmp.py b/tests/cmp/test_ranks_cmp.py index fe426d5..b177630 100644 --- a/tests/cmp/test_ranks_cmp.py +++ b/tests/cmp/test_ranks_cmp.py @@ -48,6 +48,12 @@ def test_Ranks_name_not_str(): ranks_cmp.RanksComparator([("a", rank), (1, rank)]) +def test_Ranks_not_rank_result(): + rank = madm.RankResult("test", ["a"], [1], {}) + with pytest.raises(TypeError): + ranks_cmp.RanksComparator([("a", rank), ("b", None)]) + + def test_Ranks_duplicated_names(): rank = madm.RankResult("test", ["a"], [1], {}) with pytest.raises(ValueError):