From 6343bcab24cff12f7310e6e913fef33fbd477a7c Mon Sep 17 00:00:00 2001
From: GiulioRossetti <giulio.rossetti@gmail.com>
Date: Tue, 14 May 2024 09:52:48 +0200
Subject: [PATCH] :new: clusim integration

---
 cdlib/evaluation/comparison.py            | 644 ++++++++++++++++++++++
 cdlib/test/test_partitions_comparisons.py | 116 +++-
 docs/reference/evaluation.rst             |  25 +-
 requirements.txt                          |   3 +-
 requirements_optional.txt                 |   3 +-
 setup.py                                  |   3 +-
 6 files changed, 779 insertions(+), 15 deletions(-)

diff --git a/cdlib/evaluation/comparison.py b/cdlib/evaluation/comparison.py
index 0dcbe865..1bece79a 100644
--- a/cdlib/evaluation/comparison.py
+++ b/cdlib/evaluation/comparison.py
@@ -3,6 +3,12 @@
 from cdlib.evaluation.internal.omega import Omega
 from cdlib.evaluation.internal.NF1 import NF1
 from collections import namedtuple, defaultdict
+from clusim.clustering import Clustering
+
+try:
+    import clusim.sim as sim
+except ImportError:
+    sim = None
 
 __all__ = [
     "MatchingResult",
@@ -16,6 +22,21 @@
     "adjusted_mutual_information",
     "variation_of_information",
     "partition_closeness_simple",
+    "ecs",
+    "jaccard_index",
+    "rand_index",
+    "fowlkes_mallows_index",
+    "classification_error",
+    "czekanowski_index",
+    "dice_index",
+    "sorensen_index",
+    "rogers_tanimoto_index",
+    "southwood_index",
+    "mi",
+    "rmi",
+    "geometric_accuracy",
+    "overlap_quality",
+    "sample_expected_sim",
 ]
 
 # MatchingResult = namedtuple("MatchingResult", ['mean', 'std'])
@@ -24,6 +45,14 @@
 MatchingResult.__new__.__defaults__ = (None,) * len(MatchingResult._fields)
 
 
+def __transform_partition(partition: object):
+    fp = defaultdict(list)
+    for idc, com in enumerate(partition.communities):
+        for node in com:
+            fp[node].append(idc)
+    return fp
+
+
 def __check_partition_coverage(first_partition: object, second_partition: object):
     nodes_first = {
         node: None for community in first_partition.communities for node in community
@@ -59,6 +88,7 @@ def normalized_mutual_information(
     :Example:
 
       >>> from cdlib import evaluation, algorithms
+      >>> import networkx as nx
       >>> g = nx.karate_club_graph()
       >>> louvain_communities = algorithms.louvain(g)
       >>> leiden_communities = algorithms.leiden(g)
@@ -118,6 +148,7 @@ def overlapping_normalized_mutual_information_LFK(
     :Example:
 
     >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
     >>> g = nx.karate_club_graph()
     >>> louvain_communities = algorithms.louvain(g)
     >>> leiden_communities = algorithms.leiden(g)
@@ -153,6 +184,7 @@ def overlapping_normalized_mutual_information_MGH(
     :Example:
 
     >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
     >>> g = nx.karate_club_graph()
     >>> louvain_communities = algorithms.louvain(g)
     >>> leiden_communities = algorithms.leiden(g)
@@ -191,6 +223,7 @@ def omega(first_partition: object, second_partition: object) -> MatchingResult:
     :Example:
 
     >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
     >>> g = nx.karate_club_graph()
     >>> louvain_communities = algorithms.louvain(g)
     >>> leiden_communities = algorithms.leiden(g)
@@ -221,6 +254,7 @@ def f1(first_partition: object, second_partition: object) -> MatchingResult:
     :Example:
 
     >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
     >>> g = nx.karate_club_graph()
     >>> louvain_communities = algorithms.louvain(g)
     >>> leiden_communities = algorithms.leiden(g)
@@ -250,6 +284,7 @@ def nf1(first_partition: object, second_partition: object) -> MatchingResult:
     :Example:
 
     >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
     >>> g = nx.karate_club_graph()
     >>> louvain_communities = algorithms.louvain(g)
     >>> leiden_communities = algorithms.leiden(g)
@@ -298,6 +333,7 @@ def adjusted_rand_index(
     :Example:
 
     >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
     >>> g = nx.karate_club_graph()
     >>> louvain_communities = algorithms.louvain(g)
     >>> leiden_communities = algorithms.leiden(g)
@@ -374,6 +410,7 @@ def adjusted_mutual_information(
     :Example:
 
     >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
     >>> g = nx.karate_club_graph()
     >>> louvain_communities = algorithms.louvain(g)
     >>> leiden_communities = algorithms.leiden(g)
@@ -434,6 +471,7 @@ def variation_of_information(
     :Example:
 
     >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
     >>> g = nx.karate_club_graph()
     >>> louvain_communities = algorithms.louvain(g)
     >>> leiden_communities = algorithms.leiden(g)
@@ -483,6 +521,7 @@ def partition_closeness_simple(
     :Example:
 
     >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
     >>> g = nx.karate_club_graph()
     >>> louvain_communities = algorithms.louvain(g)
     >>> leiden_communities = algorithms.leiden(g)
@@ -516,3 +555,608 @@ def partition_closeness_simple(
     closeness *= 0.5
 
     return MatchingResult(score=closeness)
+
+
+def ecs(
+    first_partition: object,
+    second_partition: object,
+    alpha: float = 0.9,
+    r: float = 1.0,
+    r2: float = None,
+    rescale_path_type: str = "max",
+    ppr_implementation: str = "prpack",
+) -> MatchingResult:
+    """
+    The element-centric clustering similarity.
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :param alpha: The personalized page-rank return probability as a float in [0,1]. float, default 0.9
+    :param r: The hierarchical scaling parameter for clustering1. float, default 1.0
+    :param r2: The hierarchical scaling parameter for clustering2. float, default None
+    :param rescale_path_type: rescale the hierarchical height by: 'max' the maximum path from the root; 'min' the minimum path form the root; 'linkage' use the linkage distances in the clustering.
+    :param ppr_implementation: Choose an implementation for personalized page-rank calculation: 'prpack' use PPR algorithms in igraph; 'power_iteration': use power_iteration method.
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.ecs(louvain_communities,leiden_communities)
+
+    :Reference:
+
+    A.J. Gates, I.B. Wood, W.P. Hetrick, and YY Ahn [2019]. "Element-centric clustering comparison unifies overlaps and hierarchy". Scientific Reports 9, 8574
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.element_sim(
+        clustering1, clustering2, alpha, r, r2, rescale_path_type, ppr_implementation
+    )
+    return MatchingResult(score=score)
+
+
+def jaccard_index(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+    This function calculates the Jaccard index between two clusterings.
+
+    J = N11/(N11+N10+N01)
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.jaccard_index(louvain_communities,leiden_communities)
+
+    :Reference:
+
+    Paul Jaccard. The distribution of the flora in the alpine zone. New Phytologist, 11(2):37–50, 1912.
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.jaccard_index(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def rand_index(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+    This function calculates the Rand index between two clusterings.
+
+    RI = (N11 + N00) / (N11 + N10 + N01 + N00)
+
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.rand_index(louvain_communities,leiden_communities)
+
+    :Reference:
+
+    William M Rand. Objective Criteria for the Evaluation of Clustering Methods. Journal of the American Statistical Association, 66(336):846, 1971.
+
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.rand_index(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def fowlkes_mallows_index(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+    This function calculates the Fowlkes and Mallows index between two clusterings
+
+    FM = N11 / sqrt( (N11 + N10) * (N11 + N01) )
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.fowlkes_mallows_index(louvain_communities,leiden_communities)
+
+    :Reference:
+
+    Edward B. Fowlkes and Colin L. Mallows. A method for comparing two hierarchical clusterings. Journal of the American Statistical Association, 78(383):553–569, 1983.
+
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.fowlkes_mallows_index(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def classification_error(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+    This function calculates the Jaccard index between two clusterings.
+
+    CE = 1 - PI
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.classification_error(louvain_communities,leiden_communities)
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.classification_error(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def czekanowski_index(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+
+    This function calculates the Czekanowski between two clusterings.
+
+    Also known as:
+    Dice Symmetric index
+    Sorensen index
+
+    F = 2*N11 / (2*N11 + N10 + N01)
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.czekanowski_index(louvain_communities,leiden_communities)
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.czekanowski_index(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def dice_index(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+    This function calculates the Czekanowski between two clusterings.
+
+    Also known as:
+    Czekanowski index
+    Sorensen index
+
+    F = 2*N11 / (2*N11 + N10 + N01)
+
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.dice_index(louvain_communities,leiden_communities)
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+
+    return czekanowski_index(first_partition, second_partition)
+
+
+def sorensen_index(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+    This function calculates the Sorensen between two clusterings.
+
+    Also known as:
+    Czekanowski index
+    Dice index
+
+    F = 2*N11 / (2*N11 + N10 + N01)
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.sorensen_index(louvain_communities,leiden_communities)
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+
+    """
+
+    return czekanowski_index(first_partition, second_partition)
+
+
+def rogers_tanimoto_index(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+    This function calculates the Rogers and Tanimoto index between two clusterings.
+
+    RT = (N11 + N00)/(N11 + 2*(N10+N01) + N00)
+
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.rogers_tanimoto_index(louvain_communities,leiden_communities)
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.rogers_tanimoto_index(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def southwood_index(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+    This function calculates the Southwood index between two clusterings.
+
+    N11 / (N10 + N01)
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.southwood_index(louvain_communities,leiden_communities)
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.southwood_index(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def mi(
+    first_partition: object,
+    second_partition: object,
+) -> MatchingResult:
+    """
+    This function calculates the Mutual Information (MI) between two clusterings.
+
+    MI = (S(c1) + S(c2) - S(c1, c2))
+
+    where S(c1) is the Shannon Entropy of the clustering size distribution, S(c1, c2) is the Shannon Entropy of the join clustering size distribution,
+
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.mi(louvain_communities,leiden_communities)
+
+    :Reference:
+
+    Leon Danon, Albert D ıaz-Guilera, Jordi Duch, and Alex Arenas. Comparing community structure identification. Journal of Statistical Mechanics: Theory and Experiment, 2005(09):P09008–P09008, September 2005.
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.mi(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def rmi(
+    first_partition: object,
+    second_partition: object,
+    norm_type: str = "none",
+    logbase: int = 2,
+) -> MatchingResult:
+    """
+    This function calculates the Reduced Mutual Information (RMI) between two clusterings.
+
+    RMI = MI(c1, c2) - log Omega(a, b) / n
+
+    where MI(c1, c2) is mutual information of the clusterings c1 and c2, and Omega(a, b) is the number of contingency tables with row and column sums equal to a and b.
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :param norm_type: The normalization types are: 'none' returns the RMI without a normalization; 'normalized' returns the RMI with upper bound equals to 1.
+    :param logbase: int, default 2
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.rmi(louvain_communities,leiden_communities)
+
+    :Reference:
+
+    M. E. J. Newman, George T. Cantwell, and Jean-Gabriel Young. Improved mutual information measure for classification and community detection. arXiv:1907.12581, 2019.
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.rmi(clustering1, clustering2, norm_type, logbase)
+    return MatchingResult(score=score)
+
+
+def geometric_accuracy(
+    first_partition: object, second_partition: object
+) -> MatchingResult:
+    """
+    This function calculates the geometric accuracy between two (overlapping) clusterings.
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.geometric_accuracy(louvain_communities,leiden_communities)
+
+    :Reference:
+
+    Tamás Nepusz, Haiyuan Yu, and Alberto Paccanaro. Detecting overlapping protein complexes in protein-protein interaction networks. Nature Methods, 9(5):471–472, 2012.
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.geometric_accuracy(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def overlap_quality(
+    first_partition: object, second_partition: object
+) -> MatchingResult:
+    """
+    This function calculates the overlap quality between two (overlapping) clusterings.
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.overlap_quality(louvain_communities,leiden_communities)
+
+    :Reference:
+
+    Yong-Yeol Ahn, James P Bagrow, and Sune Lehmann. Link communities reveal multiscale complexity in networks. Nature, 466(7307):761–764, June 2010.
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.overlap_quality(clustering1, clustering2)
+    return MatchingResult(score=score)
+
+
+def sample_expected_sim(
+    first_partition: object,
+    second_partition: object,
+    measure: str = "jaccard_index",
+    random_model: str = "perm",
+    n_samples: int = 1,
+    keep_samples: bool = False,
+) -> MatchingResult:
+    """
+    This function calculates the expected Similarity for all pair-wise comparisons between Clusterings drawn from one of six random models.
+
+    .. note:: Clustering 2 is considered the gold-standard clustering for one-sided expectations
+
+
+    :param first_partition: NodeClustering object
+    :param second_partition: NodeClustering object
+    :param measure: The similarity measure to evaluate. Must be one of [ecs, jaccard_index, rand_index, fowlkes_mallows_index, classification_error, czekanowski_index, dice_index, sorensen_index, rogers_tanimoto_index, southwood_index, mi, rmi, vi, geometric_accuracy, overlap_quality, sample_expected_sim]
+    :param random_model: The random model to use:
+
+        'all' : uniform distribution over the set of all clusterings of
+                n_elements
+
+        'all1' : one-sided selection from the uniform distribution over the set
+                 of all clusterings of n_elements
+
+        'num' : uniform distribution over the set of all clusterings of
+                n_elements in n_clusters
+
+        'num1' : one-sided selection from the uniform distribution over the set
+                 of all clusterings of n_elements in n_clusters
+
+        'perm' : the permutation model for a fixed cluster size sequence
+
+        'perm1' : one-sided selection from the permutation model for a fixed
+                  cluster size sequence, same as 'perm'
+
+    :param n_samples: The number of random Clusterings sampled to determine the expected similarity.
+    :param keep_samples:  If True, returns the Similarity samples themselves, otherwise return their mean.
+    :return: MatchingResult object
+
+    :Example:
+
+    >>> from cdlib import evaluation, algorithms
+    >>> import networkx as nx
+    >>> g = nx.karate_club_graph()
+    >>> louvain_communities = algorithms.louvain(g)
+    >>> leiden_communities = algorithms.leiden(g)
+    >>> evaluation.sample_expected_sim(louvain_communities,leiden_communities)
+
+    .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+    """
+    if sim is None:
+        raise ModuleNotFoundError(
+            "Optional dependency not satisfied: install clusim (pip install clusim) to use the selected feature."
+        )
+
+    clustering1 = Clustering(elm2clu_dict=__transform_partition(first_partition))
+    clustering2 = Clustering(elm2clu_dict=__transform_partition(second_partition))
+    score = sim.sample_expected_sim(
+        clustering1, clustering2, measure=measure, n_samples=n_samples, random_model=random_model, keep_samples=keep_samples
+    )
+    return MatchingResult(score=score)
diff --git a/cdlib/test/test_partitions_comparisons.py b/cdlib/test/test_partitions_comparisons.py
index 10751e5b..51962f76 100644
--- a/cdlib/test/test_partitions_comparisons.py
+++ b/cdlib/test/test_partitions_comparisons.py
@@ -115,12 +115,114 @@ def test_closeness_simple(self):
         self.assertLessEqual(score.score, 1)
         self.assertGreaterEqual(score.score, 0)
 
-    # def test_closeness_kde(self):
-    #     g = nx.karate_club_graph()
-    #     lp_communities = label_propagation(g)
-    #     louvain_communities = louvain(g)
+    def test_clusim(self):
+
+        g = nx.karate_club_graph()
+        louvain_communities = louvain(g)
+        lp_communities = label_propagation(g)
+
+        score = evaluation.ecs(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.jaccard_index(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.rand_index(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.fowlkes_mallows_index(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.classification_error(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.czekanowski_index(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.dice_index(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.sorensen_index(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.rogers_tanimoto_index(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.southwood_index(
+            louvain_communities, lp_communities
+        )
+
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.mi(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.rmi(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.geometric_accuracy(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.overlap_quality(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
+
+        score = evaluation.sample_expected_sim(
+            louvain_communities, lp_communities
+        )
+
+        self.assertLessEqual(score.score, 1)
+        self.assertGreaterEqual(score.score, 0)
 
-    #     score = evaluation.partition_closeness_kde(louvain_communities, lp_communities)
 
-    #     self.assertLessEqual(score.score, 1)
-    #     self.assertGreaterEqual(score.score, 0)
diff --git a/docs/reference/evaluation.rst b/docs/reference/evaluation.rst
index ae2d56fa..aa7e3aaa 100644
--- a/docs/reference/evaluation.rst
+++ b/docs/reference/evaluation.rst
@@ -81,14 +81,31 @@ It is often useful to compare different graph partition to assess their resembla
     :toctree: eval/
 
     adjusted_mutual_information
-    adjusted_rand_index
-    f1
-    nf1
+    mi
+    rmi
     normalized_mutual_information
-    omega
+    normalized_mutual_information_avg
     overlapping_normalized_mutual_information_LFK
     overlapping_normalized_mutual_information_MGH
     variation_of_information
+    rand_index
+    adjusted_rand_index
+    omega
+    f1
+    nf1
+    southwood_index
+    rogers_tanimoto_index
+    sorensen_index
+    dice_index
+    czekanowski_index
+    fowlkes_mallows_index
+    jaccard_index
+    sample_expected_sim
+    overlap_quality
+    geometric_accuracy
+    classification_error
+    ecs
+
 
 
 Some measures will return an instance of ``MatchingResult`` that takes together mean and standard deviation values of the computed index.
diff --git a/requirements.txt b/requirements.txt
index e103fbbf..345f9a4c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,5 +15,4 @@ angelcommunity
 pooch
 dynetx
 thresholdclustering
-python-Levenshtein
-clusim
\ No newline at end of file
+python-Levenshtein
\ No newline at end of file
diff --git a/requirements_optional.txt b/requirements_optional.txt
index 895e0ef7..5bc6d016 100644
--- a/requirements_optional.txt
+++ b/requirements_optional.txt
@@ -3,4 +3,5 @@ networkit
 pycombo
 leidenalg
 infomap>=1.3.0
-wurlitzer>=1.0.2
\ No newline at end of file
+wurlitzer>=1.0.2
+clusim
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 1f302639..ec30b6fa 100644
--- a/setup.py
+++ b/setup.py
@@ -56,7 +56,7 @@
             "pycombo",
             "leidenalg",
         ],
-        "pypi": ["bayanpy", "pyclustering"],
+        "pypi": ["bayanpy", "pyclustering", "clusim"],
         "all": [
             "infomap>=1.3.0",
             "wurlitzer>=1.0.2",
@@ -66,6 +66,7 @@
             "leidenalg",
             "bayanpy",
             "pyclustering",
+            "clusim",
         ],
     },
     packages=find_packages(