Skip to content

Commit

Permalink
cover divide by zero edge case
Browse files Browse the repository at this point in the history
  • Loading branch information
RossKen committed Feb 12, 2025
1 parent c43a0e7 commit bac0789
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions splink/internals/graph_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def _node_degree_centralisation_sql(
SELECT
c.{composite_uid_clusters} AS composite_unique_id,
c.cluster_id AS cluster_id,
COUNT(*) FILTER (WHERE neighbour IS NOT NULL) AS node_degree,
COUNT(*) FILTER (WHERE n.neighbour IS NOT NULL) AS node_degree,
COUNT(*) OVER(PARTITION BY c.cluster_id) AS cluster_size
FROM
{df_clustered.physical_name} c
Expand All @@ -101,7 +101,10 @@ def _node_degree_centralisation_sql(
composite_unique_id,
cluster_id,
node_degree,
node_degree / (cluster_size - 1) AS node_centrality
CASE
WHEN cluster_size > 1 THEN node_degree / (cluster_size - 1)
ELSE 0
END AS node_centrality
FROM {node_degree_table_name}
"""
sql_info = {"sql": sql, "output_table_name": "__splink__graph_metrics_nodes"}
Expand Down

0 comments on commit bac0789

Please sign in to comment.