From 51e561c440b4d46f66c3aa05b71b07b1836ad3b6 Mon Sep 17 00:00:00 2001 From: Krasen Samardzhiev Date: Mon, 17 Jun 2024 17:19:34 +0200 Subject: [PATCH] Faster node density (#609) * graph node density * Merge branch 'main' into graph_node_density * Apply suggestions from code review Co-authored-by: Martin Fleischmann * code review changes * docstring change * update api docs * keep describe only in diversity --- docs/api.rst | 5 +- momepy/functional/_intensity.py | 65 +----------------- momepy/functional/tests/test_intensity.py | 72 ++++++++------------ momepy/graph.py | 80 +++++++++++++++++++++++ 4 files changed, 110 insertions(+), 112 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 0e8e07eb..693384c6 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -111,10 +111,10 @@ A set of functions to measure intensity characters: :toctree: api/ courtyards - node_density Note that additional intensity characters can be directly derived using :meth:`libpysal.graph.Graph.describe` -and functions :func:`describe_agg` and :func:`describe_reached_agg` belonging to the diversity module. +and functions :func:`describe_agg` and :func:`describe_reached_agg`. + Measuring diversity ------------------- @@ -133,6 +133,7 @@ A set of functions to measure spatial diversity of elements and their values: theil values_range +Note that additional diversity characters can be directly derived using :meth:`libpysal.graph.Graph.describe`. Underlying components of :func:`shannon` and :func:`simpson` are also exposed for direct use: diff --git a/momepy/functional/_intensity.py b/momepy/functional/_intensity.py index 09a6cf4c..0b4fe6bb 100644 --- a/momepy/functional/_intensity.py +++ b/momepy/functional/_intensity.py @@ -1,11 +1,9 @@ -import numpy as np -import pandas as pd import shapely from geopandas import GeoDataFrame, GeoSeries from libpysal.graph import Graph from pandas import Series -__all__ = ["courtyards", "node_density"] +__all__ = ["courtyards"] def courtyards(geometry: GeoDataFrame | GeoSeries, graph: Graph) -> Series: @@ -50,64 +48,3 @@ def _calculate_courtyards(group): ) return result - - -def node_density( - nodes: GeoDataFrame, edges: GeoDataFrame, graph: Graph, weighted: bool = False -) -> Series: - """Calculate the density of a node's neighbours (for all nodes) - on the street network defined in ``graph``. - - Calculated as the number of neighbouring - nodes / cumulative length of street network within neighbours. - ``node_start``, ``node_end``, is standard output of - :py:func:`momepy.nx_to_gdf` and is compulsory for ``edges`` to have - these columns. - If ``weighted``, a ``degree`` column is also required in ``nodes``. - - Adapted from :cite:`dibble2017`. - - Parameters - ---------- - nodes : GeoDataFrame - A GeoDataFrame containing nodes of a street network. - edges : GeoDataFrame - A GeoDataFrame containing edges of a street network. - graph : libpysal.graph.Graph - A spatial weights matrix capturing relationship between nodes. - weighted : bool (default False) - If ``True``, density will take into account node degree as ``k-1``. - - Returns - ------- - Series - A Series containing resulting values. - - Examples - -------- - >>> nodes['density'] = mm.node_density(nodes, edges, graph) - """ - - required_cols = ["node_start", "node_end"] - for col in required_cols: - if col not in edges.columns: - raise ValueError(f"Column {col} is needed in the edges GeoDataframe.") - - if weighted and ("degree" not in nodes.columns): - raise ValueError("Column degree is needed in nodes GeoDataframe.") - - def _calc_nodedensity(group, edges): - """Helper function to calculate group values.""" - neighbours = group.index.values - locs = np.in1d(edges["node_start"], neighbours) & np.in1d( - edges["node_end"], neighbours - ) - lengths = edges.loc[locs].geometry.length.sum() - return group.sum() / lengths if lengths else 0 - - if weighted: - summation_values = nodes["degree"] - 1 - else: - summation_values = pd.Series(np.ones(nodes.shape[0]), index=nodes.index) - - return graph.apply(summation_values, _calc_nodedensity, edges=edges) diff --git a/momepy/functional/tests/test_intensity.py b/momepy/functional/tests/test_intensity.py index 264f81a2..07f61c29 100644 --- a/momepy/functional/tests/test_intensity.py +++ b/momepy/functional/tests/test_intensity.py @@ -1,4 +1,5 @@ import geopandas as gpd +import networkx as nx import numpy as np import pandas as pd import pytest @@ -44,12 +45,12 @@ def test_courtyards(self): assert_result(courtyards, expected, self.df_buildings) def test_node_density(self): - nx = mm.gdf_to_nx(self.df_streets, integer_labels=True) - nx = mm.node_degree(nx) - nodes, edges, w = mm.nx_to_gdf(nx, spatial_weights=True) - g = Graph.from_W(w).higher_order(k=3, lower_order=True).assign_self_weight() + g = mm.gdf_to_nx(self.df_streets, integer_labels=True) + g = mm.node_degree(g) + nodes, edges, w = mm.nx_to_gdf(g, spatial_weights=True) - density = mm.node_density(nodes, edges, g) + g = mm.node_density(g, radius=3) + density = pd.Series(nx.get_node_attributes(g, "node_density")) expected_density = { "count": 29, "mean": 0.005534125924228438, @@ -58,7 +59,7 @@ def test_node_density(self): } assert_result(density, expected_density, nodes, check_names=False) - weighted = mm.node_density(nodes, edges, g, weighted=True) + weighted = pd.Series(nx.get_node_attributes(g, "node_density_weighted")) expected_weighted = { "count": 29, "mean": 0.010090861332429164, @@ -67,32 +68,18 @@ def test_node_density(self): } assert_result(weighted, expected_weighted, nodes, check_names=False) - island = mm.node_density(nodes, edges, Graph.from_W(w).assign_self_weight()) - expected_island = { - "count": 29, - "mean": 0.01026753724860306, - "max": 0.029319191032027746, - "min": 0.004808273240207287, - } - assert_result(island, expected_island, nodes, check_names=False) - - with pytest.raises( - ValueError, - match=("Column node_start is needed in the edges GeoDataframe."), - ): - mm.node_density(nodes, nodes, g) - - with pytest.raises( - ValueError, - match=("Column node_end is needed in the edges GeoDataframe."), - ): - mm.node_density(nodes, edges["node_start"].to_frame(), g) - - with pytest.raises( - ValueError, - match=("Column degree is needed in nodes GeoDataframe."), - ): - mm.node_density(edges, edges, g, weighted=True) + # two API equivalence + g = mm.gdf_to_nx(self.df_streets, integer_labels=True) + g = mm.node_degree(g) + alternative_g = mm.subgraph(g, radius=3) + alternative_density = pd.Series( + nx.get_node_attributes(alternative_g, "node_density") + ) + alternative_weighted = pd.Series( + nx.get_node_attributes(alternative_g, "node_density_weighted") + ) + assert_series_equal(alternative_density, density) + assert_series_equal(alternative_weighted, weighted) @pytest.mark.skipif( not PD_210, reason="aggregation is different in previous pandas versions" @@ -296,14 +283,15 @@ def test_density(self): ) def test_node_density(self): - nx = mm.gdf_to_nx(self.df_streets, integer_labels=True) - nx = mm.node_degree(nx) - nodes, edges, w = mm.nx_to_gdf(nx, spatial_weights=True) + g = mm.gdf_to_nx(self.df_streets, integer_labels=True) + g = mm.node_degree(g) + g = mm.node_density(g, radius=3) + nodes, edges, w = mm.nx_to_gdf(g, spatial_weights=True) + sw = mm.sw_high(k=3, weights=w) - g = Graph.from_W(w).higher_order(k=3, lower_order=True).assign_self_weight() density_old = mm.NodeDensity(nodes, edges, sw).series - density_new = mm.node_density(nodes, edges, g) + density_new = pd.Series(nx.get_node_attributes(g, "node_density")) assert_series_equal( density_old, density_new, check_names=False, check_dtype=False ) @@ -311,15 +299,7 @@ def test_node_density(self): weighted_old = mm.NodeDensity( nodes, edges, sw, weighted=True, node_degree="degree" ).series - weighted_new = mm.node_density(nodes, edges, g, weighted=True) + weighted_new = pd.Series(nx.get_node_attributes(g, "node_density_weighted")) assert_series_equal( weighted_old, weighted_new, check_names=False, check_dtype=False ) - - islands_old = mm.NodeDensity(nodes, edges, w).series - islands_new = mm.node_density( - nodes, edges, Graph.from_W(w).assign_self_weight() - ) - assert_series_equal( - islands_old, islands_new, check_names=False, check_dtype=False - ) diff --git a/momepy/graph.py b/momepy/graph.py index 2ee65916..19d0721e 100644 --- a/momepy/graph.py +++ b/momepy/graph.py @@ -7,6 +7,7 @@ import networkx as nx import numpy as np +from pandas import Series from tqdm.auto import tqdm __all__ = [ @@ -25,6 +26,7 @@ "straightness_centrality", "subgraph", "mean_nodes", + "node_density", ] @@ -988,6 +990,73 @@ def straightness_centrality( return netx +def node_density( + graph: nx.Graph, + radius: int, + length: str = "mm_len", + distance: str | None = None, + verbose: bool = True, +) -> nx.Graph: + """Calculate the density of a node's neighbours (for all nodes) + on the street network defined in ``graph``. + + Calculated as the number of neighbouring + nodes / cumulative length of street network within neighbours. + Returns two values - an unweighted and weighted density unweighted + is calculated based on the number of neigbhouring nodes, whereas weighted + density will take into account node degree as ``k-1``. + + Adapted from :cite:`dibble2017`. + + Parameters + ---------- + graph : networkx.Graph + A Graph representing a street network. + Ideally generated from GeoDataFrame using :func:`momepy.gdf_to_nx`. + radius: int + Include all neighbors of distance <= radius from ``n``. + length : str, default `mm_len` + The name of the attribute of segment length (geographical). + distance : str, optional + Use specified edge data key as distance. + For example, setting ``distance=’weight’`` will use the edge ``weight`` to + measure the distance from the node ``n`` during ``ego_graph`` generation. + verbose : bool (default True) + If ``True``, shows progress bars in loops and indication of steps. + + Returns + ------- + netx : Graph + A networkx.Graph object. + + Examples + -------- + >>> network_graph = mm.node_density(network_graph, radius=5) + """ + netx = graph.copy() + orig_nodes_degree = Series(nx.get_node_attributes(netx, "degree")) + for n in tqdm(netx, total=len(netx), disable=not verbose): + sub = nx.ego_graph( + netx, n, radius=radius, distance=distance + ) # define subgraph of steps=radius + unweighted, weighted = _node_density( + sub, length=length, orig_nodes_degree=orig_nodes_degree + ) + netx.nodes[n]["node_density"] = unweighted + netx.nodes[n]["node_density_weighted"] = weighted + return netx + + +def _node_density(sub, length, orig_nodes_degree): + """Calculates node density for a subgraph.""" + length_sum = sub.size(length) + weighted_node_data = orig_nodes_degree[list(sub.nodes)] - 1 + unweighted_node_data = Series(1, index=weighted_node_data) + unweighted = (unweighted_node_data.sum() / length_sum) if length_sum else 0 + weighted = (weighted_node_data.sum() / length_sum) if length_sum else 0 + return unweighted, weighted + + def subgraph( graph, radius=5, @@ -1004,6 +1073,7 @@ def subgraph( gamma=True, local_closeness=True, closeness_weight=None, + node_density=True, verbose=True, ): """ @@ -1064,6 +1134,9 @@ def subgraph( netx = graph.copy() + if node_density: + orig_nodes_degree = Series(nx.get_node_attributes(netx, "degree")) + for n in tqdm(netx, total=len(netx), disable=not verbose): sub = nx.ego_graph( netx, n, radius=radius, distance=distance @@ -1108,6 +1181,13 @@ def subgraph( sub, n, length=closeness_weight, len_graph=lengraph ) + if node_density: + unweighted, weighted = _node_density( + sub, length=length, orig_nodes_degree=orig_nodes_degree + ) + netx.nodes[n]["node_density"] = unweighted + netx.nodes[n]["node_density_weighted"] = weighted + return netx