From 8980b1a3bad65a8d69d24e0980f35acf5a4af16d Mon Sep 17 00:00:00 2001 From: Felipe Gonzalez Date: Thu, 24 Oct 2024 18:31:25 -0300 Subject: [PATCH] Adds overlapping feature #159 (#169) --- notebooks/KPI.ipynb | 136 +++++++++ notebooks/Overlapping.ipynb | 217 ++++++++++++++ urbantrips/carto/carto.py | 418 ++++++++++++++------------- urbantrips/carto/routes.py | 278 +++++++++++------- urbantrips/kpi/kpi.py | 2 +- urbantrips/kpi/overlapping.py | 528 ++++++++++++++++++++++++++++++++++ urbantrips/utils/utils.py | 34 +++ urbantrips/viz/overlapping.py | 237 +++++++++++++++ urbantrips/viz/viz.py | 23 +- 9 files changed, 1559 insertions(+), 314 deletions(-) create mode 100644 notebooks/KPI.ipynb create mode 100644 notebooks/Overlapping.ipynb create mode 100644 urbantrips/kpi/overlapping.py create mode 100644 urbantrips/viz/overlapping.py diff --git a/notebooks/KPI.ipynb b/notebooks/KPI.ipynb new file mode 100644 index 0000000..724f06b --- /dev/null +++ b/notebooks/KPI.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "from urbantrips.utils import utils\n", + "from urbantrips.kpi.kpi import compute_route_section_load\n", + "from urbantrips.viz.viz import visualize_route_section_load\n", + "from urbantrips.kpi.line_od_matrix import compute_lines_od_matrix\n", + "from urbantrips.viz.line_od_matrix import visualize_lines_od_matrix\n", + "\n", + "# Completar con el directorio donde se encuentra clonado e instalado el repositorio de UrbanTrips\n", + "URBANTRIPS_PATH = \"RUTA/DE/URBANTRIPS\"\n", + "os.chdir(URBANTRIPS_PATH)\n", + "\n", + "# Leer archivos de configuración y conexiones a las db\n", + "configs = utils.leer_configs_generales()\n", + "conn_insumos = utils.iniciar_conexion_db(tipo='insumos')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Se leen los datos de las lineas\n", + "metadata_lineas = pd.read_sql(\"select id_linea,nombre_linea, modo from metadata_lineas;\", conn_insumos)\n", + "# Se puede buscar por nombre de linea que contenga alguna palabra o numero\n", + "metadata_lineas[metadata_lineas.nombre_linea.str.contains(\"50\") #reemplazar 50 por lo que se desee\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "rango = [7,10] # Se establece un rango horario, en este caso de 7 a 10 \n", + "line_ids = [1,2] # Se establecen los ids de las lineas a analizar\n", + "day_type = 'weekday' # Se establece el tipo de día a analizar puede ser weekday, weekend o una fecha 1/2/2024\n", + "section_meters = 500 # Se establece el parámetro de metros de sección\n", + "n_sections = None # Se establece el número de secciones a analizar, si se usan metro no se necesita" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Se calculan los estadisticos de carga de las secciones de las lineas\n", + "compute_route_section_load(\n", + " line_ids=line_ids, hour_range=rango,\n", + " section_meters = section_meters,day_type=day_type)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Se visualizan los estadisticos de carga de las secciones de las lineas\n", + "visualize_route_section_load(\n", + " line_ids=line_ids, hour_range=rango,\n", + " day_type=day_type, section_meters=section_meters,\n", + " save_gdf=True, stat='totals', \n", + " factor=500, factor_min=10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Se computa la matriz OD de las lineas\n", + "compute_lines_od_matrix(\n", + " line_ids=line_ids, hour_range=rango,n_sections=n_sections,\n", + " section_meters=section_meters, day_type=day_type, save_csv=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Se visualiza la matriz OD de las lineas\n", + "visualize_lines_od_matrix(\n", + " line_ids=line_ids, hour_range=rango,\n", + " day_type=day_type, n_sections=n_sections,section_meters=section_meters,\n", + " stat='totals')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# con este codigo se puede consultar la ayuda de las funciones\n", + "visualize_lines_od_matrix?" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/Overlapping.ipynb b/notebooks/Overlapping.ipynb new file mode 100644 index 0000000..c3e7feb --- /dev/null +++ b/notebooks/Overlapping.ipynb @@ -0,0 +1,217 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import folium\n", + "import itertools\n", + "\n", + "from urbantrips.utils import utils\n", + "from urbantrips.kpi import overlapping as ovl\n", + "from urbantrips.viz import overlapping as ovl_viz\n", + "\n", + "# Completar con el directorio donde se encuentra clonado e instalado el repositorio de UrbanTrips\n", + "URBANTRIPS_PATH = \"RUTA/DE/URBANTRIPS\"\n", + "os.chdir(URBANTRIPS_PATH)\n", + "\n", + "# Leer archivos de configuración y conexiones a las db\n", + "configs = utils.leer_configs_generales()\n", + "alias = configs['alias_db_data']\n", + "conn_data = utils.iniciar_conexion_db(tipo='data')\n", + "conn_insumos = utils.iniciar_conexion_db(tipo='insumos')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Se leen los datos de las lineas\n", + "metadata_lineas = pd.read_sql(\"select id_linea,nombre_linea, modo from metadata_lineas;\", conn_insumos)\n", + "# Se puede buscar por nombre de linea que contenga alguna palabra o numero\n", + "metadata_lineas[metadata_lineas.nombre_linea.str.contains(\"50\") #reemplazar 50 por lo que se desee buscar en el nombre de la linea\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Configurar el día a procesar \n", + "day = \"weekday\"\n", + "\n", + "# La resolucion h3 (no puede ser mayor a la que aparece en las configuraciones)\n", + "h3_res_comp = 8\n", + "\n", + "# Los id de las lineas a comparar\n", + "comp_line_id = 1\n", + "base_line_id = 2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Obtiene del archivo de configuración si se deben usar ramales o lineas\n", + "use_branches = configs['lineas_contienen_ramales']\n", + "if use_branches:\n", + " # Lee los datos de los ramales\n", + " metadata = pd.read_sql(f\"select id_linea,id_ramal from metadata_ramales where id_linea in ({base_line_id},{comp_line_id})\",\n", + " conn_insumos,dtype={'id_linea': int, 'id_ramal': int})\n", + " route_type = 'branches'\n", + "\n", + " # Computa todas las posibles combinaciones de ramales entre esas dos lineas\n", + " route_id_combinations = list(itertools.combinations(metadata['id_ramal'], 2))\n", + " base_route_id_combinations = list(itertools.combinations(metadata.loc[metadata.id_linea == base_line_id,'id_ramal'], 2))\n", + " comp_line_id_combinations = list(itertools.combinations(metadata.loc[metadata.id_linea == comp_line_id,'id_ramal'], 2))\n", + " route_id_combinations = [combination for combination in route_id_combinations if ((combination not in base_route_id_combinations) and (combination not in comp_line_id_combinations))]\n", + " \n", + " metadata_branches = pd.read_sql(f\"select * from metadata_ramales where id_linea in ({base_line_id},{comp_line_id})\",\n", + " conn_insumos,dtype={'id_linea': int, 'id_ramal': int})\n", + "\n", + "else:\n", + " route_type = 'lines'\n", + " route_id_combinations = [(base_line_id, comp_line_id)]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Aca se pueden ver todos los ramales de las lineas a comparar\n", + "metadata_branches " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecciona un par de las posibles combinaciones de ramales\n", + "route_id_combination = route_id_combinations[0] \n", + "route_id_combination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# crea un id de ruta unico de ramal o linea en funcion de si esta configurado para usar ramales o lineas\n", + "if use_branches:\n", + " if route_id_combination[0] in metadata.loc[metadata.id_linea == base_line_id,'id_ramal'].values:\n", + " base_route_id = route_id_combination[0]\n", + " comp_route_id = route_id_combination[1]\n", + "\n", + " else:\n", + " base_route_id = route_id_combination[1]\n", + " comp_route_id = route_id_combination[0]\n", + " \n", + " nombre_ramal_base = metadata_branches.loc[metadata_branches.id_ramal == base_route_id,'nombre_ramal'].item()\n", + " nombre_ramal_comp = metadata_branches.loc[metadata_branches.id_ramal == comp_route_id,'nombre_ramal'].item()\n", + "\n", + " base_route_str = f\"ramal {nombre_ramal_base} (id {base_route_id})\"\n", + " comp_route_str = f\"ramal {nombre_ramal_comp} (id {comp_route_id})\"\n", + "\n", + "else:\n", + " base_route_id,comp_route_id = route_id_combination\n", + " base_route_str = \"\"\n", + " comp_route_str = \"\"\n", + "\n", + "nombre_linea_base = metadata_lineas.loc[metadata_lineas.id_linea == base_line_id,'nombre_linea'].item()\n", + "nombre_linea_comp = metadata_lineas.loc[metadata_lineas.id_linea == comp_line_id,'nombre_linea'].item()\n", + "\n", + "print(f\"Tomando como linea base la linea {nombre_linea_base} (id {base_line_id}) \" + base_route_str)\n", + "print(f\"Tomando como linea comparacion la linea {nombre_linea_comp} (id {comp_line_id}) \" + comp_route_str)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Calcula la superposicion de la oferta de la linea base con la de la linea de comparacion\n", + "overlapping_dict = ovl.compute_supply_overlapping(day, base_route_id,comp_route_id,route_type,h3_res_comp) \n", + "base_gdf = overlapping_dict[\"base\"][\"h3\"]\n", + "base_route_gdf = overlapping_dict[\"base\"][\"line\"]\n", + "comp_gdf = overlapping_dict[\"comp\"][\"h3\"]\n", + "comp_route_gdf = overlapping_dict[\"comp\"][\"line\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Se visualiza la superposicion de la oferta de la linea base con la de la linea de comparacion\n", + "f = ovl_viz.plot_interactive_supply_overlapping(overlapping_dict)\n", + "f.save(f\"resultados/html/{alias}_supply_overlapping_base_{base_route_id}_comp_{comp_route_id}_h3_{h3_res_comp}.html\")\n", + "f" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Calcula la demanda de la linea base y la de comparacion\n", + "base_demand,comp_demand = ovl.compute_demand_overlapping(base_line_id,comp_line_id,day,\n", + " base_route_id,comp_route_id,\n", + " base_gdf,comp_gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Se visualiza la superposicion de la demanda de la linea base con la de la linea de comparacion\n", + "fig = ovl_viz.plot_interactive_demand_overlapping(base_demand, comp_demand, overlapping_dict)\n", + "fig.save(f\"resultados/html/{alias}_demand_overlapping_base_{base_route_id}_comp_{comp_route_id}_h3_{h3_res_comp}.html\")\n", + "fig" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/urbantrips/carto/carto.py b/urbantrips/carto/carto.py index 2a127fb..e74de32 100644 --- a/urbantrips/carto/carto.py +++ b/urbantrips/carto/carto.py @@ -14,26 +14,39 @@ from networkx import NetworkXNoPath from pandana.loaders import osm as osm_pandana from urbantrips.geo.geo import ( - get_stop_hex_ring, h3togeo, add_geometry, - create_voronoi, normalizo_lat_lon, h3dist, bring_latlon + get_stop_hex_ring, + h3togeo, + add_geometry, + create_voronoi, + normalizo_lat_lon, + h3dist, + bring_latlon, ) from urbantrips.viz import viz from urbantrips.utils.utils import ( duracion, iniciar_conexion_db, leer_configs_generales, - leer_alias) + leer_alias, +) import subprocess + def get_library_version(library_name): - result = subprocess.run(["pip", "show", library_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + result = subprocess.run( + ["pip", "show", library_name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) if result.returncode == 0: - for line in result.stdout.split('\n'): + for line in result.stdout.split("\n"): if line.startswith("Version:"): return line.split(":")[1].strip() return None + @duracion def update_stations_catchment_area(ring_size): """ @@ -42,8 +55,8 @@ def update_stations_catchment_area(ring_size): ya en la matriz """ - conn_data = iniciar_conexion_db(tipo='data') - conn_insumos = iniciar_conexion_db(tipo='insumos') + conn_data = iniciar_conexion_db(tipo="data") + conn_insumos = iniciar_conexion_db(tipo="insumos") # Leer las paradas en base a las etapas q = """ @@ -59,16 +72,14 @@ def update_stations_catchment_area(ring_size): conn_insumos, ) - paradas_etapas = paradas_etapas.merge(metadata_lineas[['id_linea', - 'id_linea_agg']], - how='left', - on='id_linea').drop(['id_linea'], - axis=1) + paradas_etapas = paradas_etapas.merge( + metadata_lineas[["id_linea", "id_linea_agg"]], how="left", on="id_linea" + ).drop(["id_linea"], axis=1) paradas_etapas = paradas_etapas.groupby( - ['id_linea_agg', 'parada'], as_index=False).size() - paradas_etapas = paradas_etapas[(paradas_etapas['size'] > 1)].drop([ - 'size'], axis=1) + ["id_linea_agg", "parada"], as_index=False + ).size() + paradas_etapas = paradas_etapas[(paradas_etapas["size"] > 1)].drop(["size"], axis=1) # Leer las paradas ya existentes en la matriz q = """ @@ -77,29 +88,39 @@ def update_stations_catchment_area(ring_size): paradas_en_matriz = pd.read_sql(q, conn_insumos) # Detectar que paradas son nuevas para cada linea - paradas_nuevas = paradas_etapas\ - .merge(paradas_en_matriz, - on=['id_linea_agg', 'parada'], - how='left') + paradas_nuevas = paradas_etapas.merge( + paradas_en_matriz, on=["id_linea_agg", "parada"], how="left" + ) - paradas_nuevas = paradas_nuevas.loc[paradas_nuevas.m.isna(), [ - 'id_linea_agg', 'parada']] + paradas_nuevas = paradas_nuevas.loc[ + paradas_nuevas.m.isna(), ["id_linea_agg", "parada"] + ] if len(paradas_nuevas) > 0: areas_influencia_nuevas = pd.concat( - (map(get_stop_hex_ring, np.unique(paradas_nuevas['parada']), - itertools.repeat(ring_size)))) + ( + map( + get_stop_hex_ring, + np.unique(paradas_nuevas["parada"]), + itertools.repeat(ring_size), + ) + ) + ) matriz_nueva = paradas_nuevas.merge( - areas_influencia_nuevas, how='left', on='parada') + areas_influencia_nuevas, how="left", on="parada" + ) # Subir a la db print("Subiendo matriz a db") - matriz_nueva.to_sql("matriz_validacion", conn_insumos, - if_exists="append", index=False) + matriz_nueva.to_sql( + "matriz_validacion", conn_insumos, if_exists="append", index=False + ) print("Fin actualizacion matriz de validacion") else: - print("La matriz de validacion ya tiene los datos más actuales" + - " en base a la informacion existente en la tabla de etapas") + print( + "La matriz de validacion ya tiene los datos más actuales" + + " en base a la informacion existente en la tabla de etapas" + ) return None def guardo_zonificaciones(): @@ -203,8 +224,8 @@ def create_zones_table(): for each h3 with data in etapas """ - conn_insumos = iniciar_conexion_db(tipo='insumos') - conn_data = iniciar_conexion_db(tipo='data') + conn_insumos = iniciar_conexion_db(tipo="insumos") + conn_data = iniciar_conexion_db(tipo="data") # leer origenes de la tabla etapas etapas = pd.read_sql_query( @@ -234,26 +255,26 @@ def create_zones_table(): etapas.groupby( "h3", as_index=False, - ).agg({'factor_expansion_linea': 'sum', - 'latitud': 'mean', - 'longitud': 'mean'}) - .rename(columns={'factor_expansion_linea': 'fex'}) + ) + .agg({"factor_expansion_linea": "sum", "latitud": "mean", "longitud": "mean"}) + .rename(columns={"factor_expansion_linea": "fex"}) ) # TODO: redo how geoms are created here zonas = pd.concat([zonas, zonas_ant], ignore_index=True) agg_dict = { - 'fex': 'mean', - 'latitud': 'mean', - 'longitud': 'mean', + "fex": "mean", + "latitud": "mean", + "longitud": "mean", } - zonas = zonas.groupby("h3", - as_index=False, - ).agg(agg_dict) + zonas = zonas.groupby( + "h3", + as_index=False, + ).agg(agg_dict) # Crea la latitud y la longitud en base al h3 zonas["origin"] = zonas["h3"].apply(h3togeo) - zonas["lon"] = zonas["origin"].apply(bring_latlon, latlon='lon') - zonas["lat"] = zonas["origin"].apply(bring_latlon, latlon='lat') + zonas["lon"] = zonas["origin"].apply(bring_latlon, latlon="lon") + zonas["lat"] = zonas["origin"].apply(bring_latlon, latlon="lat") zonas = gpd.GeoDataFrame( zonas, @@ -264,7 +285,7 @@ def create_zones_table(): # Suma a la tabla las zonificaciones del config configs = leer_configs_generales() - if configs['zonificaciones']: + if configs["zonificaciones"]: for n in range(0, 5): try: @@ -296,7 +317,7 @@ def create_voronoi_zones(res=8, max_zonas=15, show_map=False): """ alias = leer_alias() - conn_insumos = iniciar_conexion_db(tipo='insumos') + conn_insumos = iniciar_conexion_db(tipo="insumos") # Leer informacion en tabla zonas zonas = pd.read_sql_query( @@ -308,43 +329,40 @@ def create_voronoi_zones(res=8, max_zonas=15, show_map=False): ) # Si existe la columna de zona voronoi la elimina - if 'Zona_voi' in zonas.columns: - zonas.drop(['Zona_voi'], - axis=1, - inplace=True) + if "Zona_voi" in zonas.columns: + zonas.drop(["Zona_voi"], axis=1, inplace=True) # agrega datos a un hexagono mas grande - zonas['h3_r'] = zonas['h3'].apply(h3.h3_to_parent, - res=res) + zonas["h3_r"] = zonas["h3"].apply(h3.h3_to_parent, res=res) # Computa para ese hexagono el promedio ponderado de latlong zonas_for_hexs = zonas.loc[zonas.fex != 0, :] - hexs = zonas_for_hexs.groupby('h3_r', - as_index=False).fex.sum() + hexs = zonas_for_hexs.groupby("h3_r", as_index=False).fex.sum() - hexs = hexs.merge(zonas_for_hexs - .groupby('h3_r') - .apply( - lambda x: np.average( - x['longitud'], weights=x['fex'])) - .reset_index().rename(columns={0: 'longitud'}), - how='left') + hexs = hexs.merge( + zonas_for_hexs.groupby("h3_r") + .apply(lambda x: np.average(x["longitud"], weights=x["fex"])) + .reset_index() + .rename(columns={0: "longitud"}), + how="left", + ) - hexs = hexs.merge(zonas_for_hexs - .groupby('h3_r') - .apply( - lambda x: np.average(x['latitud'], weights=x['fex']) - ).reset_index().rename(columns={0: 'latitud'}), - how='left') + hexs = hexs.merge( + zonas_for_hexs.groupby("h3_r") + .apply(lambda x: np.average(x["latitud"], weights=x["fex"])) + .reset_index() + .rename(columns={0: "latitud"}), + how="left", + ) hexs = gpd.GeoDataFrame( hexs, - geometry=gpd.points_from_xy(hexs['longitud'], hexs['latitud']), + geometry=gpd.points_from_xy(hexs["longitud"], hexs["latitud"]), crs=4326, ) - cant_zonas = len(hexs)+10 + cant_zonas = len(hexs) + 10 k_ring = 1 if cant_zonas <= max_zonas: @@ -353,40 +371,41 @@ def create_voronoi_zones(res=8, max_zonas=15, show_map=False): while cant_zonas > max_zonas: # Construye un set de hexagonos aun mas grandes hexs2 = hexs.copy() - hexs2['h3_r2'] = hexs2.h3_r.apply(h3.h3_to_parent, res=res-1) - hexs2['geometry'] = hexs2.h3_r2.apply(add_geometry) - hexs2 = hexs2.sort_values(['h3_r2', 'fex'], ascending=[True, False]) - hexs2['orden'] = hexs2.groupby(['h3_r2']).cumcount() + hexs2["h3_r2"] = hexs2.h3_r.apply(h3.h3_to_parent, res=res - 1) + hexs2["geometry"] = hexs2.h3_r2.apply(add_geometry) + hexs2 = hexs2.sort_values(["h3_r2", "fex"], ascending=[True, False]) + hexs2["orden"] = hexs2.groupby(["h3_r2"]).cumcount() hexs2 = hexs2[hexs2.orden == 0] - hexs2 = hexs2.sort_values('fex', ascending=False) - hexs['cambiado'] = 0 + hexs2 = hexs2.sort_values("fex", ascending=False) + hexs["cambiado"] = 0 for i in hexs2.h3_r.tolist(): vecinos = h3.k_ring(i, k_ring) - hexs.loc[(hexs.h3_r.isin(vecinos)) & ( - hexs.cambiado == 0), 'h3_r'] = i - hexs.loc[(hexs.h3_r.isin(vecinos)) & ( - hexs.cambiado == 0), 'cambiado'] = 1 + hexs.loc[(hexs.h3_r.isin(vecinos)) & (hexs.cambiado == 0), "h3_r"] = i + hexs.loc[(hexs.h3_r.isin(vecinos)) & (hexs.cambiado == 0), "cambiado"] = 1 - hexs_tmp = hexs.groupby('h3_r', as_index=False).fex.sum() + hexs_tmp = hexs.groupby("h3_r", as_index=False).fex.sum() hexs_tmp = hexs_tmp.merge( hexs[hexs.fex != 0] - .groupby('h3_r') - .apply( - lambda x: np.average(x['longitud'], weights=x['fex'])) - .reset_index().rename(columns={0: 'longitud'}), - how='left') + .groupby("h3_r") + .apply(lambda x: np.average(x["longitud"], weights=x["fex"])) + .reset_index() + .rename(columns={0: "longitud"}), + how="left", + ) hexs_tmp = hexs_tmp.merge( hexs[hexs.fex != 0] - .groupby('h3_r') - .apply(lambda x: np.average(x['latitud'], weights=x['fex'])) - .reset_index().rename(columns={0: 'latitud'}), - how='left') + .groupby("h3_r") + .apply(lambda x: np.average(x["latitud"], weights=x["fex"])) + .reset_index() + .rename(columns={0: "latitud"}), + how="left", + ) hexs_tmp = gpd.GeoDataFrame( hexs_tmp, - geometry=gpd.points_from_xy(hexs_tmp['longitud'], - hexs_tmp['latitud']), - crs=4326) + geometry=gpd.points_from_xy(hexs_tmp["longitud"], hexs_tmp["latitud"]), + crs=4326, + ) hexs = hexs_tmp.copy() @@ -396,36 +415,25 @@ def create_voronoi_zones(res=8, max_zonas=15, show_map=False): cant_zonas = len(hexs) voi = create_voronoi(hexs) - voi = gpd.sjoin(voi, - hexs[['fex', 'geometry']], - how='left') - voi = voi.sort_values('fex', - ascending=False) - voi = voi.drop(['Zona', - 'index_right'], - axis=1) - voi = voi.reset_index(drop=True).reset_index().rename( - columns={'index': 'Zona_voi'}) - voi['Zona_voi'] = voi['Zona_voi']+1 - voi['Zona_voi'] = voi['Zona_voi'].astype(str) - - file = os.path.join("data", "data_ciudad", 'zona_voi.geojson') - voi[['Zona_voi', 'geometry']].to_file(file) - - zonas = zonas.drop(['h3_r'], axis=1) - zonas['geometry'] = zonas['h3'].apply(add_geometry) + voi = gpd.sjoin(voi, hexs[["fex", "geometry"]], how="left") + voi = voi.sort_values("fex", ascending=False) + voi = voi.drop(["Zona", "index_right"], axis=1) + voi = voi.reset_index(drop=True).reset_index().rename(columns={"index": "Zona_voi"}) + voi["Zona_voi"] = voi["Zona_voi"] + 1 + voi["Zona_voi"] = voi["Zona_voi"].astype(str) - zonas = gpd.GeoDataFrame( - zonas, - geometry='geometry', - crs=4326) - zonas['geometry'] = zonas['geometry'].representative_point() + file = os.path.join("data", "data_ciudad", "zona_voi.geojson") + voi[["Zona_voi", "geometry"]].to_file(file) + + zonas = zonas.drop(["h3_r"], axis=1) + zonas["geometry"] = zonas["h3"].apply(add_geometry) - zonas = gpd.sjoin(zonas, - voi[['Zona_voi', 'geometry']], - how='left') + zonas = gpd.GeoDataFrame(zonas, geometry="geometry", crs=4326) + zonas["geometry"] = zonas["geometry"].representative_point() - zonas = zonas.drop(['index_right', 'geometry'], axis=1) + zonas = gpd.sjoin(zonas, voi[["Zona_voi", "geometry"]], how="left") + + zonas = zonas.drop(["index_right", "geometry"], axis=1) zonas.to_sql("zonas", conn_insumos, if_exists="replace", index=False) conn_insumos.close() print("Graba zonas en sql lite") @@ -441,10 +449,10 @@ def create_distances_table(use_parallel=False): y calcula diferentes distancias para cada par que no tenga """ - conn_insumos = iniciar_conexion_db(tipo='insumos') - conn_data = iniciar_conexion_db(tipo='data') + conn_insumos = iniciar_conexion_db(tipo="insumos") + conn_data = iniciar_conexion_db(tipo="data") - print('Verifica viajes sin distancias calculadas') + print("Verifica viajes sin distancias calculadas") q = """ select distinct h3_o,h3_d @@ -468,61 +476,60 @@ def create_distances_table(use_parallel=False): # Unir pares od h desde data y desde distancias y quedarse con # los que estan en data pero no en distancias - pares_h3 = pares_h3_data\ - .merge(pares_h3_distancias, how='left') - pares_h3 = pares_h3.loc[(pares_h3.d.isna()) & ( - pares_h3.h3_o != pares_h3.h3_d), - ['h3_o', 'h3_d']] + pares_h3 = pares_h3_data.merge(pares_h3_distancias, how="left") + pares_h3 = pares_h3.loc[ + (pares_h3.d.isna()) & (pares_h3.h3_o != pares_h3.h3_d), ["h3_o", "h3_d"] + ] if len(pares_h3) > 0: pares_h3_norm = normalizo_lat_lon(pares_h3) # usa la función osmnx para distancias en caso de error con Pandana - print("Este proceso puede demorar algunas horas dependiendo del tamaño " + - " de la ciudad y si se corre por primera vez por lo que en la base" + - " de insumos no estan estos pares") + print( + "Este proceso puede demorar algunas horas dependiendo del tamaño " + + " de la ciudad y si se corre por primera vez por lo que en la base" + + " de insumos no estan estos pares" + ) - agg2_total = pares_h3_norm.groupby( - ['h3_o_norm', 'h3_d_norm'], - as_index=False).size().drop(['size'], axis=1) + agg2_total = ( + pares_h3_norm.groupby(["h3_o_norm", "h3_d_norm"], as_index=False) + .size() + .drop(["size"], axis=1) + ) - print( - f"Hay {len(agg2_total)} nuevos pares od para sumar a tabla distancias") + print(f"Hay {len(agg2_total)} nuevos pares od para sumar a tabla distancias") print(f"de los {len(pares_h3_data)} originales en la data.") - print('') - print('Procesa distancias con Pandana') - + print("") + print("Procesa distancias con Pandana") + agg2 = compute_distances_osm( agg2_total, h3_o="h3_o_norm", h3_d="h3_d_norm", processing="pandana", modes=["drive"], - use_parallel=False + use_parallel=False, ) if len(agg2) > 0: dist1 = agg2.copy() - dist1['h3_o'] = dist1['h3_o_norm'] - dist1['h3_d'] = dist1['h3_d_norm'] + dist1["h3_o"] = dist1["h3_o_norm"] + dist1["h3_d"] = dist1["h3_d_norm"] dist2 = agg2.copy() - dist2['h3_d'] = dist2['h3_o_norm'] - dist2['h3_o'] = dist2['h3_d_norm'] + dist2["h3_d"] = dist2["h3_o_norm"] + dist2["h3_o"] = dist2["h3_d_norm"] distancias_new = pd.concat([dist1, dist2], ignore_index=True) - distancias_new = distancias_new\ - .groupby(['h3_o', - 'h3_d', - 'h3_o_norm', - 'h3_d_norm'], - as_index=False)[['distance_osm_drive', - 'distance_h3']].first() + distancias_new = distancias_new.groupby( + ["h3_o", "h3_d", "h3_o_norm", "h3_d_norm"], as_index=False + )[["distance_osm_drive", "distance_h3"]].first() - distancias_new.to_sql("distancias", conn_insumos, - if_exists="append", index=False) + distancias_new.to_sql( + "distancias", conn_insumos, if_exists="append", index=False + ) else: - print('Procesa distancias con OSMNX') + print("Procesa distancias con OSMNX") # Determine the size of each chunk (500 rows in this case) chunk_size = 25000 @@ -536,7 +543,8 @@ def create_distances_table(use_parallel=False): agg2 = agg2_total.iloc[start:end].copy() # Call the process_chunk function with the selected chunk print( - f'Bajando distancias entre {start} a {end} de {len(agg2_total)} - {str(datetime.now())[:19]}') + f"Bajando distancias entre {start} a {end} de {len(agg2_total)} - {str(datetime.now())[:19]}" + ) agg2 = compute_distances_osm( agg2, @@ -544,29 +552,26 @@ def create_distances_table(use_parallel=False): h3_d="h3_d_norm", processing="osmnx", modes=["drive"], - use_parallel=use_parallel + use_parallel=use_parallel, ) dist1 = agg2.copy() - dist1['h3_o'] = dist1['h3_o_norm'] - dist1['h3_d'] = dist1['h3_d_norm'] + dist1["h3_o"] = dist1["h3_o_norm"] + dist1["h3_d"] = dist1["h3_d_norm"] dist2 = agg2.copy() - dist2['h3_d'] = dist2['h3_o_norm'] - dist2['h3_o'] = dist2['h3_d_norm'] + dist2["h3_d"] = dist2["h3_o_norm"] + dist2["h3_o"] = dist2["h3_d_norm"] distancias_new = pd.concat([dist1, dist2], ignore_index=True) - distancias_new = distancias_new\ - .groupby(['h3_o', - 'h3_d', - 'h3_o_norm', - 'h3_d_norm'], - as_index=False)[['distance_osm_drive', - 'distance_h3']].first() + distancias_new = distancias_new.groupby( + ["h3_o", "h3_d", "h3_o_norm", "h3_d_norm"], as_index=False + )[["distance_osm_drive", "distance_h3"]].first() - distancias_new.to_sql("distancias", conn_insumos, - if_exists="append", index=False) + distancias_new.to_sql( + "distancias", conn_insumos, if_exists="append", index=False + ) conn_insumos.close() - conn_insumos = iniciar_conexion_db(tipo='insumos') + conn_insumos = iniciar_conexion_db(tipo="insumos") conn_insumos.close() conn_data.close() @@ -580,7 +585,7 @@ def compute_distances_osmx(df, mode, use_parallel): Parameters ---------- df : pandas.DataFrame - DataFrame representing a chunk with OD pairs + DataFrame representing a chunk with OD pairs with h3 indexes modes: list list of modes to compute distances for. Must be a valid @@ -611,23 +616,21 @@ def compute_distances_osmx(df, mode, use_parallel): G = ox.add_edge_travel_times(G) nodes_from = ox.distance.nearest_nodes( - G, df['lon_o_tmp'].values, df['lat_o_tmp'].values, return_dist=True + G, df["lon_o_tmp"].values, df["lat_o_tmp"].values, return_dist=True ) nodes_to = ox.distance.nearest_nodes( - G, df['lon_d_tmp'].values, df['lat_d_tmp'].values, return_dist=True + G, df["lon_d_tmp"].values, df["lat_d_tmp"].values, return_dist=True ) nodes_from = nodes_from[0] nodes_to = nodes_to[0] if use_parallel: - results = run_network_distance_parallel( - mode, G, nodes_from, nodes_to) + results = run_network_distance_parallel(mode, G, nodes_from, nodes_to) df[f"distance_osm_{mode}"] = results else: - df = run_network_distance_not_parallel( - df, mode, G, nodes_from, nodes_to) + df = run_network_distance_not_parallel(df, mode, G, nodes_from, nodes_to) return df @@ -639,7 +642,7 @@ def compute_distances_pandana(df, mode): Parameters ---------- df : pandas.DataFrame - DataFrame representing a chunk with OD pairs + DataFrame representing a chunk with OD pairs with h3 indexes modes: list list of modes to compute distances for. Must be a valid @@ -665,24 +668,25 @@ def compute_distances_pandana(df, mode): ymax += 0.2 network = osm_pandana.pdna_network_from_bbox( - ymin, xmin, ymax, xmax, network_type=mode) - - df['node_from'] = network.get_node_ids( - df['lon_o_tmp'], df['lat_o_tmp']).values - df['node_to'] = network.get_node_ids( - df['lon_d_tmp'], df['lat_d_tmp']).values - df[f'distance_osm_{mode}'] = network.shortest_path_lengths( - df['node_to'].values, df['node_from'].values) + ymin, xmin, ymax, xmax, network_type=mode + ) + + df["node_from"] = network.get_node_ids(df["lon_o_tmp"], df["lat_o_tmp"]).values + df["node_to"] = network.get_node_ids(df["lon_d_tmp"], df["lat_d_tmp"]).values + df[f"distance_osm_{mode}"] = network.shortest_path_lengths( + df["node_to"].values, df["node_from"].values + ) return df def compute_distances_osm( - df, - h3_o="", - h3_d="", - processing="pandana", - modes=["drive", "walk"], - use_parallel=False): + df, + h3_o="", + h3_d="", + processing="pandana", + modes=["drive", "walk"], + use_parallel=False, +): """ Takes a dataframe with pairs of h3 with origins and destinations and computes distances between those pairs. @@ -690,7 +694,7 @@ def compute_distances_osm( Parameters ---------- df : pandas.DataFrame - DataFrame representing a chunk with OD pairs + DataFrame representing a chunk with OD pairs with h3 indexes h3_o: str (h3Index) origin h3 index @@ -714,21 +718,20 @@ def compute_distances_osm( cols = df.columns.tolist() df["origin"] = df[h3_o].apply(h3togeo) - df["lon_o_tmp"] = df["origin"].apply(bring_latlon, latlon='lon') - df["lat_o_tmp"] = df["origin"].apply(bring_latlon, latlon='lat') + df["lon_o_tmp"] = df["origin"].apply(bring_latlon, latlon="lon") + df["lat_o_tmp"] = df["origin"].apply(bring_latlon, latlon="lat") df["destination"] = df[h3_d].apply(h3togeo) - df["lon_d_tmp"] = df["destination"].apply(bring_latlon, latlon='lon') - df["lat_d_tmp"] = df["destination"].apply(bring_latlon, latlon='lat') + df["lon_d_tmp"] = df["destination"].apply(bring_latlon, latlon="lon") + df["lat_d_tmp"] = df["destination"].apply(bring_latlon, latlon="lat") var_distances = [] for mode in modes: - if processing == 'osmnx': + if processing == "osmnx": # computing distances with osmnx - df = compute_distances_osmx(df=df, mode=mode, - use_parallel=use_parallel) + df = compute_distances_osmx(df=df, mode=mode, use_parallel=use_parallel) else: try: @@ -752,19 +755,19 @@ def compute_distances_osm( return pd.DataFrame([]) var_distances += [f"distance_osm_{mode}"] - df[f"distance_osm_{mode}"] = ( - df[f"distance_osm_{mode}"] / 1000).round(2) + df[f"distance_osm_{mode}"] = (df[f"distance_osm_{mode}"] / 1000).round(2) - condition = ('distance_osm_drive' in df.columns) & ( - 'distance_osm_walk' in df.columns) + condition = ("distance_osm_drive" in df.columns) & ( + "distance_osm_walk" in df.columns + ) if condition: mask = (df.distance_osm_drive * 1.3) < df.distance_osm_walk df.loc[mask, "distance_osm_walk"] = df.loc[mask, "distance_osm_drive"] - if 'distance_osm_drive' in df.columns: + if "distance_osm_drive" in df.columns: df.loc[df.distance_osm_drive > 2000, "distance_osm_drive"] = np.nan - if 'distance_osm_walk' in df.columns: + if "distance_osm_walk" in df.columns: df.loc[df.distance_osm_walk > 2000, "distance_osm_walk"] = np.nan df = df[cols + var_distances].copy() @@ -781,7 +784,7 @@ def compute_distances_osm( axis=1, distancia_entre_hex=distance_between_hex, h3_o=h3_o, - h3_d=h3_d + h3_d=h3_d, ) return df @@ -834,8 +837,11 @@ def run_network_distance_parallel(mode, G, nodes_from, nodes_to): chunksize = int(sqrt(n) * 10) with multiprocessing.Pool(processes=n_cores) as pool: - results = pool.map(partial(get_network_distance_osmnx, G=G), zip( - nodes_from, nodes_to), chunksize=chunksize) + results = pool.map( + partial(get_network_distance_osmnx, G=G), + zip(nodes_from, nodes_to), + chunksize=chunksize, + ) return results diff --git a/urbantrips/carto/routes.py b/urbantrips/carto/routes.py index 20ab177..c0c47e5 100644 --- a/urbantrips/carto/routes.py +++ b/urbantrips/carto/routes.py @@ -11,11 +11,14 @@ from urbantrips.geo import geo from urbantrips.carto import carto -from urbantrips.utils.utils import (leer_configs_generales, - duracion, - iniciar_conexion_db, - leer_alias - ) +from urbantrips.utils.utils import ( + leer_configs_generales, + duracion, + iniciar_conexion_db, + leer_alias, + create_branch_ids_sql_filter, + create_line_ids_sql_filter, +) @duracion @@ -31,8 +34,9 @@ def process_routes_geoms(): configs = leer_configs_generales() if route_geoms_not_present(configs): - print("No hay recorridos en el archivo de config\n" - "No se procesaran recorridos") + print( + "No hay recorridos en el archivo de config\n" "No se procesaran recorridos" + ) return None geojson_name = configs["recorridos_geojson"] @@ -44,40 +48,44 @@ def process_routes_geoms(): # Checl columns check_route_geoms_columns(geojson_data, branches_present) - conn_insumos = iniciar_conexion_db(tipo='insumos') + conn_insumos = iniciar_conexion_db(tipo="insumos") # if data has lines and branches, split them if branches_present: - branches_routes = geojson_data\ - .reindex(columns=['id_ramal', 'geometry']) + branches_routes = geojson_data.reindex(columns=["id_ramal", "geometry"]) - branches_routes['wkt'] = branches_routes.geometry.to_wkt() - branches_routes = branches_routes\ - .reindex(columns=['id_ramal', 'wkt']) + branches_routes["wkt"] = branches_routes.geometry.to_wkt() + branches_routes = branches_routes.reindex(columns=["id_ramal", "wkt"]) branches_routes.to_sql( - "official_branches_geoms", conn_insumos, if_exists="replace", - index=False,) + "official_branches_geoms", + conn_insumos, + if_exists="replace", + index=False, + ) # produce a line from branches with lowess lines_routes = create_line_geom_from_branches(geojson_data) else: - lines_routes = geojson_data\ - .reindex(columns=['id_linea', 'geometry']) + lines_routes = geojson_data.reindex(columns=["id_linea", "geometry"]) - assert not lines_routes.id_linea.duplicated().any( + assert ( + not lines_routes.id_linea.duplicated().any() ), "id_linea duplicados en geojson de recorridos" - lines_routes['wkt'] = lines_routes.geometry.to_wkt() + lines_routes["wkt"] = lines_routes.geometry.to_wkt() - lines_routes = lines_routes.reindex(columns=['id_linea', 'wkt']) - print('Subiendo tabla de recorridos') + lines_routes = lines_routes.reindex(columns=["id_linea", "wkt"]) + print("Subiendo tabla de recorridos") # Upload geoms lines_routes.to_sql( - "official_lines_geoms", conn_insumos, if_exists="replace", - index=False,) + "official_lines_geoms", + conn_insumos, + if_exists="replace", + index=False, + ) conn_insumos.close() @@ -89,8 +97,8 @@ def infer_routes_geoms(plotear_lineas): de las lineas y lo guarda en la db """ - conn_data = iniciar_conexion_db(tipo='data') - conn_insumos = iniciar_conexion_db(tipo='insumos') + conn_data = iniciar_conexion_db(tipo="data") + conn_insumos = iniciar_conexion_db(tipo="insumos") # traer la coordenadas de las etapas con suficientes datos q = """ select e.id_linea,e.longitud,e.latitud @@ -98,26 +106,31 @@ def infer_routes_geoms(plotear_lineas): """ etapas = pd.read_sql(q, conn_data) - recorridos_lowess = etapas.groupby( - 'id_linea').apply(geo.lowess_linea).reset_index() + recorridos_lowess = etapas.groupby("id_linea").apply(geo.lowess_linea).reset_index() if plotear_lineas: - print('Imprimiento bosquejos de lineas') + print("Imprimiento bosquejos de lineas") alias = leer_alias() - [plotear_recorrido_lowess(id_linea, etapas, recorridos_lowess, alias) - for id_linea in recorridos_lowess.id_linea] + [ + plotear_recorrido_lowess(id_linea, etapas, recorridos_lowess, alias) + for id_linea in recorridos_lowess.id_linea + ] print("Subiendo recorridos a la db...") - recorridos_lowess['wkt'] = recorridos_lowess.geometry.to_wkt() + recorridos_lowess["wkt"] = recorridos_lowess.geometry.to_wkt() # Elminar geometrias invalidas validas = recorridos_lowess.geometry.map(lambda g: g.is_valid) recorridos_lowess = recorridos_lowess.loc[validas, :] - recorridos_lowess = recorridos_lowess.reindex(columns=['id_linea', 'wkt']) + recorridos_lowess = recorridos_lowess.reindex(columns=["id_linea", "wkt"]) - recorridos_lowess.to_sql("inferred_lines_geoms", - conn_insumos, if_exists="replace", index=False,) + recorridos_lowess.to_sql( + "inferred_lines_geoms", + conn_insumos, + if_exists="replace", + index=False, + ) conn_insumos.close() conn_data.close() @@ -126,7 +139,7 @@ def infer_routes_geoms(plotear_lineas): @duracion def build_routes_from_official_inferred(): - conn_insumos = iniciar_conexion_db(tipo='insumos') + conn_insumos = iniciar_conexion_db(tipo="insumos") # Delete old data conn_insumos.execute("DELETE FROM lines_geoms;") @@ -179,12 +192,11 @@ def create_line_geom_from_branches(geojson_data): epsg_m = geo.get_epsg_m() geojson_data = geojson_data.to_crs(epsg=epsg_m) - lines_routes = geojson_data\ - .groupby('id_linea', as_index=False)\ - .apply(get_line_lowess_from_branch_routes) - lines_routes.columns = ['id_linea', 'geometry'] - lines_routes = gpd.GeoDataFrame( - lines_routes, geometry='geometry', crs=epsg_m) + lines_routes = geojson_data.groupby("id_linea", as_index=False).apply( + get_line_lowess_from_branch_routes + ) + lines_routes.columns = ["id_linea", "geometry"] + lines_routes = gpd.GeoDataFrame(lines_routes, geometry="geometry", crs=epsg_m) lines_routes = lines_routes.to_crs(epsg=4326) @@ -214,19 +226,22 @@ def get_line_lowess_from_branch_routes(gdf): def check_route_geoms_columns(geojson_data, branches_present): # Check all columns are present - cols = ['id_linea', 'geometry'] + cols = ["id_linea", "geometry"] - assert not geojson_data.id_linea.isna().any(),\ - "id_linea vacios en geojson recorridos" + assert ( + not geojson_data.id_linea.isna().any() + ), "id_linea vacios en geojson recorridos" # assert geojson_data.dtypes['id_linea'] == int,\ # "id_linea deben ser int en geojson recorridos" if branches_present: - cols.append('id_ramal') - assert not geojson_data.id_ramal.isna().any(),\ - "id_ramal vacios en geojson recorridos" - assert not geojson_data.id_ramal.duplicated().any(),\ - "id_ramal duplicados en geojson recorridos" + cols.append("id_ramal") + assert ( + not geojson_data.id_ramal.isna().any() + ), "id_ramal vacios en geojson recorridos" + assert ( + not geojson_data.id_ramal.duplicated().any() + ), "id_ramal duplicados en geojson recorridos" # assert geojson_data.dtypes['id_ramal'] == int,\ # "id_ramal deben ser int en geojson recorridos" @@ -234,17 +249,16 @@ def check_route_geoms_columns(geojson_data, branches_present): columns_ok = cols.isin(geojson_data.columns) if not columns_ok.all(): - cols_not_ok = ','.join(cols[~columns_ok].values) + cols_not_ok = ",".join(cols[~columns_ok].values) - raise ValueError( - f'Faltan columnas en el dataset: {cols_not_ok}') + raise ValueError(f"Faltan columnas en el dataset: {cols_not_ok}") # Check geometry type geo.check_all_geoms_linestring(geojson_data) def delete_old_route_geoms_data(): - conn_insumos = iniciar_conexion_db(tipo='insumos') + conn_insumos = iniciar_conexion_db(tipo="insumos") conn_insumos.execute("DELETE FROM lines_geoms;") conn_insumos.execute("DELETE FROM branches_geoms;") @@ -277,7 +291,7 @@ def process_routes_metadata(): and uploads metadata to the db """ - conn_insumos = iniciar_conexion_db(tipo='insumos') + conn_insumos = iniciar_conexion_db(tipo="insumos") # Deletes old data conn_insumos.execute("DELETE FROM metadata_lineas;") @@ -297,8 +311,7 @@ def process_routes_metadata(): # Check modes matches config standarized modes try: modos_homologados = configs["modos"] - zipped = zip(modos_homologados.values(), - modos_homologados.keys()) + zipped = zip(modos_homologados.values(), modos_homologados.keys()) modos_homologados = {k: v for k, v in zipped} except KeyError: @@ -306,58 +319,67 @@ def process_routes_metadata(): # Line metadata is mandatory - print('Leyendo tabla con informacion de lineas') + print("Leyendo tabla con informacion de lineas") ruta = os.path.join("data", "data_ciudad", tabla_lineas) info = pd.read_csv(ruta) # Check all columns are present if branches_present: - cols = ['id_linea', 'nombre_linea', - 'id_ramal', 'nombre_ramal', 'modo'] + cols = ["id_linea", "nombre_linea", "id_ramal", "nombre_ramal", "modo"] else: - cols = ['id_linea', 'nombre_linea', 'modo'] + cols = ["id_linea", "nombre_linea", "modo"] - assert pd.Series(cols).isin(info.columns).all( + assert ( + pd.Series(cols).isin(info.columns).all() ), f"La tabla {ruta} debe tener los campos: {cols}" # check no missing data in line id assert not info.id_linea.isna().any(), "id_linea no debe ser NULL" - if 'id_linea_agg' not in info.columns: - info['id_linea_agg'] = info['id_linea'] - info['nombre_linea_agg'] = info['nombre_linea'] + if "id_linea_agg" not in info.columns: + info["id_linea_agg"] = info["id_linea"] + info["nombre_linea_agg"] = info["nombre_linea"] - line_cols = ["id_linea", - "nombre_linea", - "id_linea_agg", - "nombre_linea_agg", - "modo", - "empresa", - "descripcion"] + line_cols = [ + "id_linea", + "nombre_linea", + "id_linea_agg", + "nombre_linea_agg", + "modo", + "empresa", + "descripcion", + ] - assert pd.Series(info.modo.unique()).isin( - modos_homologados.keys()).all() + assert pd.Series(info.modo.unique()).isin(modos_homologados.keys()).all() - info['modo'] = info['modo'].replace(modos_homologados) + info["modo"] = info["modo"].replace(modos_homologados) # fill missing line agg - info.loc[info.id_linea_agg.isna( - ), 'nombre_linea_agg'] = info.loc[info.id_linea_agg.isna(), 'nombre_linea'] - info.loc[info.id_linea_agg.isna( - ), 'id_linea_agg'] = info.loc[info.id_linea_agg.isna(), 'id_linea'] + info.loc[info.id_linea_agg.isna(), "nombre_linea_agg"] = info.loc[ + info.id_linea_agg.isna(), "nombre_linea" + ] + info.loc[info.id_linea_agg.isna(), "id_linea_agg"] = info.loc[ + info.id_linea_agg.isna(), "id_linea" + ] # keep only line data info_lineas = info.reindex(columns=line_cols) - info_lineas = info_lineas.drop_duplicates(subset='id_linea') + info_lineas = info_lineas.drop_duplicates(subset="id_linea") # upload to db info_lineas.to_sql( - "metadata_lineas", conn_insumos, if_exists="replace", - index=False) + "metadata_lineas", conn_insumos, if_exists="replace", index=False + ) if branches_present: - ramales_cols = ['id_ramal', 'id_linea', - 'nombre_ramal', 'modo', 'empresa', 'descripcion'] + ramales_cols = [ + "id_ramal", + "id_linea", + "nombre_ramal", + "modo", + "empresa", + "descripcion", + ] info_ramales = info.reindex(columns=ramales_cols) @@ -365,12 +387,13 @@ def process_routes_metadata(): s = "Existen nulos en el campo id_ramal" assert not info_ramales.id_ramal.isna().any(), s - assert not info_ramales.id_ramal.duplicated( - ).any(), "Existen duplicados en id_ramal" + assert ( + not info_ramales.id_ramal.duplicated().any() + ), "Existen duplicados en id_ramal" info_ramales.to_sql( - "metadata_ramales", conn_insumos, if_exists="replace", - index=False) + "metadata_ramales", conn_insumos, if_exists="replace", index=False + ) conn_insumos.close() @@ -391,14 +414,18 @@ def create_line_g(line_id): Graph with the branch route id by node_id and ordered by stops order """ - conn = iniciar_conexion_db(tipo='insumos') + conn = iniciar_conexion_db(tipo="insumos") query = f"select * from stops where id_linea = {line_id}" line_stops = pd.read_sql(query, conn) branches_id = line_stops.id_ramal.unique() - G_line = nx.compose_all([create_branch_g_from_stops_df( - line_stops, branch_id) for branch_id in branches_id]) + G_line = nx.compose_all( + [ + create_branch_g_from_stops_df(line_stops, branch_id) + for branch_id in branches_id + ] + ) return G_line @@ -423,7 +450,7 @@ def create_branch_g_from_stops_df(line_stops, id_ramal): branch_stops = line_stops.loc[line_stops.id_ramal == id_ramal, :] # remove duplicated stops with same node_id - branch_stops = branch_stops.drop_duplicates(subset='node_id') + branch_stops = branch_stops.drop_duplicates(subset="node_id") G = create_branch_graph(branch_stops) return G @@ -448,19 +475,22 @@ def create_branch_graph(branch_stops): """ metadata = { "crs": "epsg:4326", - "id_linea": branch_stops['id_linea'].unique().item(), - "id_ramal": branch_stops['id_ramal'].unique().item() + "id_linea": branch_stops["id_linea"].unique().item(), + "id_ramal": branch_stops["id_ramal"].unique().item(), } G = nx.MultiGraph(**metadata) - branch_stops = branch_stops.sort_values( - 'branch_stop_order').reindex(columns=['node_id', 'node_x', 'node_y']) - nodes = [(int(row['node_id']), {'x': row['node_x'], 'y':row['node_y']}) - for _, row in branch_stops.iterrows()] + branch_stops = branch_stops.sort_values("branch_stop_order").reindex( + columns=["node_id", "node_x", "node_y"] + ) + nodes = [ + (int(row["node_id"]), {"x": row["node_x"], "y": row["node_y"]}) + for _, row in branch_stops.iterrows() + ] G.add_nodes_from(nodes) - edges_from = branch_stops['node_id'].iloc[:-1].map(int) - edges_to = branch_stops['node_id'].shift(-1).iloc[:-1].map(int) + edges_from = branch_stops["node_id"].iloc[:-1].map(int) + edges_to = branch_stops["node_id"].shift(-1).iloc[:-1].map(int) edges = [(i, j, 0) for i, j in zip(edges_from, edges_to)] G.add_edges_from(edges) @@ -468,3 +498,53 @@ def create_branch_graph(branch_stops): G = distance.add_edge_lengths(G) return G + + +def read_branch_routes(branch_ids): + """ + This function take a list of branch ids and returns a geodataframe + with route geoms + """ + conn_insumos = iniciar_conexion_db(tipo="insumos") + line_ids_where = create_branch_ids_sql_filter(branch_ids) + q_route_geoms = "select * from branches_geoms" + line_ids_where + route_geoms = pd.read_sql(q_route_geoms, conn_insumos) + route_geoms["geometry"] = gpd.GeoSeries.from_wkt(route_geoms.wkt) + route_geoms = gpd.GeoDataFrame( + route_geoms.drop("wkt", axis=1), geometry="geometry", crs="EPSG:4326" + ) + return route_geoms + + +def read_routes(route_ids, route_type): + """ + This function take a list of branches or lines ids and returns a geodataframe + with route geoms + + Parameters + ---------- + route_ids : list + list of branches or lines ids + route_type : str + branches or lines + + Returns + ------- + geopandas.GeoDataFrame + GeoDataFrame with route geoms + """ + conn_insumos = iniciar_conexion_db(tipo="insumos") + if route_type == "branches": + ids_where = create_branch_ids_sql_filter(route_ids) + else: + ids_where = create_line_ids_sql_filter(route_ids) + + q_route_geoms = f"select * from {route_type}_geoms" + ids_where + + route_geoms = pd.read_sql(q_route_geoms, conn_insumos) + route_geoms.columns = ["route_id", "wkt"] + route_geoms["geometry"] = gpd.GeoSeries.from_wkt(route_geoms.wkt) + route_geoms = gpd.GeoDataFrame( + route_geoms.drop("wkt", axis=1), geometry="geometry", crs="EPSG:4326" + ) + return route_geoms diff --git a/urbantrips/kpi/kpi.py b/urbantrips/kpi/kpi.py index 0952acb..57351ed 100644 --- a/urbantrips/kpi/kpi.py +++ b/urbantrips/kpi/kpi.py @@ -99,7 +99,7 @@ def compute_route_section_load( route id or list of route ids present in the legs dataset. Route section load will be computed for that subset of lines. If False, it will run with all routes. - rango_hrs : tuple or bool + hour_range : tuple or bool tuple holding hourly range (from,to) and from 0 to 24. Route section load will be computed for legs happening within tat time range. If False it won't filter by hour. diff --git a/urbantrips/kpi/overlapping.py b/urbantrips/kpi/overlapping.py new file mode 100644 index 0000000..e41b483 --- /dev/null +++ b/urbantrips/kpi/overlapping.py @@ -0,0 +1,528 @@ +import pandas as pd +import geopandas as gpd +import numpy as np +from shapely.geometry import Point, LineString +import h3 +from urbantrips.geo import geo +from urbantrips.kpi import kpi +from urbantrips.utils import utils +from urbantrips.carto.routes import read_routes + + +def from_linestring_to_h3(linestring, h3_res=8): + """ + This function takes a shapely linestring and + returns all h3 hecgrid cells that intersect that linestring + """ + lrs = np.arange(0, 1, 0.01) + points = [linestring.interpolate(i, normalized=True) for i in lrs] + coords = [(point.x, point.y) for point in points] + linestring_h3 = pd.Series( + [ + h3.geo_to_h3(lat=coord[1], lng=coord[0], resolution=h3_res) + for coord in coords + ] + ).drop_duplicates() + return linestring_h3 + + +def create_coarse_h3_from_line( + linestring: LineString, h3_res: int, route_id: int +) -> dict: + + # Reference to coarser H3 for those lines + linestring_h3 = from_linestring_to_h3(linestring, h3_res=h3_res) + + # Creeate geodataframes with hex geoms and index and LRS + gdf = gpd.GeoDataFrame( + {"h3": linestring_h3}, geometry=linestring_h3.map(geo.add_geometry), crs=4326 + ) + gdf["route_id"] = route_id + + # Create LRS for each hex index + gdf["h3_lrs"] = [ + kpi.floor_rounding(linestring.project(Point(p[::-1]), True)) + for p in gdf.h3.map(h3.h3_to_geo) + ] + + # Create section ids for each line + df_section_ids_LRS = kpi.create_route_section_ids(len(gdf)) + + # Create cut points for each section based on H3 LRS + df_section_ids_LRS_cut = df_section_ids_LRS.copy() + df_section_ids_LRS_cut.loc[0] = -0.001 + + # Use cut points to come up with a unique integer id + df_section_ids = list(range(1, len(df_section_ids_LRS_cut))) + + gdf["section_id"] = pd.cut( + gdf.h3_lrs, bins=df_section_ids_LRS_cut, labels=df_section_ids, right=True + ) + + # ESTO REEMPLAZA PARA ATRAS + gdf = gdf.sort_values("h3_lrs") + gdf["section_id"] = range(len(gdf)) + + return gdf + + +def get_demand_data( + supply_gdf: gpd.GeoDataFrame, + day_type: str, + line_id: int, + hour_range: ( + list | None + ) = None, # Not used as % of demand is not related to hour range +) -> pd.DataFrame: + + h3_res = h3.h3_get_resolution(supply_gdf.h3.iloc[0]) + + # get demand data + line_ids_where = kpi.create_line_ids_sql_filter(line_ids=[line_id]) + legs = kpi.read_legs_data_by_line_hours_and_day( + line_ids_where, hour_range=hour_range, day_type=day_type + ) + + # Add legs to same coarser H3 used in branch routes + legs["h3_o"] = legs["h3_o"].map(lambda h: geo.h3toparent(h, h3_res)) + legs["h3_d"] = legs["h3_d"].map(lambda h: geo.h3toparent(h, h3_res)) + + shared_h3 = supply_gdf.loc[supply_gdf.shared_h3, "h3"] + legs["leg_in_shared_h3"] = legs.h3_o.isin(shared_h3) & legs.h3_d.isin(shared_h3) + + return legs + + +def aggregate_demand_data( + legs: pd.DataFrame, + supply_gdf: gpd.GeoDataFrame, + base_line_id: int, + comp_line_id: int, + base_branch_id: int | str, + comp_branch_id: int | str, +) -> dict: + + # Compute total legs by h3 origin and destination + total_legs_by_h3_od = ( + legs.reindex( + columns=[ + "dia", + "id_linea", + "h3_o", + "h3_d", + "leg_in_shared_h3", + "factor_expansion_linea", + ] + ) + .groupby( + ["dia", "id_linea", "h3_o", "h3_d", "leg_in_shared_h3"], as_index=False + ) + .sum() + ) + + # Get only legs that could have been done in this branch + legs_within_branch = ( + total_legs_by_h3_od.merge( + supply_gdf.drop("geometry", axis=1), + left_on=["h3_o"], + right_on=["h3"], + how="inner", + ) + .merge( + supply_gdf.drop("geometry", axis=1), + left_on=["h3_d"], + right_on=["h3"], + how="inner", + ) + .reindex( + columns=[ + "dia", + "id_linea", + "h3_o", + "h3_d", + "leg_in_shared_h3", + "factor_expansion_linea", + "h3_lrs_x", + "h3_lrs_y", + "section_id_x", + "section_id_y", + ] + ) + .rename(columns={"section_id_x": "o_proj", "section_id_y": "d_proj"}) + ) + + total_demand = legs_within_branch.factor_expansion_linea.sum() + line_id = legs_within_branch.id_linea.iloc[0] + day = legs_within_branch.dia.iloc[0] + + configs = utils.leer_configs_generales() + use_branches = configs["lineas_contienen_ramales"] + conn_insumos = utils.iniciar_conexion_db(tipo="insumos") + + line_metadata = pd.read_sql( + f"select id_linea, nombre_linea from metadata_lineas where id_linea in ({base_line_id},{comp_line_id})", + conn_insumos, + dtype={"id_linea": int}, + ) + + base_line_name = line_metadata.loc[ + line_metadata.id_linea == base_line_id, "nombre_linea" + ].item() + comp_line_name = line_metadata.loc[ + line_metadata.id_linea == comp_line_id, "nombre_linea" + ].item() + + if use_branches: + # get line id base on branch + metadata = pd.read_sql( + f"select id_linea,id_ramal,nombre_ramal from metadata_ramales where id_ramal in ({base_branch_id},{comp_branch_id})", + conn_insumos, + dtype={"id_linea": int, "id_ramal": int}, + ) + + base_branch_name = metadata.loc[ + metadata.id_ramal == base_branch_id, "nombre_ramal" + ].item() + comp_branch_name = metadata.loc[ + metadata.id_ramal == comp_branch_id, "nombre_ramal" + ].item() + demand_base_branch_str = ( + f"que podria recorrer este ramal {base_branch_name} (id {base_branch_id}) " + ) + demand_comp_branch_str = f"ramal {comp_branch_name} (id {comp_branch_id})" + + else: + demand_base_branch_str = " " + demand_comp_branch_str = " " + conn_insumos.close() + + print( + f"La demanda total para la linea {base_line_name} (id {line_id}) {demand_base_branch_str}es: {int(total_demand)} etapas" + ) + + shared_demand = round( + legs_within_branch.loc[ + legs_within_branch.leg_in_shared_h3, "factor_expansion_linea" + ].sum() + / total_demand + * 100, + 1, + ) + print( + f"De las cuales el {shared_demand} % comparte OD con la linea {comp_line_name} (id {comp_line_id}) {demand_comp_branch_str}" + ) + update_overlapping_table_demand( + day, + base_line_id, + base_branch_id, + comp_line_id, + comp_branch_id, + res_h3=h3.h3_get_resolution(supply_gdf.h3.iloc[0]), + base_v_comp=shared_demand, + ) + return legs_within_branch + + +def demand_by_section_id(legs_within_branch): + total_demand = legs_within_branch.factor_expansion_linea.sum() + + # Add direction to use for which sections id traversed + legs_within_branch["sentido"] = [ + "ida" if row.o_proj <= row.d_proj else "vuelta" + for _, row in legs_within_branch.iterrows() + ] + + # remove legs with no origin or destination projected + legs_within_branch = legs_within_branch.dropna(subset=["o_proj", "d_proj"]) + + # Create df with all traversed sections + legs_dict = legs_within_branch.to_dict("records") + leg_route_sections_df = pd.concat(map(kpi.build_leg_route_sections_df, legs_dict)) + + # Compute total demand by section id + demand_by_section_id = leg_route_sections_df.groupby( + ["section_id"], as_index=False + ).agg(total_legs=("factor_expansion_linea", "sum")) + + demand_by_section_id["prop_demand"] = ( + demand_by_section_id.total_legs / total_demand * 100 + ) + + return demand_by_section_id + + +def update_overlapping_table_supply( + day, + base_line_id, + base_branch_id, + comp_line_id, + comp_branch_id, + res_h3, + base_v_comp, + comp_v_base, +): + conn_data = utils.iniciar_conexion_db(tipo="data") + # Update db + delete_q = f""" + delete from overlapping + where dia = '{day}' + and base_line_id = {base_line_id} + and base_branch_id = {base_branch_id} + and comp_line_id = {comp_line_id} + and comp_branch_id = {comp_branch_id} + and res_h3 = {res_h3} + and type_overlap = "oferta" + ; + """ + conn_data.execute(delete_q) + conn_data.commit() + + delete_q = f""" + delete from overlapping + where dia = '{day}' + and base_line_id = {comp_line_id} + and base_branch_id = {comp_branch_id} + and comp_line_id = {base_line_id} + and comp_branch_id = {base_branch_id} + and res_h3 = {res_h3} + and type_overlap = "oferta" + ; + """ + conn_data.execute(delete_q) + conn_data.commit() + + insert_q = f""" + insert into overlapping (dia,base_line_id,base_branch_id,comp_line_id,comp_branch_id,res_h3,overlap, type_overlap) + values + ('{day}',{base_line_id},{base_branch_id},{comp_line_id},{comp_branch_id},{res_h3},{base_v_comp},'oferta'), + ('{day}',{comp_line_id},{comp_branch_id},{base_line_id},{base_branch_id},{res_h3},{comp_v_base},'oferta') + ; + """ + + conn_data.execute(insert_q) + conn_data.commit() + conn_data.close() + + +def update_overlapping_table_demand( + day, base_line_id, base_branch_id, comp_line_id, comp_branch_id, res_h3, base_v_comp +): + conn_data = utils.iniciar_conexion_db(tipo="data") + # Update db + delete_q = f""" + delete from overlapping + where dia = '{day}' + and base_line_id = {base_line_id} + and base_branch_id = {base_branch_id} + and comp_line_id = {comp_line_id} + and comp_branch_id = {comp_branch_id} + and res_h3 = {res_h3} + and type_overlap = "demanda" + ; + """ + conn_data.execute(delete_q) + conn_data.commit() + + insert_q = f""" + insert into overlapping (dia,base_line_id,base_branch_id,comp_line_id,comp_branch_id,res_h3,overlap, type_overlap) + values + ('{day}',{base_line_id},{base_branch_id},{comp_line_id},{comp_branch_id},{res_h3},{base_v_comp},'demanda') + ; + """ + conn_data.execute(insert_q) + conn_data.commit() + conn_data.close() + + +def normalize_total_legs_to_dot_size(series, min_dot_size, max_dot_size): + return min_dot_size + (max_dot_size - 1) * (series - series.min()) / ( + series.max() - series.min() + ) + + +def compute_supply_overlapping( + day, base_route_id, comp_route_id, route_type, h3_res_comp +): + # Get route geoms + route_geoms = read_routes( + route_ids=[base_route_id, comp_route_id], route_type=route_type + ) + + # Crate linestring for each branch + base_route_gdf = route_geoms.loc[route_geoms.route_id == base_route_id, "geometry"] + linestring_base = base_route_gdf.item() + + # Crate linestring for each branch + comp_route_gdf = route_geoms.loc[route_geoms.route_id == comp_route_id, "geometry"] + linestring_comp = comp_route_gdf.item() + + # Turn linestring into coarse h3 indexes + base_h3 = create_coarse_h3_from_line( + linestring=linestring_base, h3_res=h3_res_comp, route_id=base_route_id + ) + comp_h3 = create_coarse_h3_from_line( + linestring=linestring_comp, h3_res=h3_res_comp, route_id=comp_route_id + ) + + # Compute overlapping between those h3 indexes + branch_overlapping = base_h3.reindex( + columns=["h3", "route_id", "section_id"] + ).merge( + comp_h3.reindex(columns=["h3", "route_id", "section_id"]), + on="h3", + how="outer", + suffixes=("_base", "_comp"), + ) + + # classify each h3 index as shared or not + overlapping_mask = (branch_overlapping.route_id_base.notna()) & ( + branch_overlapping.route_id_comp.notna() + ) + overlapping_indexes = overlapping_mask.sum() + overlapping_h3 = branch_overlapping.loc[overlapping_mask, "h3"] + base_h3["shared_h3"] = base_h3.h3.isin(overlapping_h3) + comp_h3["shared_h3"] = comp_h3.h3.isin(overlapping_h3) + + # Compute % of shred h3 + base_v_comp = round(overlapping_indexes / len(base_h3) * 100, 1) + comp_v_base = round(overlapping_indexes / len(comp_h3) * 100, 1) + + configs = utils.leer_configs_generales() + use_branches = configs["lineas_contienen_ramales"] + + if use_branches: + # get line id base on branch + conn_insumos = utils.iniciar_conexion_db(tipo="insumos") + metadata = pd.read_sql( + f"select id_linea,id_ramal,nombre_ramal from metadata_ramales where id_ramal in ({base_route_id},{comp_route_id})", + conn_insumos, + dtype={"id_linea": int, "id_ramal": int}, + ) + conn_insumos.close() + base_line_id = metadata.loc[ + metadata.id_ramal == base_route_id, "id_linea" + ].item() + comp_line_id = metadata.loc[ + metadata.id_ramal == comp_route_id, "id_linea" + ].item() + + base_branch_name = metadata.loc[ + metadata.id_ramal == base_route_id, "nombre_ramal" + ].item() + comp_branch_name = metadata.loc[ + metadata.id_ramal == comp_route_id, "nombre_ramal" + ].item() + + base_branch_id = base_route_id + comp_branch_id = comp_route_id + + print( + f"El {base_v_comp} % del recorrido del ramal base {base_branch_name}" + f" se superpone con el del ramal de comparación {comp_branch_name}" + ) + print( + f"Por otro lado {comp_v_base} % del recorrido del ramal {comp_branch_name}" + f" se superpone con el del ramal {base_branch_name}" + ) + + else: + base_line_id = base_route_id + comp_line_id = comp_route_id + base_branch_id = "NULL" + comp_branch_id = "NULL" + + metadata = pd.read_sql( + f"select id_linea, nombre_linea from metadata_lineas where id_linea in ({base_route_id},{comp_route_id})", + conn_insumos, + dtype={"id_linea": int}, + ) + + base_line_name = metadata.loc[ + metadata.id_linea == base_route_id, "nombre_linea" + ].item() + comp_line_name = metadata.loc[ + metadata.id_linea == comp_route_id, "nombre_linea" + ].item() + + print( + f"El {base_v_comp} % del recorrido de la linea base {base_line_name}" + " se superpone con el del ramal de comparación {comp_line_name}" + ) + print( + f"Por otro lado {comp_v_base} % del recorrido del ramal {comp_line_name}" + " se superpone con el del ramal {base_line_name}" + ) + + update_overlapping_table_supply( + day=day, + base_line_id=base_line_id, + base_branch_id=base_branch_id, + comp_line_id=comp_line_id, + comp_branch_id=comp_branch_id, + res_h3=h3_res_comp, + base_v_comp=base_v_comp, + comp_v_base=comp_v_base, + ) + + return { + "base": {"line": base_route_gdf, "h3": base_h3}, + "comp": {"line": comp_route_gdf, "h3": comp_h3}, + } + + +def compute_demand_overlapping( + base_line_id, + comp_line_id, + day_type, + base_route_id, + comp_route_id, + base_gdf, + comp_gdf, +): + configs = utils.leer_configs_generales() + comp_h3_resolution = h3.h3_get_resolution(comp_gdf.h3.iloc[0]) + configs_resolution = configs["resolucion_h3"] + + if comp_h3_resolution > configs_resolution: + print( + "No puede procesarse la demanda con resolución de H3 mayor a la configurada" + ) + print("Se recomienda bajar la resolución de H3 de la línea de comparación") + print(f"Resolucion para solapamiento de demanda {comp_h3_resolution}") + print(f"Resolucion configurada {configs_resolution}") + return None, None + + use_branches = configs["lineas_contienen_ramales"] + + if use_branches: + base_branch_id = base_route_id + comp_branch_id = comp_route_id + else: + base_branch_id = "NULL" + comp_branch_id = "NULL" + + base_legs = get_demand_data( + supply_gdf=base_gdf, day_type=day_type, line_id=base_line_id + ) + comp_legs = get_demand_data( + supply_gdf=comp_gdf, day_type=day_type, line_id=comp_line_id + ) + + base_demand = aggregate_demand_data( + legs=base_legs, + supply_gdf=base_gdf, + base_line_id=base_line_id, + comp_line_id=comp_line_id, + base_branch_id=base_branch_id, + comp_branch_id=comp_branch_id, + ) + comp_demand = aggregate_demand_data( + legs=comp_legs, + supply_gdf=comp_gdf, + base_line_id=comp_line_id, + comp_line_id=base_line_id, + base_branch_id=comp_branch_id, + comp_branch_id=base_branch_id, + ) + + return base_demand, comp_demand diff --git a/urbantrips/utils/utils.py b/urbantrips/utils/utils.py index e835c02..af2144b 100644 --- a/urbantrips/utils/utils.py +++ b/urbantrips/utils/utils.py @@ -1302,6 +1302,23 @@ def create_kpi_tables(): """ ) + conn_data.execute( + """ + CREATE TABLE IF NOT EXISTS overlapping + ( + dia text not null, + base_line_id int not null, + base_branch_id int, + comp_line_id int not null, + comp_branch_id int, + res_h3 int, + overlap float, + type_overlap text + ) + ; + """ + ) + conn_data.close() @@ -1374,6 +1391,23 @@ def create_line_ids_sql_filter(line_ids): return line_ids_where +def create_branch_ids_sql_filter(branch_ids): + """ + Takes a set of branch ids and returns a where clause + to filter in sqlite + """ + if branch_ids is not None: + if isinstance(branch_ids, int): + branch_ids = [branch_ids] + branches_str = ",".join(map(str, branch_ids)) + branch_ids_where = f" where id_ramal in ({branches_str})" + + else: + branches_str = "" + branch_ids_where = " where id_ramal is not NULL" + return branch_ids_where + + def traigo_tabla_zonas(): conn_insumos = iniciar_conexion_db(tipo="insumos") diff --git a/urbantrips/viz/overlapping.py b/urbantrips/viz/overlapping.py new file mode 100644 index 0000000..3ce29b2 --- /dev/null +++ b/urbantrips/viz/overlapping.py @@ -0,0 +1,237 @@ +import numpy as np +import geopandas as gpd +import folium +import pandas as pd +from urbantrips.geo import geo +from urbantrips.kpi import overlapping as ovl +from urbantrips.utils.utils import leer_configs_generales, iniciar_conexion_db + + +def get_route_metadata(route_id): + configs = leer_configs_generales() + conn_insumos = iniciar_conexion_db(tipo="insumos") + use_branches = configs["lineas_contienen_ramales"] + if use_branches: + metadata = pd.read_sql( + f"select nombre_ramal from metadata_ramales where id_ramal == {route_id}", + conn_insumos, + ) + metadata = metadata.nombre_ramal.iloc[0] + else: + metadata = pd.read_sql( + f"select nombre_linea from metadata_lineas where id_linea == {route_id}", + conn_insumos, + ) + metadata = metadata.nombre_linea.iloc[0] + return metadata + + +def plot_interactive_supply_overlapping(overlapping_dict): + + base_route_id = overlapping_dict["base"]["h3"].route_id.unique()[0] + comp_route_id = overlapping_dict["comp"]["h3"].route_id.unique()[0] + base_route_metadata = get_route_metadata(base_route_id) + comp_route_metadata = get_route_metadata(comp_route_id) + + # extract data from overlapping dict + base_gdf = overlapping_dict["base"]["h3"] + base_route_gdf = overlapping_dict["base"]["line"] + comp_gdf = overlapping_dict["comp"]["h3"] + comp_route_gdf = overlapping_dict["comp"]["line"] + + # get mean coords to center map + mean_x = np.mean(base_route_gdf.item().coords.xy[0]) + mean_y = np.mean(base_route_gdf.item().coords.xy[1]) + + fig = folium.Figure(width=1000, height=600) + m = folium.Map(location=(mean_y, mean_x), zoom_start=11, tiles="cartodbpositron") + + base_gdf.explore( + color="black", + tiles="CartoDB positron", + m=m, + name=f"Base H3 {base_route_metadata}", + ) + base_route_gdf.explore( + color="black", + tiles="CartoDB positron", + m=m, + name=f"Base route {base_route_metadata}", + ) + + comp_gdf.explore( + color="red", + tiles="CartoDB positron", + m=m, + name=f"Comp H3 {comp_route_metadata}", + ) + comp_route_gdf.explore( + color="red", + tiles="CartoDB positron", + m=m, + name=f"Comp route {comp_route_metadata}", + ) + + folium.LayerControl(name="Legs").add_to(m) + + fig.add_child(m) + return fig + + +def plot_interactive_demand_overlapping(base_demand, comp_demand, overlapping_dict): + base_gdf = overlapping_dict["base"]["h3"] + base_route_gdf = overlapping_dict["base"]["line"] + comp_gdf = overlapping_dict["comp"]["h3"] + comp_route_gdf = overlapping_dict["comp"]["line"] + + base_route_id = base_gdf.route_id.unique()[0] + comp_route_id = comp_gdf.route_id.unique()[0] + + base_route_metadata = get_route_metadata(base_route_id) + comp_route_metadata = get_route_metadata(comp_route_id) + + # Points for O and D + base_origins = ( + base_demand.reindex(columns=["h3_o", "factor_expansion_linea"]) + .groupby("h3_o", as_index=False) + .agg(total_legs=("factor_expansion_linea", "sum")) + ) + base_destinations = ( + base_demand.reindex(columns=["h3_d", "factor_expansion_linea"]) + .groupby("h3_d", as_index=False) + .agg(total_legs=("factor_expansion_linea", "sum")) + ) + base_origins = gpd.GeoDataFrame( + base_origins, geometry=base_origins.h3_o.map(geo.create_point_from_h3), crs=4326 + ) + base_destinations = gpd.GeoDataFrame( + base_destinations, + geometry=base_destinations.h3_d.map(geo.create_point_from_h3), + crs=4326, + ) + + comp_origins = ( + comp_demand.reindex(columns=["h3_o", "factor_expansion_linea"]) + .groupby("h3_o", as_index=False) + .agg(total_legs=("factor_expansion_linea", "sum")) + ) + comp_destinations = ( + comp_demand.reindex(columns=["h3_d", "factor_expansion_linea"]) + .groupby("h3_d", as_index=False) + .agg(total_legs=("factor_expansion_linea", "sum")) + ) + comp_origins = gpd.GeoDataFrame( + comp_origins, geometry=comp_origins.h3_o.map(geo.create_point_from_h3), crs=4326 + ) + comp_destinations = gpd.GeoDataFrame( + comp_destinations, + geometry=comp_destinations.h3_d.map(geo.create_point_from_h3), + crs=4326, + ) + + # compute demand by section id + base_demand_by_section = ovl.demand_by_section_id(base_demand) + comp_demand_by_section = ovl.demand_by_section_id(comp_demand) + + # plot + base_gdf = base_gdf.merge(base_demand_by_section, on="section_id", how="left") + base_gdf.total_legs = base_gdf.total_legs.fillna(0) + base_gdf.prop_demand = base_gdf.prop_demand.fillna(0) + + comp_gdf = comp_gdf.merge(comp_demand_by_section, on="section_id", how="left") + comp_gdf.total_legs = comp_gdf.total_legs.fillna(0) + comp_gdf.prop_demand = comp_gdf.prop_demand.fillna(0) + + min_dot_size = 1 + max_dot_size = 20 + + base_destinations["total_legs_normalized"] = ovl.normalize_total_legs_to_dot_size( + base_destinations["total_legs"], min_dot_size, max_dot_size + ) + comp_destinations["total_legs_normalized"] = ovl.normalize_total_legs_to_dot_size( + comp_destinations["total_legs"], min_dot_size, max_dot_size + ) + base_origins["total_legs_normalized"] = ovl.normalize_total_legs_to_dot_size( + base_origins["total_legs"], min_dot_size, max_dot_size + ) + comp_origins["total_legs_normalized"] = ovl.normalize_total_legs_to_dot_size( + comp_origins["total_legs"], min_dot_size, max_dot_size + ) + + fig = folium.Figure(width=1000, height=600) + m = folium.Map(location=(-34.606, -58.436), zoom_start=11, tiles="cartodbpositron") + + base_gdf.explore( + column="total_legs", + tiles="CartoDB positron", + m=m, + name=f"Demanda ruta base - {base_route_metadata}", + cmap="Blues", + scheme="equalinterval", + ) + base_destinations.explore( + color="midnightblue", + style_kwds={ + "style_function": lambda x: { + "radius": x["properties"]["total_legs_normalized"] + } + }, + name=f"Destinos ruta base - {base_route_metadata}", + m=m, + ) + base_origins.explore( + color="cornflowerblue", + style_kwds={ + "style_function": lambda x: { + "radius": x["properties"]["total_legs_normalized"] + } + }, + name=f"Origenes ruta base - {base_route_metadata}", + m=m, + ) + base_route_gdf.explore( + color="midnightblue", + tiles="CartoDB positron", + m=m, + name=f"Ruta base - {comp_route_metadata}", + ) + + comp_gdf.explore( + column="total_legs", + tiles="CartoDB positron", + m=m, + name=f"Demanda ruta comp - {comp_route_metadata}", + cmap="Greens", + scheme="equalinterval", + ) + comp_destinations.explore( + color="darkgreen", + style_kwds={ + "style_function": lambda x: { + "radius": x["properties"]["total_legs_normalized"] + } + }, + name=f"Destinos ruta comp - {comp_route_metadata}", + m=m, + ) + comp_origins.explore( + color="limegreen", + style_kwds={ + "style_function": lambda x: { + "radius": x["properties"]["total_legs_normalized"] + } + }, + name=f"Origenes ruta comp - {comp_route_metadata}", + m=m, + ) + comp_route_gdf.explore( + color="darkgreen", + tiles="CartoDB positron", + m=m, + name=f"Ruta comparacion - {comp_route_metadata}", + ) + + folium.LayerControl(name="Leyenda").add_to(m) + + fig.add_child(m) + return fig diff --git a/urbantrips/viz/viz.py b/urbantrips/viz/viz.py index ac08e8f..bba06f5 100755 --- a/urbantrips/viz/viz.py +++ b/urbantrips/viz/viz.py @@ -79,8 +79,8 @@ def visualize_route_section_load( n_sections=10, section_meters=None, stat="totals", - factor=1, - factor_min=50, + factor=500, + factor_min=1, save_gdf=False, ): """ @@ -264,9 +264,15 @@ def load_route_section_load_data_q( q_main_data = q_main_data + ";" return q_main_data +def standarize_size(series, min_size, max_size): + if series.min() == series.max(): + return pd.Series([min_size] * len(series)) + return min_size + (max_size - min_size) * (series - series.min()) / ( + series.max() - series.min() + ) def viz_etapas_x_tramo_recorrido( - df, stat="totals", factor=1, factor_min=50, return_gdfs=False, save_gdf=False + df, stat="totals", factor=500, factor_min=10, return_gdfs=False, save_gdf=False ): """ Plots and saves a section load viz for a given route @@ -337,12 +343,13 @@ def viz_etapas_x_tramo_recorrido( print("Produciendo grafico de ocupacion por tramos", line_id) # set a expansion factor for viz purposes - df["buff_factor"] = df[indicator_col] * factor - + #df["buff_factor"] = df[indicator_col] * factor # Set a minimum for each section to be displated in map - df["buff_factor"] = np.where( - df["buff_factor"] <= factor_min, factor_min, df["buff_factor"] - ) + #df["buff_factor"] = np.where(df["buff_factor"] <= factor_min, factor_min, df["buff_factor"]) + + df['buff_factor'] = standarize_size( + series=df[indicator_col], min_size=factor_min, max_size=factor + ) cols = [ "id_linea",