Source code for urbanpy.geom.geom

import pandas as pd
import geopandas as gpd
import osmnx as ox
from h3 import h3
from rich.progress import track
from urbanpy.utils import geo_boundary_to_polygon
from typing import Sequence, Union

__all__ = [
    "merge_geom_downloads",
    "filter_population",
    "remove_features",
    "gen_hexagons",
    "merge_shape_hex",
    "overlay_polygons_hexs",
    "resolution_downsampling",
    "osmnx_coefficient_computation",
]


[docs] def merge_geom_downloads( gdfs: Sequence[gpd.GeoDataFrame], crs: str = "EPSG:4326" ) -> gpd.GeoDataFrame: """ Merge several GeoDataFrames from OSM download_osm Parameters ---------- dfs: array_like Array of GeoDataFrames to merge. Assumes equal CRS. crs: str Valid string to pass to crs param of the geopandas.GeoDataFrame constructor function. Returns ------- concat: GeoDataFrame Output from concatenation and unary union of geometries, providing a single geometry database for the city Examples -------- >>> lima = urbanpy.download.nominatim_osm("Lima, Peru", 2) >>> callao = urbanpy.download.nominatim_osm("Callao, Peru", 1) >>> lima = urbanpy.geom.merge_geom_downloads([lima, callao]) >>> lima.head() geometry MULTIPOLYGON (((-76.80277 -12.47562, -76.80261...))) """ concat = gpd.GeoDataFrame(geometry=[pd.concat(gdfs).unary_union], crs=crs) return concat
[docs] def filter_population( pop_df: pd.DataFrame, polygon_gdf: gpd.GeoDataFrame ) -> gpd.GeoDataFrame: """ Filter an HDX database download to the polygon bounds Parameters ---------- pop_df: DataFrame Result from download_hdx polygon_gdf: GeoDataFrame Result from download_osm or merge_geom_downloads Returns ------- filtered_points_gdf: GeoDataFrame Population DataFrame filtered to polygon bounds Examples -------- >>> lima = urbanpy.download.nominatim_osm("Lima, Peru", 2) >>> callao = urbanpy.download.nominatim_osm("Callao, Peru", 1) >>> lima = urbanpy.geom.merge_geom_downloads([lima, callao]) >>> pop = urbanpy.download.hdx_fb_population('peru', 'full') >>> urbanpy.geom.filter_population(pop, lima) latitude | longitude | population_2015 | population_2020 | geometry -12.519861 | -76.774583 | 2.633668 | 2.644757 | POINT (-76.77458 -12.51986) -12.519861 | -76.745972 | 2.633668 | 2.644757 | POINT (-76.74597 -12.51986) -12.519861 | -76.745694 | 2.633668 | 2.644757 | POINT (-76.74569 -12.51986) -12.519861 | -76.742639 | 2.633668 | 2.644757 | POINT (-76.74264 -12.51986) -12.519861 | -76.741250 | 2.633668 | 2.644757 | POINT (-76.74125 -12.51986) """ minx, miny, maxx, maxy = polygon_gdf.geometry.total_bounds limits_filter = pop_df["longitude"].between(minx, maxx) & pop_df[ "latitude" ].between(miny, maxy) filtered_points = pop_df[limits_filter] geometry_ = gpd.points_from_xy( filtered_points["longitude"], filtered_points["latitude"] ) filtered_points_gdf = gpd.GeoDataFrame( filtered_points, geometry=geometry_, crs="EPSG:4326" ) return filtered_points_gdf
[docs] def remove_features(gdf: gpd.GeoDataFrame, bounds: Sequence[float]) -> gpd.GeoDataFrame: """ Remove a set of features based on bounds Parameters ---------- gdf: GeoDataFrame Input GeoDataFrame containing the point features filtered with filter_population bounds: array_like Array input following [minx, miny, maxx, maxy] for filtering (GeoPandas total_bounds method output) Returns ------- gdf: GeoDataFrame Input DataFrame but without the desired features Examples -------- >>> lima = urbanpy.geom.filter_population(pop_lima, poly_lima) >>> removed = urbanpy.geom.remove_features(lima, [-12.2,-12, -77.2,-77.17]) #Remove San Lorenzo Island >>> print(lima.shape, removed.shape) (348434, 4) (348427, 4) """ minx, miny, maxx, maxy = bounds drop_ix = gdf.cx[minx:maxx, miny:maxy].index return gdf.drop(drop_ix)
[docs] def gen_hexagons(resolution: int, city: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Converts an input multipolygon layer to H3 hexagons given a resolution. Parameters ---------- resolution: int, 0:15 Hexagon resolution, higher values create smaller hexagons. city: GeoDataFrame Input city polygons to transform into hexagons. Returns ------- city_hexagons: GeoDataFrame Hexagon geometry GeoDataFrame (hex_id, geom). Examples -------- >>> lima = urbanpy.geom.filter_population(pop_lima, poly_lima) >>> lima_hex = urbanpy.geom.gen_hexagons(8, lima) hex | geometry 888e620e41fffff | POLYGON ((-76.80007 -12.46917, -76.80439 -12.4...)) 888e62c809fffff | POLYGON ((-77.22539 -12.08663, -77.22971 -12.0...)) 888e62c851fffff | POLYGON ((-77.20708 -12.08484, -77.21140 -12.0...)) 888e62c841fffff | POLYGON ((-77.22689 -12.07104, -77.23122 -12.0...)) 888e62c847fffff | POLYGON ((-77.23072 -12.07929, -77.23504 -12.0...)) """ # Polyfill the city boundaries h3_polygons = list() h3_indexes = list() # Get every polygon in Multipolygon shape city_poly = city.explode(index_parts=True).reset_index(drop=True) for _, geo in city_poly.iterrows(): hexagons = h3.polyfill( geo["geometry"].__geo_interface__, res=resolution, geo_json_conformant=True ) for hexagon in hexagons: h3_polygons.append(geo_boundary_to_polygon(hexagon)) h3_indexes.append(hexagon) # Create hexagon dataframe city_hexagons = gpd.GeoDataFrame(h3_indexes, geometry=h3_polygons).drop_duplicates() city_hexagons.crs = "EPSG:4326" city_hexagons = city_hexagons.rename( columns={0: "hex"} ) # Format column name for readability return city_hexagons
[docs] def merge_shape_hex( hexs: gpd.GeoDataFrame, shape: gpd.GeoDataFrame, agg: dict, how="inner", predicate="intersects", ) -> gpd.GeoDataFrame: """ Merges a H3 hexagon GeoDataFrame with a Point GeoDataFrame and aggregates the point gdf data. Parameters ---------- hexs: GeoDataFrame Input GeoDataFrame containing hexagon geometries shape: GeoDataFrame Input GeoDataFrame containing points and features to be aggregated agg: dict A dictionary with column names as keys and values as aggregation operations. The aggregation must be one of {'sum', 'min', 'max'}. how: str. One of {'inner', 'left', 'right'}. Default 'inner'. Determines how to merge data: 'left' uses keys from left and only retains geometry from left 'right' uses keys from right and only retains geometry from right 'inner': use intersection of keys from both dfs; retain only left geometry column op: str. One of {'intersects', 'contains', 'within'}. Default 'intersects' Determines how geometries are queried for merging. Returns ------- hexs: GeoDataFrame Result of a spatial join within hex and points. All features are aggregated based on the input parameters Examples -------- >>> lima = urbanpy.download.nominatim_osm('Lima, Peru', 2) >>> pop_lima = urbanpy.download.hdx_fb_population('peru', 'full') >>> pop_df = urbanpy.filter_population(pop_lima, lima) >>> hexs = urbanpy.geom.gen_hexagons(8, lima) >>> urbanpy.geom.merge_point_hex(hexs, pop_df, 'inner', 'within', {'population_2020':'sum'}) 0 | geometry | population_2020 888e628d8bfffff | POLYGON ((-76.66002 -12.20371, -76.66433 -12.2... | NaN 888e62c5ddfffff | POLYGON ((-76.94564 -12.16138, -76.94996 -12.1... | 14528.039097 888e62132bfffff | POLYGON ((-76.84736 -12.17523, -76.85167 -12.1... | 608.312696 888e628debfffff | POLYGON ((-76.67982 -12.18998, -76.68413 -12.1... | NaN 888e6299b3fffff | POLYGON ((-76.78876 -11.97286, -76.79307 -11.9... | 3225.658803 """ joined = gpd.sjoin(shape, hexs, how=how, predicate=predicate) # Uses index right based on the order of points and hex. Right takes hex index hex_merge = joined.groupby("index_right").agg(agg) # Avoid SpecificationError by copying the DataFrame ret_hex = hexs.copy() for key in agg: ret_hex.loc[hex_merge.index, key] = hex_merge[key].values return ret_hex
[docs] def overlay_polygons_hexs( polygons: gpd.GeoDataFrame, hexs: gpd.GeoDataFrame, hex_col: str, columns: Sequence[str], ) -> gpd.GeoDataFrame: """ Overlays a Polygon GeoDataFrame with a H3 hexagon GeoDataFrame and divide the 'columns' the values proportionally to the overlayed area. Parameters ---------- polygons: GeoDataFrame Input GeoDataFrame containing polygons and columns to be processed hexs: GeoDataFrame Input GeoDataFrame containing desired output hexagon resolution geometries hex_col: str Determines the column with the hex id. columns: list A list with column names of the columns that are going to be proportionally adjusted Returns ------- hexs: GeoDataFrame Result of a spatial join within hex and points. All columns are adjusted based on the overlayed area. Examples -------- >>> urbanpy.geom.overlay_polygons_hexs(zonas_pob, hex_lima, 'hex', pob_vulnerable) hex | POB_TOTAL | geometry 898e6200493ffff | 193.705376 | POLYGON ((-76.80695 -12.35199, -76.80812 -12.3... 898e6200497ffff | 175.749780 | POLYGON ((-76.80412 -12.35395, -76.80528 -12.3... 898e620049bffff | 32.231078 | POLYGON ((-76.81011 -12.35342, -76.81127 -12.3... 898e62004a7ffff | 74.154973 | POLYGON ((-76.79911 -12.36468, -76.80027 -12.3... 898e62004b7ffff | 46.989828 | POLYGON ((-76.79879 -12.36128, -76.79995 -12.3... """ polygons_ = polygons.copy() # Preserve data state polygons_["poly_area"] = polygons_.geometry.area # Calc polygon area # Overlay intersection overlayed = gpd.overlay(polygons_, hexs, how="intersection") # Downsample indicators using proporional overlayed area w.r.t polygon area area_prop = overlayed.geometry.area / overlayed["poly_area"] overlayed[columns] = overlayed[columns].apply(lambda col: col * area_prop) # Aggregate over Hex ID per_hexagon_data = overlayed.groupby(hex_col)[columns].sum() # Preserve data as GeoDataFrame hex_df = pd.merge( left=per_hexagon_data, right=hexs[[hex_col, "geometry"]], on=hex_col ) hex_gdf = gpd.GeoDataFrame( hex_df[[hex_col] + columns], geometry=hex_df["geometry"], crs=hexs.crs ) return hex_gdf
[docs] def resolution_downsampling( gdf: gpd.GeoDataFrame, hex_col: str, coarse_resolution: int, agg: dict ) -> gpd.GeoDataFrame: """ Downsample hexagon resolution aggregating indicated metrics (e.g. Transform hexagon resolution from 9 to 6). Parameters ---------- gdf: GeoDataFrame GeoDataFrame with hexagon geometries (output from gen_hexagons). hex_col: str Determines the column with the hex id. coarse_resolution: int, 0:15 Hexagon resolution lower than gdf actual resolution (higher values create smaller hexagons). Returns ------- gdfc: GeoDataFrame GeoDataFrame with lower resolution hexagons geometry and metrics aggregated as indicated. """ gdf_coarse = gdf.copy() coarse_hex_col = "hex_{}".format(coarse_resolution) gdf_coarse[coarse_hex_col] = gdf_coarse[hex_col].apply( lambda x: h3.h3_to_parent(x, coarse_resolution) ) dfc = gdf_coarse.groupby([coarse_hex_col]).agg(agg).reset_index() gdfc_geometry = dfc[coarse_hex_col].apply(geo_boundary_to_polygon) return gpd.GeoDataFrame(dfc, geometry=gdfc_geometry, crs=gdf.crs)
[docs] def osmnx_coefficient_computation( gdf, net_type, basic_stats, extended_stats, connectivity=False, anc=False, ecc=False, bc=False, cc=False, ): """ Apply osmnx's graph from polygon to query a city's street network within a geometry. This may be a long procedure given the hexagon layer resolution. Parameters ---------- gdf: GeoDataFrame GeoDataFrame with geometries to download graphs contained within them. net_type: str Network type to download. One of {'drive', 'drive_service', 'walk', 'bike', 'all', 'all_private'} basic_stats: list List of basic stats to compute from downloaded graph extended_stats: list List of extended stats to compute from graph connectivity: bool. Default False. Compute node and edge connectivity anc: bool. Default False. Compute avg node connectivity ecc: bool. Default False. Compute shortest paths, eccentricity and topological metric bc: bool. Default False. Compute node betweeness centrality cc: bool. Default False. Compute node closeness centrality For more detail about these parameters, see https://osmnx.readthedocs.io/en/stable/osmnx.html#module-osmnx.stats Returns ------- gdf: GeoDataFrame Input GeoDataFrame with updated columns containing the selected metrics Examples -------- >>> hexagons = urbanpy.geom.gen_hexagons(8, lima) >>> urbanpy.geom.osmnx_coefficient_computation(hexagons.head(), 'walk', ['circuity_avg'], []) On record 1: There are no nodes within the requested geometry On record 3: There are no nodes within the requested geometry hex | geometry | circuity_avg 888e62c64bfffff | POLYGON ((-76.89763 -12.03869, -76.90194 -12.0... | 1.021441 888e6212e1fffff | POLYGON ((-76.75291 -12.19727, -76.75722 -12.2... | NaN 888e62d333fffff | POLYGON ((-77.09253 -11.83762, -77.09685 -11.8... | 1.025313 888e666c2dfffff | POLYGON ((-76.93109 -11.79031, -76.93540 -11.7... | NaN 888e62d4b3fffff | POLYGON ((-76.87935 -12.03688, -76.88366 -12.0... | 1.044654 """ # May be a lengthy download depending on the amount of features for index, row in track( gdf.iterrows(), total=gdf.shape[0], description="Computing road network coefficients...", ): try: graph = ox.graph_from_polygon(row["geometry"], net_type) b_stats = ox.basic_stats(graph) ext_stats = ox.extended_stats(graph, connectivity, anc, ecc, bc, cc) for stat in basic_stats: gdf.loc[index, stat] = b_stats.get(stat) for stat in extended_stats: gdf.loc[index, stat] = ext_stats.get(stat) except Exception as err: print(f"On record {index}: ", err) return gdf