Source code for urbanpy.geom.geom

import pandas as pd
import geopandas as gpd
import osmnx as ox
from h3 import h3
from rich.progress import track
from urbanpy.utils import geo_boundary_to_polygon
from typing import Sequence, Union

__all__ = [
    "merge_geom_downloads",
    "filter_population",
    "remove_features",
    "gen_hexagons",
    "merge_shape_hex",
    "overlay_polygons_hexs",
    "resolution_downsampling",
    "osmnx_coefficient_computation",
]



[docs]
def merge_geom_downloads(
    gdfs: Sequence[gpd.GeoDataFrame], crs: str = "EPSG:4326"
) -> gpd.GeoDataFrame:
    """
    Merge several GeoDataFrames from OSM download_osm

    Parameters
    ----------
    dfs: array_like
        Array of GeoDataFrames to merge. Assumes equal CRS.
    crs: str
        Valid string to pass to crs param of the geopandas.GeoDataFrame constructor function.

    Returns
    -------

    concat: GeoDataFrame
        Output from concatenation and unary union of geometries, providing a single geometry database for the city

    Examples
    --------
    >>> lima = urbanpy.download.nominatim_osm("Lima, Peru", 2)
    >>> callao = urbanpy.download.nominatim_osm("Callao, Peru", 1)
    >>> lima = urbanpy.geom.merge_geom_downloads([lima, callao])
    >>> lima.head()
    geometry
    MULTIPOLYGON (((-76.80277 -12.47562, -76.80261...)))
    """
    concat = gpd.GeoDataFrame(geometry=[pd.concat(gdfs).unary_union], crs=crs)
    return concat




[docs]
def filter_population(
    pop_df: pd.DataFrame, polygon_gdf: gpd.GeoDataFrame
) -> gpd.GeoDataFrame:
    """
    Filter an HDX database download to the polygon bounds

    Parameters
    ----------
    pop_df: DataFrame
        Result from download_hdx

    polygon_gdf: GeoDataFrame
        Result from download_osm or merge_geom_downloads

    Returns
    -------
    filtered_points_gdf: GeoDataFrame
        Population DataFrame filtered to polygon bounds

    Examples
    --------
    >>> lima = urbanpy.download.nominatim_osm("Lima, Peru", 2)
    >>> callao = urbanpy.download.nominatim_osm("Callao, Peru", 1)
    >>> lima = urbanpy.geom.merge_geom_downloads([lima, callao])
    >>> pop = urbanpy.download.hdx_fb_population('peru', 'full')
    >>> urbanpy.geom.filter_population(pop, lima)
    latitude   | longitude  | population_2015 | population_2020 | geometry
    -12.519861 | -76.774583 | 2.633668        | 2.644757        | POINT (-76.77458 -12.51986)
    -12.519861 | -76.745972 | 2.633668        | 2.644757        | POINT (-76.74597 -12.51986)
    -12.519861 | -76.745694 | 2.633668        | 2.644757        | POINT (-76.74569 -12.51986)
    -12.519861 | -76.742639 | 2.633668        | 2.644757        | POINT (-76.74264 -12.51986)
    -12.519861 | -76.741250 | 2.633668        | 2.644757        | POINT (-76.74125 -12.51986)
    """
    minx, miny, maxx, maxy = polygon_gdf.geometry.total_bounds
    limits_filter = pop_df["longitude"].between(minx, maxx) & pop_df[
        "latitude"
    ].between(miny, maxy)
    filtered_points = pop_df[limits_filter]

    geometry_ = gpd.points_from_xy(
        filtered_points["longitude"], filtered_points["latitude"]
    )
    filtered_points_gdf = gpd.GeoDataFrame(
        filtered_points, geometry=geometry_, crs="EPSG:4326"
    )

    return filtered_points_gdf




[docs]
def remove_features(gdf: gpd.GeoDataFrame, bounds: Sequence[float]) -> gpd.GeoDataFrame:
    """
    Remove a set of features based on bounds

    Parameters
    ----------
    gdf: GeoDataFrame
        Input GeoDataFrame containing the point features filtered with filter_population

    bounds: array_like
        Array input following [minx, miny, maxx, maxy] for filtering (GeoPandas total_bounds method output)

    Returns
    -------
    gdf: GeoDataFrame
        Input DataFrame but without the desired features

    Examples
    --------
    >>> lima = urbanpy.geom.filter_population(pop_lima, poly_lima)
    >>> removed = urbanpy.geom.remove_features(lima, [-12.2,-12, -77.2,-77.17]) #Remove San Lorenzo Island
    >>> print(lima.shape, removed.shape)
    (348434, 4) (348427, 4)

    """
    minx, miny, maxx, maxy = bounds
    drop_ix = gdf.cx[minx:maxx, miny:maxy].index

    return gdf.drop(drop_ix)




[docs]
def gen_hexagons(resolution: int, city: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    """
    Converts an input multipolygon layer to H3 hexagons given a resolution.

    Parameters
    ----------
    resolution: int, 0:15
        Hexagon resolution, higher values create smaller hexagons.

    city: GeoDataFrame
        Input city polygons to transform into hexagons.

    Returns
    -------
    city_hexagons: GeoDataFrame
        Hexagon geometry GeoDataFrame (hex_id, geom).

    Examples
    --------
    >>> lima = urbanpy.geom.filter_population(pop_lima, poly_lima)
    >>> lima_hex = urbanpy.geom.gen_hexagons(8, lima)
    hex	            | geometry
    888e620e41fffff | POLYGON ((-76.80007 -12.46917, -76.80439 -12.4...))
    888e62c809fffff | POLYGON ((-77.22539 -12.08663, -77.22971 -12.0...))
    888e62c851fffff | POLYGON ((-77.20708 -12.08484, -77.21140 -12.0...))
    888e62c841fffff | POLYGON ((-77.22689 -12.07104, -77.23122 -12.0...))
    888e62c847fffff | POLYGON ((-77.23072 -12.07929, -77.23504 -12.0...))
    """
    # Polyfill the city boundaries
    h3_polygons = list()
    h3_indexes = list()

    # Get every polygon in Multipolygon shape
    city_poly = city.explode(index_parts=True).reset_index(drop=True)

    for _, geo in city_poly.iterrows():
        hexagons = h3.polyfill(
            geo["geometry"].__geo_interface__, res=resolution, geo_json_conformant=True
        )
        for hexagon in hexagons:
            h3_polygons.append(geo_boundary_to_polygon(hexagon))
            h3_indexes.append(hexagon)

    # Create hexagon dataframe
    city_hexagons = gpd.GeoDataFrame(h3_indexes, geometry=h3_polygons).drop_duplicates()
    city_hexagons.crs = "EPSG:4326"
    city_hexagons = city_hexagons.rename(
        columns={0: "hex"}
    )  # Format column name for readability

    return city_hexagons




[docs]
def merge_shape_hex(
    hexs: gpd.GeoDataFrame,
    shape: gpd.GeoDataFrame,
    agg: dict,
    how="inner",
    predicate="intersects",
) -> gpd.GeoDataFrame:
    """
    Merges a H3 hexagon GeoDataFrame with a Point GeoDataFrame and aggregates the
    point gdf data.

    Parameters
    ----------
    hexs: GeoDataFrame
        Input GeoDataFrame containing hexagon geometries

    shape: GeoDataFrame
        Input GeoDataFrame containing points and features to be aggregated

    agg: dict
        A dictionary with column names as keys and values as aggregation
        operations. The aggregation must be one of {'sum', 'min', 'max'}.

    how: str. One of {'inner', 'left', 'right'}. Default 'inner'.
        Determines how to merge data:
            'left' uses keys from left and only retains geometry from left
            'right' uses keys from right and only retains geometry from right
            'inner': use intersection of keys from both dfs; retain only left geometry column

    op: str. One of {'intersects', 'contains', 'within'}. Default 'intersects'
        Determines how geometries are queried for merging.

    Returns
    -------
    hexs: GeoDataFrame
        Result of a spatial join within hex and points. All features are aggregated
        based on the input parameters

    Examples
    --------
    >>> lima = urbanpy.download.nominatim_osm('Lima, Peru', 2)
    >>> pop_lima = urbanpy.download.hdx_fb_population('peru', 'full')
    >>> pop_df = urbanpy.filter_population(pop_lima, lima)
    >>> hexs = urbanpy.geom.gen_hexagons(8, lima)
    >>> urbanpy.geom.merge_point_hex(hexs, pop_df, 'inner', 'within', {'population_2020':'sum'})
    0               | geometry                                          | population_2020
    888e628d8bfffff | POLYGON ((-76.66002 -12.20371, -76.66433 -12.2... | NaN
    888e62c5ddfffff | POLYGON ((-76.94564 -12.16138, -76.94996 -12.1... | 14528.039097
    888e62132bfffff | POLYGON ((-76.84736 -12.17523, -76.85167 -12.1... | 608.312696
    888e628debfffff | POLYGON ((-76.67982 -12.18998, -76.68413 -12.1... | NaN
    888e6299b3fffff | POLYGON ((-76.78876 -11.97286, -76.79307 -11.9... | 3225.658803
    """
    joined = gpd.sjoin(shape, hexs, how=how, predicate=predicate)

    # Uses index right based on the order of points and hex. Right takes hex index
    hex_merge = joined.groupby("index_right").agg(agg)

    # Avoid SpecificationError by copying the DataFrame
    ret_hex = hexs.copy()

    for key in agg:
        ret_hex.loc[hex_merge.index, key] = hex_merge[key].values

    return ret_hex




[docs]
def overlay_polygons_hexs(
    polygons: gpd.GeoDataFrame,
    hexs: gpd.GeoDataFrame,
    hex_col: str,
    columns: Sequence[str],
) -> gpd.GeoDataFrame:
    """
    Overlays a Polygon GeoDataFrame with a H3 hexagon GeoDataFrame and divide the 'columns' the values proportionally to the overlayed area.

    Parameters
    ----------
    polygons: GeoDataFrame
                Input GeoDataFrame containing polygons and columns to be processed

    hexs: GeoDataFrame
             Input GeoDataFrame containing desired output hexagon resolution geometries

    hex_col: str
            Determines the column with the hex id.

    columns: list
             A list with column names of the columns that are going to be proportionally adjusted

    Returns
    -------

    hexs: GeoDataFrame
           Result of a spatial join within hex and points. All columns are adjusted
           based on the overlayed area.

    Examples
    --------

    >>> urbanpy.geom.overlay_polygons_hexs(zonas_pob, hex_lima, 'hex', pob_vulnerable)
                hex |  POB_TOTAL |  geometry
    898e6200493ffff | 193.705376 |  POLYGON ((-76.80695 -12.35199, -76.80812 -12.3...
    898e6200497ffff | 175.749780 |  POLYGON ((-76.80412 -12.35395, -76.80528 -12.3...
    898e620049bffff |  32.231078 |  POLYGON ((-76.81011 -12.35342, -76.81127 -12.3...
    898e62004a7ffff |  74.154973 |  POLYGON ((-76.79911 -12.36468, -76.80027 -12.3...
    898e62004b7ffff |  46.989828 |  POLYGON ((-76.79879 -12.36128, -76.79995 -12.3...

    """
    polygons_ = polygons.copy()  # Preserve data state
    polygons_["poly_area"] = polygons_.geometry.area  # Calc polygon area

    # Overlay intersection
    overlayed = gpd.overlay(polygons_, hexs, how="intersection")

    # Downsample indicators using proporional overlayed area w.r.t polygon area
    area_prop = overlayed.geometry.area / overlayed["poly_area"]
    overlayed[columns] = overlayed[columns].apply(lambda col: col * area_prop)

    # Aggregate over Hex ID
    per_hexagon_data = overlayed.groupby(hex_col)[columns].sum()

    # Preserve data as GeoDataFrame
    hex_df = pd.merge(
        left=per_hexagon_data, right=hexs[[hex_col, "geometry"]], on=hex_col
    )
    hex_gdf = gpd.GeoDataFrame(
        hex_df[[hex_col] + columns], geometry=hex_df["geometry"], crs=hexs.crs
    )

    return hex_gdf




[docs]
def resolution_downsampling(
    gdf: gpd.GeoDataFrame, hex_col: str, coarse_resolution: int, agg: dict
) -> gpd.GeoDataFrame:
    """
    Downsample hexagon resolution aggregating indicated metrics (e.g. Transform hexagon resolution from 9 to 6).

    Parameters
    ----------
    gdf: GeoDataFrame
        GeoDataFrame with hexagon geometries (output from gen_hexagons).

    hex_col: str
        Determines the column with the hex id.

    coarse_resolution: int, 0:15
        Hexagon resolution lower than gdf actual resolution (higher values create smaller hexagons).

    Returns
    -------
    gdfc: GeoDataFrame
        GeoDataFrame with lower resolution hexagons geometry and metrics aggregated as indicated.
    """
    gdf_coarse = gdf.copy()
    coarse_hex_col = "hex_{}".format(coarse_resolution)
    gdf_coarse[coarse_hex_col] = gdf_coarse[hex_col].apply(
        lambda x: h3.h3_to_parent(x, coarse_resolution)
    )
    dfc = gdf_coarse.groupby([coarse_hex_col]).agg(agg).reset_index()
    gdfc_geometry = dfc[coarse_hex_col].apply(geo_boundary_to_polygon)

    return gpd.GeoDataFrame(dfc, geometry=gdfc_geometry, crs=gdf.crs)




[docs]
def osmnx_coefficient_computation(
    gdf,
    net_type,
    basic_stats,
    extended_stats,
    connectivity=False,
    anc=False,
    ecc=False,
    bc=False,
    cc=False,
):
    """
    Apply osmnx's graph from polygon to query a city's street network within a geometry.
    This may be a long procedure given the hexagon layer resolution.

    Parameters
    ----------

    gdf: GeoDataFrame
        GeoDataFrame with geometries to download graphs contained within them.

    net_type: str
        Network type to download. One of {'drive', 'drive_service', 'walk', 'bike', 'all', 'all_private'}

    basic_stats: list
        List of basic stats to compute from downloaded graph

    extended_stats: list
        List of extended stats to compute from graph

    connectivity: bool. Default False.
        Compute node and edge connectivity

    anc: bool. Default False.
        Compute avg node connectivity

    ecc: bool. Default False.
        Compute shortest paths, eccentricity and topological metric

    bc: bool. Default False.
        Compute node betweeness centrality

    cc: bool. Default False.
        Compute node closeness centrality

    For more detail about these parameters, see https://osmnx.readthedocs.io/en/stable/osmnx.html#module-osmnx.stats

    Returns
    -------
    gdf: GeoDataFrame
        Input GeoDataFrame with updated columns containing the selected metrics

    Examples
    --------
    >>> hexagons = urbanpy.geom.gen_hexagons(8, lima)
    >>> urbanpy.geom.osmnx_coefficient_computation(hexagons.head(), 'walk', ['circuity_avg'], [])
    On record 1:  There are no nodes within the requested geometry
    On record 3:  There are no nodes within the requested geometry
                hex	| geometry	                                        | circuity_avg
        888e62c64bfffff	| POLYGON ((-76.89763 -12.03869, -76.90194 -12.0... | 1.021441
        888e6212e1fffff	| POLYGON ((-76.75291 -12.19727, -76.75722 -12.2... | NaN
        888e62d333fffff	| POLYGON ((-77.09253 -11.83762, -77.09685 -11.8... | 1.025313
        888e666c2dfffff	| POLYGON ((-76.93109 -11.79031, -76.93540 -11.7... | NaN
        888e62d4b3fffff	| POLYGON ((-76.87935 -12.03688, -76.88366 -12.0... | 1.044654
    """
    # May be a lengthy download depending on the amount of features
    for index, row in track(
        gdf.iterrows(),
        total=gdf.shape[0],
        description="Computing road network coefficients...",
    ):
        try:
            graph = ox.graph_from_polygon(row["geometry"], net_type)
            b_stats = ox.basic_stats(graph)
            ext_stats = ox.extended_stats(graph, connectivity, anc, ecc, bc, cc)

            for stat in basic_stats:
                gdf.loc[index, stat] = b_stats.get(stat)
            for stat in extended_stats:
                gdf.loc[index, stat] = ext_stats.get(stat)
        except Exception as err:
            print(f"On record {index}: ", err)

    return gdf