From 9d23a9539c190dda10429ba71320dee68cc0ab94 Mon Sep 17 00:00:00 2001 From: floriscalkoen Date: Tue, 20 Feb 2024 20:12:38 +0100 Subject: [PATCH] as Python CLI --- .../overture/extract_admin_bounds.py | 103 ++++++++++-------- 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/open_buildings/overture/extract_admin_bounds.py b/open_buildings/overture/extract_admin_bounds.py index 7b0c40e..41d156b 100644 --- a/open_buildings/overture/extract_admin_bounds.py +++ b/open_buildings/overture/extract_admin_bounds.py @@ -1,53 +1,62 @@ +import argparse import pathlib import duckdb import geopandas as gpd from shapely import wkb -con = duckdb.connect(database=":memory:", read_only=False) -con.execute("LOAD spatial;") - -admin_level = 2 - -OVERTURE_DIR = pathlib.Path("~/data/src/overture/2024-02-15-alpha.0").expanduser() -OUT_DIR = pathlib.Path("~/data/prc/overture/2024-02-15").expanduser() -if not OUT_DIR.exists(): - OUT_DIR.mkdir(parents=True, exist_ok=True) - -# Adjusted query to perform a join and retrieve polygons -query = f""" -WITH admins_view AS ( - SELECT * FROM read_parquet('{str(OVERTURE_DIR)}/theme=admins/type=*/*') -) -SELECT - admins.id, - admins.isoCountryCodeAlpha2, - admins.names, - admins.isoSubCountryCode, - areas.areaGeometry as geometry -FROM admins_view AS admins -INNER JOIN ( - SELECT - id as areaId, - localityId, - geometry AS areaGeometry - FROM admins_view -) AS areas ON areas.localityId = admins.id -WHERE admins.adminLevel = {admin_level}; -""" - -# Execute the query and fetch the result -admins = con.execute(query).fetchdf() - -# Convert the 'geometry' column from WKB to Shapely geometries and create a GeoDataFrame -admins = gpd.GeoDataFrame( - admins, - geometry=admins["geometry"].apply(lambda b: wkb.loads(bytes(b))), - crs="EPSG:4326", -) - -admins[f"admin_level_{admin_level}_name"] = admins.names.map(lambda r: r["primary"]) -admins = admins.drop(columns=["names"]) - -outpath = OUT_DIR/ f"admin_bounds_level_{admin_level}.parquet" -admins.to_parquet(outpath) \ No newline at end of file + +def main(admin_level, overture_dir, out_dir): + con = duckdb.connect(database=":memory:", read_only=False) + con.execute("LOAD spatial;") + + OVERTURE_DIR = pathlib.Path(overture_dir).expanduser() + OUT_DIR = pathlib.Path(out_dir).expanduser() + if not OUT_DIR.exists(): + OUT_DIR.mkdir(parents=True, exist_ok=True) + + query = f""" + WITH admins_view AS ( + SELECT * FROM read_parquet('{str(OVERTURE_DIR)}/theme=admins/type=*/*') + ) + SELECT + admins.id, + admins.isoCountryCodeAlpha2, + admins.names, + admins.isoSubCountryCode, + areas.areaGeometry as geometry + FROM admins_view AS admins + INNER JOIN ( + SELECT + id as areaId, + localityId, + geometry AS areaGeometry + FROM admins_view + ) AS areas ON areas.localityId = admins.id + WHERE admins.adminLevel = {admin_level}; + """ + + admins = con.execute(query).fetchdf() + + admins = gpd.GeoDataFrame( + admins, + geometry=admins["geometry"].apply(lambda b: wkb.loads(bytes(b))), + crs="EPSG:4326", + ) + + admins[f"admin_level_{admin_level}_name"] = admins.names.map(lambda r: r["primary"]) + admins = admins.drop(columns=["names"]) + + outpath = OUT_DIR / f"admin_bounds_level_{admin_level}.parquet" + print(f"Writing admin boundaries level {admin_level} to: {outpath}") + admins.to_parquet(outpath) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Process Overture admin level data.') + parser.add_argument('admin_level', type=int, help='The admin level to process') + parser.add_argument('-s', '--source', default='~/data/src/overture/2024-02-15-alpha.0', help='The source Overture directory') + parser.add_argument('-o', '--output', default='~/data/prc/overture/2024-02-15', help='The output directory') + + args = parser.parse_args() + + main(args.admin_level, args.source, args.output)