From a1764ff4dec677ea0dc8e6a25711f8b1593e2a3a Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 8 Jul 2024 08:29:16 +0200 Subject: [PATCH] Compatibility with geopandas version 1 (#1136) * address geopandas and pandas deprecations/warnings * compatibility with geopandas v1 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- envs/environment.yaml | 2 +- scripts/add_electricity.py | 6 +++--- scripts/base_network.py | 13 ++++++++----- scripts/build_gas_input_locations.py | 13 ++++++++----- scripts/build_gas_network.py | 6 ++++-- scripts/build_hourly_heat_demand.py | 6 +++--- scripts/build_industrial_distribution_key.py | 2 +- ...d_industrial_energy_demand_per_country_today.py | 4 ++-- scripts/build_industrial_production_per_country.py | 3 ++- scripts/build_population_layouts.py | 4 +++- scripts/build_renewable_profiles.py | 2 +- scripts/build_shapes.py | 14 ++++++++------ scripts/build_shipping_demand.py | 4 +--- scripts/cluster_gas_network.py | 6 +++--- scripts/determine_availability_matrix_MD_UA.py | 2 +- scripts/prepare_sector_network.py | 2 +- scripts/solve_network.py | 4 ++-- 17 files changed, 52 insertions(+), 41 deletions(-) diff --git a/envs/environment.yaml b/envs/environment.yaml index e57c87612..febd6ea2e 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -28,7 +28,7 @@ dependencies: - powerplantmatching>=0.5.15 - numpy - pandas>=2.1 -- geopandas>=0.11.0, <1 +- geopandas>=1 - xarray>=2023.11.0 - rioxarray - netcdf4 diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index 49d0bdf75..f90d6c851 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -287,9 +287,9 @@ def shapes_to_shapes(orig, dest): transfer = sparse.lil_matrix((len(dest), len(orig)), dtype=float) for i, j in product(range(len(dest)), range(len(orig))): - if orig_prepped[j].intersects(dest[i]): - area = orig[j].intersection(dest[i]).area - transfer[i, j] = area / dest[i].area + if orig_prepped[j].intersects(dest.iloc[i]): + area = orig.iloc[j].intersection(dest.iloc[i]).area + transfer[i, j] = area / dest.iloc[i].area return transfer diff --git a/scripts/base_network.py b/scripts/base_network.py index df3bc2b2c..f87d666b1 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -671,7 +671,7 @@ def _set_links_underwater_fraction(n, offshore_shapes): if not hasattr(n.links, "geometry"): n.links["underwater_fraction"] = 0.0 else: - offshore_shape = gpd.read_file(offshore_shapes).unary_union + offshore_shape = gpd.read_file(offshore_shapes).union_all() links = gpd.GeoSeries(n.links.geometry.dropna().map(shapely.wkt.loads)) n.links["underwater_fraction"] = ( links.intersection(offshore_shape).length / links.length @@ -874,7 +874,8 @@ def build_bus_shapes(n, country_shapes, offshore_shapes, countries): onshore_locs.values, onshore_shape ), "country": country, - } + }, + crs=n.crs, ) ) @@ -889,12 +890,14 @@ def build_bus_shapes(n, country_shapes, offshore_shapes, countries): "y": offshore_locs["y"], "geometry": voronoi_partition_pts(offshore_locs.values, offshore_shape), "country": country, - } + }, + crs=n.crs, ) - offshore_regions_c = offshore_regions_c.loc[offshore_regions_c.area > 1e-2] + sel = offshore_regions_c.to_crs(3035).area > 10 # m2 + offshore_regions_c = offshore_regions_c.loc[sel] offshore_regions.append(offshore_regions_c) - shapes = pd.concat(onshore_regions, ignore_index=True) + shapes = pd.concat(onshore_regions, ignore_index=True).set_crs(n.crs) return onshore_regions, offshore_regions, shapes, offshore_shapes diff --git a/scripts/build_gas_input_locations.py b/scripts/build_gas_input_locations.py index 67dbc9860..ca43db3c1 100644 --- a/scripts/build_gas_input_locations.py +++ b/scripts/build_gas_input_locations.py @@ -7,6 +7,7 @@ production sites with data from SciGRID_gas and Global Energy Monitor. """ +import json import logging import geopandas as gpd @@ -19,7 +20,8 @@ def read_scigrid_gas(fn): df = gpd.read_file(fn) - df = pd.concat([df, df.param.apply(pd.Series)], axis=1) + expanded_param = df.param.apply(json.loads).apply(pd.Series) + df = pd.concat([df, expanded_param], axis=1) df.drop(["param", "uncertainty", "method"], axis=1, inplace=True) return df @@ -97,11 +99,11 @@ def build_gas_input_locations(gem_fn, entry_fn, sto_fn, countries): ~(entry.from_country.isin(countries) & entry.to_country.isin(countries)) & ~entry.name.str.contains("Tegelen") # only take non-EU entries | (entry.from_country == "NO") # malformed datapoint # entries from NO to GB - ] + ].copy() sto = read_scigrid_gas(sto_fn) remove_country = ["RU", "UA", "TR", "BY"] # noqa: F841 - sto = sto.query("country_code not in @remove_country") + sto = sto.query("country_code not in @remove_country").copy() # production sites inside the model scope prod = build_gem_prod_data(gem_fn) @@ -132,7 +134,8 @@ def build_gas_input_locations(gem_fn, entry_fn, sto_fn, countries): snakemake = mock_snakemake( "build_gas_input_locations", simpl="", - clusters="128", + clusters="5", + configfiles="config/test/config.overnight.yaml", ) configure_logging(snakemake) @@ -162,7 +165,7 @@ def build_gas_input_locations(gem_fn, entry_fn, sto_fn, countries): gas_input_nodes = gpd.sjoin(gas_input_locations, regions, how="left") - gas_input_nodes.rename(columns={"index_right": "bus"}, inplace=True) + gas_input_nodes.rename(columns={"name": "bus"}, inplace=True) gas_input_nodes.to_file(snakemake.output.gas_input_nodes, driver="GeoJSON") diff --git a/scripts/build_gas_network.py b/scripts/build_gas_network.py index 5e9a5c9af..e16096d36 100644 --- a/scripts/build_gas_network.py +++ b/scripts/build_gas_network.py @@ -7,6 +7,7 @@ (https://www.gas.scigrid.de/). """ +import json import logging import geopandas as gpd @@ -54,8 +55,9 @@ def diameter_to_capacity(pipe_diameter_mm): def load_dataset(fn): df = gpd.read_file(fn) - param = df.param.apply(pd.Series) - method = df.method.apply(pd.Series)[["diameter_mm", "max_cap_M_m3_per_d"]] + param = df.param.apply(json.loads).apply(pd.Series) + cols = ["diameter_mm", "max_cap_M_m3_per_d"] + method = df.method.apply(json.loads).apply(pd.Series)[cols] method.columns = method.columns + "_method" df = pd.concat([df, param, method], axis=1) to_drop = ["param", "uncertainty", "method", "tags"] diff --git a/scripts/build_hourly_heat_demand.py b/scripts/build_hourly_heat_demand.py index 0dcf3524a..c28860f3c 100644 --- a/scripts/build_hourly_heat_demand.py +++ b/scripts/build_hourly_heat_demand.py @@ -41,10 +41,10 @@ from _helpers import mock_snakemake snakemake = mock_snakemake( - "build_hourly_heat_demands", + "build_hourly_heat_demand", scope="total", simpl="", - clusters=48, + clusters=5, ) set_scenario_config(snakemake) @@ -85,6 +85,6 @@ heat_demand.index.name = "snapshots" - ds = heat_demand.stack().to_xarray() + ds = heat_demand.stack(future_stack=True).to_xarray() ds.to_netcdf(snakemake.output.heat_demand) diff --git a/scripts/build_industrial_distribution_key.py b/scripts/build_industrial_distribution_key.py index bfbba35e1..e66547289 100644 --- a/scripts/build_industrial_distribution_key.py +++ b/scripts/build_industrial_distribution_key.py @@ -116,7 +116,7 @@ def prepare_hotmaps_database(regions): gdf = gpd.sjoin(gdf, regions, how="inner", predicate="within") - gdf.rename(columns={"index_right": "bus"}, inplace=True) + gdf.rename(columns={"name": "bus"}, inplace=True) gdf["country"] = gdf.bus.str[:2] # the .sjoin can lead to duplicates if a geom is in two overlapping regions diff --git a/scripts/build_industrial_energy_demand_per_country_today.py b/scripts/build_industrial_energy_demand_per_country_today.py index 9c8f2e986..b77ba8d65 100644 --- a/scripts/build_industrial_energy_demand_per_country_today.py +++ b/scripts/build_industrial_energy_demand_per_country_today.py @@ -184,7 +184,7 @@ def separate_basic_chemicals(demand, production): demand.drop(columns="Basic chemicals", inplace=True) - demand["HVC"].clip(lower=0, inplace=True) + demand["HVC"] = demand["HVC"].clip(lower=0) return demand @@ -248,7 +248,7 @@ def industrial_energy_demand(countries, year): demand = add_non_eu28_industrial_energy_demand(countries, demand, production) # for format compatibility - demand = demand.stack(dropna=False).unstack(level=[0, 2]) + demand = demand.stack(future_stack=True).unstack(level=[0, 2]) # style and annotation demand.index.name = "TWh/a" diff --git a/scripts/build_industrial_production_per_country.py b/scripts/build_industrial_production_per_country.py index 458062059..ec86a78d5 100644 --- a/scripts/build_industrial_production_per_country.py +++ b/scripts/build_industrial_production_per_country.py @@ -301,7 +301,8 @@ def separate_basic_chemicals(demand, year): demand["Basic chemicals"] -= demand["Ammonia"] # EE, HR and LT got negative demand through subtraction - poor data - demand["Basic chemicals"].clip(lower=0.0, inplace=True) + col = "Basic chemicals" + demand[col] = demand[col].clip(lower=0.0) # assume HVC, methanol, chlorine production proportional to non-ammonia basic chemicals distribution_key = ( diff --git a/scripts/build_population_layouts.py b/scripts/build_population_layouts.py index dc4cf2f8c..ca664ed0a 100644 --- a/scripts/build_population_layouts.py +++ b/scripts/build_population_layouts.py @@ -92,7 +92,9 @@ # The first low density grid cells to reach rural fraction are rural asc_density_i = density_cells_ct.sort_values().index - asc_density_cumsum = pop_cells_ct[asc_density_i].cumsum() / pop_cells_ct.sum() + asc_density_cumsum = ( + pop_cells_ct.iloc[asc_density_i].cumsum() / pop_cells_ct.sum() + ) rural_fraction_ct = 1 - urban_fraction[ct] pop_ct_rural_b = asc_density_cumsum < rural_fraction_ct pop_ct_urban_b = ~pop_ct_rural_b diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py index 0aef89bc0..57568f246 100644 --- a/scripts/build_renewable_profiles.py +++ b/scripts/build_renewable_profiles.py @@ -406,7 +406,7 @@ if snakemake.wildcards.technology.startswith("offwind"): logger.info("Calculate underwater fraction of connections.") - offshore_shape = gpd.read_file(snakemake.input["offshore_shapes"]).unary_union + offshore_shape = gpd.read_file(snakemake.input["offshore_shapes"]).union_all() underwater_fraction = [] for bus in buses: p = centre_of_mass.sel(bus=bus).data diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index 85afdaea4..2a3cc297a 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -124,7 +124,7 @@ def countries(naturalearth, country_list): df = df.loc[ df.name.isin(country_list) & ((df["scalerank"] == 0) | (df["scalerank"] == 5)) ] - s = df.set_index("name")["geometry"].map(_simplify_polys) + s = df.set_index("name")["geometry"].map(_simplify_polys).set_crs(df.crs) if "RS" in country_list: s["RS"] = s["RS"].union(s.pop("KV")) # cleanup shape union @@ -145,7 +145,8 @@ def eez(country_shapes, eez, country_list): lambda s: _simplify_polys(s, filterremote=False) ) s = gpd.GeoSeries( - {k: v for k, v in s.items() if v.distance(country_shapes[k]) < 1e-3} + {k: v for k, v in s.items() if v.distance(country_shapes[k]) < 1e-3}, + crs=df.crs, ) s = s.to_frame("geometry") s.index.name = "name" @@ -156,7 +157,7 @@ def country_cover(country_shapes, eez_shapes=None): shapes = country_shapes if eez_shapes is not None: shapes = pd.concat([shapes, eez_shapes]) - europe_shape = shapes.unary_union + europe_shape = shapes.union_all() if isinstance(europe_shape, MultiPolygon): europe_shape = max(europe_shape.geoms, key=attrgetter("area")) return Polygon(shell=europe_shape.exterior) @@ -235,11 +236,11 @@ def nuts3(country_shapes, nuts3, nuts3pop, nuts3gdp, ch_cantons, ch_popgdp): [["BA1", "BA", 3871.0], ["RS1", "RS", 7210.0], ["AL1", "AL", 2893.0]], columns=["NUTS_ID", "country", "pop"], geometry=gpd.GeoSeries(), + crs=df.crs, ) - manual["geometry"] = manual["country"].map(country_shapes) + manual["geometry"] = manual["country"].map(country_shapes.to_crs(df.crs)) manual = manual.dropna() manual = manual.set_index("NUTS_ID") - manual = manual.set_crs("ETRS89") df = pd.concat([df, manual], sort=False) @@ -265,7 +266,8 @@ def nuts3(country_shapes, nuts3, nuts3pop, nuts3gdp, ch_cantons, ch_popgdp): offshore_shapes.reset_index().to_file(snakemake.output.offshore_shapes) europe_shape = gpd.GeoDataFrame( - geometry=[country_cover(country_shapes, offshore_shapes.geometry)] + geometry=[country_cover(country_shapes, offshore_shapes.geometry)], + crs=country_shapes.crs, ) europe_shape.reset_index().to_file(snakemake.output.europe_shape) diff --git a/scripts/build_shipping_demand.py b/scripts/build_shipping_demand.py index b50cd3168..d8c960ae0 100644 --- a/scripts/build_shipping_demand.py +++ b/scripts/build_shipping_demand.py @@ -45,9 +45,7 @@ # assign ports to nearest region p = european_ports.to_crs(3857) r = regions.to_crs(3857) - outflows = ( - p.sjoin_nearest(r).groupby("index_right").properties_outflows.sum().div(1e3) - ) + outflows = p.sjoin_nearest(r).groupby("name").properties_outflows.sum().div(1e3) # calculate fraction of each country's port outflows countries = outflows.index.str[:2] diff --git a/scripts/cluster_gas_network.py b/scripts/cluster_gas_network.py index 19585aa94..6d4e6c48a 100755 --- a/scripts/cluster_gas_network.py +++ b/scripts/cluster_gas_network.py @@ -41,9 +41,9 @@ def build_clustered_gas_network(df, bus_regions, length_factor=1.25): for i in [0, 1]: gdf = gpd.GeoDataFrame(geometry=df[f"point{i}"], crs="EPSG:4326") - bus_mapping = gpd.sjoin( - gdf, bus_regions, how="left", predicate="within" - ).index_right + bus_mapping = gpd.sjoin(gdf, bus_regions, how="left", predicate="within")[ + "name" + ] bus_mapping = bus_mapping.groupby(bus_mapping.index).first() df[f"bus{i}"] = bus_mapping diff --git a/scripts/determine_availability_matrix_MD_UA.py b/scripts/determine_availability_matrix_MD_UA.py index 80c04083c..678ef025d 100644 --- a/scripts/determine_availability_matrix_MD_UA.py +++ b/scripts/determine_availability_matrix_MD_UA.py @@ -147,7 +147,7 @@ def get_wdpa_layer_name(wdpa_fn, layer_substring): regions_geometry = regions.to_crs(3035).geometry band, transform = shape_availability(regions_geometry, excluder) fig, ax = plt.subplots(figsize=(4, 8)) - gpd.GeoSeries(regions_geometry.unary_union).plot(ax=ax, color="none") + gpd.GeoSeries(regions_geometry.union_all()).plot(ax=ax, color="none") show(band, transform=transform, cmap="Greens", ax=ax) plt.axis("off") plt.savefig(snakemake.output.availability_map, bbox_inches="tight", dpi=500) diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py index 6dcbc1917..820b19735 100644 --- a/scripts/prepare_sector_network.py +++ b/scripts/prepare_sector_network.py @@ -3836,7 +3836,7 @@ def lossy_bidirectional_links(n, carrier, efficiencies={}): rev_links.index = rev_links.index.map(lambda x: x + "-reversed") n.links = pd.concat([n.links, rev_links], sort=False) - n.links["reversed"] = n.links["reversed"].fillna(False) + n.links["reversed"] = n.links["reversed"].fillna(False).infer_objects(copy=False) n.links["length_original"] = n.links["length_original"].fillna(n.links.length) # do compression losses after concatenation to take electricity consumption at bus0 in either direction diff --git a/scripts/solve_network.py b/scripts/solve_network.py index ba2fac7fe..42ffe6be9 100644 --- a/scripts/solve_network.py +++ b/scripts/solve_network.py @@ -155,7 +155,7 @@ def _add_land_use_constraint(n): existing_large, "p_nom_min" ] - n.generators.p_nom_max.clip(lower=0, inplace=True) + n.generators["p_nom_max"] = n.generators["p_nom_max"].clip(lower=0) def _add_land_use_constraint_m(n, planning_horizons, config): @@ -207,7 +207,7 @@ def _add_land_use_constraint_m(n, planning_horizons, config): existing_large, "p_nom_min" ] - n.generators.p_nom_max.clip(lower=0, inplace=True) + n.generators["p_nom_max"] = n.generators["p_nom_max"].clip(lower=0) def add_solar_potential_constraints(n, config):