From 36fe57938c1363da47799dfbb127c9b19199c0cd Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 27 Sep 2023 12:47:23 +0300 Subject: [PATCH 01/15] Replace pandas.append() --- scripts/build_retro_cost.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index c830415ed..c09e8744f 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -223,7 +223,7 @@ def prepare_building_stock_data(): usecols=[0, 1, 2, 3], encoding="ISO-8859-1", ) - area_tot = area_tot.append(area_missing.unstack(level=-1).dropna().stack()) + area_tot = pd.concat([area_tot, area_missing.unstack(level=-1).dropna().stack()]) area_tot = area_tot.loc[~area_tot.index.duplicated(keep="last")] # for still missing countries calculate floor area by population size @@ -246,7 +246,7 @@ def prepare_building_stock_data(): averaged_data.index = index averaged_data["estimated"] = 1 if ct not in area_tot.index.levels[0]: - area_tot = area_tot.append(averaged_data, sort=True) + area_tot = pd.concat([area_tot, averaged_data], sort=True) else: area_tot.loc[averaged_data.index] = averaged_data @@ -272,7 +272,7 @@ def prepare_building_stock_data(): ][x["bage"]].iloc[0], axis=1, ) - data_PL_final = data_PL_final.append(data_PL) + data_PL_final = pd.concat([data_PL_final, data_PL]) u_values = pd.concat([u_values, data_PL_final]).reset_index(drop=True) @@ -966,7 +966,7 @@ def sample_dE_costs_area( .mean(level=1) .set_index(pd.MultiIndex.from_product([[ct], cost_dE.index.levels[1]])) ) - cost_dE = cost_dE.append(averaged_data) + cost_dE = pd.concat(cost_dE, averaged_data) # weights costs after construction index if construction_index: @@ -995,12 +995,12 @@ def sample_dE_costs_area( ) ) ) - cost_dE = cost_dE.append(tot).unstack().stack() + cost_dE = pd.concat(cost_dE, tot).unstack().stack() summed_area = pd.DataFrame(area_tot.groupby("country").sum()).set_index( pd.MultiIndex.from_product([area_tot.index.unique(level="country"), ["tot"]]) ) - area_tot = area_tot.append(summed_area).unstack().stack() + area_tot = pd.concat(area_tot, summed_area).unstack().stack() cost_per_saving = cost_dE["cost"] / ( 1 - cost_dE["dE"] From 89d140cd7a10fd207e1d42ed67bdd5c225bd8677 Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 27 Sep 2023 12:51:38 +0300 Subject: [PATCH 02/15] Fix a column name in buildings data --- scripts/build_retro_cost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index c09e8744f..be06d3ad3 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -201,7 +201,7 @@ def prepare_building_stock_data(): # heated floor area ---------------------------------------------------------- area = building_data[ (building_data.type == "Heated area [Mm²]") - & (building_data.subsector != "Total") + & (building_data.detail != "Total") ] area_tot = area.groupby(["country", "sector"]).sum() area = pd.concat( From ce8b89d1e28c4a74d0f5dba927e1dc0645b961de Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 27 Sep 2023 14:41:00 +0300 Subject: [PATCH 03/15] Fix structure of the grouped dataframe --- scripts/build_retro_cost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index be06d3ad3..bccea3e45 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -203,7 +203,7 @@ def prepare_building_stock_data(): (building_data.type == "Heated area [Mm²]") & (building_data.detail != "Total") ] - area_tot = area.groupby(["country", "sector"]).sum() + area_tot = area[["country", "sector", "value"]].groupby(["country", "sector"]).sum() area = pd.concat( [ area, From 429424851805817c2f2ba51011d506a092485f7a Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 11 Oct 2023 12:26:10 +0300 Subject: [PATCH 04/15] Re-map country codes to account for "GB" --- scripts/build_retro_cost.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index bccea3e45..c9912b2fd 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -198,6 +198,8 @@ def prepare_building_stock_data(): } ) + building_data["country_code"] = building_data["country"].map(country_iso_dic) + # heated floor area ---------------------------------------------------------- area = building_data[ (building_data.type == "Heated area [Mm²]") From f825544c3bf9b30829b97c4ce71c7d107191ce02 Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 11 Oct 2023 12:26:30 +0300 Subject: [PATCH 05/15] Remove duplication in column names --- scripts/build_retro_cost.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index c9912b2fd..5449c4f1e 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -949,7 +949,8 @@ def sample_dE_costs_area( .rename(index=rename_sectors, level=2) .reset_index() ) - .rename(columns={"country": "country_code"}) + # if uncommented, leads to the second `country_code` column + # .rename(columns={"country": "country_code"}) .set_index(["country_code", "subsector", "bage"]) ) From 5ec00c5d9d01d32af5e3f4fd83fb4f2068b6d386 Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 11 Oct 2023 12:27:10 +0300 Subject: [PATCH 06/15] Fix type definition for a call of difference function --- scripts/build_retro_cost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index 5449c4f1e..bacb87153 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -963,7 +963,7 @@ def sample_dE_costs_area( ) # map missing countries - for ct in countries.difference(cost_dE.index.levels[0]): + for ct in set(countries).difference(cost_dE.index.levels[0]): averaged_data = ( cost_dE.reindex(index=map_for_missings[ct], level=0) .mean(level=1) From b51d908bfa5ca1b671154dc3043b4a6a90083653 Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 11 Oct 2023 12:27:31 +0300 Subject: [PATCH 07/15] Fix computation of the mean --- scripts/build_retro_cost.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index bacb87153..cb3caeff2 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -966,7 +966,8 @@ def sample_dE_costs_area( for ct in set(countries).difference(cost_dE.index.levels[0]): averaged_data = ( cost_dE.reindex(index=map_for_missings[ct], level=0) - .mean(level=1) + .groupby(level=1) + .mean() .set_index(pd.MultiIndex.from_product([[ct], cost_dE.index.levels[1]])) ) cost_dE = pd.concat(cost_dE, averaged_data) From 5ee9df000b99ed94145bf366c60b2ce2c467b194 Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 11 Oct 2023 12:28:08 +0300 Subject: [PATCH 08/15] Make an argument of pd.concat to a list --- scripts/build_retro_cost.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index cb3caeff2..b65c82ba3 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -970,7 +970,7 @@ def sample_dE_costs_area( .mean() .set_index(pd.MultiIndex.from_product([[ct], cost_dE.index.levels[1]])) ) - cost_dE = pd.concat(cost_dE, averaged_data) + cost_dE = pd.concat([cost_dE, averaged_data]) # weights costs after construction index if construction_index: @@ -999,12 +999,12 @@ def sample_dE_costs_area( ) ) ) - cost_dE = pd.concat(cost_dE, tot).unstack().stack() + cost_dE = pd.concat([cost_dE, tot]).unstack().stack() summed_area = pd.DataFrame(area_tot.groupby("country").sum()).set_index( pd.MultiIndex.from_product([area_tot.index.unique(level="country"), ["tot"]]) ) - area_tot = pd.concat(area_tot, summed_area).unstack().stack() + area_tot = pd.concat([area_tot, summed_area]).unstack().stack() cost_per_saving = cost_dE["cost"] / ( 1 - cost_dE["dE"] From 7ad60cb7f985dee07c515d0b795eebbc9660ecc1 Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 11 Oct 2023 12:28:34 +0300 Subject: [PATCH 09/15] Fix calculation of averages --- scripts/build_retro_cost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index b65c82ba3..04ae6ef4c 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -987,7 +987,7 @@ def sample_dE_costs_area( # drop not considered countries cost_dE = cost_dE.reindex(countries, level=0) # get share of residential and service floor area - sec_w = area_tot.value / area_tot.value.groupby(level=0).sum() + sec_w = area_tot.div(area_tot.groupby(level=0).transform('sum')) # get the total cost-energy-savings weight by sector area tot = ( cost_dE.mul(sec_w, axis=0) From fcef19456699cb696452c5dd83dbd0a996fc994e Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 11 Oct 2023 12:29:38 +0300 Subject: [PATCH 10/15] Reference to a multiindex level by number instead of name --- scripts/build_retro_cost.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index 04ae6ef4c..41c2442bb 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -990,19 +990,22 @@ def sample_dE_costs_area( sec_w = area_tot.div(area_tot.groupby(level=0).transform('sum')) # get the total cost-energy-savings weight by sector area tot = ( - cost_dE.mul(sec_w, axis=0) - .groupby(level="country_code") + # sec_w has columns "estimated" and "value" + cost_dE.mul(sec_w.value, axis=0) + # for some reasons names of the levels were lost somewhere + #.groupby(level="country_code") + .groupby(level=0) .sum() .set_index( pd.MultiIndex.from_product( - [cost_dE.index.unique(level="country_code"), ["tot"]] + [cost_dE.index.unique(level=0), ["tot"]] ) ) ) cost_dE = pd.concat([cost_dE, tot]).unstack().stack() - summed_area = pd.DataFrame(area_tot.groupby("country").sum()).set_index( - pd.MultiIndex.from_product([area_tot.index.unique(level="country"), ["tot"]]) + summed_area = pd.DataFrame(area_tot.groupby(level=0).sum()).set_index( + pd.MultiIndex.from_product([area_tot.index.unique(level=0), ["tot"]]) ) area_tot = pd.concat([area_tot, summed_area]).unstack().stack() From 460fa00b2814d999c8b055805a704e6f5acff480 Mon Sep 17 00:00:00 2001 From: ekatef Date: Wed, 11 Oct 2023 21:43:51 +0300 Subject: [PATCH 11/15] Fix file name --- scripts/prepare_sector_network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py index 11406bffc..fc119b218 100644 --- a/scripts/prepare_sector_network.py +++ b/scripts/prepare_sector_network.py @@ -1960,7 +1960,7 @@ def add_heat(n, costs): # demand 'dE' [per unit of original heat demand] for each country and # different retrofitting strengths [additional insulation thickness in m] retro_data = pd.read_csv( - snakemake.input.retro_cost_energy, + snakemake.input.retro_cost, index_col=[0, 1], skipinitialspace=True, header=[0, 1], From dccfabfdac06175add0cd1b14c87169fd13e5b52 Mon Sep 17 00:00:00 2001 From: ekatef Date: Thu, 12 Oct 2023 00:15:47 +0300 Subject: [PATCH 12/15] Fix environment --- envs/environment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/envs/environment.yaml b/envs/environment.yaml index c3af36bbd..23e617225 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -11,7 +11,7 @@ dependencies: - pip - atlite>=0.2.9 -- dask +- dask<=2023.9.1 # Dependencies of the workflow itself - xlrd @@ -27,7 +27,7 @@ dependencies: - numpy - pandas>=1.4 - geopandas>=0.11.0 -- xarray +- xarray<=2023.8.0 - rioxarray - netcdf4 - networkx From 21148e3cc5048734e21cd941384beaa2c87b3ae0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Oct 2023 10:36:48 +0000 Subject: [PATCH 13/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/build_retro_cost.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py index 91bdd1be3..2176a3b6d 100644 --- a/scripts/build_retro_cost.py +++ b/scripts/build_retro_cost.py @@ -198,12 +198,11 @@ def prepare_building_stock_data(): } ) - building_data["country_code"] = building_data["country"].map(country_iso_dic) + building_data["country_code"] = building_data["country"].map(country_iso_dic) # heated floor area ---------------------------------------------------------- area = building_data[ - (building_data.type == "Heated area [Mm²]") - & (building_data.detail != "Total") + (building_data.type == "Heated area [Mm²]") & (building_data.detail != "Total") ] area_tot = area[["country", "sector", "value"]].groupby(["country", "sector"]).sum() area = pd.concat( @@ -983,20 +982,16 @@ def sample_dE_costs_area( # drop not considered countries cost_dE = cost_dE.reindex(countries, level=0) # get share of residential and service floor area - sec_w = area_tot.div(area_tot.groupby(level=0).transform('sum')) + sec_w = area_tot.div(area_tot.groupby(level=0).transform("sum")) # get the total cost-energy-savings weight by sector area tot = ( # sec_w has columns "estimated" and "value" cost_dE.mul(sec_w.value, axis=0) # for some reasons names of the levels were lost somewhere - #.groupby(level="country_code") + # .groupby(level="country_code") .groupby(level=0) .sum() - .set_index( - pd.MultiIndex.from_product( - [cost_dE.index.unique(level=0), ["tot"]] - ) - ) + .set_index(pd.MultiIndex.from_product([cost_dE.index.unique(level=0), ["tot"]])) ) cost_dE = pd.concat([cost_dE, tot]).unstack().stack() From f88ce0ea075eb0bb4d2864096cdf5e918d07805a Mon Sep 17 00:00:00 2001 From: ekatef Date: Fri, 13 Oct 2023 14:51:25 +0300 Subject: [PATCH 14/15] Adjust data structure --- rules/build_sector.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/build_sector.smk b/rules/build_sector.smk index 21a588bed..5a9e8646d 100644 --- a/rules/build_sector.smk +++ b/rules/build_sector.smk @@ -609,7 +609,7 @@ if config["sector"]["retrofitting"]["retro_endogen"]: countries=config["countries"], input: building_stock="data/retro/data_building_stock.csv", - data_tabula="data/retro/tabula-calculator-calcsetbuilding.csv", + data_tabula="data/bundle-sector/retro/tabula-calculator-calcsetbuilding.csv", air_temperature=RESOURCES + "temp_air_total_elec_s{simpl}_{clusters}.nc", u_values_PL="data/retro/u_values_poland.csv", tax_w="data/retro/electricity_taxes_eu.csv", From 6f0a73f169f270eef1c7d8c6664c1b6571aa034b Mon Sep 17 00:00:00 2001 From: ekatef Date: Fri, 13 Oct 2023 14:52:20 +0300 Subject: [PATCH 15/15] Release dask constraint --- envs/environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envs/environment.yaml b/envs/environment.yaml index 7dfcf1656..696eb01f6 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -11,7 +11,7 @@ dependencies: - pip - atlite>=0.2.9 -- dask<=2023.9.1 +- dask # Dependencies of the workflow itself - xlrd