Skip to content

Commit

Permalink
Add Marktstammdatenregister (MaStR) (#165)
Browse files Browse the repository at this point in the history
* fix: resolve auto downcasting warning

* feat: add MaStR data

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* improve data cleaning and performance

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix typo

* reset threshold to 1 MW for now

---------

Co-authored-by: Fabian Neumann <fabian.neumann@outlook.de>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 10, 2025
1 parent b42a068 commit fa8b827
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 14 deletions.
4 changes: 3 additions & 1 deletion powerplantmatching/cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,9 @@ def aggregate_units(
df = cliques(df, duplicates)
df = df.groupby("grouped").agg(props_for_groups)

df[str_cols] = df[str_cols].replace("", pd.NA)
# Downcasting in replace is deprecated
with pd.option_context("future.no_silent_downcasting", True):
df[str_cols] = df[str_cols].replace("", pd.NA).infer_objects(copy=False)

df = (
df.assign(
Expand Down
121 changes: 121 additions & 0 deletions powerplantmatching/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2144,6 +2144,127 @@ def GEM(raw=False, update=False, config=None):
return pd.concat(data, ignore_index=True)


def MASTR(
raw=False,
update=False,
config=None,
):
"""
Get the Marktstammdatenregister (MaStR) dataset.
Provided by the German Federal Network Agency (Bundesnetzagentur / BNetza) and
contains data on Germany, Austria and Switzerland.
Parameters
----------
raw : Boolean, default False
Whether to return the original dataset
update: bool, default False
Whether to update the data from the url.
config : dict, default None
Add custom specific configuration,
e.g. powerplantmatching.config.get_config(target_countries='Italy'),
defaults to powerplantmatching.config.get_config()
"""
config = get_config() if config is None else config

RENAME_COLUMNS = {
"EinheitMastrNummer": "projectID",
"NameKraftwerk": "Name",
"Land": "Country",
"Nettonennleistung": "Capacity",
"Inbetriebnahmedatum": "DateIn",
"DatumEndgueltigeStilllegung": "DateOut",
"EinheitBetriebsstatus": "Status",
"Laengengrad": "lon",
"Breitengrad": "lat",
}
COUNTRY_MAP = {
"Deutschland": "Germany",
"Österreich": "Austria",
"Schweiz": "Switzerland",
}
PARSE_COLUMNS = [
"ArtDerWasserkraftanlage",
"Biomasseart",
"Filesuffix",
"Energietraeger",
"Hauptbrennstoff",
"NameStromerzeugungseinheit",
]

fn = get_raw_file("MASTR", update=update, config=config)
file_suffixes = {
"Bioenergy": "biomass.csv",
"Combustion": "combustion.csv",
"Nuclear": "nuclear.csv",
"Hydro": "hydro.csv",
"Wind": "wind.csv",
"Solar": "solar.csv",
}
data_frames = []
with ZipFile(fn, "r") as file:
for fueltype, suffix in file_suffixes.items():
for name in file.namelist():
if name.endswith(suffix):
available_columns = pd.read_csv(file.open(name), nrows=0).columns
target_columns = [
"GeplantesInbetriebnahmedatum",
"ThermischeNutzleistung",
"KwkMastrNummer",
]
target_columns = (
target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys())
)
usecols = available_columns.intersection(target_columns)
df = pd.read_csv(file.open(name), usecols=usecols).assign(
Filesuffix=fueltype
)
data_frames.append(df)
break
df = pd.concat(data_frames).reset_index(drop=True)

if raw:
return df

status_list = config["MASTR"].get("status", ["In Betrieb"]) # noqa: F841
capacity_threshold_kw = 1000

df = (
df.rename(columns=RENAME_COLUMNS)
.query("Status in @status_list")
.loc[lambda df: df.Capacity > capacity_threshold_kw]
.assign(
projectID=lambda df: "MASTR-" + df.projectID,
Country=lambda df: df.Country.map(COUNTRY_MAP),
Capacity=lambda df: df.Capacity / 1e3, # kW to MW
DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year,
DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year,
)
.assign(
DateIn=lambda df: df["DateIn"].combine_first(
pd.to_datetime(df["GeplantesInbetriebnahmedatum"]).dt.year
),
)
.pipe(
gather_specifications,
config=config,
parse_columns=PARSE_COLUMNS,
)
.assign(
Set=lambda df: df["Set"].where(
df["KwkMastrNummer"].isna() & df["ThermischeNutzleistung"].isna(), "CHP"
),
)
.pipe(clean_name)
.pipe(set_column_name, "MASTR")
.pipe(config_filter, config)
)

return df


# deprecated alias for GGPT
@deprecated(
deprecated_in="0.5.5",
Expand Down
39 changes: 26 additions & 13 deletions powerplantmatching/package_data/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ matching_sources:
- BEYONDCOAL: Fueltype != 'Solar'
- WIKIPEDIA: Fueltype != 'Solar'
- GEM
- MASTR

# fully_included_sources, these sources are included even without match to the final dataset
fully_included_sources:
Expand All @@ -34,7 +35,8 @@ fully_included_sources:
- JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway']
- OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria']
- BEYONDCOAL
- GEM
- GEM: Country != 'Germany' or Fueltype == 'Solar'
- MASTR


parallel_duke_processes: false
Expand Down Expand Up @@ -202,6 +204,14 @@ GHPT:
status: ["operating", "retired", "construction"]
fn: Global-Hydropower-Tracker-April-2024.xlsx
url: https://tubcloud.tu-berlin.de/s/sEztyBLdJS5sNHY/download/Global-Hydropower-Tracker-April-2024.xlsx

MASTR:
net_capacity: true
reliability_score: 8
status: ["In Betrieb", "In Planung", "Endgültig stillgelegt"]
fn: bnetza_open_mastr_2023-08-08_B.zip
url: https://zenodo.org/records/8225106/files/bnetza_open_mastr_2023-08-08_B.zip

# ---------------------------------------------------------------------------- #
# Data Structure Config #
# ---------------------------------------------------------------------------- #
Expand Down Expand Up @@ -269,8 +279,8 @@ target_fueltypes:
# given by the list. An empty string results in a regex expression containing only the key.
# Parsed of representatives at the top may be overwritten by representatives further below.
Other: ".*"
Solid Biomass: [biological, bioenergy, agricultural, wood, biomass]
Biogas: [biogas]
Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse]
Biogas: [biogas, biomethan, gasförmige biomasse]
Nuclear: [nuclear]
Natural Gas:
[
Expand All @@ -282,6 +292,8 @@ target_fueltypes:
combined cycle,
fossil gas,
mixed fossil fuels,
erdgas,
andere gase,
]
Hydro:
[
Expand All @@ -293,13 +305,14 @@ target_fueltypes:
hydro,
hydroelectric,
wasserkraft,
wasser,
]
Hard Coal: [coal, coke]
Lignite: [brown coal, lignite, peat]
Oil: [oil, diesel]
Hard Coal: [coal, coke, steinkohle]
Lignite: [brown coal, lignite, peat, braunkohle]
Oil: [oil, diesel, mineralölprodukte]
Geothermal: ""
Solar: ""
Waste: ""
Waste: ["abfall.*", "waste"]
Wind: ""
Battery: [Electro-chemical, battery]
target_sets:
Expand Down Expand Up @@ -328,12 +341,12 @@ target_technologies:
# A list will be converted to a regex expression matching all words (case-insensitive)
# given by the list. An empty string results in a regex expression containing only the key.
# Parsed of representatives at the top may be overwritten by representatives further below.
CCGT: [ccgt, gas, natural gas]
OCGT: [ocgt]
Steam Turbine: [steam, turbine]
Combustion Engine: [combustion engine]
Run-Of-River: [run-off, run off, run of river, run-of-river, ror]
Pumped Storage: [pumped hydro, pumped]
CCGT: [ccgt, gas, natural gas, gasturbinen mit abhitzekessel]
OCGT: [ocgt, gasturbinen ohne abhitzekessel]
Steam Turbine: [steam, turbine, kondensationsmaschine, gegendruckmaschine, dampfmotor]
Combustion Engine: [combustion engine, verbrennungsmotor, stirlingmotor]
Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage]
Pumped Storage: [pumped hydro, pumped, speicherwasseranlage]
Reservoir: ""
Marine: ""
Onshore: ""
Expand Down

0 comments on commit fa8b827

Please sign in to comment.