Add Marktstammdatenregister (MaStR) (#165)

* fix: resolve auto downcasting warning * feat: add MaStR data * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * improve data cleaning and performance * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix typo * reset threshold to 1 MW for now --------- Co-authored-by: Fabian Neumann <fabian.neumann@outlook.de> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
PyPSA · Jan 10, 2025 · fa8b827 · fa8b827
1 parent b42a068
commit fa8b827
Show file tree

Hide file tree

Showing 3 changed files with 150 additions and 14 deletions.
diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
@@ -445,7 +445,9 @@ def aggregate_units(
     df = cliques(df, duplicates)
     df = df.groupby("grouped").agg(props_for_groups)
 
-    df[str_cols] = df[str_cols].replace("", pd.NA)
+    # Downcasting in replace is deprecated
+    with pd.option_context("future.no_silent_downcasting", True):
+        df[str_cols] = df[str_cols].replace("", pd.NA).infer_objects(copy=False)
 
     df = (
         df.assign(

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
@@ -2144,6 +2144,127 @@ def GEM(raw=False, update=False, config=None):
     return pd.concat(data, ignore_index=True)
 
 
+def MASTR(
+    raw=False,
+    update=False,
+    config=None,
+):
+    """
+    Get the Marktstammdatenregister (MaStR) dataset.
+
+    Provided by the German Federal Network Agency (Bundesnetzagentur / BNetza) and
+    contains data on Germany, Austria and Switzerland.
+
+    Parameters
+    ----------
+    raw : Boolean, default False
+        Whether to return the original dataset
+    update: bool, default False
+        Whether to update the data from the url.
+    config : dict, default None
+        Add custom specific configuration,
+        e.g. powerplantmatching.config.get_config(target_countries='Italy'),
+        defaults to powerplantmatching.config.get_config()
+
+    """
+    config = get_config() if config is None else config
+
+    RENAME_COLUMNS = {
+        "EinheitMastrNummer": "projectID",
+        "NameKraftwerk": "Name",
+        "Land": "Country",
+        "Nettonennleistung": "Capacity",
+        "Inbetriebnahmedatum": "DateIn",
+        "DatumEndgueltigeStilllegung": "DateOut",
+        "EinheitBetriebsstatus": "Status",
+        "Laengengrad": "lon",
+        "Breitengrad": "lat",
+    }
+    COUNTRY_MAP = {
+        "Deutschland": "Germany",
+        "Österreich": "Austria",
+        "Schweiz": "Switzerland",
+    }
+    PARSE_COLUMNS = [
+        "ArtDerWasserkraftanlage",
+        "Biomasseart",
+        "Filesuffix",
+        "Energietraeger",
+        "Hauptbrennstoff",
+        "NameStromerzeugungseinheit",
+    ]
+
+    fn = get_raw_file("MASTR", update=update, config=config)
+    file_suffixes = {
+        "Bioenergy": "biomass.csv",
+        "Combustion": "combustion.csv",
+        "Nuclear": "nuclear.csv",
+        "Hydro": "hydro.csv",
+        "Wind": "wind.csv",
+        "Solar": "solar.csv",
+    }
+    data_frames = []
+    with ZipFile(fn, "r") as file:
+        for fueltype, suffix in file_suffixes.items():
+            for name in file.namelist():
+                if name.endswith(suffix):
+                    available_columns = pd.read_csv(file.open(name), nrows=0).columns
+                    target_columns = [
+                        "GeplantesInbetriebnahmedatum",
+                        "ThermischeNutzleistung",
+                        "KwkMastrNummer",
+                    ]
+                    target_columns = (
+                        target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys())
+                    )
+                    usecols = available_columns.intersection(target_columns)
+                    df = pd.read_csv(file.open(name), usecols=usecols).assign(
+                        Filesuffix=fueltype
+                    )
+                    data_frames.append(df)
+                    break
+    df = pd.concat(data_frames).reset_index(drop=True)
+
+    if raw:
+        return df
+
+    status_list = config["MASTR"].get("status", ["In Betrieb"])  # noqa: F841
+    capacity_threshold_kw = 1000
+
+    df = (
+        df.rename(columns=RENAME_COLUMNS)
+        .query("Status in @status_list")
+        .loc[lambda df: df.Capacity > capacity_threshold_kw]
+        .assign(
+            projectID=lambda df: "MASTR-" + df.projectID,
+            Country=lambda df: df.Country.map(COUNTRY_MAP),
+            Capacity=lambda df: df.Capacity / 1e3,  # kW to MW
+            DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year,
+            DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year,
+        )
+        .assign(
+            DateIn=lambda df: df["DateIn"].combine_first(
+                pd.to_datetime(df["GeplantesInbetriebnahmedatum"]).dt.year
+            ),
+        )
+        .pipe(
+            gather_specifications,
+            config=config,
+            parse_columns=PARSE_COLUMNS,
+        )
+        .assign(
+            Set=lambda df: df["Set"].where(
+                df["KwkMastrNummer"].isna() & df["ThermischeNutzleistung"].isna(), "CHP"
+            ),
+        )
+        .pipe(clean_name)
+        .pipe(set_column_name, "MASTR")
+        .pipe(config_filter, config)
+    )
+
+    return df
+
+
 # deprecated alias for GGPT
 @deprecated(
     deprecated_in="0.5.5",

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
@@ -24,6 +24,7 @@ matching_sources:
   - BEYONDCOAL: Fueltype != 'Solar'
   - WIKIPEDIA: Fueltype != 'Solar'
   - GEM
+  - MASTR
 
 # fully_included_sources, these sources are included even without match to the final dataset
 fully_included_sources:
@@ -34,7 +35,8 @@ fully_included_sources:
   - JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway']
   - OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria']
   - BEYONDCOAL
-  - GEM
+  - GEM: Country != 'Germany' or Fueltype == 'Solar'
+  - MASTR
 
 
 parallel_duke_processes: false
@@ -202,6 +204,14 @@ GHPT:
   status: ["operating", "retired", "construction"]
   fn: Global-Hydropower-Tracker-April-2024.xlsx
   url: https://tubcloud.tu-berlin.de/s/sEztyBLdJS5sNHY/download/Global-Hydropower-Tracker-April-2024.xlsx
+
+MASTR:
+  net_capacity: true
+  reliability_score: 8
+  status: ["In Betrieb", "In Planung", "Endgültig stillgelegt"]
+  fn: bnetza_open_mastr_2023-08-08_B.zip
+  url: https://zenodo.org/records/8225106/files/bnetza_open_mastr_2023-08-08_B.zip
+
 # ---------------------------------------------------------------------------- #
 #                             Data Structure Config                            #
 # ---------------------------------------------------------------------------- #
@@ -269,8 +279,8 @@ target_fueltypes:
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
   Other: ".*"
-  Solid Biomass: [biological, bioenergy, agricultural, wood, biomass]
-  Biogas: [biogas]
+  Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse]
+  Biogas: [biogas, biomethan, gasförmige biomasse]
   Nuclear: [nuclear]
   Natural Gas:
     [
@@ -282,6 +292,8 @@ target_fueltypes:
       combined cycle,
       fossil gas,
       mixed fossil fuels,
+      erdgas,
+      andere gase,
     ]
   Hydro:
     [
@@ -293,13 +305,14 @@ target_fueltypes:
       hydro,
       hydroelectric,
       wasserkraft,
+      wasser,
     ]
-  Hard Coal: [coal, coke]
-  Lignite: [brown coal, lignite, peat]
-  Oil: [oil, diesel]
+  Hard Coal: [coal, coke, steinkohle]
+  Lignite: [brown coal, lignite, peat, braunkohle]
+  Oil: [oil, diesel, mineralölprodukte]
   Geothermal: ""
   Solar: ""
-  Waste: ""
+  Waste: ["abfall.*", "waste"]
   Wind: ""
   Battery: [Electro-chemical, battery]
 target_sets:
@@ -328,12 +341,12 @@ target_technologies:
   # A list will be converted to a regex expression matching all words (case-insensitive)
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
-  CCGT: [ccgt, gas, natural gas]
-  OCGT: [ocgt]
-  Steam Turbine: [steam, turbine]
-  Combustion Engine: [combustion engine]
-  Run-Of-River: [run-off, run off, run of river, run-of-river, ror]
-  Pumped Storage: [pumped hydro, pumped]
+  CCGT: [ccgt, gas, natural gas, gasturbinen mit abhitzekessel]
+  OCGT: [ocgt, gasturbinen ohne abhitzekessel]
+  Steam Turbine: [steam, turbine, kondensationsmaschine, gegendruckmaschine, dampfmotor]
+  Combustion Engine: [combustion engine, verbrennungsmotor, stirlingmotor]
+  Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage]
+  Pumped Storage: [pumped hydro, pumped, speicherwasseranlage]
   Reservoir: ""
   Marine: ""
   Onshore: ""