From ab4dac0eebc65b74ad31fadb79ea2073c68bac90 Mon Sep 17 00:00:00 2001
From: FelixMau <fmau@posteo.de>
Date: Mon, 10 Jun 2024 11:33:49 +0200
Subject: [PATCH] Include nan value handler

---
 data_adapter_oemof/build_datapackage.py | 19 ++-----
 data_adapter_oemof/calculations.py      | 66 +++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 16 deletions(-)
diff --git a/data_adapter_oemof/build_datapackage.py b/data_adapter_oemof/build_datapackage.py
index a4c653e..d6532b9 100644
--- a/data_adapter_oemof/build_datapackage.py
+++ b/data_adapter_oemof/build_datapackage.py
@@ -11,6 +11,7 @@
 
 from data_adapter_oemof.adapters import FACADE_ADAPTERS
 from data_adapter_oemof.adapters import Adapter as FacadeAdapter
+from data_adapter_oemof.calculations import handle_nans
 from data_adapter_oemof.settings import BUS_MAP, PARAMETER_MAP, PROCESS_ADAPTER_MAP
 from data_adapter_oemof.utils import convert_mixed_types_to_same_length
 
@@ -48,10 +49,11 @@ def _listify_to_periodic(group_df) -> pd.Series:
 
 
     """
-
+    handle_nans(group_df)
     if "year" not in group_df.columns:
         return group_df
     unique_values = pd.Series(dtype=object)
+
     for col in group_df.columns:
         if isinstance(group_df[col][group_df.index[0]], dict):
             # Unique input/output parameters are not allowed per period
@@ -64,21 +66,6 @@ def _listify_to_periodic(group_df) -> pd.Series:
         ):
             values = group_df[col].explode().unique()
         else:
-            # FIXME: Hotfix "if not" statement to replace nan values from lists:
-            #   in final data only complete datasets are expected.
-            if not all(group_df[col].isna()) and any(group_df[col].isna()):
-                group_df.loc[group_df[col].isna(), col] = (
-                    group_df[col]
-                    .dropna()
-                    .sample(
-                        group_df[col]
-                        .isna()
-                        .sum(),  # get the same number of values as are missing
-                        replace=True,
-                        random_state=0,
-                    )
-                    .values
-                )  # throw out the index
             values = group_df[col].unique()
         if len(values) > 1:
             if isinstance(group_df[col].iloc[0], list):
diff --git a/data_adapter_oemof/calculations.py b/data_adapter_oemof/calculations.py
index 40934d3..c180d1f 100644
--- a/data_adapter_oemof/calculations.py
+++ b/data_adapter_oemof/calculations.py
@@ -3,6 +3,7 @@
 import warnings
 
 import numpy as np
+import pandas as pd
 from oemof.tools.economics import annuity
 
 
@@ -188,3 +189,68 @@ def floor_lifetime(mapped_defaults):
         warnings.warn("Lifetime cannot change in Multi-period modeling")
         mapped_defaults["lifetime"] = int(np.floor(mapped_defaults["lifetime"][0]))
     return mapped_defaults
+
+
+def handle_nans(group_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    This function should find and fill in missing min and max values in the data
+
+    Missing min value is set to 0.
+    Missing max value is set to 9999999999999.
+
+    Min values:
+    capacity_p_min
+    capacity_e_min
+    capacity_w_min
+    flow_share_min_<commodity>
+
+    Max values:
+    potential_annual_max
+    capacity_p_max
+    capacity_e_max
+    capacity_w_max
+    capacity_p_abs_new_max
+    capacity_e_abs_new_max
+    capacity_w_abs_new_max
+    availability_timeseries_max
+    capacity_tra_connection_max
+    flow_share_max_<commodity>
+    sto_cycles_max
+    sto_max_timeseries
+
+    Returns
+    -------
+
+    """
+
+    max_value = 9999999999999
+    min_value = 0
+
+    min = ["capacity_p_min", "capacity_e_min", "capacity_w_min", "flow_share_min_"]
+
+    max = [
+        "potential_annual_max",
+        "capacity_p_max",
+        "capacity_e_max",
+        "capacity_w_max",
+        "capacity_p_abs_new_max",
+        "capacity_e_abs_new_max",
+        "capacity_w_abs_new_max",
+        "availability_timeseries_max",
+        "capacity_tra_connection_max",
+        "flow_share_max_",
+        "sto_cycles_max",
+        "sto_max_timeseries",
+    ]
+
+    for column in group_df.columns:
+        if column in ["method", "source", "comment", "bandwidth_type"]:
+            continue
+
+        if group_df[column].nunique(dropna=False) > 1:
+            if column in max:
+                group_df[column].fillna(max_value, inplace=True)
+            elif column in min:
+                group_df[column].fillna(min_value, inplace=True)
+            else:
+                group_df[column].fillna(min_value, inplace=True)