From ab4dac0eebc65b74ad31fadb79ea2073c68bac90 Mon Sep 17 00:00:00 2001 From: FelixMau Date: Mon, 10 Jun 2024 11:33:49 +0200 Subject: [PATCH] Include nan value handler --- data_adapter_oemof/build_datapackage.py | 19 ++----- data_adapter_oemof/calculations.py | 66 +++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 16 deletions(-) diff --git a/data_adapter_oemof/build_datapackage.py b/data_adapter_oemof/build_datapackage.py index a4c653e..d6532b9 100644 --- a/data_adapter_oemof/build_datapackage.py +++ b/data_adapter_oemof/build_datapackage.py @@ -11,6 +11,7 @@ from data_adapter_oemof.adapters import FACADE_ADAPTERS from data_adapter_oemof.adapters import Adapter as FacadeAdapter +from data_adapter_oemof.calculations import handle_nans from data_adapter_oemof.settings import BUS_MAP, PARAMETER_MAP, PROCESS_ADAPTER_MAP from data_adapter_oemof.utils import convert_mixed_types_to_same_length @@ -48,10 +49,11 @@ def _listify_to_periodic(group_df) -> pd.Series: """ - + handle_nans(group_df) if "year" not in group_df.columns: return group_df unique_values = pd.Series(dtype=object) + for col in group_df.columns: if isinstance(group_df[col][group_df.index[0]], dict): # Unique input/output parameters are not allowed per period @@ -64,21 +66,6 @@ def _listify_to_periodic(group_df) -> pd.Series: ): values = group_df[col].explode().unique() else: - # FIXME: Hotfix "if not" statement to replace nan values from lists: - # in final data only complete datasets are expected. - if not all(group_df[col].isna()) and any(group_df[col].isna()): - group_df.loc[group_df[col].isna(), col] = ( - group_df[col] - .dropna() - .sample( - group_df[col] - .isna() - .sum(), # get the same number of values as are missing - replace=True, - random_state=0, - ) - .values - ) # throw out the index values = group_df[col].unique() if len(values) > 1: if isinstance(group_df[col].iloc[0], list): diff --git a/data_adapter_oemof/calculations.py b/data_adapter_oemof/calculations.py index 40934d3..c180d1f 100644 --- a/data_adapter_oemof/calculations.py +++ b/data_adapter_oemof/calculations.py @@ -3,6 +3,7 @@ import warnings import numpy as np +import pandas as pd from oemof.tools.economics import annuity @@ -188,3 +189,68 @@ def floor_lifetime(mapped_defaults): warnings.warn("Lifetime cannot change in Multi-period modeling") mapped_defaults["lifetime"] = int(np.floor(mapped_defaults["lifetime"][0])) return mapped_defaults + + +def handle_nans(group_df: pd.DataFrame) -> pd.DataFrame: + """ + This function should find and fill in missing min and max values in the data + + Missing min value is set to 0. + Missing max value is set to 9999999999999. + + Min values: + capacity_p_min + capacity_e_min + capacity_w_min + flow_share_min_ + + Max values: + potential_annual_max + capacity_p_max + capacity_e_max + capacity_w_max + capacity_p_abs_new_max + capacity_e_abs_new_max + capacity_w_abs_new_max + availability_timeseries_max + capacity_tra_connection_max + flow_share_max_ + sto_cycles_max + sto_max_timeseries + + Returns + ------- + + """ + + max_value = 9999999999999 + min_value = 0 + + min = ["capacity_p_min", "capacity_e_min", "capacity_w_min", "flow_share_min_"] + + max = [ + "potential_annual_max", + "capacity_p_max", + "capacity_e_max", + "capacity_w_max", + "capacity_p_abs_new_max", + "capacity_e_abs_new_max", + "capacity_w_abs_new_max", + "availability_timeseries_max", + "capacity_tra_connection_max", + "flow_share_max_", + "sto_cycles_max", + "sto_max_timeseries", + ] + + for column in group_df.columns: + if column in ["method", "source", "comment", "bandwidth_type"]: + continue + + if group_df[column].nunique(dropna=False) > 1: + if column in max: + group_df[column].fillna(max_value, inplace=True) + elif column in min: + group_df[column].fillna(min_value, inplace=True) + else: + group_df[column].fillna(min_value, inplace=True)