Skip to content

Commit

Permalink
Include nan value handler
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixMau committed Jun 10, 2024
1 parent d9e872d commit ab4dac0
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 16 deletions.
19 changes: 3 additions & 16 deletions data_adapter_oemof/build_datapackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from data_adapter_oemof.adapters import FACADE_ADAPTERS
from data_adapter_oemof.adapters import Adapter as FacadeAdapter
from data_adapter_oemof.calculations import handle_nans
from data_adapter_oemof.settings import BUS_MAP, PARAMETER_MAP, PROCESS_ADAPTER_MAP
from data_adapter_oemof.utils import convert_mixed_types_to_same_length

Expand Down Expand Up @@ -48,10 +49,11 @@ def _listify_to_periodic(group_df) -> pd.Series:
"""

handle_nans(group_df)
if "year" not in group_df.columns:
return group_df
unique_values = pd.Series(dtype=object)

for col in group_df.columns:
if isinstance(group_df[col][group_df.index[0]], dict):
# Unique input/output parameters are not allowed per period
Expand All @@ -64,21 +66,6 @@ def _listify_to_periodic(group_df) -> pd.Series:
):
values = group_df[col].explode().unique()
else:
# FIXME: Hotfix "if not" statement to replace nan values from lists:
# in final data only complete datasets are expected.
if not all(group_df[col].isna()) and any(group_df[col].isna()):
group_df.loc[group_df[col].isna(), col] = (
group_df[col]
.dropna()
.sample(
group_df[col]
.isna()
.sum(), # get the same number of values as are missing
replace=True,
random_state=0,
)
.values
) # throw out the index
values = group_df[col].unique()
if len(values) > 1:
if isinstance(group_df[col].iloc[0], list):
Expand Down
66 changes: 66 additions & 0 deletions data_adapter_oemof/calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import warnings

import numpy as np
import pandas as pd
from oemof.tools.economics import annuity


Expand Down Expand Up @@ -188,3 +189,68 @@ def floor_lifetime(mapped_defaults):
warnings.warn("Lifetime cannot change in Multi-period modeling")
mapped_defaults["lifetime"] = int(np.floor(mapped_defaults["lifetime"][0]))
return mapped_defaults


def handle_nans(group_df: pd.DataFrame) -> pd.DataFrame:
"""
This function should find and fill in missing min and max values in the data
Missing min value is set to 0.
Missing max value is set to 9999999999999.
Min values:
capacity_p_min
capacity_e_min
capacity_w_min
flow_share_min_<commodity>
Max values:
potential_annual_max
capacity_p_max
capacity_e_max
capacity_w_max
capacity_p_abs_new_max
capacity_e_abs_new_max
capacity_w_abs_new_max
availability_timeseries_max
capacity_tra_connection_max
flow_share_max_<commodity>
sto_cycles_max
sto_max_timeseries
Returns
-------
"""

max_value = 9999999999999
min_value = 0

min = ["capacity_p_min", "capacity_e_min", "capacity_w_min", "flow_share_min_"]

max = [
"potential_annual_max",
"capacity_p_max",
"capacity_e_max",
"capacity_w_max",
"capacity_p_abs_new_max",
"capacity_e_abs_new_max",
"capacity_w_abs_new_max",
"availability_timeseries_max",
"capacity_tra_connection_max",
"flow_share_max_",
"sto_cycles_max",
"sto_max_timeseries",
]

for column in group_df.columns:
if column in ["method", "source", "comment", "bandwidth_type"]:
continue

if group_df[column].nunique(dropna=False) > 1:
if column in max:
group_df[column].fillna(max_value, inplace=True)
elif column in min:
group_df[column].fillna(min_value, inplace=True)
else:
group_df[column].fillna(min_value, inplace=True)

0 comments on commit ab4dac0

Please sign in to comment.