Skip to content

Commit

Permalink
neuralforecast rmse
Browse files Browse the repository at this point in the history
  • Loading branch information
americast committed Oct 31, 2023
1 parent 70088d2 commit 2c28b81
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 30 deletions.
46 changes: 42 additions & 4 deletions evadb/executor/create_function_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
from pathlib import Path
from typing import Dict, List

import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

from evadb.catalog.catalog_utils import get_metadata_properties
from evadb.catalog.models.function_catalog import FunctionCatalogEntry
Expand Down Expand Up @@ -534,6 +536,25 @@ def get_optuna_config(trial):
with set_env(CUDA_VISIBLE_DEVICES=cuda_devices_here):
model.fit(df=data, val_size=horizon)
model.save(model_path, overwrite=True)
rmses = []
crossvalidation_df = model.cross_validation(
df=data, val_size=horizon
)
for uid in crossvalidation_df.unique_id.unique():
crossvalidation_df_here = crossvalidation_df[
crossvalidation_df.unique_id == uid
]
rmses.append(
mean_squared_error(
crossvalidation_df_here.y,
crossvalidation_df_here[arg_map["model"] + "-median"],
squared=False,
)
/ np.mean(crossvalidation_df_here.y)
)
mean_rmse = np.mean(rmses)
with open(model_path + "_rmse", "w") as f:
f.write(str(mean_rmse) + "\n")
else:
# The following lines of code helps eliminate the math error encountered in statsforecast when only one datapoint is available in a time series
for col in data["unique_id"].unique():
Expand All @@ -546,9 +567,29 @@ def get_optuna_config(trial):
f = open(model_path, "wb")
pickle.dump(model, f)
f.close()
crossvalidation_df = model.cross_validation(
df=self.training_data[["ds", "y", "unique_id"]],
h=self.horizon,
step_size=24,
n_windows=1,
).reset_index()
for uid in crossvalidation_df.unique_id.unique():
crossvalidation_df_here = crossvalidation_df[
crossvalidation_df.unique_id == uid
]
rmses.append(
mean_squared_error(
crossvalidation_df_here.y,
crossvalidation_df_here[arg_map["model"]],
squared=False,
)
/ np.mean(crossvalidation_df_here.y)
)
mean_rmse = np.mean(rmses)
with open(model_path + "_rmse", "w") as f:
f.write(str(mean_rmse) + "\n")
elif not Path(model_path).exists():
model_path = os.path.join(model_dir, existing_model_files[-1])

io_list = self._resolve_function_io(None)
data["ds"] = data.ds.astype(str)
metadata_here = [
Expand All @@ -566,9 +607,6 @@ def get_optuna_config(trial):
FunctionMetadataCatalogEntry("horizon", horizon),
FunctionMetadataCatalogEntry("library", library),
FunctionMetadataCatalogEntry("conf", conf),
FunctionMetadataCatalogEntry(
"data", data.to_json(path_or_buf=None, orient="split")
),
]

return (
Expand Down
29 changes: 3 additions & 26 deletions evadb/functions/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@

import pickle

import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

from evadb.functions.abstract.abstract_function import AbstractFunction
from evadb.functions.decorators.decorators import setup
Expand All @@ -40,7 +38,6 @@ def setup(
horizon: int,
library: str,
conf: int,
data: pd.DataFrame,
):
self.library = library
if "neuralforecast" in self.library:
Expand All @@ -62,8 +59,8 @@ def setup(
1: "Predictions are flat. Consider using LIBRARY 'neuralforecast' for more accrate predictions.",
}
self.conf = conf
self.training_data = pd.read_json(data, orient="split")
self.training_data.ds = pd.to_datetime(self.training_data.ds)
with open(model_path + "_rmse", "r") as f:
self.rmse = float(f.readline())

def forward(self, data) -> pd.DataFrame:
if self.library == "statsforecast":
Expand Down Expand Up @@ -92,27 +89,7 @@ def forward(self, data) -> pd.DataFrame:
print("\nSUGGESTION: " + self.suggestion_dict[suggestion])

# Metrics
if self.library == "statsforecast":
crossvalidation_df = self.model.cross_validation(
df=self.training_data[["ds", "y", "unique_id"]],
h=self.horizon,
step_size=24,
n_windows=1,
).reset_index()
rmses = []
for uid in crossvalidation_df.unique_id.unique():
crossvalidation_df_here = crossvalidation_df[
crossvalidation_df.unique_id == uid
]
rmses.append(
mean_squared_error(
crossvalidation_df_here.y,
crossvalidation_df_here[self.model_name],
squared=False,
)
/ np.mean(crossvalidation_df_here.y)
)
print("\nMean normalized RMSE: " + str(np.mean(rmses)))
print("\nMean normalized RMSE: " + str(self.rmse))

forecast_df = forecast_df.rename(
columns={
Expand Down

0 comments on commit 2c28b81

Please sign in to comment.