Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 25 additions & 20 deletions ads/opctl/operator/lowcode/forecast/model_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,23 +121,26 @@ def run_all_models(self, datasets: ForecastDatasets, operator_config: ForecastOp
from .model.factory import ForecastOperatorModelFactory
metrics[model] = {}
for i in range(len(cut_offs)):
backtest_historical_data = train_sets[i]
backtest_additional_data = additional_data[i]
backtest_test_data = test_sets[i]
backtest_operator_config = self.create_operator_config(operator_config, i, model,
backtest_historical_data,
backtest_additional_data,
backtest_test_data)
datasets = ForecastDatasets(backtest_operator_config)
ForecastOperatorModelFactory.get_model(
backtest_operator_config, datasets
).generate_report()
test_metrics_filename = backtest_operator_config.spec.test_metrics_filename
metrics_df = pd.read_csv(
f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}")
metrics_df["average_across_series"] = metrics_df.drop('metrics', axis=1).mean(axis=1)
metrics_average_dict = dict(zip(metrics_df['metrics'].str.lower(), metrics_df['average_across_series']))
metrics[model][i] = metrics_average_dict[operator_config.spec.metric]
try:
backtest_historical_data = train_sets[i]
backtest_additional_data = additional_data[i]
backtest_test_data = test_sets[i]
backtest_operator_config = self.create_operator_config(operator_config, i, model,
backtest_historical_data,
backtest_additional_data,
backtest_test_data)
datasets = ForecastDatasets(backtest_operator_config)
ForecastOperatorModelFactory.get_model(
backtest_operator_config, datasets
).generate_report()
test_metrics_filename = backtest_operator_config.spec.test_metrics_filename
metrics_df = pd.read_csv(
f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}")
metrics_df["average_across_series"] = metrics_df.drop('metrics', axis=1).mean(axis=1)
metrics_average_dict = dict(zip(metrics_df['metrics'].str.lower(), metrics_df['average_across_series']))
metrics[model][i] = metrics_average_dict[operator_config.spec.metric]
except:
logger.warn(f"Failed to calculate metrics for {model} and {i} backtest")
return metrics

def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
Expand All @@ -147,10 +150,12 @@ def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastO
model = SupportedModels.Prophet
logger.error(f"Running {model} model as auto-select failed with the following error: {e.message}")
return model
avg_backtests_metrics = {key: sum(value.values()) / len(value.values()) for key, value in metrics.items()}
best_model = min(avg_backtests_metrics, key=avg_backtests_metrics.get)
nonempty_metrics = {model: metric for model, metric in metrics.items() if metric != {}}
avg_backtests_metric = {model: sum(value.values()) / len(value.values())
for model, value in nonempty_metrics.items()}
best_model = min(avg_backtests_metric, key=avg_backtests_metric.get)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the metric is set to accuracy "r-squared"? Do we prevent this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The r-squared metric is not present in forecast/schema.yaml file as an allowed optimization metric, so it is not used for selecting the best model. Although it gets calculated when test data is provided only for reporting purpose.

logger.info(f"Among models {self.models}, {best_model} model shows better performance during backtesting.")
backtest_stats = pd.DataFrame(metrics).rename_axis('backtest')
backtest_stats = pd.DataFrame(nonempty_metrics).rename_axis('backtest')
backtest_stats.reset_index(inplace=True)
output_dir = operator_config.spec.output_directory.url
backtest_report_name = "backtest_stats.csv"
Expand Down
7 changes: 3 additions & 4 deletions ads/opctl/operator/lowcode/forecast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@
mean_absolute_percentage_error,
mean_squared_error,
)
try:
from scipy.stats import linregress
except:
from sklearn.metrics import r2_score

from scipy.stats import linregress
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is scipy in the pyproject.toml?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not explicitly, statsmodels is there and it is dependent on scipy.

from sklearn.metrics import r2_score

from ads.common.object_storage_details import ObjectStorageDetails
from ads.dataset.label_encoder import DataFrameLabelEncoder
Expand Down