Skip to content
Open
2 changes: 2 additions & 0 deletions ads/opctl/operator/lowcode/common/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,8 @@ def build_fforms_meta_features(self, data, target_col=None, group_cols=None):
if target_col not in data.columns:
raise ValueError(f"Target column '{target_col}' not found in DataFrame")

data[target_col] = data[target_col].fillna(0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why fillna with 0? why no backfill? Did we discuss this?
Don't we already have this covered in pre-processing steps? What are we gaining from this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is independent of the pre-processing, we want this to be done even if the pre-processing is not set/enabled. This helps us to create the meta-features incase we have nan's in the dataset. Filling with other values is not validated yet, can be experimented and documented to see the impact.


# Check if group_cols are provided and valid
if group_cols is not None:
if not isinstance(group_cols, list):
Expand Down
25 changes: 19 additions & 6 deletions ads/opctl/operator/lowcode/forecast/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@
import sys
from typing import Dict, List

import pandas as pd
import yaml

from ads.opctl import logger
from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS
from ads.opctl.operator.common.utils import _parse_input_args

from .const import AUTO_SELECT_SERIES
from .const import AUTO_SELECT, AUTO_SELECT_SERIES
from .model.forecast_datasets import ForecastDatasets, ForecastResults
from .operator_config import ForecastOperatorConfig
from .whatifserve import ModelDeploymentManager
Expand All @@ -29,8 +28,10 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults:
datasets = ForecastDatasets(operator_config)
model = ForecastOperatorModelFactory.get_model(operator_config, datasets)

if operator_config.spec.model == AUTO_SELECT_SERIES and hasattr(
operator_config.spec, "meta_features"
if (
operator_config.spec.model == AUTO_SELECT_SERIES
and hasattr(operator_config.spec, "meta_features")
and operator_config.spec.target_category_columns
):
# For AUTO_SELECT_SERIES, handle each series with its specific model
meta_features = operator_config.spec.meta_features
Expand Down Expand Up @@ -64,8 +65,6 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults:
)
sub_results_list.append(sub_results)

# results_df = pd.concat([results_df, sub_result_df], ignore_index=True, axis=0)
# elapsed_time += sub_elapsed_time
# Merge all sub_results into a single ForecastResults object
if sub_results_list:
results = sub_results_list[0]
Expand All @@ -75,6 +74,20 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults:
results = None

else:
# When AUTO_SELECT_SERIES is specified but target_category_columns is not,
# we fall back to AUTO_SELECT behavior.
if (
operator_config.spec.model == AUTO_SELECT_SERIES
and not operator_config.spec.target_category_columns
):

logger.warning(
"AUTO_SELECT_SERIES cannot be run with a single-series dataset or when "
"'target_category_columns' is not provided. Falling back to AUTO_SELECT."
)

operator_config.spec.model = AUTO_SELECT
model = ForecastOperatorModelFactory.get_model(operator_config, datasets)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice!

Can we reflect this in the report? Make sure it's still saying "auto-select-series".

Can we add a unit test for this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added log message we are falling back to auto select, populating the report with method as auto-select-series would give a wrong impression to the user and I feel it should be avoided , enabled the test-case

# For other cases, use the single selected model
results = model.generate_report()
# saving to model catalog
Expand Down
40 changes: 40 additions & 0 deletions tests/operators/forecast/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pandas as pd
import pytest
import yaml
import numpy as np

from ads.opctl.operator.cmd import run
from ads.opctl.operator.lowcode.forecast.__main__ import operate as forecast_operate
Expand Down Expand Up @@ -413,5 +414,44 @@ def run_operator(
# generate_train_metrics = True


def test_missing_data_autoselect_series():
"""Test case for auto-select-series with missing data."""
data = {
"Date": pd.to_datetime(
[
"2023-01-01",
"2023-01-02",
"2023-01-03",
"2023-01-04",
"2023-01-05",
"2023-01-06",
"2023-01-07",
"2023-01-08",
"2023-01-09",
"2023-01-10",
]
),
"Y": [1, 2, np.nan, 4, 5, 6, 7, 8, 9, 10],
"Category": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"],
}
df = pd.DataFrame(data)

with tempfile.TemporaryDirectory() as tmpdirname:
output_data_path = f"{tmpdirname}/results"
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["model"] = "auto-select-series"
yaml_i["spec"]["historical_data"].pop("url")
yaml_i["spec"]["historical_data"]["data"] = df
yaml_i["spec"]["target_column"] = "Y"
yaml_i["spec"]["datetime_column"]["name"] = "Date"
yaml_i["spec"]["target_category_columns"] = ["Category"]
yaml_i["spec"]["horizon"] = 2
yaml_i["spec"]["output_directory"]["url"] = output_data_path

operator_config = ForecastOperatorConfig.from_dict(yaml_i)
forecast_operate(operator_config)
check_output_for_errors(output_data_path)


if __name__ == "__main__":
pass
Loading