-
Notifications
You must be signed in to change notification settings - Fork 59
[ODSC-76829/76830] : improve auto-select logic and handle missing data #1259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a57b872
aa4dedf
0b1a389
041e3c4
5f58a65
6aca817
d9eb158
22c3291
d73ba64
197985d
4a64be2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,14 +9,13 @@ | |
| import sys | ||
| from typing import Dict, List | ||
|
|
||
| import pandas as pd | ||
| import yaml | ||
|
|
||
| from ads.opctl import logger | ||
| from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS | ||
| from ads.opctl.operator.common.utils import _parse_input_args | ||
|
|
||
| from .const import AUTO_SELECT_SERIES | ||
| from .const import AUTO_SELECT, AUTO_SELECT_SERIES | ||
| from .model.forecast_datasets import ForecastDatasets, ForecastResults | ||
| from .operator_config import ForecastOperatorConfig | ||
| from .whatifserve import ModelDeploymentManager | ||
|
|
@@ -29,8 +28,10 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: | |
| datasets = ForecastDatasets(operator_config) | ||
| model = ForecastOperatorModelFactory.get_model(operator_config, datasets) | ||
|
|
||
| if operator_config.spec.model == AUTO_SELECT_SERIES and hasattr( | ||
| operator_config.spec, "meta_features" | ||
| if ( | ||
| operator_config.spec.model == AUTO_SELECT_SERIES | ||
| and hasattr(operator_config.spec, "meta_features") | ||
| and operator_config.spec.target_category_columns | ||
| ): | ||
| # For AUTO_SELECT_SERIES, handle each series with its specific model | ||
| meta_features = operator_config.spec.meta_features | ||
|
|
@@ -64,8 +65,6 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: | |
| ) | ||
| sub_results_list.append(sub_results) | ||
|
|
||
| # results_df = pd.concat([results_df, sub_result_df], ignore_index=True, axis=0) | ||
| # elapsed_time += sub_elapsed_time | ||
| # Merge all sub_results into a single ForecastResults object | ||
| if sub_results_list: | ||
| results = sub_results_list[0] | ||
|
|
@@ -75,6 +74,20 @@ def operate(operator_config: ForecastOperatorConfig) -> ForecastResults: | |
| results = None | ||
|
|
||
| else: | ||
| # When AUTO_SELECT_SERIES is specified but target_category_columns is not, | ||
| # we fall back to AUTO_SELECT behavior. | ||
| if ( | ||
| operator_config.spec.model == AUTO_SELECT_SERIES | ||
| and not operator_config.spec.target_category_columns | ||
| ): | ||
|
|
||
| logger.warning( | ||
| "AUTO_SELECT_SERIES cannot be run with a single-series dataset or when " | ||
| "'target_category_columns' is not provided. Falling back to AUTO_SELECT." | ||
| ) | ||
|
|
||
| operator_config.spec.model = AUTO_SELECT | ||
| model = ForecastOperatorModelFactory.get_model(operator_config, datasets) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice! Can we reflect this in the report? Make sure it's still saying "auto-select-series". Can we add a unit test for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added log message we are falling back to auto select, populating the report with method as auto-select-series would give a wrong impression to the user and I feel it should be avoided , enabled the test-case |
||
| # For other cases, use the single selected model | ||
| results = model.generate_report() | ||
| # saving to model catalog | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why fillna with 0? why no backfill? Did we discuss this?
Don't we already have this covered in pre-processing steps? What are we gaining from this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is independent of the pre-processing, we want this to be done even if the pre-processing is not set/enabled. This helps us to create the meta-features incase we have nan's in the dataset. Filling with other values is not validated yet, can be experimented and documented to see the impact.