From 8db96d191694c738ba8d1655a3e7582ca251b1e7 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 27 Oct 2022 16:49:49 +0800 Subject: [PATCH 1/2] Add checking and error message if materialize features defined on INPUT_CONTEXT --- feathr_project/feathr/client.py | 12 +++++++++-- .../test/test_feature_materialization.py | 21 ++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/feathr_project/feathr/client.py b/feathr_project/feathr/client.py index dd39a70fa..4cd83afae 100644 --- a/feathr_project/feathr/client.py +++ b/feathr_project/feathr/client.py @@ -32,6 +32,7 @@ from feathr.utils._file_utils import write_to_file from feathr.utils.feature_printer import FeaturePrinter from feathr.utils.spark_job_params import FeatureGenerationJobParams, FeatureJoinJobParams +from feathr.definition.source import InputContext class FeathrClient(object): @@ -619,8 +620,15 @@ def materialize_features(self, settings: MaterializationSettings, execution_conf allow_materialize_non_agg_feature: Materializing non-aggregated features (the features without WindowAggTransformation) doesn't output meaningful results so it's by default set to False, but if you really want to materialize non-aggregated features, set this to True. """ feature_list = settings.feature_names - if len(feature_list) > 0 and not self._valid_materialize_keys(feature_list): - raise RuntimeError(f"Invalid materialization features: {feature_list}, since they have different keys. Currently Feathr only supports materializing features of the same keys.") + if len(feature_list) > 0: + if 'anchor_list' in dir(self): + anchors = [anchor for anchor in self.anchor_list if isinstance(anchor.source, InputContext)] + anchor_feature_names = set(feature.name for anchor in anchors for feature in anchor.features) + for feature in feature_list: + if feature in anchor_feature_names: + raise RuntimeError(f"Materializing features that are defined on INPUT_CONTEXT is not supported. {feature} is defined on INPUT_CONTEXT so you should remove it from the feature list in MaterializationSettings.") + if self._valid_materialize_keys(feature_list): + raise RuntimeError(f"Invalid materialization features: {feature_list}, since they have different keys. Currently Feathr only supports materializing features of the same keys.") if not allow_materialize_non_agg_feature: # Check if there are non-aggregation features in the list diff --git a/feathr_project/test/test_feature_materialization.py b/feathr_project/test/test_feature_materialization.py index e8100578c..754c12ebb 100644 --- a/feathr_project/test/test_feature_materialization.py +++ b/feathr_project/test/test_feature_materialization.py @@ -18,6 +18,8 @@ from test_fixture import basic_test_setup from test_fixture import get_online_test_table_name from test_utils.constants import Constants +from logging import raiseExceptions +import pytest def test_feature_materialization_config(): backfill_time = BackfillTime(start=datetime(2020, 5, 20), end=datetime(2020, 5,20), step=timedelta(days=1)) @@ -255,4 +257,21 @@ def test_delete_feature_from_redis(): res = client.get_online_features(online_test_table, '265', ['f_location_avg_fare']) assert len(res) == 1 - assert res[0] == None \ No newline at end of file + assert res[0] == None + +def test_feature_list_on_input_context(): + with pytest.raises(RuntimeError) as e_info: + test_workspace_dir = Path(__file__).parent.resolve() / "test_user_workspace" + + client: FeathrClient = basic_test_setup(os.path.join(test_workspace_dir, "feathr_config.yaml")) + online_test_table = get_online_test_table_name('nycTaxiCITableDeletion') + redisSink = RedisSink(table_name=online_test_table) + settings = MaterializationSettings(name="py_udf", + sinks=[redisSink], + feature_names=[ + "f_location_avg_fare", + "f_day_of_week" + ]) + client.materialize_features(settings, allow_materialize_non_agg_feature=True) + assert e_info is not None + assert e_info.value.args[0] == "Materializing features that are defined on INPUT_CONTEXT is not supported. f_day_of_week is defined on INPUT_CONTEXT so you should remove it from the feature list in MaterializationSettings." \ No newline at end of file From 863e567e47599ba375b1f777e19e1e50485b3ebd Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 3 Nov 2022 21:46:43 +0800 Subject: [PATCH 2/2] feathr_project/feathr/client.py --- feathr_project/feathr/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/feathr_project/feathr/client.py b/feathr_project/feathr/client.py index 21604ee6b..63cd07c1e 100644 --- a/feathr_project/feathr/client.py +++ b/feathr_project/feathr/client.py @@ -627,7 +627,7 @@ def materialize_features(self, settings: MaterializationSettings, execution_conf for feature in feature_list: if feature in anchor_feature_names: raise RuntimeError(f"Materializing features that are defined on INPUT_CONTEXT is not supported. {feature} is defined on INPUT_CONTEXT so you should remove it from the feature list in MaterializationSettings.") - if self._valid_materialize_keys(feature_list): + if not self._valid_materialize_keys(feature_list): raise RuntimeError(f"Invalid materialization features: {feature_list}, since they have different keys. Currently Feathr only supports materializing features of the same keys.") if not allow_materialize_non_agg_feature: