Skip to content

Commit

Permalink
Add exception if materialize features defined on 'INPUT_CONTEXT' (#785)
Browse files Browse the repository at this point in the history
* Add checking and error message if materialize features defined on INPUT_CONTEXT
  • Loading branch information
enya-yx authored Nov 6, 2022
1 parent c203d69 commit ca9aa21
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
12 changes: 10 additions & 2 deletions feathr_project/feathr/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from feathr.utils._file_utils import write_to_file
from feathr.utils.feature_printer import FeaturePrinter
from feathr.utils.spark_job_params import FeatureGenerationJobParams, FeatureJoinJobParams
from feathr.definition.source import InputContext


class FeathrClient(object):
Expand Down Expand Up @@ -619,8 +620,15 @@ def materialize_features(self, settings: MaterializationSettings, execution_conf
allow_materialize_non_agg_feature: Materializing non-aggregated features (the features without WindowAggTransformation) doesn't output meaningful results so it's by default set to False, but if you really want to materialize non-aggregated features, set this to True.
"""
feature_list = settings.feature_names
if len(feature_list) > 0 and not self._valid_materialize_keys(feature_list):
raise RuntimeError(f"Invalid materialization features: {feature_list}, since they have different keys. Currently Feathr only supports materializing features of the same keys.")
if len(feature_list) > 0:
if 'anchor_list' in dir(self):
anchors = [anchor for anchor in self.anchor_list if isinstance(anchor.source, InputContext)]
anchor_feature_names = set(feature.name for anchor in anchors for feature in anchor.features)
for feature in feature_list:
if feature in anchor_feature_names:
raise RuntimeError(f"Materializing features that are defined on INPUT_CONTEXT is not supported. {feature} is defined on INPUT_CONTEXT so you should remove it from the feature list in MaterializationSettings.")
if not self._valid_materialize_keys(feature_list):
raise RuntimeError(f"Invalid materialization features: {feature_list}, since they have different keys. Currently Feathr only supports materializing features of the same keys.")

if not allow_materialize_non_agg_feature:
# Check if there are non-aggregation features in the list
Expand Down
21 changes: 20 additions & 1 deletion feathr_project/test/test_feature_materialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from test_fixture import basic_test_setup
from test_fixture import get_online_test_table_name
from test_utils.constants import Constants
from logging import raiseExceptions
import pytest

def test_feature_materialization_config():
backfill_time = BackfillTime(start=datetime(2020, 5, 20), end=datetime(2020, 5,20), step=timedelta(days=1))
Expand Down Expand Up @@ -255,4 +257,21 @@ def test_delete_feature_from_redis():
res = client.get_online_features(online_test_table, '265', ['f_location_avg_fare'])

assert len(res) == 1
assert res[0] == None
assert res[0] == None

def test_feature_list_on_input_context():
with pytest.raises(RuntimeError) as e_info:
test_workspace_dir = Path(__file__).parent.resolve() / "test_user_workspace"

client: FeathrClient = basic_test_setup(os.path.join(test_workspace_dir, "feathr_config.yaml"))
online_test_table = get_online_test_table_name('nycTaxiCITableDeletion')
redisSink = RedisSink(table_name=online_test_table)
settings = MaterializationSettings(name="py_udf",
sinks=[redisSink],
feature_names=[
"f_location_avg_fare",
"f_day_of_week"
])
client.materialize_features(settings, allow_materialize_non_agg_feature=True)
assert e_info is not None
assert e_info.value.args[0] == "Materializing features that are defined on INPUT_CONTEXT is not supported. f_day_of_week is defined on INPUT_CONTEXT so you should remove it from the feature list in MaterializationSettings."

0 comments on commit ca9aa21

Please sign in to comment.