From eb4d0a514ead03b246717ef57e44071b59e6f892 Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Mon, 21 Mar 2022 12:50:02 -0700 Subject: [PATCH 1/5] Remove labels parameter from Entity Signed-off-by: Felix Wang --- sdk/python/feast/entity.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index efac8c17da..b14997e979 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import warnings from datetime import datetime from typing import Dict, Optional @@ -23,8 +22,6 @@ from feast.usage import log_exceptions from feast.value_type import ValueType -warnings.simplefilter("once", DeprecationWarning) - class Entity: """ @@ -61,8 +58,7 @@ def __init__( value_type: ValueType = ValueType.UNKNOWN, description: str = "", join_key: Optional[str] = None, - tags: Dict[str, str] = None, - labels: Optional[Dict[str, str]] = None, + tags: Optional[Dict[str, str]] = None, owner: str = "", ): """Creates an Entity object.""" @@ -70,19 +66,7 @@ def __init__( self.value_type = value_type self.join_key = join_key if join_key else name self.description = description - - if labels is not None: - self.tags = labels - warnings.warn( - ( - "The parameter 'labels' is being deprecated. Please use 'tags' instead. " - "Feast 0.20 and onwards will not support the parameter 'labels'." - ), - DeprecationWarning, - ) - else: - self.tags = labels or tags or {} - + self.tags = tags if tags is not None else {} self.owner = owner self.created_timestamp = None self.last_updated_timestamp = None From a370c2d5dfc4c70a82407c7ebce177d9279d0b2b Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Mon, 21 Mar 2022 13:04:40 -0700 Subject: [PATCH 2/5] Remove input parameter and deprecate Duration type for ttl parameter in FeatureView Signed-off-by: Felix Wang --- sdk/python/feast/feature_view.py | 41 ++++++++----------- .../infra/offline_stores/offline_utils.py | 10 ++--- ..._repo_with_duplicated_featureview_names.py | 6 +-- ...ample_feature_repo_with_entity_join_key.py | 4 +- .../example_feature_repo_with_inference.py | 20 --------- .../test_universal_historical_retrieval.py | 5 ++- .../scaffolding/test_partial_apply.py | 5 ++- 7 files changed, 34 insertions(+), 57 deletions(-) delete mode 100644 sdk/python/tests/example_repos/example_feature_repo_with_inference.py diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 2c1d0675d4..3e7561d338 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -58,9 +58,7 @@ class FeatureView(BaseFeatureView): ttl: The amount of time this group of features lives. A ttl of 0 indicates that this group of features lives forever. Note that large ttl's or a ttl of 0 can result in extremely computationally intensive queries. - input: The source of data where this group of features is stored. - batch_source (optional): The batch source of data where this group of features - is stored. + batch_source: The batch source of data where this group of features is stored. stream_source (optional): The stream source of data where this group of features is stored. features (optional): The set of features defined as part of this FeatureView. @@ -72,7 +70,6 @@ class FeatureView(BaseFeatureView): tags: Optional[Dict[str, str]] ttl: timedelta online: bool - input: DataSource batch_source: DataSource stream_source: Optional[DataSource] materialization_intervals: List[Tuple[datetime, datetime]] @@ -83,8 +80,7 @@ def __init__( name: str, entities: List[str], ttl: Union[Duration, timedelta], - input: Optional[DataSource] = None, - batch_source: Optional[DataSource] = None, + batch_source: DataSource, stream_source: Optional[DataSource] = None, features: Optional[List[Feature]] = None, tags: Optional[Dict[str, str]] = None, @@ -96,26 +92,17 @@ def __init__( Raises: ValueError: A field mapping conflicts with an Entity or a Feature. """ - if input is not None: - warnings.warn( - ( - "The argument 'input' is being deprecated. Please use 'batch_source' " - "instead. Feast 0.13 and onwards will not support the argument 'input'." - ), - DeprecationWarning, - ) - - _input = input or batch_source - assert _input is not None - _features = features or [] cols = [entity for entity in entities] + [feat.name for feat in _features] for col in cols: - if _input.field_mapping is not None and col in _input.field_mapping.keys(): + if ( + batch_source.field_mapping is not None + and col in batch_source.field_mapping.keys() + ): raise ValueError( - f"The field {col} is mapped to {_input.field_mapping[col]} for this data source. " - f"Please either remove this field mapping or use {_input.field_mapping[col]} as the " + f"The field {col} is mapped to {batch_source.field_mapping[col]} for this data source. " + f"Please either remove this field mapping or use {batch_source.field_mapping[col]} as the " f"Entity or Feature name." ) @@ -125,12 +112,19 @@ def __init__( if isinstance(ttl, Duration): self.ttl = timedelta(seconds=int(ttl.seconds)) + warnings.warn( + ( + "The option to pass a Duration object to the ttl parameter is being deprecated. " + "Please pass a timedelta object instead. Feast 0.21 and onwards will not support " + "Duration objects." + ), + DeprecationWarning, + ) else: self.ttl = ttl self.online = online - self.input = _input - self.batch_source = _input + self.batch_source = batch_source self.stream_source = stream_source self.materialization_intervals = [] @@ -144,7 +138,6 @@ def __copy__(self): name=self.name, entities=self.entities, ttl=self.ttl, - input=self.input, batch_source=self.batch_source, stream_source=self.stream_source, features=self.features, diff --git a/sdk/python/feast/infra/offline_stores/offline_utils.py b/sdk/python/feast/infra/offline_stores/offline_utils.py index eaf4925266..68440df205 100644 --- a/sdk/python/feast/infra/offline_stores/offline_utils.py +++ b/sdk/python/feast/infra/offline_stores/offline_utils.py @@ -115,7 +115,7 @@ def get_feature_view_query_context( join_keys = [] entity_selections = [] reverse_field_mapping = { - v: k for k, v in feature_view.input.field_mapping.items() + v: k for k, v in feature_view.batch_source.field_mapping.items() } for entity_name in feature_view.entities: entity = registry.get_entity(entity_name, project) @@ -130,8 +130,8 @@ def get_feature_view_query_context( else: ttl_seconds = 0 - event_timestamp_column = feature_view.input.event_timestamp_column - created_timestamp_column = feature_view.input.created_timestamp_column + event_timestamp_column = feature_view.batch_source.event_timestamp_column + created_timestamp_column = feature_view.batch_source.created_timestamp_column min_event_timestamp = None if feature_view.ttl: @@ -148,7 +148,7 @@ def get_feature_view_query_context( features=[ reverse_field_mapping.get(feature, feature) for feature in features ], - field_mapping=feature_view.input.field_mapping, + field_mapping=feature_view.batch_source.field_mapping, event_timestamp_column=reverse_field_mapping.get( event_timestamp_column, event_timestamp_column ), @@ -156,7 +156,7 @@ def get_feature_view_query_context( created_timestamp_column, created_timestamp_column ), # TODO: Make created column optional and not hardcoded - table_subquery=feature_view.input.get_table_query_string(), + table_subquery=feature_view.batch_source.get_table_query_string(), entity_selections=entity_selections, min_event_timestamp=min_event_timestamp, max_event_timestamp=max_event_timestamp, diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py index 84d57bf038..20ff666bd9 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py @@ -1,4 +1,4 @@ -from google.protobuf.duration_pb2 import Duration +from datetime import timedelta from feast import FeatureView, FileSource @@ -11,7 +11,7 @@ entities=["driver_id"], online=False, batch_source=driver_hourly_stats, - ttl=Duration(seconds=10), + ttl=timedelta(days=1), tags={}, ) @@ -20,6 +20,6 @@ entities=["driver_id"], online=False, batch_source=driver_hourly_stats, - ttl=Duration(seconds=10), + ttl=timedelta(days=1), tags={}, ) diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py index f89df1404c..bdabd14d2a 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py @@ -1,4 +1,4 @@ -from google.protobuf.duration_pb2 import Duration +from datetime import timedelta from feast import Entity, Feature, FeatureView, FileSource, ValueType @@ -21,7 +21,7 @@ driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver_id"], - ttl=Duration(seconds=86400 * 1), + ttl=timedelta(days=1), features=[ Feature(name="conv_rate", dtype=ValueType.FLOAT), Feature(name="acc_rate", dtype=ValueType.FLOAT), diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_inference.py b/sdk/python/tests/example_repos/example_feature_repo_with_inference.py deleted file mode 100644 index f2be472f55..0000000000 --- a/sdk/python/tests/example_repos/example_feature_repo_with_inference.py +++ /dev/null @@ -1,20 +0,0 @@ -from google.protobuf.duration_pb2 import Duration - -from feast import Entity, FeatureView, FileSource - -driver_hourly_stats = FileSource( - path="%PARQUET_PATH%", # placeholder to be replaced by the test - created_timestamp_column="created", -) - -driver = Entity(name="driver_id", description="driver id",) - -# features are inferred from columns of data source -driver_hourly_stats_view = FeatureView( - name="driver_hourly_stats", - entities=["driver_id"], - ttl=Duration(seconds=86400 * 1), - online=True, - batch_source=driver_hourly_stats, - tags={}, -) diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index ab9b9515f3..541033433f 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -219,7 +219,10 @@ def get_expected_training_df( ( f"field_mapping__{feature}" if full_feature_names else feature ): field_mapping_record.get(column, None) - for (column, feature) in field_mapping_fv.input.field_mapping.items() + for ( + column, + feature, + ) in field_mapping_fv.batch_source.field_mapping.items() } ) diff --git a/sdk/python/tests/integration/scaffolding/test_partial_apply.py b/sdk/python/tests/integration/scaffolding/test_partial_apply.py index bfd078c7da..ce06e26e4f 100644 --- a/sdk/python/tests/integration/scaffolding/test_partial_apply.py +++ b/sdk/python/tests/integration/scaffolding/test_partial_apply.py @@ -1,5 +1,6 @@ +from datetime import timedelta + import pytest -from google.protobuf.duration_pb2 import Duration from feast import BigQuerySource, Feature, FeatureView, ValueType from tests.utils.cli_utils import CliRunner, get_example_repo @@ -27,7 +28,7 @@ def test_partial() -> None: driver_locations_100 = FeatureView( name="driver_locations_100", entities=["driver"], - ttl=Duration(seconds=86400 * 1), + ttl=timedelta(days=1), features=[ Feature(name="lat", dtype=ValueType.FLOAT), Feature(name="lon", dtype=ValueType.STRING), From 00b4756767605f276c25f0b388a161befab70243 Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Mon, 21 Mar 2022 13:18:24 -0700 Subject: [PATCH 3/5] Change examples to stop using Duration Signed-off-by: Felix Wang --- sdk/python/feast/templates/local/example.py | 4 ++-- sdk/python/feast/templates/spark/example.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sdk/python/feast/templates/local/example.py b/sdk/python/feast/templates/local/example.py index 9ff4fe3055..6ab549b8c5 100644 --- a/sdk/python/feast/templates/local/example.py +++ b/sdk/python/feast/templates/local/example.py @@ -1,6 +1,6 @@ # This is an example feature definition file -from google.protobuf.duration_pb2 import Duration +from datetime import timedelta from feast import Entity, Feature, FeatureView, FileSource, ValueType @@ -23,7 +23,7 @@ driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver_id"], - ttl=Duration(seconds=86400 * 1), + ttl=timedelta(days=1), features=[ Feature(name="conv_rate", dtype=ValueType.FLOAT), Feature(name="acc_rate", dtype=ValueType.FLOAT), diff --git a/sdk/python/feast/templates/spark/example.py b/sdk/python/feast/templates/spark/example.py index 2b738c4337..fc9dd39222 100644 --- a/sdk/python/feast/templates/spark/example.py +++ b/sdk/python/feast/templates/spark/example.py @@ -4,7 +4,7 @@ from pathlib import Path -from google.protobuf.duration_pb2 import Duration +from datetime import timedelta from feast import Entity, Feature, FeatureView, ValueType from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( @@ -41,7 +41,7 @@ driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver_id"], - ttl=Duration(seconds=86400 * 7), # one week + ttl=timedelta(days=7), features=[ Feature(name="conv_rate", dtype=ValueType.FLOAT), Feature(name="acc_rate", dtype=ValueType.FLOAT), @@ -54,7 +54,7 @@ customer_daily_profile_view = FeatureView( name="customer_daily_profile", entities=["customer_id"], - ttl=Duration(seconds=86400 * 7), # one week + ttl=timedelta(days=7), features=[ Feature(name="current_balance", dtype=ValueType.FLOAT), Feature(name="avg_passenger_count", dtype=ValueType.FLOAT), From a15ed5de46715da85d727d6ddfe975926b9ffa6c Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Mon, 21 Mar 2022 13:20:37 -0700 Subject: [PATCH 4/5] Delete unused example feature repo Signed-off-by: Felix Wang --- ...ple_feature_repo_with_missing_bq_source.py | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 sdk/python/tests/example_repos/example_feature_repo_with_missing_bq_source.py diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_missing_bq_source.py b/sdk/python/tests/example_repos/example_feature_repo_with_missing_bq_source.py deleted file mode 100644 index 46efe5b275..0000000000 --- a/sdk/python/tests/example_repos/example_feature_repo_with_missing_bq_source.py +++ /dev/null @@ -1,20 +0,0 @@ -from datetime import timedelta - -from feast import BigQuerySource, Entity, Feature, FeatureView, ValueType - -nonexistent_source = BigQuerySource( - table_ref="project.dataset.nonexistent_table", event_timestamp_column="" -) - -driver = Entity(name="driver", value_type=ValueType.INT64, description="driver id",) - -nonexistent_features = FeatureView( - name="driver_locations", - entities=["driver"], - ttl=timedelta(days=1), - features=[ - Feature(name="lat", dtype=ValueType.FLOAT), - Feature(name="lon", dtype=ValueType.STRING), - ], - batch_source=nonexistent_source, -) From bc89e25f65c361429aa8cafb0876bf287a53c1a6 Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Mon, 21 Mar 2022 13:21:07 -0700 Subject: [PATCH 5/5] Format Signed-off-by: Felix Wang --- sdk/python/feast/templates/spark/example.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/python/feast/templates/spark/example.py b/sdk/python/feast/templates/spark/example.py index fc9dd39222..b1741fd688 100644 --- a/sdk/python/feast/templates/spark/example.py +++ b/sdk/python/feast/templates/spark/example.py @@ -2,9 +2,8 @@ # This is an example feature definition file # # # # # # # # # # # # # # # # # # # # # # # # # -from pathlib import Path - from datetime import timedelta +from pathlib import Path from feast import Entity, Feature, FeatureView, ValueType from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (