Skip to content

Commit

Permalink
Add support for removal of auto extractor (#530)
Browse files Browse the repository at this point in the history
  • Loading branch information
nonibansal committed Aug 14, 2024
1 parent 5937a42 commit 7e7eb6b
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 168 deletions.
3 changes: 3 additions & 0 deletions fennel/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## [1.5.4] - 2024-08-14
- Add support for removal of auto extractors

## [1.5.1] - 2024-08-05
- Support chained lookup extractors

Expand Down
63 changes: 62 additions & 1 deletion fennel/client_tests/test_featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,11 @@ class ImageFeatureWithDefault:
{
"image_id": [1, 2, 3],
"embedding": [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]],
"ts": [datetime.now(), datetime.now(), datetime.now()],
"ts": [
datetime.now(timezone.utc),
datetime.now(timezone.utc),
datetime.now(timezone.utc),
],
}
),
)
Expand Down Expand Up @@ -1056,3 +1060,60 @@ def test_chained_lookups(client):
"Rugby",
]
assert feature_df["UserInfo3.num_players"].tolist() == [9, 11, 6, pd.NA]


@pytest.mark.integration
@mock
def test_auto_extractor_removal(client):
@source(webhook.endpoint("IndexDataset"), disorder="14d", cdc="upsert")
@dataset(index=True)
class IndexDataset:
user_id: int = field(key=True)
name: str
age: int
timestamp: datetime

@featureset
class IndexFeatures:
user_id: int
name: Optional[str] = F(IndexDataset.name)
age: Optional[int] = F(IndexDataset.age)

response = client.commit(
datasets=[IndexDataset],
featuresets=[IndexFeatures],
message="first_commit",
)
assert response.status_code == requests.codes.OK, response.json()

# Try removing one auto extractor from incremental mode
def remove_age():
@featureset
class IndexFeatures:
user_id: int
name: Optional[str] = F(IndexDataset.name)
age: Optional[int] = F(IndexDataset.age).meta(deleted=True)

return IndexFeatures

client.commit(
featuresets=[remove_age()], message="second_commit", incremental=True
)
assert response.status_code == requests.codes.OK, response.json()

# Try removing one auto extractor from normal commit
def remove_name():
@featureset
class IndexFeatures:
user_id: int
name: Optional[str] = F(IndexDataset.name).meta(deleted=True)
age: Optional[int] = F(IndexDataset.age).meta(deleted=True)

return IndexFeatures

client.commit(
datasets=[IndexDataset],
featuresets=[remove_name()],
message="third_commit",
)
assert response.status_code == requests.codes.OK, response.json()
11 changes: 11 additions & 0 deletions fennel/featuresets/featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
get_meta_attr,
set_meta_attr,
)
from fennel.lib.metadata.metadata import get_meta
from fennel.lib.params import (
FENNEL_INPUTS,
FENNEL_OUTPUTS,
Expand Down Expand Up @@ -586,6 +587,11 @@ def _get_generated_extractors(
extractor.outputs = [feature]
extractor.inputs = [ref]
extractor.featureset = self._name
feature_meta = get_meta(feature)
if feature_meta:
extractor = cast(
Extractor, meta(**feature_meta.dict())(extractor)
)
output.append(extractor)
continue

Expand Down Expand Up @@ -620,6 +626,11 @@ def _get_generated_extractors(
extractor.set_inputs_from_featureset(self, feature)
extractor.featureset = self._name
extractor.outputs = [feature]
feature_meta = get_meta(feature)
if feature_meta:
extractor = cast(
Extractor, meta(**feature_meta.dict())(extractor)
)
# If extractor already exists, throw an error
if extractor.name in [e.name for e in output]:
raise ValueError(
Expand Down
24 changes: 17 additions & 7 deletions fennel/featuresets/test_derived_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def test_extractor(actual, expected):
"datasets": [],
"inputs": [{"feature": {"feature_set_name": "User", "name": "id"}}],
"features": ["user_id"],
"metadata": {},
"metadata": {"description": "alias feature"},
"version": 0,
"pycode": None,
"feature_set_name": "UserInfo",
Expand All @@ -243,7 +243,7 @@ def test_extractor(actual, expected):
{"feature": {"feature_set_name": "UserInfo", "name": "user_id"}}
],
"features": ["gender"],
"metadata": {},
"metadata": {"description": "lookup derived feature"},
"version": 0,
"pycode": None,
"feature_set_name": "UserInfo",
Expand All @@ -260,7 +260,10 @@ def test_extractor(actual, expected):
{"feature": {"feature_set_name": "UserInfo", "name": "user_id"}}
],
"features": ["age_years"],
"metadata": {},
"metadata": {
"owner": "zaki@fennel.ai",
"description": "lookup with meta",
},
"version": 0,
"pycode": None,
"feature_set_name": "UserInfo",
Expand All @@ -277,7 +280,10 @@ def test_extractor(actual, expected):
{"feature": {"feature_set_name": "UserInfo", "name": "user_id"}}
],
"features": ["dob"],
"metadata": {},
"metadata": {
"deprecated": True,
"description": "deprecated feature",
},
"version": 0,
"pycode": None,
"feature_set_name": "UserInfo",
Expand All @@ -294,7 +300,7 @@ def test_extractor(actual, expected):
{"feature": {"feature_set_name": "UserInfo", "name": "user_id"}}
],
"features": ["optional_nickname"],
"metadata": {},
"metadata": {"description": "optional lookup derived feature"},
"version": 0,
"pycode": None,
"feature_set_name": "UserInfo",
Expand Down Expand Up @@ -335,7 +341,9 @@ def test_extractor(actual, expected):
}
],
"features": ["age_group"],
"metadata": {},
"metadata": {
"description": "alias a feature that has an explicit extractor"
},
"version": 0,
"pycode": None,
"feature_set_name": "AgeInfo",
Expand All @@ -354,7 +362,9 @@ def test_extractor(actual, expected):
}
],
"features": ["age"],
"metadata": {},
"metadata": {
"description": "alias a feature that has a derived extractor"
},
"version": 0,
"pycode": None,
"feature_set_name": "AgeInfo",
Expand Down
7 changes: 6 additions & 1 deletion fennel/internal_lib/to_proto/to_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,14 +623,19 @@ def _extractor_to_proto(
extractor.derived_extractor_info
)

if extractor.extractor_type == ExtractorType.PY_FUNC:
metadata = get_metadata_proto(extractor.func)
else:
metadata = get_metadata_proto(extractor)

proto_extractor = fs_proto.Extractor(
name=extractor.name,
datasets=[
dataset._name for dataset in extractor.get_dataset_dependencies()
],
inputs=inputs,
features=[feature.name for feature in extractor.outputs],
metadata=get_metadata_proto(extractor.func),
metadata=metadata,
version=extractor.version,
pycode=to_extractor_pycode(extractor, fs, fs_obj_map),
feature_set_name=extractor.featureset,
Expand Down
Loading

0 comments on commit 7e7eb6b

Please sign in to comment.