Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingestion): extend feast plugin to ingest tags and owners #11784

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ef24dc4
feat(ingestion): extend feast plugin to ingest tags for features
margaridafernandes-trip Nov 4, 2024
0494431
revert(ingestion): change in get_workunits_internal
margaridafernandes-trip Nov 4, 2024
429f663
feat(ingestion): add tags, owners metadata ingestion
margaridafernandes-trip Nov 7, 2024
94fc26a
feat(ingestion): add updated mces golden json
margaridafernandes-trip Nov 7, 2024
c14f94e
Merge branch 'master' into extend-feast-plugin
margaridafernandes-trip Nov 7, 2024
f471598
Merge branch 'master' into extend-feast-plugin
margaridafernandes-trip Nov 7, 2024
33535bb
fix(ingestion): static tests
margaridafernandes-trip Nov 7, 2024
cfdccec
Merge branch 'master' into extend-feast-plugin
margaridafernandes-trip Nov 7, 2024
6a2cc71
fix(ingestion): update golden file
margaridafernandes-trip Nov 7, 2024
4e8916a
feat(ingestion): solve discussions
margaridafernandes-trip Nov 7, 2024
7620c98
feat(ingestion): add reusable method for tags and owners
margaridafernandes-trip Nov 8, 2024
adb4809
Merge branch 'master' into extend-feast-plugin
margaridafernandes-trip Nov 8, 2024
f21d1a0
fix(ingestion): remove import
margaridafernandes-trip Nov 8, 2024
c066e6a
feat(ingestion): fmt
margaridafernandes-trip Nov 8, 2024
4598f40
feat(ingestion): add MLOPs and ML owners to owner_mapping
margaridafernandes-trip Nov 18, 2024
1b85fb5
feat(ingestion): add owner mappings to FeastRepositorySourceConfig
margaridafernandes-trip Nov 19, 2024
eae19eb
Merge branch 'master' into extend-feast-plugin
margaridafernandes-trip Nov 19, 2024
0ee0592
revert(ingestion): remove added spaces
margaridafernandes-trip Nov 19, 2024
701b602
revert(ingestion): remove added spaces
margaridafernandes-trip Nov 19, 2024
6dd35fc
feat(ingestion): fmt
margaridafernandes-trip Nov 19, 2024
b2f4d3b
feat(ingestion): update integration tests
margaridafernandes-trip Nov 25, 2024
d73a4d7
feat(ingestion): solve discussions
margaridafernandes-trip Nov 26, 2024
22afe01
feat(ingestion): solve discussions
margaridafernandes-trip Nov 26, 2024
0693000
feat(ingestion): fmt
margaridafernandes-trip Nov 26, 2024
a851ad8
feat(ingestion): fmt
margaridafernandes-trip Nov 26, 2024
fcfb79d
feat(ingestion): add flags enable_owner_extraction and enable_tag_ext…
margaridafernandes-trip Nov 27, 2024
238eba0
Merge branch 'master' into extend-feast-plugin
jjoyce0510 Nov 27, 2024
2da5e2a
fix(ingestion): static tests
margaridafernandes-trip Nov 27, 2024
63d67d9
feat(ingestion): add registry.db
margaridafernandes-trip Nov 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions metadata-ingestion/scripts/datahub_preflight.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash -e
#!/bin/bash -e

margaridafernandes-trip marked this conversation as resolved.
Show resolved Hide resolved
#From https://stackoverflow.com/questions/4023830/how-to-compare-two-strings-in-dot-separated-version-format-in-bash
verlte() {
Expand Down Expand Up @@ -45,7 +45,7 @@ arm64_darwin_preflight() {
pip3 install --no-use-pep517 scipy
fi

brew_install "openssl@1.1"
brew_install "openssl@3.0.14"
brew install "postgresql@14"
margaridafernandes-trip marked this conversation as resolved.
Show resolved Hide resolved

# postgresql installs libs in a strange way
Expand Down
64 changes: 58 additions & 6 deletions metadata-ingestion/src/datahub/ingestion/source/feast.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@
)
from datahub.ingestion.api.source import Source, SourceReport
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.metadata._schema_classes import (
OwnerClass,
OwnershipClass,
OwnershipTypeClass,
)
from datahub.metadata.com.linkedin.pegasus2avro.common import MLFeatureDataType
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
MLFeatureSnapshot,
Expand All @@ -42,10 +47,12 @@
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
from datahub.metadata.schema_classes import (
BrowsePathsClass,
GlobalTagsClass,
MLFeaturePropertiesClass,
MLFeatureTablePropertiesClass,
MLPrimaryKeyPropertiesClass,
StatusClass,
TagAssociationClass,
)

# FIXME: ValueType module cannot be used as a type
Expand Down Expand Up @@ -216,9 +223,26 @@ def _get_entity_workunit(

feature_view_name = f"{self.feature_store.project}.{feature_view.name}"

aspects = [StatusClass(removed=False)]

if entity.tags.get("name"):
tag: str = entity.tags.get("name")
margaridafernandes-trip marked this conversation as resolved.
Show resolved Hide resolved
tag_association = TagAssociationClass(tag=builder.make_tag_urn(tag))
global_tags_aspect = GlobalTagsClass(tags=[tag_association])
aspects.append(global_tags_aspect)

if entity.owner:
owner = entity.owner
owner_association = OwnerClass(
owner=builder.make_owner_urn(owner, owner_type=builder.OwnerType.USER),
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
owners_aspect = OwnershipClass(owners=[owner_association])
aspects.append(owners_aspect)

entity_snapshot = MLPrimaryKeySnapshot(
urn=builder.make_ml_primary_key_urn(feature_view_name, entity.name),
aspects=[StatusClass(removed=False)],
aspects=aspects,
)

entity_snapshot.aspects.append(
Expand All @@ -243,10 +267,20 @@ def _get_feature_workunit(
Generate an MLFeature work unit for a Feast feature.
"""
feature_view_name = f"{self.feature_store.project}.{feature_view.name}"
global_tags_aspect = None

if field.tags.get("name"):
tag_name = field.tags.get("name")
tag_association = TagAssociationClass(tag=builder.make_tag_urn(tag_name))
global_tags_aspect = GlobalTagsClass(tags=[tag_association])

aspects = [StatusClass(removed=False)]
if global_tags_aspect is not None:
aspects.append(global_tags_aspect)

feature_snapshot = MLFeatureSnapshot(
urn=builder.make_ml_feature_urn(feature_view_name, field.name),
aspects=[StatusClass(removed=False)],
aspects=aspects,
)

feature_sources = []
Expand Down Expand Up @@ -296,12 +330,29 @@ def _get_feature_view_workunit(self, feature_view: FeatureView) -> MetadataWorkU

feature_view_name = f"{self.feature_store.project}.{feature_view.name}"

aspects = [
BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
StatusClass(removed=False),
]

if feature_view.tags.get("name"):
tag = feature_view.tags.get("name")
tag_association = TagAssociationClass(tag=builder.make_tag_urn(tag))
global_tags_aspect = GlobalTagsClass(tags=[tag_association])
margaridafernandes-trip marked this conversation as resolved.
Show resolved Hide resolved
aspects.append(global_tags_aspect)

if feature_view.owner:
margaridafernandes-trip marked this conversation as resolved.
Show resolved Hide resolved
owner = feature_view.owner
owner_association = OwnerClass(
owner=builder.make_owner_urn(owner, owner_type=builder.OwnerType.USER),
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
owners_aspect = OwnershipClass(owners=[owner_association])
aspects.append(owners_aspect)

feature_view_snapshot = MLFeatureTableSnapshot(
urn=builder.make_ml_feature_table_urn("feast", feature_view_name),
aspects=[
BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]),
StatusClass(removed=False),
],
aspects=aspects,
)

feature_view_snapshot.aspects.append(
Expand Down Expand Up @@ -366,6 +417,7 @@ def create(cls, config_dict, ctx):
return cls(config, ctx)

def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:

for feature_view in self.feature_store.list_feature_views():
for entity_name in feature_view.entities:
entity = self.feature_store.get_entity(entity_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
},
{
"com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": {
"customProperties": {},
margaridafernandes-trip marked this conversation as resolved.
Show resolved Hide resolved
"description": "Driver ID",
"dataType": "ORDINAL",
"sources": [
Expand All @@ -23,7 +24,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -38,6 +40,7 @@
},
{
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": {
"customProperties": {},
"description": "Conv rate",
"dataType": "CONTINUOUS",
"sources": [
Expand All @@ -50,7 +53,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -65,6 +69,7 @@
},
{
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": {
"customProperties": {},
"description": "Acc rate",
"dataType": "CONTINUOUS",
"sources": [
Expand All @@ -77,7 +82,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -92,6 +98,7 @@
},
{
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": {
"customProperties": {},
"description": "Avg daily trips",
"dataType": "ORDINAL",
"sources": [
Expand All @@ -104,7 +111,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -119,6 +127,7 @@
},
{
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": {
"customProperties": {},
"description": "String feature",
"dataType": "TEXT",
"sources": [
Expand All @@ -131,7 +140,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand Down Expand Up @@ -170,7 +180,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -189,7 +200,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -204,6 +216,7 @@
},
{
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": {
"customProperties": {},
"dataType": "CONTINUOUS",
"sources": [
"urn:li:dataset:(urn:li:dataPlatform:request,vals_to_add,PROD)",
Expand All @@ -216,7 +229,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -231,6 +245,7 @@
},
{
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": {
"customProperties": {},
"dataType": "CONTINUOUS",
"sources": [
"urn:li:dataset:(urn:li:dataPlatform:request,vals_to_add,PROD)",
Expand All @@ -243,7 +258,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand Down Expand Up @@ -278,7 +294,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -297,7 +314,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "feast-repository-test"
"runId": "feast-repository-test",
"lastRunId": "no-run-id-provided"
}
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
join_keys=["driver_id"],
value_type=ValueType.INT64,
description="Driver ID",
owner="Datahub",
tags={"name": "deprecated"},
)

driver_hourly_stats_view = FeatureView(
Expand All @@ -29,7 +31,7 @@
Field(
name="conv_rate",
dtype=feast.types.Float64,
tags=dict(description="Conv rate"),
tags={"name": "needs_documentation", "description": "Conv rate"},
),
Field(
name="acc_rate",
Expand All @@ -49,7 +51,8 @@
],
online=True,
source=driver_hourly_stats_source,
tags={},
tags={"name": "deprecated"},
owner="Datahub",
)

input_request = RequestSource(
Expand Down
Loading