-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(SDK) Add StructuredPropertyPatchBuilder in python sdk and provid…
…e sample CRUD files (#10824)
- Loading branch information
1 parent
a7f4b71
commit b651d81
Showing
10 changed files
with
440 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
86 changes: 86 additions & 0 deletions
86
...tadata/aspect/patch/template/structuredproperty/StructuredPropertyDefinitionTemplate.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
package com.linkedin.metadata.aspect.patch.template.structuredproperty; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.node.ArrayNode; | ||
import com.linkedin.common.UrnArray; | ||
import com.linkedin.common.urn.UrnUtils; | ||
import com.linkedin.data.template.RecordTemplate; | ||
import com.linkedin.metadata.aspect.patch.template.CompoundKeyTemplate; | ||
import com.linkedin.structured.StructuredPropertyDefinition; | ||
import java.util.Collections; | ||
import javax.annotation.Nonnull; | ||
|
||
public class StructuredPropertyDefinitionTemplate | ||
extends CompoundKeyTemplate<StructuredPropertyDefinition> { | ||
|
||
private static final String ENTITY_TYPES_FIELD_NAME = "entityTypes"; | ||
private static final String ALLOWED_VALUES_FIELD_NAME = "allowedValues"; | ||
private static final String VALUE_FIELD_NAME = "value"; | ||
private static final String UNIT_SEPARATOR_DELIMITER = "␟"; | ||
|
||
@Override | ||
public StructuredPropertyDefinition getSubtype(RecordTemplate recordTemplate) | ||
throws ClassCastException { | ||
if (recordTemplate instanceof StructuredPropertyDefinition) { | ||
return (StructuredPropertyDefinition) recordTemplate; | ||
} | ||
throw new ClassCastException("Unable to cast RecordTemplate to StructuredPropertyDefinition"); | ||
} | ||
|
||
@Override | ||
public Class<StructuredPropertyDefinition> getTemplateType() { | ||
return StructuredPropertyDefinition.class; | ||
} | ||
|
||
@Nonnull | ||
@Override | ||
public StructuredPropertyDefinition getDefault() { | ||
StructuredPropertyDefinition definition = new StructuredPropertyDefinition(); | ||
definition.setQualifiedName(""); | ||
definition.setValueType(UrnUtils.getUrn("urn:li:dataType:datahub.string")); | ||
definition.setEntityTypes(new UrnArray()); | ||
|
||
return definition; | ||
} | ||
|
||
@Nonnull | ||
@Override | ||
public JsonNode transformFields(JsonNode baseNode) { | ||
JsonNode transformedNode = | ||
arrayFieldToMap(baseNode, ENTITY_TYPES_FIELD_NAME, Collections.emptyList()); | ||
|
||
if (transformedNode.get(ALLOWED_VALUES_FIELD_NAME) == null) { | ||
return transformedNode; | ||
} | ||
|
||
// allowedValues has a nested key - value.string or value.number depending on type. Mapping | ||
// needs this nested key | ||
JsonNode allowedValues = transformedNode.get(ALLOWED_VALUES_FIELD_NAME); | ||
if (((ArrayNode) allowedValues).size() > 0) { | ||
JsonNode allowedValue = ((ArrayNode) allowedValues).get(0); | ||
JsonNode value = allowedValue.get(VALUE_FIELD_NAME); | ||
String secondaryKeyName = value.fieldNames().next(); | ||
return arrayFieldToMap( | ||
transformedNode, | ||
ALLOWED_VALUES_FIELD_NAME, | ||
Collections.singletonList( | ||
VALUE_FIELD_NAME + UNIT_SEPARATOR_DELIMITER + secondaryKeyName)); | ||
} | ||
|
||
return arrayFieldToMap( | ||
transformedNode, ALLOWED_VALUES_FIELD_NAME, Collections.singletonList(VALUE_FIELD_NAME)); | ||
} | ||
|
||
@Nonnull | ||
@Override | ||
public JsonNode rebaseFields(JsonNode patched) { | ||
JsonNode patchedNode = | ||
transformedMapToArray(patched, ENTITY_TYPES_FIELD_NAME, Collections.emptyList()); | ||
|
||
if (patchedNode.get(ALLOWED_VALUES_FIELD_NAME) == null) { | ||
return patchedNode; | ||
} | ||
return transformedMapToArray( | ||
patchedNode, ALLOWED_VALUES_FIELD_NAME, Collections.singletonList(VALUE_FIELD_NAME)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 24 additions & 0 deletions
24
metadata-ingestion/examples/library/dataset_add_structured_properties.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import logging | ||
|
||
from datahub.emitter.mce_builder import make_dataset_urn | ||
from datahub.emitter.rest_emitter import DataHubRestEmitter | ||
from datahub.specific.dataset import DatasetPatchBuilder | ||
|
||
log = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
# Create rest emitter | ||
rest_emitter = DataHubRestEmitter(gms_server="http://localhost:8080") | ||
|
||
dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD") | ||
|
||
|
||
for patch_mcp in ( | ||
DatasetPatchBuilder(dataset_urn) | ||
.add_structured_property("io.acryl.dataManagement.replicationSLA", 12) | ||
.build() | ||
): | ||
rest_emitter.emit(patch_mcp) | ||
|
||
|
||
log.info(f"Added cluster_name, retention_time properties to dataset {dataset_urn}") |
24 changes: 24 additions & 0 deletions
24
metadata-ingestion/examples/library/dataset_remove_structured_properties.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import logging | ||
|
||
from datahub.emitter.mce_builder import make_dataset_urn | ||
from datahub.emitter.rest_emitter import DataHubRestEmitter | ||
from datahub.specific.dataset import DatasetPatchBuilder | ||
|
||
log = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
# Create rest emitter | ||
rest_emitter = DataHubRestEmitter(gms_server="http://localhost:8080") | ||
|
||
dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD") | ||
|
||
|
||
for patch_mcp in ( | ||
DatasetPatchBuilder(dataset_urn) | ||
.remove_structured_property("io.acryl.dataManagement.replicationSLA") | ||
.build() | ||
): | ||
rest_emitter.emit(patch_mcp) | ||
|
||
|
||
log.info(f"Added cluster_name, retention_time properties to dataset {dataset_urn}") |
24 changes: 24 additions & 0 deletions
24
metadata-ingestion/examples/library/dataset_update_structured_properties.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import logging | ||
|
||
from datahub.emitter.mce_builder import make_dataset_urn | ||
from datahub.emitter.rest_emitter import DataHubRestEmitter | ||
from datahub.specific.dataset import DatasetPatchBuilder | ||
|
||
log = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
# Create rest emitter | ||
rest_emitter = DataHubRestEmitter(gms_server="http://localhost:8080") | ||
|
||
dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD") | ||
|
||
|
||
for patch_mcp in ( | ||
DatasetPatchBuilder(dataset_urn) | ||
.set_structured_property("io.acryl.dataManagement.replicationSLA", 120) | ||
.build() | ||
): | ||
rest_emitter.emit(patch_mcp) | ||
|
||
|
||
log.info(f"Added cluster_name, retention_time properties to dataset {dataset_urn}") |
98 changes: 98 additions & 0 deletions
98
metadata-ingestion/examples/structured_properties/create_structured_property.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import logging | ||
|
||
from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||
from datahub.emitter.rest_emitter import DatahubRestEmitter | ||
|
||
# Imports for metadata model classes | ||
from datahub.metadata.schema_classes import ( | ||
PropertyValueClass, | ||
StructuredPropertyDefinitionClass, | ||
) | ||
from datahub.metadata.urns import StructuredPropertyUrn | ||
|
||
log = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
# Create rest emitter | ||
rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080") | ||
|
||
# first, let's make an open ended structured property that allows one text value | ||
text_property_urn = StructuredPropertyUrn("openTextProperty") | ||
text_property_definition = StructuredPropertyDefinitionClass( | ||
qualifiedName="io.acryl.openTextProperty", | ||
displayName="Open Text Property", | ||
valueType="urn:li:dataType:datahub.string", | ||
cardinality="SINGLE", | ||
entityTypes=[ | ||
"urn:li:entityType:datahub.dataset", | ||
"urn:li:entityType:datahub.container", | ||
], | ||
description="This structured property allows a signle open ended response as a value", | ||
immutable=False, | ||
) | ||
|
||
event_prop_1: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( | ||
entityUrn=str(text_property_urn), | ||
aspect=text_property_definition, | ||
) | ||
rest_emitter.emit(event_prop_1) | ||
|
||
# next, let's make a property that allows for multiple datahub entity urns as values | ||
# This example property could be used to reference other users or groups in datahub | ||
urn_property_urn = StructuredPropertyUrn("dataSteward") | ||
urn_property_definition = StructuredPropertyDefinitionClass( | ||
qualifiedName="io.acryl.dataManagement.dataSteward", | ||
displayName="Data Steward", | ||
valueType="urn:li:dataType:datahub.urn", | ||
cardinality="MULTIPLE", | ||
entityTypes=["urn:li:entityType:datahub.dataset"], | ||
description="The data stewards of this asset are in charge of ensuring data cleanliness and governance", | ||
immutable=True, | ||
typeQualifier={ | ||
"allowedTypes": [ | ||
"urn:li:entityType:datahub.corpuser", | ||
"urn:li:entityType:datahub.corpGroup", | ||
] | ||
}, # this line ensures only user or group urns can be assigned as values | ||
) | ||
|
||
event_prop_2: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( | ||
entityUrn=str(urn_property_urn), | ||
aspect=urn_property_definition, | ||
) | ||
rest_emitter.emit(event_prop_2) | ||
|
||
# finally, let's make a single select number property with a few allowed options | ||
number_property_urn = StructuredPropertyUrn("replicationSLA") | ||
number_property_definition = StructuredPropertyDefinitionClass( | ||
qualifiedName="io.acryl.dataManagement.replicationSLA", | ||
displayName="Retention Time", | ||
valueType="urn:li:dataType:datahub.number", | ||
cardinality="SINGLE", | ||
entityTypes=[ | ||
"urn:li:entityType:datahub.dataset", | ||
"urn:li:entityType:datahub.dataFlow", | ||
], | ||
description="SLA for how long data can be delayed before replicating to the destination cluster", | ||
immutable=False, | ||
allowedValues=[ | ||
PropertyValueClass( | ||
value=30, | ||
description="30 days, usually reserved for datasets that are ephemeral and contain pii", | ||
), | ||
PropertyValueClass( | ||
value=90, | ||
description="Use this for datasets that drive monthly reporting but contain pii", | ||
), | ||
PropertyValueClass( | ||
value=365, | ||
description="Use this for non-sensitive data that can be retained for longer", | ||
), | ||
], | ||
) | ||
|
||
event_prop_3: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( | ||
entityUrn=str(number_property_urn), | ||
aspect=number_property_definition, | ||
) | ||
rest_emitter.emit(event_prop_3) |
43 changes: 43 additions & 0 deletions
43
metadata-ingestion/examples/structured_properties/update_structured_property.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import logging | ||
from typing import Union | ||
|
||
from datahub.configuration.kafka import KafkaProducerConnectionConfig | ||
from datahub.emitter.kafka_emitter import DatahubKafkaEmitter, KafkaEmitterConfig | ||
from datahub.emitter.rest_emitter import DataHubRestEmitter | ||
from datahub.metadata.urns import StructuredPropertyUrn | ||
from datahub.specific.structured_property import StructuredPropertyPatchBuilder | ||
|
||
log = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
# Get an emitter, either REST or Kafka, this example shows you both | ||
def get_emitter() -> Union[DataHubRestEmitter, DatahubKafkaEmitter]: | ||
USE_REST_EMITTER = True | ||
if USE_REST_EMITTER: | ||
gms_endpoint = "http://localhost:8080" | ||
return DataHubRestEmitter(gms_server=gms_endpoint) | ||
else: | ||
kafka_server = "localhost:9092" | ||
schema_registry_url = "http://localhost:8081" | ||
return DatahubKafkaEmitter( | ||
config=KafkaEmitterConfig( | ||
connection=KafkaProducerConnectionConfig( | ||
bootstrap=kafka_server, schema_registry_url=schema_registry_url | ||
) | ||
) | ||
) | ||
|
||
|
||
# input your unique structured property ID | ||
property_urn = StructuredPropertyUrn("dataSteward") | ||
|
||
with get_emitter() as emitter: | ||
for patch_mcp in ( | ||
StructuredPropertyPatchBuilder(str(property_urn)) | ||
.set_display_name("test display name") | ||
.set_cardinality("MULTIPLE") | ||
.add_entity_type("urn:li:entityType:datahub.dataJob") | ||
.build() | ||
): | ||
emitter.emit(patch_mcp) |
Oops, something went wrong.