From f0c81d33da47661e90522855579ffdada3fbe31e Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Thu, 16 May 2024 18:40:01 -0400 Subject: [PATCH 1/2] feat(propagation): Add models for metadata attribution (#2959) --- .../com/linkedin/common/Documentation.pdl | 16 ++++++++++ .../common/DocumentationAssociation.pdl | 30 +++++++++++++++++++ .../common/GlossaryTermAssociation.pdl | 19 ++++++++++++ .../linkedin/common/MetadataAttribution.pdl | 29 ++++++++++++++++++ .../com/linkedin/common/TagAssociation.pdl | 19 ++++++++++++ .../schema/EditableSchemaFieldInfo.pdl | 28 +++++++++++++++-- .../com/linkedin/schema/SchemaField.pdl | 28 +++++++++++++++-- .../src/main/resources/entity-registry.yml | 3 +- 8 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/Documentation.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/DocumentationAssociation.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/MetadataAttribution.pdl diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/Documentation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/Documentation.pdl new file mode 100644 index 0000000000000..d40865c019677 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/Documentation.pdl @@ -0,0 +1,16 @@ +namespace com.linkedin.common + +/** + * Aspect used for applying documentation to assets + * NOTE: This is an experimental aspect. Please use cautiously. + */ +@Aspect = { + "name": "documentation" +} +record Documentation { + + /** + * Documentations associated with this asset. We could be receiving docs from different sources + */ + documentations: array[DocumentationAssociation] +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/DocumentationAssociation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/DocumentationAssociation.pdl new file mode 100644 index 0000000000000..19404346797bb --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/DocumentationAssociation.pdl @@ -0,0 +1,30 @@ +namespace com.linkedin.common + +/** + * Properties of applied documentation including the attribution of the doc + */ +record DocumentationAssociation { + /** + * Description of this asset + */ + documentation: string + + /** + * Information about who, why, and how this metadata was applied + */ + @Searchable = { + "/time": { + "fieldName": "documentationAttributionDates", + "fieldType": "DATETIME" + }, + "/actor": { + "fieldName": "documentationAttributionActors", + "fieldType": "URN" + }, + "/source": { + "fieldName": "documentationAttributionSources", + "fieldType": "URN" + }, + } + attribution: optional MetadataAttribution +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl index 80dc07981816a..a5267bbc635e4 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/GlossaryTermAssociation.pdl @@ -30,4 +30,23 @@ record GlossaryTermAssociation { */ context: optional string + /** + * Information about who, why, and how this metadata was applied + */ + @Searchable = { + "/time": { + "fieldName": "termAttributionDates", + "fieldType": "DATETIME" + }, + "/actor": { + "fieldName": "termAttributionActors", + "fieldType": "URN" + }, + "/source": { + "fieldName": "termAttributionSources", + "fieldType": "URN" + }, + } + attribution: optional MetadataAttribution + } diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/MetadataAttribution.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/MetadataAttribution.pdl new file mode 100644 index 0000000000000..4691ed6323ea1 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/MetadataAttribution.pdl @@ -0,0 +1,29 @@ +namespace com.linkedin.common + +/** + * Information about who, why, and how this metadata was applied + */ +record MetadataAttribution { + /** + * When this metadata was updated. + */ + time: Time + + /** + * The entity (e.g. a member URN) responsible for applying the assocated metadata. This can + * either be a user (in case of UI edits) or the datahub system for automation. + */ + actor: Urn + + /** + * The DataHub source responsible for applying the associated metadata. This will only be filled out + * when a DataHub source is responsible. This includes the specific metadata test urn, the automation urn. + */ + source: optional Urn + + /** + * The details associated with why this metadata was applied. For example, this could include + * the actual regex rule, sql statement, ingestion pipeline ID, etc. + */ + sourceDetail: map[string, string] = { } +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl index 904371fb7b49e..8a58ca97de195 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/TagAssociation.pdl @@ -14,4 +14,23 @@ record TagAssociation { * Additional context about the association */ context: optional string + + /** + * Information about who, why, and how this metadata was applied + */ + @Searchable = { + "/time": { + "fieldName": "tagAttributionDates", + "fieldType": "DATETIME" + }, + "/actor": { + "fieldName": "tagAttributionActors", + "fieldType": "URN" + }, + "/source": { + "fieldName": "tagAttributionSources", + "fieldType": "URN" + }, + } + attribution: optional MetadataAttribution } diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl index 4e6e135ae05da..816277bd1e0c9 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/schema/EditableSchemaFieldInfo.pdl @@ -36,7 +36,19 @@ record EditableSchemaFieldInfo { "fieldName": "editedFieldTags", "fieldType": "URN", "boostScore": 0.5 - } + }, + "/tags/*/attribution/time": { + "fieldName": "editedFieldTagAttributionDates", + "fieldType": "DATETIME" + }, + "/tags/*/attribution/actor": { + "fieldName": "editedFieldTagAttributionActors", + "fieldType": "URN" + }, + "/tags/*/attribution/source": { + "fieldName": "editedFieldTagAttributionSources", + "fieldType": "URN" + }, } globalTags: optional GlobalTags @@ -54,7 +66,19 @@ record EditableSchemaFieldInfo { "fieldName": "editedFieldGlossaryTerms", "fieldType": "URN", "boostScore": 0.5 - } + }, + "/terms/*/attribution/time": { + "fieldName": "editedFieldTermAttributionDates", + "fieldType": "DATETIME" + }, + "/terms/*/attribution/actor": { + "fieldName": "editedFieldTermAttributionActors", + "fieldType": "URN" + }, + "/terms/*/attribution/source": { + "fieldName": "editedFieldTermAttributionSources", + "fieldType": "URN" + }, } glossaryTerms: optional GlossaryTerms } diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl index 4874d1081bc26..afb0263057b6d 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl @@ -96,7 +96,19 @@ record SchemaField { "fieldName": "fieldTags", "fieldType": "URN", "boostScore": 0.5 - } + }, + "/tags/*/attribution/time": { + "fieldName": "fieldTagAttributionDates", + "fieldType": "DATETIME" + }, + "/tags/*/attribution/actor": { + "fieldName": "fieldTagAttributionActors", + "fieldType": "URN" + }, + "/tags/*/attribution/source": { + "fieldName": "fieldTagAttributionSources", + "fieldType": "URN" + }, } globalTags: optional GlobalTags @@ -114,7 +126,19 @@ record SchemaField { "fieldName": "fieldGlossaryTerms", "fieldType": "URN", "boostScore": 0.5 - } + }, + "/terms/*/attribution/time": { + "fieldName": "fieldTermAttributionDates", + "fieldType": "DATETIME" + }, + "/terms/*/attribution/actor": { + "fieldName": "fieldTermAttributionActors", + "fieldType": "URN" + }, + "/terms/*/attribution/source": { + "fieldName": "fieldTermAttributionSources", + "fieldType": "URN" + }, } glossaryTerms: optional GlossaryTerms diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 6006ca179d162..3af4af5e4767e 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -454,6 +454,7 @@ entities: - structuredProperties - forms - businessAttributes + - documentation - name: globalSettings doc: Global settings for an the platform category: internal @@ -670,4 +671,4 @@ plugins: spring: enabled: true packageScan: - - com.linkedin.gms.factory.plugins \ No newline at end of file + - com.linkedin.gms.factory.plugins From afda3fd49ec0cdeca753c5ede0ae969eb5576bd8 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Mon, 29 Jul 2024 10:32:26 -0700 Subject: [PATCH 2/2] docs update --- .../com/linkedin/common/Documentation.pdl | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/Documentation.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/Documentation.pdl index d40865c019677..7ac3d2612077b 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/Documentation.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/Documentation.pdl @@ -1,8 +1,24 @@ namespace com.linkedin.common /** - * Aspect used for applying documentation to assets - * NOTE: This is an experimental aspect. Please use cautiously. + * Aspect used for storing all applicable documentations on assets. + * This aspect supports multiple documentations from different sources. + * There is an implicit assumption that there is only one documentation per + source. + * For example, if there are two documentations from the same source, the + latest one will overwrite the previous one. + * If there are two documentations from different sources, both will be + stored. + * Future evolution considerations: + * The first entity that uses this aspect is Schema Field. We will expand this + aspect to other entities eventually. + * The values of the documentation are not currently searchable. This will be + changed once this aspect develops opinion on which documentation entry is + the authoritative one. + * Ensuring that there is only one documentation per source is a business + rule that is not enforced by the aspect yet. This will currently be enforced by the + application that uses this aspect. We will eventually enforce this rule in + the aspect using AspectMutators. */ @Aspect = { "name": "documentation"