diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py index db702df9ddc92..944b8a080cb57 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py @@ -130,8 +130,8 @@ class HiveMetastore(BasicSQLAlchemyConfig): ) enable_properties_merge: bool = Field( - default=False, - description="By default, the connector overwrites properties every time. Set this to True to enable merging of properties with what exists on the server.", + default=True, + description="By default, the connector enables merging of properties with what exists on the server. Set this to False to enable the default connector behavior of overwriting properties on each ingestion.", ) simplify_nested_field_paths: bool = Field( diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json index 3b07d651d0dcf..2fad0643e5027 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json index 8dfed3de760cc..58e1e11c8dd76 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json index b88149cd333e9..78db506868679 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json index aecb60f6347d3..193e1e23b9de4 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json index dff32615d1bdf..ce7ebdd299579 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "foo", + "fieldPath": "baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "baz", + "fieldPath": "foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] }