From 6befd1c4546a62cffee9ff4b231b852f18ca07b6 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Fri, 2 Aug 2024 15:05:10 +0530 Subject: [PATCH 1/3] bump the sqlglot version --- metadata-ingestion/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 20a43a94f6bda..f48181d2b57cd 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -99,7 +99,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1 - "acryl-sqlglot[rs]==25.3.1.dev3", + "acryl-sqlglot[rs]==25.8.2.dev9", } classification_lib = { From 2aee5f745eeb5a23ba7a5f2d37064301e00d9feb Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Thu, 8 Aug 2024 20:56:20 +0530 Subject: [PATCH 2/3] update golden files --- .../goldens/test_bigquery_information_schema_query.json | 4 ---- .../tests/unit/sql_parsing/goldens/test_merge_from_union.json | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json index 4b9bbd06ecba6..417ab611c5df7 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json @@ -117,10 +117,6 @@ { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMNS,PROD)", "column": "data_type" - }, - { - "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS,PROD)", - "column": "field_path" } ] }, diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json index 4ba44d9e54c9d..1a75dde4c634f 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_merge_from_union.json @@ -1,7 +1,7 @@ { "query_type": "MERGE", "query_type_props": {}, - "query_fingerprint": "38a78af8cc48333df0e4de7d6af5b9507a87dd8a2f129ef97c9b06dce2ca7b9f", + "query_fingerprint": "8001b852498d94a7f0f532dcd8cfa05328981ba437df6314466c764cc408969c", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,demo-pipelines-stg.referrer.prep_from_ios,PROD)", "urn:li:dataset:(urn:li:dataPlatform:bigquery,demo-pipelines-stg.referrer.prep_from_web,PROD)" @@ -12,6 +12,6 @@ "column_lineage": null, "debug_info": { "confidence": 0.2, - "generalized_statement": "MERGE INTO `demo-pipelines-stg`.`referrer`.`base_union` AS DBT_INTERNAL_DEST USING (SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_ios` WHERE partition_time = ? UNION ALL SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_web` WHERE partition_time = ?) AS DBT_INTERNAL_SOURCE ON FALSE WHEN NOT MATCHED BY SOURCE AND timestamp_trunc(DBT_INTERNAL_DEST.partition_time, DAY) IN (TIMESTAMP(?)) THEN delete WHEN NOT MATCHED THEN INSERT (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`) VALUES (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`)" + "generalized_statement": "MERGE INTO `demo-pipelines-stg`.`referrer`.`base_union` AS DBT_INTERNAL_DEST USING (SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_ios` WHERE partition_time = ? UNION ALL SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_web` WHERE partition_time = ?) AS DBT_INTERNAL_SOURCE ON FALSE WHEN NOT MATCHED BY SOURCE AND timestamp_trunc(DBT_INTERNAL_DEST.partition_time, DAY) IN (timestamp(?)) THEN delete WHEN NOT MATCHED THEN INSERT (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`) VALUES (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`)" } } \ No newline at end of file From 1a0882568c2fad70981d0fbc94a5f41afb5efed2 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 8 Aug 2024 20:55:02 -0700 Subject: [PATCH 3/3] fix test --- .../goldens/test_bigquery_information_schema_query.json | 8 ++++++-- .../tests/unit/sql_parsing/test_sqlglot_lineage.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json index 417ab611c5df7..f5f573f3d5113 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json @@ -1,7 +1,7 @@ { "query_type": "SELECT", "query_type_props": {}, - "query_fingerprint": "772187d1c6ce8dbed2dd1ba79975b108d4e733015ffb7bcbf9b7146e64cf9914", + "query_fingerprint": "c721ce16410601b36e5f32bd9c5c28488500a93e617363739faebfe71496f163", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMNS,PROD)", "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS,PROD)" @@ -117,6 +117,10 @@ { "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMNS,PROD)", "column": "data_type" + }, + { + "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS,PROD)", + "column": "field_path" } ] }, @@ -174,6 +178,6 @@ ], "debug_info": { "confidence": 0.2, - "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMNS AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC" + "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMNS AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC" } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index e5b669329f16c..887d93170a0d6 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -1188,7 +1188,7 @@ def test_bigquery_information_schema_query() -> None: c.ordinal_position as ordinal_position, cfp.field_path as field_path, c.is_nullable as is_nullable, - CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type, + CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ".") THEN NULL ELSE c.data_type END as data_type, description as comment, c.is_hidden as is_hidden, c.is_partitioning_column as is_partitioning_column,