From 2925edc88b617b74169a6c47a96345772f990976 Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Wed, 27 Dec 2023 14:38:19 +0530 Subject: [PATCH 1/3] Add __TABLE__ support for fetching metadata, bigquery --- .../orm/functions/table_metric_construct.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py b/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py index 491df4134749..f12120e492bb 100644 --- a/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py +++ b/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py @@ -178,9 +178,21 @@ def bigquery_table_construct(runner: QueryRunner, **kwargs): except AttributeError: raise AttributeError(ERROR_MSG) - conn_config = kwargs.get("conn_config") - conn_config = cast(BigQueryConnection, conn_config) + conn_config = cast(BigQueryConnection, kwargs.get("conn_config")) + where_clause = [ + Column("table_id") == table_name, + ] + + columns = [ + Column("row_count").label("rowCount"), + Column("size_bytes").label("sizeInBytes"), + Column("creation_time").label("createDateTime"), + ] + + table_metadata_deprecated = _build_table("__TABLES__", schema_name) + query = _build_query(columns, table_metadata_deprecated, where_clause) + return runner._session.execute(query).first() table_storage = _build_table( "TABLE_STORAGE", f"region-{conn_config.usageLocation}.INFORMATION_SCHEMA" ) From 451a594d19dc8b4b7d71a34763b38cdb1d00f017 Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Wed, 27 Dec 2023 14:50:41 +0530 Subject: [PATCH 2/3] Modify code logic --- .../orm/functions/table_metric_construct.py | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py b/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py index f12120e492bb..31d90aa29f78 100644 --- a/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py +++ b/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py @@ -178,41 +178,44 @@ def bigquery_table_construct(runner: QueryRunner, **kwargs): except AttributeError: raise AttributeError(ERROR_MSG) - conn_config = cast(BigQueryConnection, kwargs.get("conn_config")) - where_clause = [ Column("table_id") == table_name, ] + col_names, col_count = _get_col_names_and_count(runner.table) columns = [ Column("row_count").label("rowCount"), Column("size_bytes").label("sizeInBytes"), Column("creation_time").label("createDateTime"), - ] - - table_metadata_deprecated = _build_table("__TABLES__", schema_name) - query = _build_query(columns, table_metadata_deprecated, where_clause) - return runner._session.execute(query).first() - table_storage = _build_table( - "TABLE_STORAGE", f"region-{conn_config.usageLocation}.INFORMATION_SCHEMA" - ) - col_names, col_count = _get_col_names_and_count(runner.table) - columns = [ - Column("total_rows").label("rowCount"), - Column("total_logical_bytes").label("sizeInBytes"), - Column("creation_time").label("createDateTime"), col_names, col_count, ] - where_clause = [ - Column("table_schema") == schema_name, - Column("table_name") == table_name, - ] + table_metadata_deprecated = _build_table("__TABLES__", f"{schema_name}") + query = _build_query(columns, table_metadata_deprecated, where_clause) + result = runner._session.execute(query).first() + if not result: + conn_config = cast(BigQueryConnection, kwargs.get("conn_config")) + table_storage = _build_table( + "TABLE_STORAGE", f"region-{conn_config.usageLocation}.INFORMATION_SCHEMA" + ) - query = _build_query(columns, table_storage, where_clause) + columns = [ + Column("total_rows").label("rowCount"), + Column("total_logical_bytes").label("sizeInBytes"), + Column("creation_time").label("createDateTime"), + col_names, + col_count, + ] - return runner._session.execute(query).first() + where_clause = [ + Column("table_schema") == schema_name, + Column("table_name") == table_name, + ] + + query = _build_query(columns, table_storage, where_clause) + + return runner._session.execute(query).first() def clickhouse_table_construct(runner: QueryRunner, **kwargs): From 6a31f685ebfe72a8e2f50c364c81dba2a2de031c Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Wed, 27 Dec 2023 14:52:45 +0530 Subject: [PATCH 3/3] Add return to results --- .../orm/functions/table_metric_construct.py | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py b/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py index 31d90aa29f78..230ff097b42f 100644 --- a/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py +++ b/ingestion/src/metadata/profiler/orm/functions/table_metric_construct.py @@ -194,28 +194,29 @@ def bigquery_table_construct(runner: QueryRunner, **kwargs): table_metadata_deprecated = _build_table("__TABLES__", f"{schema_name}") query = _build_query(columns, table_metadata_deprecated, where_clause) result = runner._session.execute(query).first() - if not result: - conn_config = cast(BigQueryConnection, kwargs.get("conn_config")) - table_storage = _build_table( - "TABLE_STORAGE", f"region-{conn_config.usageLocation}.INFORMATION_SCHEMA" - ) + if result: + return result + conn_config = cast(BigQueryConnection, kwargs.get("conn_config")) + table_storage = _build_table( + "TABLE_STORAGE", f"region-{conn_config.usageLocation}.INFORMATION_SCHEMA" + ) - columns = [ - Column("total_rows").label("rowCount"), - Column("total_logical_bytes").label("sizeInBytes"), - Column("creation_time").label("createDateTime"), - col_names, - col_count, - ] + columns = [ + Column("total_rows").label("rowCount"), + Column("total_logical_bytes").label("sizeInBytes"), + Column("creation_time").label("createDateTime"), + col_names, + col_count, + ] - where_clause = [ - Column("table_schema") == schema_name, - Column("table_name") == table_name, - ] + where_clause = [ + Column("table_schema") == schema_name, + Column("table_name") == table_name, + ] - query = _build_query(columns, table_storage, where_clause) + query = _build_query(columns, table_storage, where_clause) - return runner._session.execute(query).first() + return runner._session.execute(query).first() def clickhouse_table_construct(runner: QueryRunner, **kwargs):