From 7ebe7c3df2ff234cd38fdf1b42764324b157ee64 Mon Sep 17 00:00:00 2001 From: Hassan Al-Joubori Date: Thu, 21 Jul 2022 16:19:57 +0100 Subject: [PATCH] using information_shema to get catalog data to limit perm need --- dbt/adapters/bigquery/relation.py | 2 +- dbt/include/bigquery/macros/catalog.sql | 52 +++++++++++++++++-------- tests/unit/test_bigquery_adapter.py | 19 +++++++++ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 7224de8cf..ab47f0b82 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -65,7 +65,7 @@ def get_include_policy(cls, relation, information_schema_view): schema = False identifier = True - if information_schema_view == "__TABLES__": + if information_schema_view in ("INFORMATION_SCHEMA.TABLES", "INFORMATION_SCHEMA.VIEWS", "__TABLES__"): identifier = False # In the future, let's refactor so that location/region can also be a diff --git a/dbt/include/bigquery/macros/catalog.sql b/dbt/include/bigquery/macros/catalog.sql index 6822d88a6..b3fb4ab83 100644 --- a/dbt/include/bigquery/macros/catalog.sql +++ b/dbt/include/bigquery/macros/catalog.sql @@ -9,28 +9,48 @@ {%- set query -%} with tables as ( select - project_id as table_database, - dataset_id as table_schema, - table_id as original_table_name, + table_catalog as table_database, + table_schema as table_schema, + table_name as original_table_name, - concat(project_id, '.', dataset_id, '.', table_id) as relation_id, + CONCAT(table_catalog, '.', table_schema, '.', table_name) as relation_id, - row_count, - size_bytes as size_bytes, - case - when type = 1 then 'table' - when type = 2 then 'view' - else 'external' - end as table_type, + 0 as row_count, + 0 as size_bytes, + case when table_type = 'EXTERNAL' then 'external' ELSE 'table' end as table_type, + + REGEXP_CONTAINS(table_name, '^.+[0-9]{8}$') and table_type = 'BASE TABLE' as is_date_shard, + REGEXP_EXTRACT(table_name, '^(.+)[0-9]{8}$') as shard_base_name, + REGEXP_EXTRACT(table_name, '^.+([0-9]{8})$') as shard_name + + from {{ information_schema.replace(information_schema_view='INFORMATION_SCHEMA.TABLES') }} + where ( + {%- for schema in schemas -%} + upper(table_schema) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) + + union all + + select + table_catalog as table_database, + table_schema as table_schema, + table_name as original_table_name, + + CONCAT(table_catalog, '.', table_schema, '.', table_name) as relation_id, + + 0 as row_count, + 0 as size_bytes, + 'view' as table_type, - REGEXP_CONTAINS(table_id, '^.+[0-9]{8}$') and coalesce(type, 0) = 1 as is_date_shard, - REGEXP_EXTRACT(table_id, '^(.+)[0-9]{8}$') as shard_base_name, - REGEXP_EXTRACT(table_id, '^.+([0-9]{8})$') as shard_name + false as is_date_shard, + REGEXP_EXTRACT(table_name, '^(.+)[0-9]{8}$') as shard_base_name, + REGEXP_EXTRACT(table_name, '^.+([0-9]{8})$') as shard_name - from {{ information_schema.replace(information_schema_view='__TABLES__') }} + from {{ information_schema.replace(information_schema_view='INFORMATION_SCHEMA.VIEWS') }} where ( {%- for schema in schemas -%} - upper(dataset_id) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} + upper(table_schema) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} {%- endfor -%} ) ), diff --git a/tests/unit/test_bigquery_adapter.py b/tests/unit/test_bigquery_adapter.py index 188c19b7c..2cfab5858 100644 --- a/tests/unit/test_bigquery_adapter.py +++ b/tests/unit/test_bigquery_adapter.py @@ -482,6 +482,25 @@ def test_replace(self): assert tables_schema.quote_policy.identifier is False assert tables_schema.quote_policy.database is False + tables_schema = info_schema.replace(information_schema_view='INFORMATION_SCHEMA.TABLES') + assert tables_schema.information_schema_view == 'INFORMATION_SCHEMA.TABLES' + assert tables_schema.include_policy.schema is True + assert tables_schema.include_policy.identifier is False + assert tables_schema.include_policy.database is True + assert tables_schema.quote_policy.schema is True + assert tables_schema.quote_policy.identifier is False + assert tables_schema.quote_policy.database is False + + + tables_schema = info_schema.replace(information_schema_view='INFORMATION_SCHEMA.VIEWS') + assert tables_schema.information_schema_view == 'INFORMATION_SCHEMA.VIEWS' + assert tables_schema.include_policy.schema is True + assert tables_schema.include_policy.identifier is False + assert tables_schema.include_policy.database is True + assert tables_schema.quote_policy.schema is True + assert tables_schema.quote_policy.identifier is False + assert tables_schema.quote_policy.database is False + schemata_schema = info_schema.replace(information_schema_view='SCHEMATA') assert schemata_schema.information_schema_view == 'SCHEMATA' assert schemata_schema.include_policy.schema is False