Merge pull request #2324 from fishtown-analytics/feature/sf-describe-…

…columns-in-table Use "describe table" to get the columns in a relation on snowflake (#2260)
dbt-labs · Apr 16, 2020 · 59477a3 · 59477a3
2 parents 51278c0 + bc38750
commit 59477a3
Show file tree

Hide file tree

Showing 5 changed files with 144 additions and 24 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@
 - Added support for `db_groups` and `autocreate` flags in Redshift configurations. ([#1995](https://github.com/fishtown-analytics/dbt/issues/1995), [#2262](https://github.com/fishtown-analytics/dbt/pull/2262))
 - Users can supply paths as arguments to `--models` and `--select`, either explicitily by prefixing with `path:` or implicitly with no prefix. ([#454](https://github.com/fishtown-analytics/dbt/issues/454), [#2258](https://github.com/fishtown-analytics/dbt/pull/2258))
 - dbt now builds the relation cache for "dbt compile" and "dbt ls" as well as "dbt run" ([#1705](https://github.com/fishtown-analytics/dbt/issues/1705), [#2319](https://github.com/fishtown-analytics/dbt/pull/2319))
+- Snowflake now uses "describe table" to get the columns in a relation ([#2260](https://github.com/fishtown-analytics/dbt/issues/2260), [#2324](https://github.com/fishtown-analytics/dbt/pull/2324))
 
 ### Fixes
 - When a jinja value is undefined, give a helpful error instead of failing with cryptic "cannot pickle ParserMacroCapture" errors ([#2110](https://github.com/fishtown-analytics/dbt/issues/2110), [#2184](https://github.com/fishtown-analytics/dbt/pull/2184))

diff --git a/core/dbt/adapters/base/column.py b/core/dbt/adapters/base/column.py
@@ -1,6 +1,8 @@
 from dataclasses import dataclass
+import re
 
 from hologram import JsonSchemaMixin
+from dbt.exceptions import RuntimeException
 
 from typing import Dict, ClassVar, Any, Optional
 
@@ -74,7 +76,7 @@ def is_numeric(self) -> bool:
 
  def string_size(self) -> int:
  if not self.is_string():
- raise RuntimeError("Called string_size() on non-string field!")
+ raise RuntimeException("Called string_size() on non-string field!")
 
  if self.dtype == 'text' or self.char_size is None:
  # char_size should never be None. Handle it reasonably just in case
@@ -108,3 +110,46 @@ def numeric_type(cls, dtype: str, precision: Any, scale: Any) -> str:
 
  def __repr__(self) -> str:
  return "<Column {} ({})>".format(self.name, self.data_type)
+
+ @classmethod
+ def from_description(cls, name: str, raw_data_type: str) -> 'Column':
+ match = re.match(r'([^(]+)(\([^)]+\))?', raw_data_type)
+ if match is None:
+ raise RuntimeException(
+ f'Could not interpret data type "{raw_data_type}"'
+ )
+ data_type, size_info = match.groups()
+ char_size = None
+ numeric_precision = None
+ numeric_scale = None
+ if size_info is not None:
+ # strip out the parentheses
+ size_info = size_info[1:-1]
+ parts = size_info.split(',')
+ if len(parts) == 1:
+ try:
+ char_size = int(parts[0])
+ except ValueError:
+ raise RuntimeException(
+ f'Could not interpret data_type "{raw_data_type}": '
+ f'could not convert "{parts[0]}" to an integer'
+ )
+ elif len(parts) == 2:
+ try:
+ numeric_precision = int(parts[0])
+ except ValueError:
+ raise RuntimeException(
+ f'Could not interpret data_type "{raw_data_type}": '
+ f'could not convert "{parts[0]}" to an integer'
+ )
+ try:
+ numeric_scale = int(parts[1])
+ except ValueError:
+ raise RuntimeException(
+ f'Could not interpret data_type "{raw_data_type}": '
+ f'could not convert "{parts[1]}" to an integer'
+ )
+
+ return cls(
+ name, data_type, char_size, numeric_precision, numeric_scale
+ )
diff --git a/plugins/snowflake/dbt/adapters/snowflake/column.py b/plugins/snowflake/dbt/adapters/snowflake/column.py
@@ -1,6 +1,7 @@
 from dataclasses import dataclass
 
 from dbt.adapters.base.column import Column
+from dbt.exceptions import RuntimeException
 
 
 @dataclass
@@ -19,3 +20,12 @@ def is_float(self):
  return self.dtype.lower() in [
  'float', 'float4', 'float8', 'double', 'double precision', 'real',
  ]
+
+ def string_size(self) -> int:
+ if not self.is_string():
+ raise RuntimeException("Called string_size() on non-string field!")
+
+ if self.dtype == 'text' or self.char_size is None:
+ return 16777216
+ else:
+ return int(self.char_size)
diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql
@@ -52,31 +52,25 @@
 {% endmacro %}
 
 {% macro snowflake__get_columns_in_relation(relation) -%}
- {% call statement('get_columns_in_relation', fetch_result=True) %}
- select
- column_name,
- data_type,
- character_maximum_length,
- numeric_precision,
- numeric_scale
-
- from
- {{ relation.information_schema('columns') }}
-
- where table_name ilike '{{ relation.identifier }}'
- {% if relation.schema %}
- and table_schema ilike '{{ relation.schema }}'
- {% endif %}
- {% if relation.database %}
- and table_catalog ilike '{{ relation.database }}'
- {% endif %}
- order by ordinal_position
+ {%- set sql -%}
+ describe table {{ relation }}
+ {%- endset -%}
+ {%- set result = run_query(sql) -%}
 
- {% endcall %}
-
- {% set table = load_result('get_columns_in_relation').table %}
- {{ return(sql_convert_columns_in_relation(table)) }}
+ {% set maximum = 10000 %}
+ {% if (result | length) >= maximum %}
+ {% set msg %}
+ Too many columns in relation {{ relation }}! dbt can only get
+ information about relations with fewer than {{ maximum }} columns.
+ {% endset %}
+ {% do exceptions.raise_compiler_error(msg) %}
+ {% endif %}
 
+ {% set columns = [] %}
+ {% for row in result %}
+ {% do columns.append(api.Column.from_description(row['name'], row['type'])) %}
+ {% endfor %}
+ {% do return(columns) %}
 {% endmacro %}
 
 {% macro snowflake__list_schemas(database) -%}

diff --git a/test/unit/test_snowflake_adapter.py b/test/unit/test_snowflake_adapter.py
@@ -6,6 +6,7 @@
 import dbt.flags as flags
 
 from dbt.adapters.snowflake import SnowflakeAdapter
+from dbt.adapters.snowflake.column import SnowflakeColumn
 from dbt.adapters.base.query_headers import MacroQueryStringSetter
 from dbt.clients import agate_helper
 from dbt.logger import GLOBAL_LOGGER as logger # noqa
@@ -449,3 +450,72 @@ def test_convert_time_type(self):
  expected = ['time', 'time', 'time']
  for col_idx, expect in enumerate(expected):
  assert SnowflakeAdapter.convert_time_type(agate_table, col_idx) == expect
+
+
+class TestSnowflakeColumn(unittest.TestCase):
+ def test_text_from_description(self):
+ col = SnowflakeColumn.from_description('my_col', 'TEXT')
+ assert col.column == 'my_col'
+ assert col.dtype == 'TEXT'
+ assert col.char_size is None
+ assert col.numeric_precision is None
+ assert col.numeric_scale is None
+ assert col.is_float() is False
+ assert col.is_number() is False
+ assert col.is_numeric() is False
+ assert col.is_string() is True
+ assert col.is_integer() is False
+ assert col.string_size() == 16777216
+
+ col = SnowflakeColumn.from_description('my_col', 'VARCHAR')
+ assert col.column == 'my_col'
+ assert col.dtype == 'VARCHAR'
+ assert col.char_size is None
+ assert col.numeric_precision is None
+ assert col.numeric_scale is None
+ assert col.is_float() is False
+ assert col.is_number() is False
+ assert col.is_numeric() is False
+ assert col.is_string() is True
+ assert col.is_integer() is False
+ assert col.string_size() == 16777216
+
+ def test_sized_varchar_from_description(self):
+ col = SnowflakeColumn.from_description('my_col', 'VARCHAR(256)')
+ assert col.column == 'my_col'
+ assert col.dtype == 'VARCHAR'
+ assert col.char_size == 256
+ assert col.numeric_precision is None
+ assert col.numeric_scale is None
+ assert col.is_float() is False
+ assert col.is_number() is False
+ assert col.is_numeric() is False
+ assert col.is_string() is True
+ assert col.is_integer() is False
+ assert col.string_size() == 256
+
+ def test_sized_decimal_from_description(self):
+ col = SnowflakeColumn.from_description('my_col', 'DECIMAL(1, 0)')
+ assert col.column == 'my_col'
+ assert col.dtype == 'DECIMAL'
+ assert col.char_size is None
+ assert col.numeric_precision == 1
+ assert col.numeric_scale == 0
+ assert col.is_float() is False
+ assert col.is_number() is True
+ assert col.is_numeric() is True
+ assert col.is_string() is False
+ assert col.is_integer() is False
+
+ def test_float_from_description(self):
+ col = SnowflakeColumn.from_description('my_col', 'FLOAT8')
+ assert col.column == 'my_col'
+ assert col.dtype == 'FLOAT8'
+ assert col.char_size is None
+ assert col.numeric_precision is None
+ assert col.numeric_scale is None
+ assert col.is_float() is True
+ assert col.is_number() is True
+ assert col.is_numeric() is False
+ assert col.is_string() is False
+ assert col.is_integer() is False