Skip to content

Commit

Permalink
Merge pull request #2324 from fishtown-analytics/feature/sf-describe-…
Browse files Browse the repository at this point in the history
…columns-in-table

Use "describe table" to get the columns in a relation on snowflake (#2260)
  • Loading branch information
beckjake authored Apr 16, 2020
2 parents 51278c0 + bc38750 commit 59477a3
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Added support for `db_groups` and `autocreate` flags in Redshift configurations. ([#1995](https://github.com/fishtown-analytics/dbt/issues/1995), [#2262](https://github.com/fishtown-analytics/dbt/pull/2262))
- Users can supply paths as arguments to `--models` and `--select`, either explicitily by prefixing with `path:` or implicitly with no prefix. ([#454](https://github.com/fishtown-analytics/dbt/issues/454), [#2258](https://github.com/fishtown-analytics/dbt/pull/2258))
- dbt now builds the relation cache for "dbt compile" and "dbt ls" as well as "dbt run" ([#1705](https://github.com/fishtown-analytics/dbt/issues/1705), [#2319](https://github.com/fishtown-analytics/dbt/pull/2319))
- Snowflake now uses "describe table" to get the columns in a relation ([#2260](https://github.com/fishtown-analytics/dbt/issues/2260), [#2324](https://github.com/fishtown-analytics/dbt/pull/2324))

### Fixes
- When a jinja value is undefined, give a helpful error instead of failing with cryptic "cannot pickle ParserMacroCapture" errors ([#2110](https://github.com/fishtown-analytics/dbt/issues/2110), [#2184](https://github.com/fishtown-analytics/dbt/pull/2184))
Expand Down
47 changes: 46 additions & 1 deletion core/dbt/adapters/base/column.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from dataclasses import dataclass
import re

from hologram import JsonSchemaMixin
from dbt.exceptions import RuntimeException

from typing import Dict, ClassVar, Any, Optional

Expand Down Expand Up @@ -74,7 +76,7 @@ def is_numeric(self) -> bool:

def string_size(self) -> int:
if not self.is_string():
raise RuntimeError("Called string_size() on non-string field!")
raise RuntimeException("Called string_size() on non-string field!")

if self.dtype == 'text' or self.char_size is None:
# char_size should never be None. Handle it reasonably just in case
Expand Down Expand Up @@ -108,3 +110,46 @@ def numeric_type(cls, dtype: str, precision: Any, scale: Any) -> str:

def __repr__(self) -> str:
return "<Column {} ({})>".format(self.name, self.data_type)

@classmethod
def from_description(cls, name: str, raw_data_type: str) -> 'Column':
match = re.match(r'([^(]+)(\([^)]+\))?', raw_data_type)
if match is None:
raise RuntimeException(
f'Could not interpret data type "{raw_data_type}"'
)
data_type, size_info = match.groups()
char_size = None
numeric_precision = None
numeric_scale = None
if size_info is not None:
# strip out the parentheses
size_info = size_info[1:-1]
parts = size_info.split(',')
if len(parts) == 1:
try:
char_size = int(parts[0])
except ValueError:
raise RuntimeException(
f'Could not interpret data_type "{raw_data_type}": '
f'could not convert "{parts[0]}" to an integer'
)
elif len(parts) == 2:
try:
numeric_precision = int(parts[0])
except ValueError:
raise RuntimeException(
f'Could not interpret data_type "{raw_data_type}": '
f'could not convert "{parts[0]}" to an integer'
)
try:
numeric_scale = int(parts[1])
except ValueError:
raise RuntimeException(
f'Could not interpret data_type "{raw_data_type}": '
f'could not convert "{parts[1]}" to an integer'
)

return cls(
name, data_type, char_size, numeric_precision, numeric_scale
)
10 changes: 10 additions & 0 deletions plugins/snowflake/dbt/adapters/snowflake/column.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass

from dbt.adapters.base.column import Column
from dbt.exceptions import RuntimeException


@dataclass
Expand All @@ -19,3 +20,12 @@ def is_float(self):
return self.dtype.lower() in [
'float', 'float4', 'float8', 'double', 'double precision', 'real',
]

def string_size(self) -> int:
if not self.is_string():
raise RuntimeException("Called string_size() on non-string field!")

if self.dtype == 'text' or self.char_size is None:
return 16777216
else:
return int(self.char_size)
40 changes: 17 additions & 23 deletions plugins/snowflake/dbt/include/snowflake/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -52,31 +52,25 @@
{% endmacro %}

{% macro snowflake__get_columns_in_relation(relation) -%}
{% call statement('get_columns_in_relation', fetch_result=True) %}
select
column_name,
data_type,
character_maximum_length,
numeric_precision,
numeric_scale

from
{{ relation.information_schema('columns') }}

where table_name ilike '{{ relation.identifier }}'
{% if relation.schema %}
and table_schema ilike '{{ relation.schema }}'
{% endif %}
{% if relation.database %}
and table_catalog ilike '{{ relation.database }}'
{% endif %}
order by ordinal_position
{%- set sql -%}
describe table {{ relation }}
{%- endset -%}
{%- set result = run_query(sql) -%}

{% endcall %}

{% set table = load_result('get_columns_in_relation').table %}
{{ return(sql_convert_columns_in_relation(table)) }}
{% set maximum = 10000 %}
{% if (result | length) >= maximum %}
{% set msg %}
Too many columns in relation {{ relation }}! dbt can only get
information about relations with fewer than {{ maximum }} columns.
{% endset %}
{% do exceptions.raise_compiler_error(msg) %}
{% endif %}

{% set columns = [] %}
{% for row in result %}
{% do columns.append(api.Column.from_description(row['name'], row['type'])) %}
{% endfor %}
{% do return(columns) %}
{% endmacro %}

{% macro snowflake__list_schemas(database) -%}
Expand Down
70 changes: 70 additions & 0 deletions test/unit/test_snowflake_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import dbt.flags as flags

from dbt.adapters.snowflake import SnowflakeAdapter
from dbt.adapters.snowflake.column import SnowflakeColumn
from dbt.adapters.base.query_headers import MacroQueryStringSetter
from dbt.clients import agate_helper
from dbt.logger import GLOBAL_LOGGER as logger # noqa
Expand Down Expand Up @@ -449,3 +450,72 @@ def test_convert_time_type(self):
expected = ['time', 'time', 'time']
for col_idx, expect in enumerate(expected):
assert SnowflakeAdapter.convert_time_type(agate_table, col_idx) == expect


class TestSnowflakeColumn(unittest.TestCase):
def test_text_from_description(self):
col = SnowflakeColumn.from_description('my_col', 'TEXT')
assert col.column == 'my_col'
assert col.dtype == 'TEXT'
assert col.char_size is None
assert col.numeric_precision is None
assert col.numeric_scale is None
assert col.is_float() is False
assert col.is_number() is False
assert col.is_numeric() is False
assert col.is_string() is True
assert col.is_integer() is False
assert col.string_size() == 16777216

col = SnowflakeColumn.from_description('my_col', 'VARCHAR')
assert col.column == 'my_col'
assert col.dtype == 'VARCHAR'
assert col.char_size is None
assert col.numeric_precision is None
assert col.numeric_scale is None
assert col.is_float() is False
assert col.is_number() is False
assert col.is_numeric() is False
assert col.is_string() is True
assert col.is_integer() is False
assert col.string_size() == 16777216

def test_sized_varchar_from_description(self):
col = SnowflakeColumn.from_description('my_col', 'VARCHAR(256)')
assert col.column == 'my_col'
assert col.dtype == 'VARCHAR'
assert col.char_size == 256
assert col.numeric_precision is None
assert col.numeric_scale is None
assert col.is_float() is False
assert col.is_number() is False
assert col.is_numeric() is False
assert col.is_string() is True
assert col.is_integer() is False
assert col.string_size() == 256

def test_sized_decimal_from_description(self):
col = SnowflakeColumn.from_description('my_col', 'DECIMAL(1, 0)')
assert col.column == 'my_col'
assert col.dtype == 'DECIMAL'
assert col.char_size is None
assert col.numeric_precision == 1
assert col.numeric_scale == 0
assert col.is_float() is False
assert col.is_number() is True
assert col.is_numeric() is True
assert col.is_string() is False
assert col.is_integer() is False

def test_float_from_description(self):
col = SnowflakeColumn.from_description('my_col', 'FLOAT8')
assert col.column == 'my_col'
assert col.dtype == 'FLOAT8'
assert col.char_size is None
assert col.numeric_precision is None
assert col.numeric_scale is None
assert col.is_float() is True
assert col.is_number() is True
assert col.is_numeric() is False
assert col.is_string() is False
assert col.is_integer() is False

0 comments on commit 59477a3

Please sign in to comment.