Skip to content

Commit

Permalink
Merge pull request #1420 from fishtown-analytics/fix/postgres-text-types
Browse files Browse the repository at this point in the history
Fix postgres text handling (#781)
  • Loading branch information
beckjake authored Apr 30, 2019
2 parents ad2f228 + c6d6dae commit acca6a7
Show file tree
Hide file tree
Showing 18 changed files with 353 additions and 17 deletions.
2 changes: 1 addition & 1 deletion core/dbt/adapters/base/relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def string_size(self):

if self.dtype == 'text' or self.char_size is None:
# char_size should never be None. Handle it reasonably just in case
return 255
return 256
else:
return int(self.char_size)

Expand Down
1 change: 1 addition & 0 deletions plugins/postgres/dbt/adapters/postgres/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# these are mostly just exports, #noqa them so flake8 will be happy
from dbt.adapters.postgres.connections import PostgresConnectionManager # noqa
from dbt.adapters.postgres.connections import PostgresCredentials
from dbt.adapters.postgres.relation import PostgresColumn # noqa
from dbt.adapters.postgres.impl import PostgresAdapter

from dbt.adapters.base import AdapterPlugin
Expand Down
2 changes: 2 additions & 0 deletions plugins/postgres/dbt/adapters/postgres/impl.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dbt.adapters.base.meta import available
from dbt.adapters.sql import SQLAdapter
from dbt.adapters.postgres import PostgresConnectionManager
from dbt.adapters.postgres import PostgresColumn
import dbt.compat
import dbt.exceptions

Expand All @@ -11,6 +12,7 @@

class PostgresAdapter(SQLAdapter):
ConnectionManager = PostgresConnectionManager
Column = PostgresColumn

@classmethod
def date_function(cls):
Expand Down
10 changes: 10 additions & 0 deletions plugins/postgres/dbt/adapters/postgres/relation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from dbt.adapters.base import Column


class PostgresColumn(Column):
@property
def data_type(self):
# on postgres, do not convert 'text' to 'varchar()'
if self.dtype.lower() == 'text':
return self.dtype
return super(PostgresColumn, self).data_type
1 change: 1 addition & 0 deletions plugins/redshift/dbt/adapters/redshift/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dbt.adapters.redshift.connections import RedshiftConnectionManager # noqa
from dbt.adapters.redshift.connections import RedshiftCredentials
from dbt.adapters.redshift.relation import RedshiftColumn # noqa
from dbt.adapters.redshift.impl import RedshiftAdapter


Expand Down
2 changes: 2 additions & 0 deletions plugins/redshift/dbt/adapters/redshift/impl.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from dbt.adapters.postgres import PostgresAdapter
from dbt.adapters.redshift import RedshiftConnectionManager
from dbt.adapters.redshift import RedshiftColumn
from dbt.logger import GLOBAL_LOGGER as logger # noqa


class RedshiftAdapter(PostgresAdapter):
ConnectionManager = RedshiftConnectionManager
Column = RedshiftColumn

AdapterSpecificConfigs = frozenset({"sort_type", "dist", "sort", "bind"})

Expand Down
5 changes: 5 additions & 0 deletions plugins/redshift/dbt/adapters/redshift/relation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from dbt.adapters.base import Column


class RedshiftColumn(Column):
pass # redshift does not inherit from postgres here
8 changes: 4 additions & 4 deletions test/integration/004_simple_archive_test/seed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ create table {database}.{schema}.archive_expected (
updated_at TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_from TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_to TIMESTAMP WITHOUT TIME ZONE,
dbt_scd_id VARCHAR(255),
dbt_scd_id VARCHAR(256),
dbt_updated_at TIMESTAMP WITHOUT TIME ZONE
);

Expand Down Expand Up @@ -93,7 +93,7 @@ create table {database}.{schema}.archive_castillo_expected (
updated_at TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_from TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_to TIMESTAMP WITHOUT TIME ZONE,
dbt_scd_id VARCHAR(255),
dbt_scd_id VARCHAR(256),
dbt_updated_at TIMESTAMP WITHOUT TIME ZONE
);

Expand Down Expand Up @@ -139,7 +139,7 @@ create table {database}.{schema}.archive_alvarez_expected (
updated_at TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_from TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_to TIMESTAMP WITHOUT TIME ZONE,
dbt_scd_id VARCHAR(255),
dbt_scd_id VARCHAR(256),
dbt_updated_at TIMESTAMP WITHOUT TIME ZONE
);

Expand Down Expand Up @@ -185,7 +185,7 @@ create table {database}.{schema}.archive_kelly_expected (
updated_at TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_from TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_to TIMESTAMP WITHOUT TIME ZONE,
dbt_scd_id VARCHAR(255),
dbt_scd_id VARCHAR(256),
dbt_updated_at TIMESTAMP WITHOUT TIME ZONE
);

Expand Down
9 changes: 9 additions & 0 deletions test/integration/004_simple_archive_test/seed_longtext.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
create table {database}.{schema}.super_long (
id INTEGER,
longstring TEXT,
updated_at TIMESTAMP WITHOUT TIME ZONE
);

insert into {database}.{schema}.super_long (id, longstring, updated_at) VALUES
(1, 'short', current_timestamp),
(2, repeat('a', 500), current_timestamp);
221 changes: 221 additions & 0 deletions test/integration/004_simple_archive_test/seed_pg.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
create table {database}.{schema}.seed (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),
updated_at TIMESTAMP WITHOUT TIME ZONE
);

create table {database}.{schema}.archive_expected (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),

-- archival fields
updated_at TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_from TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_to TIMESTAMP WITHOUT TIME ZONE,
dbt_scd_id TEXT,
dbt_updated_at TIMESTAMP WITHOUT TIME ZONE
);


-- seed inserts
insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values
(1, 'Judith', 'Kennedy', 'jkennedy0@phpbb.com', 'Female', '54.60.24.128', '2015-12-24 12:19:28'),
(2, 'Arthur', 'Kelly', 'akelly1@eepurl.com', 'Male', '62.56.24.215', '2015-10-28 16:22:15'),
(3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'),
(4, 'Ralph', 'Turner', 'rturner3@hp.com', 'Male', '157.83.76.114', '2016-08-08 00:06:51'),
(5, 'Laura', 'Gonzales', 'lgonzales4@howstuffworks.com', 'Female', '30.54.105.168', '2016-09-01 08:25:38'),
(6, 'Katherine', 'Lopez', 'klopez5@yahoo.co.jp', 'Female', '169.138.46.89', '2016-08-30 18:52:11'),
(7, 'Jeremy', 'Hamilton', 'jhamilton6@mozilla.org', 'Male', '231.189.13.133', '2016-07-17 02:09:46'),
(8, 'Heather', 'Rose', 'hrose7@goodreads.com', 'Female', '87.165.201.65', '2015-12-29 22:03:56'),
(9, 'Gregory', 'Kelly', 'gkelly8@trellian.com', 'Male', '154.209.99.7', '2016-03-24 21:18:16'),
(10, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '2016-08-20 15:44:49'),
(11, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '2016-02-27 01:41:48'),
(12, 'Russell', 'Lawrence', 'rlawrenceb@qq.com', 'Male', '189.115.73.4', '2016-06-11 03:07:09'),
(13, 'Michelle', 'Montgomery', 'mmontgomeryc@scientificamerican.com', 'Female', '243.220.95.82', '2016-06-18 16:27:19'),
(14, 'Walter', 'Castillo', 'wcastillod@pagesperso-orange.fr', 'Male', '71.159.238.196', '2016-10-06 01:55:44'),
(15, 'Robin', 'Mills', 'rmillse@vkontakte.ru', 'Female', '172.190.5.50', '2016-10-31 11:41:21'),
(16, 'Raymond', 'Holmes', 'rholmesf@usgs.gov', 'Male', '148.153.166.95', '2016-10-03 08:16:38'),
(17, 'Gary', 'Bishop', 'gbishopg@plala.or.jp', 'Male', '161.108.182.13', '2016-08-29 19:35:20'),
(18, 'Anna', 'Riley', 'arileyh@nasa.gov', 'Female', '253.31.108.22', '2015-12-11 04:34:27'),
(19, 'Sarah', 'Knight', 'sknighti@foxnews.com', 'Female', '222.220.3.177', '2016-09-26 00:49:06'),
(20, 'Phyllis', 'Fox', 'pfoxj@creativecommons.org', 'Female', '163.191.232.95', '2016-08-21 10:35:19');


-- populate archive table
insert into {database}.{schema}.archive_expected (
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
dbt_valid_from,
dbt_valid_to,
dbt_updated_at,
dbt_scd_id
)

select
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
-- fields added by archival
updated_at as dbt_valid_from,
null::timestamp as dbt_valid_to,
updated_at as dbt_updated_at,
md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id
from {database}.{schema}.seed;



create table {database}.{schema}.archive_castillo_expected (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),

-- archival fields
updated_at TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_from TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_to TIMESTAMP WITHOUT TIME ZONE,
dbt_scd_id TEXT,
dbt_updated_at TIMESTAMP WITHOUT TIME ZONE
);

-- one entry
insert into {database}.{schema}.archive_castillo_expected (
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
dbt_valid_from,
dbt_valid_to,
dbt_updated_at,
dbt_scd_id
)

select
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
-- fields added by archival
updated_at as dbt_valid_from,
null::timestamp as dbt_valid_to,
updated_at as dbt_updated_at,
md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id
from {database}.{schema}.seed where last_name = 'Castillo';

create table {database}.{schema}.archive_alvarez_expected (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),

-- archival fields
updated_at TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_from TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_to TIMESTAMP WITHOUT TIME ZONE,
dbt_scd_id TEXT,
dbt_updated_at TIMESTAMP WITHOUT TIME ZONE
);

-- 0 entries
insert into {database}.{schema}.archive_alvarez_expected (
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
dbt_valid_from,
dbt_valid_to,
dbt_updated_at,
dbt_scd_id
)

select
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
-- fields added by archival
updated_at as dbt_valid_from,
null::timestamp as dbt_valid_to,
updated_at as dbt_updated_at,
md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id
from {database}.{schema}.seed where last_name = 'Alvarez';

create table {database}.{schema}.archive_kelly_expected (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),

-- archival fields
updated_at TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_from TIMESTAMP WITHOUT TIME ZONE,
dbt_valid_to TIMESTAMP WITHOUT TIME ZONE,
dbt_scd_id TEXT,
dbt_updated_at TIMESTAMP WITHOUT TIME ZONE
);


-- 2 entries
insert into {database}.{schema}.archive_kelly_expected (
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
dbt_valid_from,
dbt_valid_to,
dbt_updated_at,
dbt_scd_id
)

select
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
-- fields added by archival
updated_at as dbt_valid_from,
null::timestamp as dbt_valid_to,
updated_at as dbt_updated_at,
md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id
from {database}.{schema}.seed where last_name = 'Kelly';
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{% archive archive_actual %}
{{
config(
target_database=var('target_database', database),
target_schema=schema,
unique_key='id',
strategy='timestamp',
updated_at='updated_at',
)
}}
select * from {{database}}.{{schema}}.super_long
{% endarchive %}
Loading

0 comments on commit acca6a7

Please sign in to comment.