From 30f03692eff862f1456b9c376c21fe8e57de7eaa Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 7 Aug 2018 07:10:36 -0600 Subject: [PATCH 1/2] Make the agate table type tester more restrictive on what counts as null/true/false --- dbt/clients/agate_helper.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/dbt/clients/agate_helper.py b/dbt/clients/agate_helper.py index 798dcfaba19..4a5d7caad0e 100644 --- a/dbt/clients/agate_helper.py +++ b/dbt/clients/agate_helper.py @@ -2,11 +2,14 @@ import agate DEFAULT_TYPE_TESTER = agate.TypeTester(types=[ - agate.data_types.Number(), - agate.data_types.Date(), - agate.data_types.DateTime(), - agate.data_types.Boolean(), - agate.data_types.Text() + agate.data_types.Boolean(true_values=('true',), + false_values=('false',), + null_values=('null',)), + agate.data_types.Number(null_values=('null',)), + agate.data_types.TimeDelta(null_values=('null',)), + agate.data_types.Date(null_values=('null',)), + agate.data_types.DateTime(null_values=('null',)), + agate.data_types.Text(null_values=('null',)) ]) From 7c286fc8b9fa31088872411b73a880013c05464c Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 7 Aug 2018 07:41:20 -0600 Subject: [PATCH 2/2] add a test --- CHANGELOG.md | 1 + test/unit/test_agate_helper.py | 55 ++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 test/unit/test_agate_helper.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 299c766bd61..e9a8366e3f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Add a 'generated_at' field to both the manifest and the catalog. ([#887](https://github.com/fishtown-analytics/dbt/pull/877)) - Version 2 of schema.yml, which allows users to create table and column comments that end up in the manifest ([#880](https://github.com/fishtown-analytics/dbt/pull/880)) - Add `docs` blocks that users can put into `.md` files and `doc()` value for schema v2 description fields ([#888](https://github.com/fishtown-analytics/dbt/pull/888)) +- Type inference for interpreting CSV data is now less aggressive ## dbt 0.10.2 (unreleased, codename: Betsy Ross) diff --git a/test/unit/test_agate_helper.py b/test/unit/test_agate_helper.py new file mode 100644 index 00000000000..ba8c4785cec --- /dev/null +++ b/test/unit/test_agate_helper.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals +import unittest + +from datetime import datetime +from decimal import Decimal +from isodate import tzinfo +import os +from shutil import rmtree +from tempfile import mkdtemp +from dbt.clients import agate_helper + +SAMPLE_CSV_DATA = """a,b,c,d,e,f,g +1,n,test,3.2,20180806T11:33:29.320Z,True,NULL +2,y,asdf,900,20180806T11:35:29.320Z,False,a string""" + + +EXPECTED = [ + [1, 'n', 'test', Decimal('3.2'), + datetime(2018, 8, 6, 11, 33, 29, 320000, tzinfo=tzinfo.Utc()), + True, None, + ], + [2, 'y', 'asdf', 900, + datetime(2018, 8, 6, 11, 35, 29, 320000, tzinfo=tzinfo.Utc()), + False, 'a string', + ], +] + +class TestAgateHelper(unittest.TestCase): + def setUp(self): + self.tempdir = mkdtemp() + + def tearDown(self): + rmtree(self.tempdir) + + def test_from_csv(self): + path = os.path.join(self.tempdir, 'input.csv') + with open(path, 'wb') as fp: + fp.write(SAMPLE_CSV_DATA.encode('utf-8')) + tbl = agate_helper.from_csv(path) + self.assertEqual(len(tbl), len(EXPECTED)) + for idx, row in enumerate(tbl): + self.assertEqual(list(row), EXPECTED[idx]) + + def test_from_data(self): + column_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] + data = [ + {'a': '1', 'b': 'n', 'c': 'test', 'd': '3.2', + 'e': '20180806T11:33:29.320Z', 'f': 'True', 'g': 'NULL'}, + {'a': '2', 'b': 'y', 'c': 'asdf', 'd': '900', + 'e': '20180806T11:35:29.320Z', 'f': 'False', 'g': 'a string'} + ] + tbl = agate_helper.table_from_data(data, column_names) + self.assertEqual(len(tbl), len(EXPECTED)) + for idx, row in enumerate(tbl): + self.assertEqual(list(row), EXPECTED[idx])