From bf9d779f210f6e45798bbe1c60ee23f0d421ff90 Mon Sep 17 00:00:00 2001 From: Geoff Genz Date: Sun, 5 Nov 2023 18:36:16 -0700 Subject: [PATCH] Fix s3 bucket bug --- CHANGELOG.md | 7 +++++++ dbt/adapters/clickhouse/impl.py | 5 ++--- tests/integration/adapter/test_s3.py | 29 ++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38e6e945..761c7653 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +### Release [1.5.0], TBD +#### Improvement +- Compatible with dbt 1.5.x + +#### Bug Fix +- Fix s3 macro when bucket includes `https://` prefix. Closes https://github.com/ClickHouse/dbt-clickhouse/issues/192. + ### Release [1.4.9], 2023-10-27 #### Improvement - Lots of work on Distributed table materializations. Big thanks to [gfunc](https://github.com/gfunc) for the additional PR diff --git a/dbt/adapters/clickhouse/impl.py b/dbt/adapters/clickhouse/impl.py index 80497fe9..3c9a5b87 100644 --- a/dbt/adapters/clickhouse/impl.py +++ b/dbt/adapters/clickhouse/impl.py @@ -167,13 +167,12 @@ def s3source_clause( fmt = fmt or s3config.get('fmt') bucket = bucket or s3config.get('bucket', '') path = path or s3config.get('path', '') - url = bucket + url = bucket.replace('https://', '') if path: if bucket and path and not bucket.endswith('/') and not bucket.startswith('/'): path = f'/{path}' url = f'{url}{path}'.replace('//', '/') - if not url.startswith('http'): - url = f'https://{url}' + url = f'https://{url}' access = '' if aws_access_key_id and not aws_secret_access_key: raise DbtRuntimeError('S3 aws_access_key_id specified without aws_secret_access_key') diff --git a/tests/integration/adapter/test_s3.py b/tests/integration/adapter/test_s3.py index 8fb8727f..10f1289e 100644 --- a/tests/integration/adapter/test_s3.py +++ b/tests/integration/adapter/test_s3.py @@ -27,6 +27,10 @@ select * from {{ clickhouse_s3source('taxi_s3', path='/trips_4.gz') }} LIMIT 5000 """ +s3_taxis_full_source = """ +select * from {{ clickhouse_s3source('taxi_s3', path='/trips_5.gz') }} LIMIT 1000 +""" + s3_taxis_inc = """ {{ config( materialized='incremental', @@ -84,3 +88,28 @@ def test_s3_incremental(self, project): ) assert 5000 < result[0] < 10000 assert result[1] > 0 + + +class TestS3Bucket: + @pytest.fixture(scope="class") + def project_config_update(self): + return { + 'vars': { + 'taxi_s3': { + 'bucket': 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/', + 'fmt': 'TabSeparatedWithNames', + } + } + } + + @pytest.fixture(scope="class") + def models(self): + return { + "s3_taxis_source.sql": s3_taxis_full_source, + "schema.yml": schema_yaml, + } + + def test_read(self, project): + run_dbt(["run", "--select", "s3_taxis_source.sql"]) + result = project.run_sql("select count() as num_rows from s3_taxis_source", fetch="one") + assert result[0] == 1000