Skip to content

Commit

Permalink
Fix s3 bucket bug
Browse files Browse the repository at this point in the history
  • Loading branch information
genzgd committed Nov 6, 2023
1 parent e72b775 commit bf9d779
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 3 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
### Release [1.5.0], TBD
#### Improvement
- Compatible with dbt 1.5.x

#### Bug Fix
- Fix s3 macro when bucket includes `https://` prefix. Closes https://github.com/ClickHouse/dbt-clickhouse/issues/192.

### Release [1.4.9], 2023-10-27
#### Improvement
- Lots of work on Distributed table materializations. Big thanks to [gfunc](https://github.com/gfunc) for the additional PR
Expand Down
5 changes: 2 additions & 3 deletions dbt/adapters/clickhouse/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,12 @@ def s3source_clause(
fmt = fmt or s3config.get('fmt')
bucket = bucket or s3config.get('bucket', '')
path = path or s3config.get('path', '')
url = bucket
url = bucket.replace('https://', '')
if path:
if bucket and path and not bucket.endswith('/') and not bucket.startswith('/'):
path = f'/{path}'
url = f'{url}{path}'.replace('//', '/')
if not url.startswith('http'):
url = f'https://{url}'
url = f'https://{url}'
access = ''
if aws_access_key_id and not aws_secret_access_key:
raise DbtRuntimeError('S3 aws_access_key_id specified without aws_secret_access_key')
Expand Down
29 changes: 29 additions & 0 deletions tests/integration/adapter/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
select * from {{ clickhouse_s3source('taxi_s3', path='/trips_4.gz') }} LIMIT 5000
"""

s3_taxis_full_source = """
select * from {{ clickhouse_s3source('taxi_s3', path='/trips_5.gz') }} LIMIT 1000
"""

s3_taxis_inc = """
{{ config(
materialized='incremental',
Expand Down Expand Up @@ -84,3 +88,28 @@ def test_s3_incremental(self, project):
)
assert 5000 < result[0] < 10000
assert result[1] > 0


class TestS3Bucket:
@pytest.fixture(scope="class")
def project_config_update(self):
return {
'vars': {
'taxi_s3': {
'bucket': 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/',
'fmt': 'TabSeparatedWithNames',
}
}
}

@pytest.fixture(scope="class")
def models(self):
return {
"s3_taxis_source.sql": s3_taxis_full_source,
"schema.yml": schema_yaml,
}

def test_read(self, project):
run_dbt(["run", "--select", "s3_taxis_source.sql"])
result = project.run_sql("select count() as num_rows from s3_taxis_source", fetch="one")
assert result[0] == 1000

0 comments on commit bf9d779

Please sign in to comment.