Skip to content

Commit

Permalink
🎉 New Destination: DuckDB (#17494)
Browse files Browse the repository at this point in the history
This is the first version of the DuckDB destination. There are potential edge cases that still need to be taken care of. But looking forward to your feedback.
  • Loading branch information
sspaeti authored Feb 7, 2023
1 parent 538d5ca commit 2bbc4f6
Show file tree
Hide file tree
Showing 51 changed files with 1,717 additions and 8 deletions.
5 changes: 5 additions & 0 deletions airbyte-config/init/src/main/resources/icons/duckdb.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,15 @@
dockerImageTag: 0.1.0
documentationUrl: https://docs.airbyte.com/integrations/destinations/weaviate
releaseStage: alpha
- name: DuckDB
destinationDefinitionId: 94bd199c-2ff0-4aa2-b98e-17f0acb72610
dockerRepository: airbyte/destination-duckdb
dockerImageTag: 0.1.0
documentationUrl: https://docs.airbyte.io/integrations/destinations/duckdb
icon: duckdb.svg
normalizationConfig:
normalizationRepository: airbyte/normalization-duckdb
normalizationTag: 0.2.25
normalizationIntegrationType: duckdb
supportsDbt: true
releaseStage: alpha
27 changes: 27 additions & 0 deletions airbyte-config/init/src/main/resources/seed/destination_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7347,3 +7347,30 @@
supported_destination_sync_modes:
- "append"
- "overwrite"
- dockerImage: "airbyte/destination-duckdb:0.1.0"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/duckdb"
connectionSpecification:
$schema: "http://json-schema.org/draft-07/schema#"
title: "DuckDB Destination Spec"
type: "object"
required:
- "destination_path"
additionalProperties: false
properties:
destination_path:
type: "string"
description: "Path to the destination.duckdb file. The file will be placed\
\ inside that local mount. For more information check out our <a href=\"\
https://docs.airbyte.io/integrations/destinations/duckdb\">docs</a>"
example: "/local/destination.duckdb"
schema:
type: "string"
description: "database schema, default for duckdb is main"
example: "main"
supportsIncremental: true
supportsNormalization: true
supportsDBT: true
supported_destination_sync_modes:
- "overwrite"
- "append"
7 changes: 7 additions & 0 deletions airbyte-integrations/bases/base-normalization/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ task airbyteDockerTiDB(type: Exec, dependsOn: checkSshScriptCopy) {
configure buildAirbyteDocker('tidb')
dependsOn assemble
}
task airbyteDockerDuckDB(type: Exec, dependsOn: checkSshScriptCopy) {
configure buildAirbyteDocker('duckdb')
dependsOn assemble
}

airbyteDocker.dependsOn(airbyteDockerMSSql)
airbyteDocker.dependsOn(airbyteDockerMySql)
Expand All @@ -89,6 +93,7 @@ airbyteDocker.dependsOn(airbyteDockerClickhouse)
airbyteDocker.dependsOn(airbyteDockerSnowflake)
airbyteDocker.dependsOn(airbyteDockerRedshift)
airbyteDocker.dependsOn(airbyteDockerTiDB)
airbyteDocker.dependsOn(airbyteDockerDuckDB)

task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs) {
module = "pytest"
Expand All @@ -104,6 +109,7 @@ task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs
dependsOn ':airbyte-integrations:connectors:destination-mssql:airbyteDocker'
dependsOn ':airbyte-integrations:connectors:destination-clickhouse:airbyteDocker'
dependsOn ':airbyte-integrations:connectors:destination-tidb:airbyteDocker'
dependsOn ':airbyte-integrations:connectors:destination-duckdb:airbyteDocker'
}

// not really sure what this task does differently from customIntegrationTestPython, but it seems to also run integration tests
Expand All @@ -118,6 +124,7 @@ project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-mssql:airbyteDocker'
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-clickhouse:airbyteDocker'
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-tidb:airbyteDocker'
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-duckdb:airbyteDocker'

// DATs have some additional tests that exercise normalization code paths,
// so we want to run these in addition to the base-normalization integration tests.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# This file is necessary to install dbt-utils with dbt deps
# the content will be overwritten by the transform function

# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: "airbyte_utils"
version: "1.0"
config-version: 2

# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: "normalize"

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
docs-paths: ["docs"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
seed-paths: ["data"]
macro-paths: ["macros"]

target-path: "../build" # directory which will store compiled SQL files
log-path: "../logs" # directory which will store DBT logs
packages-install-path: "/dbt" # directory which will store external DBT dependencies

clean-targets: # directories to be removed by `dbt clean`
- "build"
- "dbt_modules"

quoting:
database: true
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
# all schemas should be unquoted
schema: false
identifier: true

# You can define configurations for models in the `model-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
models:
airbyte_utils:
+materialized: table
generated:
airbyte_ctes:
+tags: airbyte_internal_cte
+materialized: ephemeral
airbyte_incremental:
+tags: incremental_tables
+materialized: incremental
+on_schema_change: sync_all_columns
airbyte_tables:
+tags: normalized_tables
+materialized: table
airbyte_views:
+tags: airbyte_internal_views
+materialized: view

dispatch:
- macro_namespace: dbt_utils
search_order: ["airbyte_utils", "dbt_utils"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# add dependencies. these will get pulled during the `dbt deps` process.

packages:
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
revision: 0.8.2
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
{%- endmacro %}

{% macro duckdb__cross_join_unnest(stream_name, array_col) -%}
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
{%- endmacro %}

{% macro redshift__cross_join_unnest(stream_name, array_col) -%}
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
{%- endmacro %}
Expand Down Expand Up @@ -95,6 +99,10 @@
_airbyte_nested_data
{%- endmacro %}

{% macro duckdb__unnested_column_value(column_col) -%}
_airbyte_nested_data
{%- endmacro %}

{% macro oracle__unnested_column_value(column_col) -%}
{{ column_col }}
{%- endmacro %}
Expand Down Expand Up @@ -193,3 +201,7 @@ joined as (
{% macro tidb__unnest_cte(from_table, stream_name, column_col) -%}
{{ mysql__unnest_cte(from_table, stream_name, column_col) }}
{%- endmacro %}

{% macro duckdb__unnest_cte(from_table, stream_name, column_col) -%}
{{ mysql__unnest_cte(from_table, stream_name, column_col) }}
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@
{% macro tidb__concat(fields) -%}
concat({{ fields|join(', ') }})
{%- endmacro %}

{% macro duckdb__concat(fields) -%}
concat({{ fields|join(', ') }})
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
json
{%- endmacro -%}

{%- macro duckdb__type_json() -%}
json
{%- endmacro -%}

{# string ------------------------------------------------- #}

Expand Down Expand Up @@ -72,6 +75,10 @@
char(1000)
{%- endmacro -%}

{%- macro duckdb__type_string() -%}
VARCHAR
{%- endmacro -%}

{# float ------------------------------------------------- #}
{% macro mysql__type_float() %}
float
Expand All @@ -89,6 +96,10 @@
float
{% endmacro %}

{% macro duckdb__type_float() %}
DOUBLE
{% endmacro %}

{# int ------------------------------------------------- #}
{% macro default__type_int() %}
int
Expand All @@ -110,6 +121,10 @@
signed
{% endmacro %}

{% macro duckdb__type_int() %}
INTEGER
{% endmacro %}

{# bigint ------------------------------------------------- #}
{% macro mysql__type_bigint() %}
signed
Expand All @@ -127,6 +142,10 @@
signed
{% endmacro %}

{% macro duckdb__type_bigint() %}
BIGINT
{% endmacro %}

{# numeric ------------------------------------------------- --#}
{% macro mysql__type_numeric() %}
float
Expand All @@ -140,6 +159,10 @@
float
{% endmacro %}

{% macro duckdb__type_numeric() %}
DOUBLE
{% endmacro %}

{# very_large_integer --------------------------------------- --#}
{#
Most databases don't have a true unbounded numeric datatype, so we use a really big numeric field.
Expand Down Expand Up @@ -170,6 +193,10 @@ so this macro needs to be called very_large_integer.
decimal(38, 0)
{% endmacro %}
{% macro duckdb__type_very_large_integer() %}
DECIMAL(38, 0)
{% endmacro %}
{# timestamp ------------------------------------------------- --#}
{% macro mysql__type_timestamp() %}
time
Expand All @@ -189,6 +216,10 @@ so this macro needs to be called very_large_integer.
time
{% endmacro %}
{% macro duckdb__type_timestamp() %}
TIMESTAMP
{% endmacro %}
{# timestamp with time zone ------------------------------------------------- #}
{%- macro type_timestamp_with_timezone() -%}
Expand Down Expand Up @@ -229,6 +260,10 @@ so this macro needs to be called very_large_integer.
char(1000)
{%- endmacro -%}

{%- macro duckdb__type_timestamp_with_timezone() -%}
TIMESTAMPTZ
{%- endmacro -%}

{# timestamp without time zone ------------------------------------------------- #}

{%- macro type_timestamp_without_timezone() -%}
Expand Down Expand Up @@ -261,6 +296,10 @@ so this macro needs to be called very_large_integer.
datetime
{% endmacro %}

{% macro duckdb__type_timestamp_without_timezone() %}
TIMESTAMP
{% endmacro %}

{# time without time zone ------------------------------------------------- #}

{%- macro type_time_without_timezone() -%}
Expand All @@ -287,6 +326,9 @@ so this macro needs to be called very_large_integer.
time
{% endmacro %}

{% macro duckdb__type_time_without_timezone() %}
TIMESTAMP
{% endmacro %}

{# time with time zone ------------------------------------------------- #}

Expand Down Expand Up @@ -330,6 +372,9 @@ so this macro needs to be called very_large_integer.
char(1000)
{%- endmacro -%}

{%- macro duckdb__type_time_with_timezone() -%}
TIMESTAMPTZ
{%- endmacro -%}
{# date ------------------------------------------------- #}

{%- macro type_date() -%}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html
- ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/
- TiDB: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://docs.pingcap.com/tidb/stable/json-functions
- DuckDB: json_extract(json, 'path') note: If path is a LIST, the result will be a LIST of JSON -> https://duckdb.org/docs/extensions/json
#}

{# format_json_path -------------------------------------------------- #}
Expand Down Expand Up @@ -103,6 +104,11 @@
{{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
{%- endmacro %}

{% macro duckdb__format_json_path(json_path_list) -%}
{# -- '$."x"."y"."z"' #}
{{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
{%- endmacro %}

{# json_extract ------------------------------------------------- #}

{% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -180,6 +186,14 @@
{% endif -%}
{%- endmacro %}

{% macro duckdb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %}
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
{% else %}
json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
{% endif -%}
{%- endmacro %}

{# json_extract_scalar ------------------------------------------------- #}

{% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -234,6 +248,10 @@
)
{%- endmacro %}

{% macro duckdb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
json_extract_string({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}

{# json_extract_array ------------------------------------------------- #}

{% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -284,6 +302,10 @@
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
{%- endmacro %}

{% macro duckdb__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
{%- endmacro %}

{# json_extract_string_array ------------------------------------------------- #}

{% macro json_extract_string_array(json_column, json_path_list, normalized_json_path) -%}
Expand Down
Loading

0 comments on commit 2bbc4f6

Please sign in to comment.