Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle more than 100 fields to compute hashid #970

Merged
merged 2 commits into from
Nov 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion airbyte-integrations/bases/base-normalization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ WORKDIR /airbyte

ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.1.0
LABEL io.airbyte.version=0.1.1
LABEL io.airbyte.name=airbyte/normalization
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{#
Overriding the following macro from dbt-utils:
https://github.com/fishtown-analytics/dbt-utils/blob/0.6.2/macros/cross_db_utils/concat.sql
To implement our own version of concat
Because on postgres, we cannot pass more than 100 arguments to a function
#}

{% macro concat(fields) -%}
{{ adapter.dispatch('concat')(fields) }}
{%- endmacro %}

{% macro default__concat(fields) -%}
concat({{ fields|join(', ') }})
{%- endmacro %}

{% macro alternative_concat(fields) %}
{{ fields|join(' || ') }}
{% endmacro %}


{% macro postgres__concat(fields) %}
{{ dbt_utils.alternative_concat(fields) }}
{% endmacro %}


{% macro redshift__concat(fields) %}
{{ dbt_utils.alternative_concat(fields) }}
{% endmacro %}


{% macro snowflake__concat(fields) %}
{{ dbt_utils.alternative_concat(fields) }}
{% endmacro %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{#
Overriding the following macro from dbt-utils:
https://github.com/fishtown-analytics/dbt-utils/blob/0.6.2/macros/sql/surrogate_key.sql
To implement our own version of concat
Because on postgres, we cannot pass more than 100 arguments to a function
#}

{%- macro surrogate_key(field_list) -%}

{%- if varargs|length >= 1 or field_list is string %}

{%- set error_message = '
Warning: the `surrogate_key` macro now takes a single list argument instead of \
multiple string arguments. Support for multiple string arguments will be \
deprecated in a future release of dbt-utils. The {}.{} model triggered this warning. \
'.format(model.package_name, model.name) -%}

{%- do exceptions.warn(error_message) -%}

{# first argument is not included in varargs, so add first element to field_list_xf #}
{%- set field_list_xf = [field_list] -%}

{%- for field in varargs %}
{%- set _ = field_list_xf.append(field) -%}
{%- endfor -%}

{%- else -%}

{# if using list, just set field_list_xf as field_list #}
{%- set field_list_xf = field_list -%}

{%- endif -%}


{%- set fields = [] -%}

{%- for field in field_list_xf -%}

{%- set _ = fields.append(
"coalesce(cast(" ~ field ~ " as " ~ dbt_utils.type_string() ~ "), '')"
) -%}

{%- if not loop.last %}
{%- set _ = fields.append("'-'") -%}
{%- endif -%}

{%- endfor -%}

{{dbt_utils.hash(concat(fields))}}

{%- endmacro -%}
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,10 @@ def process_node(
node_properties = extract_node_properties(path=path, json_col=json_col, properties=properties)
node_columns = ",\n ".join([sql for sql in node_properties.values()])
hash_node_columns = ",\n ".join([f"adapter.quote_as_configured('{column}', 'identifier')" for column in node_properties.keys()])
hash_node_columns = jinja_call(f"dbt_utils.surrogate_key([\n {hash_node_columns}\n ])")
# Disable dbt_utils.surrogate_key for own version to fix a bug with Postgres (#913).
# hash_node_columns = jinja_call(f"dbt_utils.surrogate_key([\n {hash_node_columns}\n ])")
# We should re-enable it when our PR to dbt_utils is merged
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add the link to the dbt PR?

Copy link
Contributor Author

@ChristopheDuong ChristopheDuong Nov 13, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dbt-utils PR is here: dbt-labs/dbt-utils#296

hash_node_columns = jinja_call(f"surrogate_key([\n {hash_node_columns}\n ])")
hash_id = jinja_call(f"adapter.quote_as_configured('_{name}_hashid', 'identifier')")
foreign_hash_id = jinja_call(f"adapter.quote_as_configured('_{name}_foreign_hashid', 'identifier')")
emitted_col = "{},\n {} as {}".format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class DefaultNormalizationRunner implements NormalizationRunner {

private static final Logger LOGGER = LoggerFactory.getLogger(DefaultNormalizationRunner.class);

public static final String NORMALIZATION_IMAGE_NAME = "airbyte/normalization:0.1.0";
public static final String NORMALIZATION_IMAGE_NAME = "airbyte/normalization:0.1.1";

private final DestinationType destinationType;
private final ProcessBuilderFactory pbf;
Expand Down