Skip to content

Commit

Permalink
Upgrade dbt generate artifact types (#2598)
Browse files Browse the repository at this point in the history
* upgrade dbt-bigquery and re-generate artifact types (#2545)

* upgrade dbt-bigquery

* create gen_artifacts.sh and use it

* wip on horrific code

* continue subclassing/patching generated models

* start working on mypy compatibility

* remove empty test block

* continue working on mypy passing

* make mypy pass

* fixes post-upgrade

* address comments

* upgrade dbt to 1.6 preview, and fix data access model full refresh

* just determine partitions dynamically

* allow extra fields in generated artifact models

* make mypy pass
  • Loading branch information
atvaccaro committed May 17, 2023
1 parent bcf45a5 commit e4fb9a5
Show file tree
Hide file tree
Showing 17 changed files with 2,019 additions and 454 deletions.
1 change: 1 addition & 0 deletions warehouse/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ RUN poetry export -f requirements.txt --without-hashes --output requirements.txt

COPY ./dbt_project.yml /app/dbt_project.yml
COPY ./packages.yml /app/packages.yml
COPY ./profiles.yml /app/profiles.yml
RUN dbt deps

COPY . /app
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,6 @@ models:
For best results, join with reference to a specific date, and make sure to choose a specific
output grain (organizations, services, customer-facing vs. not); you will likely need to filter
or group to get the desired output.
tests:
columns:
- *key
- name: service_key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
'data_type': 'date',
'granularity': 'day',
},
partitions=['current_date()'],
cluster_by='job_type',
)
}}
Expand All @@ -21,13 +20,15 @@ WITH latest AS (
FROM cal-itp-data-infra.audit.cloudaudit_googleapis_com_data_access_{{ yesterday.strftime('%Y%m%d') }}
),

everything AS (
{% set start_date = modules.datetime.date(year=2022, month=4, day=11) %}
{% set days = (modules.datetime.date.today() - start_date).days + 1 %}
everything AS ( -- noqa: ST03
-- without this limited lookback, we'd eventually exhaust query resources on full refreshes
-- since we might end up unioning hundreds of tables
-- technically we have data back to 2022-04-11
{% set days = 90 %}

{% for add in range(days) %}
{% for day in range(days) %}

{% set current = start_date + modules.datetime.timedelta(days=add) %}
{% set current = modules.datetime.date.today() - modules.datetime.timedelta(days=day) %}

SELECT *
FROM cal-itp-data-infra.audit.cloudaudit_googleapis_com_data_access_{{ current.strftime('%Y%m%d') }}
Expand Down Expand Up @@ -79,9 +80,9 @@ stg_audit__cloudaudit_googleapis_com_data_access AS (
SECOND
) AS duration_in_seconds,
JSON_VALUE_ARRAY(job, '$.jobStats.queryStats.referencedTables') as referenced_tables,
CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS INT64) AS total_billed_bytes,
5.0 * CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS INT64) / POWER(2, 40) AS estimated_cost_usd, -- $5/TB
CAST(JSON_VALUE(job, '$.jobStats.totalSlotMs') AS INT64) / 1000 AS total_slots_seconds,
CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS int64) AS total_billed_bytes,
5.0 * CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS int64) / POWER(2, 40) AS estimated_cost_usd, -- $5/TB
CAST(JSON_VALUE(job, '$.jobStats.totalSlotMs') AS int64) / 1000 AS total_slots_seconds,

JSON_VALUE(metadata, '$.tableDataRead.jobName') as table_data_read_job_name,

Expand Down
3 changes: 3 additions & 0 deletions warehouse/mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[mypy]
plugins = pydantic.mypy, sqlmypy
disable_error_code = assignment
201 changes: 53 additions & 148 deletions warehouse/poetry.lock

Large diffs are not rendered by default.

7 changes: 2 additions & 5 deletions warehouse/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,11 @@ networkx = {version = "<3", extras = ["default"]}
# export CFLAGS="-I $(brew --prefix graphviz)/include"
# export LDFLAGS="-L $(brew --prefix graphviz)/lib"
pygraphviz = "^1.10"
dbt-bigquery = "^1.4.3"
palettable = "^3.3.0"
dbt-bigquery = "1.6.0b1"

[tool.poetry.group.dev.dependencies]
black = "^22.12.0"
mypy = "^0.991"
isort = "^5.11.4"
types-tqdm = "^4.64.7"
types-requests = "^2.28.11"
Expand All @@ -46,10 +45,8 @@ datamodel-code-generator = "^0.17.1"
sqlfluff = "^2.0.2"
sqlfluff-templater-dbt = "^2.0.2"
ipdb = "^0.13.13"
mypy = "^1.2.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.mypy]
plugins = "sqlmypy"
Loading

0 comments on commit e4fb9a5

Please sign in to comment.