Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
3 changes: 2 additions & 1 deletion dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ RUN mkdir -p /${MELTANO_PROJ_ROOT}/data/dev/ \
&& mkdir -p /${MELTANO_PROJ_ROOT}/data/prod/ \
&& /${MELTANO_PROJ_ROOT}/${DUCKDB_CLI_FOLDER}/duckdb /${MELTANO_PROJ_ROOT}/data/dev/data.duckdb "select * from pg_tables;" \
&& /${MELTANO_PROJ_ROOT}/${DUCKDB_CLI_FOLDER}/duckdb /${MELTANO_PROJ_ROOT}/data/test/data.duckdb "select * from pg_tables;" \
&& /${MELTANO_PROJ_ROOT}/${DUCKDB_CLI_FOLDER}/duckdb /${MELTANO_PROJ_ROOT}/data/prod/data.duckdb "select * from pg_tables;"
&& /${MELTANO_PROJ_ROOT}/${DUCKDB_CLI_FOLDER}/duckdb /${MELTANO_PROJ_ROOT}/data/prod/data.duckdb "select * from pg_tables;" \
&& meltano invoke dbt-duckdb:deps


###RUN chmod -R u+x /project/data/
Expand Down
27 changes: 12 additions & 15 deletions meltano.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@ plugins:
params:
date: $ENV_DATE_GIE
#date: '2023-02-15'
stream_maps:
stg_gie_storage:
key_hash: md5(config['hash_seed'] + (gasDayStart + code))
#__alias__: stg_gie_storage_vX
stg_gie_company:
key_hash: md5(config['hash_seed'] + (gasDayStart + code))
stg_gie_country:
key_hash: md5(config['hash_seed'] + (gasDayStart + code))
stg_gie_region:
key_hash: md5(config['hash_seed'] + (gasDayStart + code))
stream_map_config:
hash_seed: 01AWZh7A6DzGm6iJZZ2T
streams:
- name: stg_gie_storage
path: /api
Expand Down Expand Up @@ -132,15 +144,13 @@ plugins:
pip_url: target-duckdb~=0.4
config:
add_metadata_columns: true
#default_target_schema: gie_stage
default_target_schema: main
filepath: $DB_LOCATION
data_flattening_max_level: 10
- name: target-duckdb-usgs
inherit_from: target-duckdb
config:
add_metadata_columns: true
#default_target_schema: usgs_stage
default_target_schema: main
filepath: $DB_LOCATION
data_flattening_max_level: 10
Expand All @@ -149,7 +159,6 @@ plugins:
inherit_from: target-duckdb
config:
add_metadata_columns: true
#default_target_schema: gie
default_target_schema: main
filepath: $DB_LOCATION
data_flattening_max_level: 10
Expand All @@ -158,24 +167,18 @@ plugins:
- name: airflow
variant: apache
pip_url: apache-airflow==2.1.2 --constraint https://raw.githubusercontent.com/apache/airflow/constraints-2.1.2/constraints-${MELTANO__PYTHON_VERSION}.txt
# transformers:
# - name: dbt-duckdb
# variant: jwills
# pip_url: dbt-core~=1.2.0 dbt-duckdb~=1.2.0
files:
- name: files-airflow
variant: meltano
pip_url: git+https://github.com/meltano/files-airflow.git --constraint https://raw.githubusercontent.com/apache/airflow/constraints-2.1.2/constraints-${MELTANO__PYTHON_VERSION}.txt
utilities:
- name: superset
variant: apache
#pip_url: apache-superset==1.5.0 markupsafe==2.0.1 duckdb-engine==0.6.4
pip_url: apache-superset==2.0.0 flask==2.0.3 werkzeug==2.0.3 jinja2==3.0.1 wtforms==2.3.3
git+https://github.com/meltano/superset-ext.git@main cryptography==3.4.7 markupsafe==2.0.1
duckdb-engine==0.7.0
- name: dbt-duckdb
variant: jwills
#pip_url: dbt-core~=1.3.0 dbt-duckdb~=1.3.0 git+https://github.com/meltano/dbt-ext.git@main
pip_url: dbt-core~=1.4.0 dbt-duckdb~=1.4.0 git+https://github.com/meltano/dbt-ext.git@main
commands:
usgs:
Expand All @@ -188,12 +191,6 @@ jobs:
tasks:
- stg_usgs target-duckdb-usgs dbt-duckdb:usgs
schedules:
#- name: USGS-Earthquake
# interval: 35 */1 * * *
# extractor: stg_usgs
# loader: target-duckdb-usgs
# transform: skip
# start_date: 2023-01-01 15:40:21.295936
- name: USGS-Earthquake
interval: 35 */1 * * *
job: usgs-to-duckdb-rpt
Expand Down
2 changes: 2 additions & 0 deletions meltano_transform/models/gie_rpt/rpt_gie_storage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ SSO AS
gasdaystart::DATE gasdaystart,
split_part(url, '/', 2) as country,
split_part(url, '/', 3) as company_eic,
key_hash,
code as sso_eic,
name as sso_name,
status,
Expand Down Expand Up @@ -36,6 +37,7 @@ FROM
select
_sdc_batched_at,
_sdc_extracted_at,
key_hash,
sso.gasdaystart,
country,
SSO.company_eic,
Expand Down
12 changes: 12 additions & 0 deletions meltano_transform/models/gie_rpt/rpt_gie_storage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,25 @@ models:
+tags:
- gie

tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- sso_eic
- gasdaystart

columns:
- name: _sdc_batched_at
description: Timestamp when the data was captured in the database.

- name: _sdc_extracted_at
description: Timestamp when the data was retrieved from the REST API.

- name: key_hash
description: Has of sso_eic and gasdaystart.
tests:
- unique
- not_null

- name: gasdaystart
description: Date of the observation. Ex. the injection field refers to the injection on this date. gasinstorage as per end of the gasdaystart.
tests:
Expand Down
4 changes: 4 additions & 0 deletions meltano_transform/models/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

![mimodast Logo](https://github.com/EJOOSTEROP/mimodast/blob/master/assets/hatchful/logo_transparent.png)

<a href="https://github.com/EJOOSTEROP/mimodast">
<img src="assets/hatchful/logo_transparent.png" alt="Logo" width="180" height="180">
</a>

# Mimodast dbt Project
Mimodast is a minimal modern data stack with working data pipelines in a single Docker container.

Expand Down
3 changes: 3 additions & 0 deletions meltano_transform/packages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
packages:
- package: dbt-labs/dbt_utils
version: 1.0.0
3 changes: 3 additions & 0 deletions meltano_transform/tests/richter_max.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT *
FROM {{ ref('rpt_usgs_events')}}
WHERE magnitude > 10
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *
FROM {{ ref('rpt_gie_storage')}}
WHERE _sdc_batched_at < _sdc_extracted_at
WHERE gasinstorage - workinggasvolume > 1
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ version: 2

# NOTE: THIS DOES NOT WORK. UNCLEAR IF YOU CAN DOCUMENT A TEST AT THIS STAGE.
tests:
- name: stage_timing
- name: storage_max
description: Data needs to be extracted before it can be stored in the database.
Loading