From 390ab08ee94c0096178820d7f6c0ac9bc7636455 Mon Sep 17 00:00:00 2001 From: Ethan Date: Thu, 7 Mar 2024 21:44:26 +1100 Subject: [PATCH 1/3] fix generate_source example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a39f8bc..90bd836 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ the schema to your source definition If you use the `dbt run-operation` approach it is possible to output directly to a file by piping the output to a new file and using the `--quiet` CLI flag: ``` -dbt --quiet run-operation generate_model_yaml --args '{"model_name": "stg_jaffle_shop__orders"}' > models/staging/jaffle_shop/stg_jaffle_shop__orders.yml +dbt --quiet run-operation generate_source --args '{"table_names": ["stg_jaffle_shop__orders"]}' > models/staging/jaffle_shop/stg_jaffle_shop__orders.yml ``` ### Usage: From a2e286c939ebddc9afa23231bce5fafe7582eb2c Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Thu, 7 Mar 2024 05:16:16 -0600 Subject: [PATCH 2/3] Additional fixes to source output to file example in README --- README.md | 101 ++++++++++++++++++++++++++++++++++-------------------- justfile | 42 +++++++++++++++++++++++ 2 files changed, 106 insertions(+), 37 deletions(-) create mode 100644 justfile diff --git a/README.md b/README.md index 90bd836..79ba382 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ Macros that generate dbt code, and log it to the command line. # Contents + - [dbt-codegen](#dbt-codegen) - [Contents](#contents) - [Installation instructions](#installation-instructions) @@ -27,56 +28,65 @@ Macros that generate dbt code, and log it to the command line. - [Usage:](#usage-5) # Installation instructions + New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/). + 1. Include this package in your `packages.yml` file — check [here](https://hub.getdbt.com/dbt-labs/codegen/latest/) for the latest version number: + ```yml packages: - package: dbt-labs/codegen version: X.X.X ## update to latest version here ``` + 2. Run `dbt deps` to install the package. # Macros + ## generate_source ([source](macros/generate_source.sql)) + This macro generates lightweight YAML for a [Source](https://docs.getdbt.com/docs/using-sources), which you can then paste into a schema file. ### Arguments -* `schema_name` (required): The schema name that contains your source data -* `database_name` (optional, default=target.database): The database that your -source data is in. -* `table_names` (optional, default=none): A list of tables that you want to generate the source definitions for. -* `generate_columns` (optional, default=False): Whether you want to add the -column names to your source definition. -* `include_descriptions` (optional, default=False): Whether you want to add -description placeholders to your source definition. -* `include_data_types` (optional, default=True): Whether you want to add data -types to your source columns definitions. -* `table_pattern` (optional, default='%'): A table prefix / postfix that you -want to subselect from all available tables within a given schema. -* `exclude` (optional, default=''): A string you want to exclude from the selection criteria -* `name` (optional, default=schema_name): The name of your source -* `include_database` (optional, default=False): Whether you want to add -the database to your source definition -* `include_schema` (optional, default=False): Whether you want to add -the schema to your source definition + +- `schema_name` (required): The schema name that contains your source data +- `database_name` (optional, default=target.database): The database that your + source data is in. +- `table_names` (optional, default=none): A list of tables that you want to generate the source definitions for. +- `generate_columns` (optional, default=False): Whether you want to add the + column names to your source definition. +- `include_descriptions` (optional, default=False): Whether you want to add + description placeholders to your source definition. +- `include_data_types` (optional, default=True): Whether you want to add data + types to your source columns definitions. +- `table_pattern` (optional, default='%'): A table prefix / postfix that you + want to subselect from all available tables within a given schema. +- `exclude` (optional, default=''): A string you want to exclude from the selection criteria +- `name` (optional, default=schema_name): The name of your source +- `include_database` (optional, default=False): Whether you want to add + the database to your source definition +- `include_schema` (optional, default=False): Whether you want to add + the schema to your source definition ### Outputting to a file + If you use the `dbt run-operation` approach it is possible to output directly to a file by piping the output to a new file and using the `--quiet` CLI flag: ``` -dbt --quiet run-operation generate_source --args '{"table_names": ["stg_jaffle_shop__orders"]}' > models/staging/jaffle_shop/stg_jaffle_shop__orders.yml +dbt --quiet run-operation generate_source --args '{"table_names": ["orders"]}' > models/staging/jaffle_shop/_sources.yml ``` ### Usage: + 1. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code ``` {{ codegen.generate_source('raw_jaffle_shop') }} ``` - or for multiple arguments - +or for multiple arguments + ``` {{ codegen.generate_source(schema_name= 'jaffle_shop', database_name= 'raw') }} ``` @@ -127,18 +137,20 @@ sources: 3. Paste the output in to a schema `.yml` file, and refactor as required. ## generate_base_model ([source](macros/generate_base_model.sql)) + This macro generates the SQL for a base model, which you can then paste into a model. ### Arguments: -* `source_name` (required): The source you wish to generate base model SQL for. -* `table_name` (required): The source table you wish to generate base model SQL for. -* `leading_commas` (optional, default=False): Whether you want your commas to be leading (vs trailing). -* `case_sensitive_cols ` (optional, default=False): Whether your source table has case sensitive column names. If true, keeps the case of the column names from the source. -* `materialized` (optional, default=None): Set materialization style (e.g. table, view, incremental) inside of the model's `config` block. If not set, materialization style will be controlled by `dbt_project.yml` +- `source_name` (required): The source you wish to generate base model SQL for. +- `table_name` (required): The source table you wish to generate base model SQL for. +- `leading_commas` (optional, default=False): Whether you want your commas to be leading (vs trailing). +- `case_sensitive_cols ` (optional, default=False): Whether your source table has case sensitive column names. If true, keeps the case of the column names from the source. +- `materialized` (optional, default=None): Set materialization style (e.g. table, view, incremental) inside of the model's `config` block. If not set, materialization style will be controlled by `dbt_project.yml` ### Usage: + 1. Create a source for the table you wish to create a base model on top of. 2. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code @@ -184,29 +196,38 @@ select * from renamed 4. Paste the output in to a model, and refactor as required. ## create_base_models ([source](macros/create_base_models.sql)) + This macro generates a series of terminal commands (appended with the `&&` to allow for subsequent execution) that execute the [base_model_creation](#base_model_creation-source) bash script. This bash script will write the output of the [generate_base_model](#generate_base_model-source) macro into a new model file in your local dbt project. ->**Note**: This macro is not compatible with the dbt Cloud IDE. + +> **Note**: This macro is not compatible with the dbt Cloud IDE. ### Arguments: -* `source_name` (required): The source you wish to generate base model SQL for. -* `tables` (required): A list of all tables you want to generate the base models for. + +- `source_name` (required): The source you wish to generate base model SQL for. +- `tables` (required): A list of all tables you want to generate the base models for. ### Usage: + 1. Create a source for the table you wish to create a base model on top of. 2. Copy the macro into a statement tab into your local IDE, and run your code ```sql dbt run-operation codegen.create_base_models --args '{source_name: my-source, tables: ["this-table","that-table"]}' ``` + ## base_model_creation ([source](bash_scripts/base_model_creation.sh)) + This bash script when executed from your local IDE will create model files in your dbt project instance that contain the outputs of the [generate_base_model](macros/generate_base_model.sql) macro. ->**Note**: This macro is not compatible with the dbt Cloud IDE. + +> **Note**: This macro is not compatible with the dbt Cloud IDE. ### Arguments: -* `source_name` (required): The source you wish to generate base model SQL for. -* `tables` (required): A list of all tables you want to generate the base models for. + +- `source_name` (required): The source you wish to generate base model SQL for. +- `tables` (required): A list of all tables you want to generate the base models for. ### Usage: + 1. Create a source for the table you wish to create a base model on top of. 2. Copy the macro into a statement tab into your local IDE, and run your code @@ -215,15 +236,18 @@ source dbt_packages/codegen/bash_scripts/base_model_creation.sh "source_name" [" ``` ## generate_model_yaml ([source](macros/generate_model_yaml.sql)) + This macro generates the YAML for a list of model(s), which you can then paste into a schema.yml file. ### Arguments: -* `model_names` (required): The model(s) you wish to generate YAML for. -* `upstream_descriptions` (optional, default=False): Whether you want to include descriptions for identical column names from upstream models and sources. -* `include_data_types` (optional, default=True): Whether you want to add data types to your model column definitions. + +- `model_names` (required): The model(s) you wish to generate YAML for. +- `upstream_descriptions` (optional, default=False): Whether you want to include descriptions for identical column names from upstream models and sources. +- `include_data_types` (optional, default=True): Whether you want to add data types to your model column definitions. ### Usage: + 1. Create a model. 2. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code @@ -268,13 +292,16 @@ models: 4. Paste the output in to a schema.yml file, and refactor as required. ## generate_model_import_ctes ([source](macros/generate_model_import_ctes.sql)) + This macro generates the SQL for a given model with all references pulled up into import CTEs, which you can then paste back into the model. ### Arguments: -* `model_name` (required): The model you wish to generate SQL with import CTEs for. -* `leading_commas` (optional, default=False): Whether you want your commas to be leading (vs trailing). + +- `model_name` (required): The model you wish to generate SQL with import CTEs for. +- `leading_commas` (optional, default=False): Whether you want your commas to be leading (vs trailing). ### Usage: + 1. Create a model with your original SQL query 2. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code diff --git a/justfile b/justfile new file mode 100644 index 0000000..14f83ad --- /dev/null +++ b/justfile @@ -0,0 +1,42 @@ +# generate _*sources.yml for all tables in a schema. +dbt-generate-source database schema: + {{dbt}} run-operation codegen.generate_source --args '{"schema_name": "{{schema}}", "database_name": "{{database}}"}' + +# generate the model sql for a table defined in your sources yml to speed up renaming step. +dbt-generate-base-model source table: + @{{dbt}} run-operation codegen.generate_base_model --args '{"source_name": "{{source}}", "table_name": "{{table}}"}' > generated/stg_{{table}}.sql + @awk '/with source as \(/{p=1} p' generated/stg_{{table}}.sql > temp && mv temp generated/stg_{{table}}.sql + @echo "Model {{table}} generated in generated/stg_{{table}}.sql" + +# generate model yml with all columns from a model sql file. +generated_default := 'generated' +dbt-generate-model-yaml model_name generated_folder=generated_default: + @if [ ! -d "{{generated_folder}}" ]; then \ + mkdir -p {{generated_folder}}; \ + fi + @{{dbt}} run-operation codegen.generate_model_yaml --args '{"model_names": ["{{model_name}}"]}' > /tmp/{{model_name}}.tmpyml + @awk '/models:/{p=1} p' /tmp/{{model_name}}.tmpyml > /tmp/temp{{model_name}} && mv /tmp/temp{{model_name}} {{generated_folder}}/{{model_name}}.yml + @echo "Model {{model_name}} generated in {{generated_folder}}/{{model_name}}.yml" + +# generate model yml with all columns for all sql files without accompanying yml files. +# optionally accept a parameter for folder to search in +default_folder := 'models' +dbt-generate-missing-yaml folder=default_folder: + @for sql_file in $(find {{folder}} -type f -name '*.sql'); do \ + yml_file=${sql_file%.sql}.yml; \ + if [ ! -f $yml_file ]; then \ + model_name=${sql_file##*/}; \ + model_name=${model_name%.sql}; \ + folder_name=$(dirname ${sql_file}); \ + just dbt-generate-model-yaml $model_name $folder_name; \ + fi; \ + done + +# Clean up model references in your sql files by generating CTE aliases for all models referenced in a sql file. +dbt-generate-model-import-ctes model_name generated_folder=generated_default: + @if [ ! -d "{{generated_folder}}" ]; then \ + mkdir -p {{generated_folder}}; \ + fi + @{{dbt}} run-operation codegen.generate_model_import_ctes --args '{"model_name": "{{model_name}}"}'> /tmp/{{model_name}}.tmpsql + @awk '/with .* as \($/{p=1} p' /tmp/{{model_name}}.tmpsql | sed 's/^.*with/with/' > /tmp/temp{{model_name}} && mv /tmp/temp{{model_name}} {{generated_folder}}/{{model_name}}.sql + @echo "SQL {{model_name}} edited in {{generated_folder}}/{{model_name}}.sql" From 9acdd4bb4545ff0218a2e2281ec28e7ea5db7023 Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Thu, 7 Mar 2024 05:17:33 -0600 Subject: [PATCH 3/3] Removes justfile that was from another branch --- justfile | 42 ------------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 justfile diff --git a/justfile b/justfile deleted file mode 100644 index 14f83ad..0000000 --- a/justfile +++ /dev/null @@ -1,42 +0,0 @@ -# generate _*sources.yml for all tables in a schema. -dbt-generate-source database schema: - {{dbt}} run-operation codegen.generate_source --args '{"schema_name": "{{schema}}", "database_name": "{{database}}"}' - -# generate the model sql for a table defined in your sources yml to speed up renaming step. -dbt-generate-base-model source table: - @{{dbt}} run-operation codegen.generate_base_model --args '{"source_name": "{{source}}", "table_name": "{{table}}"}' > generated/stg_{{table}}.sql - @awk '/with source as \(/{p=1} p' generated/stg_{{table}}.sql > temp && mv temp generated/stg_{{table}}.sql - @echo "Model {{table}} generated in generated/stg_{{table}}.sql" - -# generate model yml with all columns from a model sql file. -generated_default := 'generated' -dbt-generate-model-yaml model_name generated_folder=generated_default: - @if [ ! -d "{{generated_folder}}" ]; then \ - mkdir -p {{generated_folder}}; \ - fi - @{{dbt}} run-operation codegen.generate_model_yaml --args '{"model_names": ["{{model_name}}"]}' > /tmp/{{model_name}}.tmpyml - @awk '/models:/{p=1} p' /tmp/{{model_name}}.tmpyml > /tmp/temp{{model_name}} && mv /tmp/temp{{model_name}} {{generated_folder}}/{{model_name}}.yml - @echo "Model {{model_name}} generated in {{generated_folder}}/{{model_name}}.yml" - -# generate model yml with all columns for all sql files without accompanying yml files. -# optionally accept a parameter for folder to search in -default_folder := 'models' -dbt-generate-missing-yaml folder=default_folder: - @for sql_file in $(find {{folder}} -type f -name '*.sql'); do \ - yml_file=${sql_file%.sql}.yml; \ - if [ ! -f $yml_file ]; then \ - model_name=${sql_file##*/}; \ - model_name=${model_name%.sql}; \ - folder_name=$(dirname ${sql_file}); \ - just dbt-generate-model-yaml $model_name $folder_name; \ - fi; \ - done - -# Clean up model references in your sql files by generating CTE aliases for all models referenced in a sql file. -dbt-generate-model-import-ctes model_name generated_folder=generated_default: - @if [ ! -d "{{generated_folder}}" ]; then \ - mkdir -p {{generated_folder}}; \ - fi - @{{dbt}} run-operation codegen.generate_model_import_ctes --args '{"model_name": "{{model_name}}"}'> /tmp/{{model_name}}.tmpsql - @awk '/with .* as \($/{p=1} p' /tmp/{{model_name}}.tmpsql | sed 's/^.*with/with/' > /tmp/temp{{model_name}} && mv /tmp/temp{{model_name}} {{generated_folder}}/{{model_name}}.sql - @echo "SQL {{model_name}} edited in {{generated_folder}}/{{model_name}}.sql"