From 2b5702748fa3b8f643715c27bf281d0daeab9157 Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Sat, 2 Mar 2024 08:29:52 -0600 Subject: [PATCH 1/8] Simplify the dev workflow and improve its README --- .circleci/config.yml | 9 +- .gitignore | 5 +- README.md | 103 ++++++++---- integration_tests/README.md | 155 ++++++++++-------- .../{ci/sample.profiles.yml => profiles.yml} | 0 5 files changed, 157 insertions(+), 115 deletions(-) rename integration_tests/{ci/sample.profiles.yml => profiles.yml} (100%) diff --git a/.circleci/config.yml b/.circleci/config.yml index 025fa6a..2585c60 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,4 +1,3 @@ - version: 2 jobs: @@ -27,15 +26,12 @@ jobs: python -m pip install --upgrade pip setuptools python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery - mkdir -p ~/.dbt - cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml - - run: name: "Run Tests - Postgres" environment: POSTGRES_TEST_HOST: localhost POSTGRES_TEST_USER: root - POSTGRES_TEST_PASS: '' + POSTGRES_TEST_PASS: "" POSTGRES_TEST_PORT: 5432 POSTGRES_TEST_DBNAME: circle_test command: | @@ -74,7 +70,7 @@ jobs: - run: name: "Run Tests - BigQuery" environment: - BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json" + BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json" command: | . dbt_venv/bin/activate @@ -86,7 +82,6 @@ jobs: dbt --warn-error run --target bigquery dbt --warn-error test --target bigquery - - save_cache: key: deps1-{{ .Branch }} paths: diff --git a/.gitignore b/.gitignore index 526bb69..1e302bd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ - target/ dbt_modules/ dbt_packages/ logs/ env/ -integration_tests/profiles.yml +.venv/ +.env/ +venv/ diff --git a/README.md b/README.md index a39f8bc..93daff5 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ Macros that generate dbt code, and log it to the command line. # Contents + - [dbt-codegen](#dbt-codegen) - [Contents](#contents) - [Installation instructions](#installation-instructions) @@ -27,41 +28,49 @@ Macros that generate dbt code, and log it to the command line. - [Usage:](#usage-5) # Installation instructions + New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/). + 1. Include this package in your `packages.yml` file — check [here](https://hub.getdbt.com/dbt-labs/codegen/latest/) for the latest version number: + ```yml packages: - package: dbt-labs/codegen version: X.X.X ## update to latest version here ``` + 2. Run `dbt deps` to install the package. # Macros + ## generate_source ([source](macros/generate_source.sql)) + This macro generates lightweight YAML for a [Source](https://docs.getdbt.com/docs/using-sources), which you can then paste into a schema file. ### Arguments -* `schema_name` (required): The schema name that contains your source data -* `database_name` (optional, default=target.database): The database that your -source data is in. -* `table_names` (optional, default=none): A list of tables that you want to generate the source definitions for. -* `generate_columns` (optional, default=False): Whether you want to add the -column names to your source definition. -* `include_descriptions` (optional, default=False): Whether you want to add -description placeholders to your source definition. -* `include_data_types` (optional, default=True): Whether you want to add data -types to your source columns definitions. -* `table_pattern` (optional, default='%'): A table prefix / postfix that you -want to subselect from all available tables within a given schema. -* `exclude` (optional, default=''): A string you want to exclude from the selection criteria -* `name` (optional, default=schema_name): The name of your source -* `include_database` (optional, default=False): Whether you want to add -the database to your source definition -* `include_schema` (optional, default=False): Whether you want to add -the schema to your source definition + +- `schema_name` (required): The schema name that contains your source data +- `database_name` (optional, default=target.database): The database that your + source data is in. +- `table_names` (optional, default=none): A list of tables that you want to generate the source definitions for. +- `generate_columns` (optional, default=False): Whether you want to add the + column names to your source definition. +- `include_descriptions` (optional, default=False): Whether you want to add + description placeholders to your source definition. +- `include_data_types` (optional, default=True): Whether you want to add data + types to your source columns definitions. +- `table_pattern` (optional, default='%'): A table prefix / postfix that you + want to subselect from all available tables within a given schema. +- `exclude` (optional, default=''): A string you want to exclude from the selection criteria +- `name` (optional, default=schema_name): The name of your source +- `include_database` (optional, default=False): Whether you want to add + the database to your source definition +- `include_schema` (optional, default=False): Whether you want to add + the schema to your source definition ### Outputting to a file + If you use the `dbt run-operation` approach it is possible to output directly to a file by piping the output to a new file and using the `--quiet` CLI flag: ``` @@ -69,14 +78,15 @@ dbt --quiet run-operation generate_model_yaml --args '{"model_name": "stg_jaffle ``` ### Usage: + 1. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code ``` {{ codegen.generate_source('raw_jaffle_shop') }} ``` - or for multiple arguments - +or for multiple arguments + ``` {{ codegen.generate_source(schema_name= 'jaffle_shop', database_name= 'raw') }} ``` @@ -127,18 +137,20 @@ sources: 3. Paste the output in to a schema `.yml` file, and refactor as required. ## generate_base_model ([source](macros/generate_base_model.sql)) + This macro generates the SQL for a base model, which you can then paste into a model. ### Arguments: -* `source_name` (required): The source you wish to generate base model SQL for. -* `table_name` (required): The source table you wish to generate base model SQL for. -* `leading_commas` (optional, default=False): Whether you want your commas to be leading (vs trailing). -* `case_sensitive_cols ` (optional, default=False): Whether your source table has case sensitive column names. If true, keeps the case of the column names from the source. -* `materialized` (optional, default=None): Set materialization style (e.g. table, view, incremental) inside of the model's `config` block. If not set, materialization style will be controlled by `dbt_project.yml` +- `source_name` (required): The source you wish to generate base model SQL for. +- `table_name` (required): The source table you wish to generate base model SQL for. +- `leading_commas` (optional, default=False): Whether you want your commas to be leading (vs trailing). +- `case_sensitive_cols ` (optional, default=False): Whether your source table has case sensitive column names. If true, keeps the case of the column names from the source. +- `materialized` (optional, default=None): Set materialization style (e.g. table, view, incremental) inside of the model's `config` block. If not set, materialization style will be controlled by `dbt_project.yml` ### Usage: + 1. Create a source for the table you wish to create a base model on top of. 2. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code @@ -184,29 +196,38 @@ select * from renamed 4. Paste the output in to a model, and refactor as required. ## create_base_models ([source](macros/create_base_models.sql)) + This macro generates a series of terminal commands (appended with the `&&` to allow for subsequent execution) that execute the [base_model_creation](#base_model_creation-source) bash script. This bash script will write the output of the [generate_base_model](#generate_base_model-source) macro into a new model file in your local dbt project. ->**Note**: This macro is not compatible with the dbt Cloud IDE. + +> **Note**: This macro is not compatible with the dbt Cloud IDE. ### Arguments: -* `source_name` (required): The source you wish to generate base model SQL for. -* `tables` (required): A list of all tables you want to generate the base models for. + +- `source_name` (required): The source you wish to generate base model SQL for. +- `tables` (required): A list of all tables you want to generate the base models for. ### Usage: + 1. Create a source for the table you wish to create a base model on top of. 2. Copy the macro into a statement tab into your local IDE, and run your code ```sql dbt run-operation codegen.create_base_models --args '{source_name: my-source, tables: ["this-table","that-table"]}' ``` + ## base_model_creation ([source](bash_scripts/base_model_creation.sh)) + This bash script when executed from your local IDE will create model files in your dbt project instance that contain the outputs of the [generate_base_model](macros/generate_base_model.sql) macro. ->**Note**: This macro is not compatible with the dbt Cloud IDE. + +> **Note**: This macro is not compatible with the dbt Cloud IDE. ### Arguments: -* `source_name` (required): The source you wish to generate base model SQL for. -* `tables` (required): A list of all tables you want to generate the base models for. + +- `source_name` (required): The source you wish to generate base model SQL for. +- `tables` (required): A list of all tables you want to generate the base models for. ### Usage: + 1. Create a source for the table you wish to create a base model on top of. 2. Copy the macro into a statement tab into your local IDE, and run your code @@ -215,15 +236,18 @@ source dbt_packages/codegen/bash_scripts/base_model_creation.sh "source_name" [" ``` ## generate_model_yaml ([source](macros/generate_model_yaml.sql)) + This macro generates the YAML for a list of model(s), which you can then paste into a schema.yml file. ### Arguments: -* `model_names` (required): The model(s) you wish to generate YAML for. -* `upstream_descriptions` (optional, default=False): Whether you want to include descriptions for identical column names from upstream models and sources. -* `include_data_types` (optional, default=True): Whether you want to add data types to your model column definitions. + +- `model_names` (required): The model(s) you wish to generate YAML for. +- `upstream_descriptions` (optional, default=False): Whether you want to include descriptions for identical column names from upstream models and sources. +- `include_data_types` (optional, default=True): Whether you want to add data types to your model column definitions. ### Usage: + 1. Create a model. 2. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code @@ -268,13 +292,16 @@ models: 4. Paste the output in to a schema.yml file, and refactor as required. ## generate_model_import_ctes ([source](macros/generate_model_import_ctes.sql)) + This macro generates the SQL for a given model with all references pulled up into import CTEs, which you can then paste back into the model. ### Arguments: -* `model_name` (required): The model you wish to generate SQL with import CTEs for. -* `leading_commas` (optional, default=False): Whether you want your commas to be leading (vs trailing). + +- `model_name` (required): The model you wish to generate SQL with import CTEs for. +- `leading_commas` (optional, default=False): Whether you want your commas to be leading (vs trailing). ### Usage: + 1. Create a model with your original SQL query 2. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code @@ -357,3 +384,7 @@ select * from final ``` 4. Replace the contents of the model's current SQL file with the compiled or logged code + +## Contributing + +To contirbute code to this package, please follow the steps outlined in the `integration_tests` directory's[README](https://github.com/dbt-labs/dbt-codegen/blob/main/integration_tests/README.md) file. diff --git a/integration_tests/README.md b/integration_tests/README.md index 25ae33c..242dc27 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -1,60 +1,74 @@ -### Overview -1. Prerequisites -1. Configure credentials -1. Setup Postgres (optional) -1. Setup virtual environment -1. Installation for development -1. Run the integration tests -1. Run tests -1. Creating a new integration test +## Table of Contents + +## Overview ### Prerequisites + - python3 -- Docker (optional) +- make (Optional, but highly recommended for better development experience) +- Docker (optional, but recommended for using Postgres as your target database easily) + +Packages in dbt are actually dbt projects themselves, you write SQL and Jinja, sometimes in macros, to add new functionality or models to another dbt project. As SQL and Jinja rely on input data, it's essential to have a functioning project to be able to test that the code works as expected. Constantly running the code, loading data, running bits and pieces, and hoping for the best is not a good development flow though, nor is it a reliable way to ensure that everything works. This is why our dbt packages have integration tests. These tests run all of the data loading, model building, and tests that are defined in the package inside testing environments, and check that the results are as expected. + +If you add or modify functionality in any codegen macros, there should be corresponding changes to the integration tests. This README will walk you through this process. Let's outline the basic steps first: + +1. Set up your environment (credentials, virtual environment, dependencies, test database(s)) +2. Write or modify an integration test (you should expect to fail as you haven't implemented the functionality yet!) +3. Implement the functionality in the new or modified macro, and run the tests to get them to pass. +4. Commit your changes and open a pull request. + +## Setup ### Configure credentials -Edit the env file for your TARGET in `integration_tests/.env/[TARGET].env`. -Load the environment variables: -```shell -set -a; source integration_tests/.env/[TARGET].env; set +a -``` +You'll need to set environment variables with the credentials to access your target database. If you're using the recommended local development path of Postgres in Docker, these values are already filled in as they are generic. For the cloud warehouses listed, you'll need real credentials. You probably want to ensure you're building into a testing schema as well to keep the output of this codegen separate from any production data. We run against all the warehouses listed in the CI (implmented via CircleCI) when you open a PR, so feel free to test against Postgres while developing, and we'll ensure the code works against all the other targets. -or more specific: -```shell -set -a; source integration_tests/.env/postgres.env; set +a -``` +You can set these env vars a couple ways: + +> [!WARNING] +> The files below are _not_ gitignored — never, ever put your credentials into them and commit them! If you do by accident, you'll need to rotate your credentials immediately! + +- Use the `.env/[TARGET].env` files in the `integration_tests` directory as a guide to set your own environment variables, you'll need one for every variable listed in the sample file. You run `export [VARIABLE_NAME]=[VALUE]` for each variable in the file. +- **Most robust**: If you anticipate developing for multiple sessions, set these environment variables in your shell profile (like `~/.bashrc` or `~/.zshrc`). This way, you won't have to set them every time you open a new terminal. + +### Setup Postgres or other database targets -#### Setup Postgres (optional) +As mentioned, you'll need a target database to run the integration tests and develop against. You can use a cloud warehouse, but the easiest and free way to work is to use Postgres locally. We include a `docker-compose.yml` file that will spin up a Postgres container for you to make this easy. -Docker and `docker-compose` are both used in testing. Specific instructions for your OS can be found [here](https://docs.docker.com/get-docker/). +Specific instructions on installing and getting started with Docker for your OS can be found [here](https://docs.docker.com/get-docker/). -Postgres offers the easiest way to test most `dbt-codegen` functionality today. Its tests are the fastest to run, and the easiest to set up. To run the Postgres integration tests, you'll have to do one extra step of setting up the test database: +To run the Postgres container, just run: ```shell make setup-db ``` -or, alternatively: + +Or, alternatively: + ```shell docker-compose up --detach postgres ``` -### Setup virtual environment +> [!NOTE] +> `make` is a venerable build tool that is included in most Unix-like operating systems. It's not strictly necessary to use `make` to develop on this project, but there are several `make` commands that wrap more complex commands and make development easier. If you don't have `make` installed or don't want to use it, you can just run the commands in the `Makefile` directly. All the examples will show both options. + +### Set up virtual environment + +We strongly recommend using virtual environments when developing code in `dbt-codegen`. We recommend creating this virtual environment in the root of the `dbt-codegen` repository. To create a new virtual environment, run: -We strongly recommend using virtual environments when developing code in `dbt-codegen`. We recommend creating this virtualenv -in the root of the `dbt-codegen` repository. To create a new virtualenv, run: ```shell -python3 -m venv env -source env/bin/activate +python3 -m venv .venv +source .venv/bin/activate ``` This will create and activate a new Python virtual environment. -### Installation for development +### Install dependencies First make sure that you set up your virtual environment as described above. Also ensure you have the latest version of pip and setuptools installed: + ``` -python -m pip install --upgrade pip setuptools +python3 -m pip install --upgrade pip setuptools ``` Next, install `dbt-core` (and its dependencies) with: @@ -62,20 +76,31 @@ Next, install `dbt-core` (and its dependencies) with: ```shell make dev target=[postgres|redshift|...] # or -pip install --pre dbt-core dbt-[postgres|redshift|...] +python3 -m pip install --pre dbt-core dbt-[postgres|redshift|...] ``` -or more specific: +Or more specific: ```shell make dev target=postgres # or -pip install --pre dbt-core dbt-postgres +python3 -m pip install --pre dbt-core dbt-postgres ``` +> [!NOTE] +> The `--pre` flag tells pip to install the latest pre-release version of whatever you pass to install. This ensures you're always using the latest version of dbt, so if your code interacts with dbt in a way that causes issues or test failures, we'll know about it ahead of a release. + +Make sure to reload your virtual environment after installing the dependencies: + +```shell +source .venv/bin/activate +``` + +## Write or modify an integration test + ### Run the integration tests -To run all the integration tests on your local machine like they will get run in the CI (using CircleCI): +To run all the integration tests on your local machine like they will get run in CI: ```shell make test target=[postgres|redshift|...] @@ -83,7 +108,7 @@ make test target=[postgres|redshift|...] ./run_test.sh [postgres|redshift|...] ``` -or more specific: +Or more specific: ```shell make test target=postgres @@ -91,51 +116,41 @@ make test target=postgres ./run_test.sh postgres ``` -Where possible, targets can run in docker containers (this works for Postgres or in the future Spark for example). For managed services like Snowflake, BigQuery and Redshift this is not possible, hence your own configuration for these services has to be provided in the appropriate env files in `integration_tests/.env/[TARGET].env` +Run all the tests _before_ you start developing to make sure everything is working as expected before you start making changes. Nothing is worse than spending a ton of time troubleshooting a failing test, only to realize it was failing before you touched anything. This will also ensure that you have the correct environment variables set up and that your database is running. ### Creating a new integration test -#### Set up profiles -Do one of the following: -1. Use the `profiles.yml` in the current working directory for dbt Core 1.3 and above - ```shell - cp integration_tests/ci/sample.profiles.yml integration_tests/profiles.yml - ``` -1. Use `DBT_PROFILES_DIR` - ```shell - cp integration_tests/ci/sample.profiles.yml integration_tests/profiles.yml - export DBT_PROFILES_DIR=$(cd integration_tests && pwd) - ``` -1. Use `~/.dbt/profiles.yml` - - Copy contents from `integration_tests/ci/sample.profiles.yml` into `~/.dbt/profiles.yml`. - #### Add your integration test -This directory contains an example dbt project which tests the macros in the `dbt-codegen` package. An integration test typically involves making: -1. a new seed file -2. a new model file -3. a generic test to assert anticipated behaviour. -For an example of integration tests, check out the tests for the `get_url_parameter` macro in the `dbt-utils` project: +Adding integration tests for new functionality typically involves making one or more of the following: -1. [Macro definition](https://github.com/dbt-labs/dbt-utils/blob/main/macros/web/get_url_parameter.sql) -2. [Seed file with fake data](https://github.com/dbt-labs/dbt-utils/blob/main/integration_tests/data/web/data_urls.csv) -3. [Model to test the macro](https://github.com/dbt-labs/dbt-utils/blob/main/integration_tests/models/web/test_urls.sql) -4. [A generic test to assert the macro works as expected](https://github.com/dbt-labs/dbt-utils/blob/main/integration_tests/models/web/schema.yml) +- a new seed file of fixture data +- a new model file to test against +- a new test to assert anticipated behaviour -Once you've added all of these files, you should be able to run: +Once you've added and/or edited the necessary files, assuming you are in the sub-project in the `integration_tests` folder, you should be able to run and test your new additions specifically by running: -Assuming you are in the `integration_tests` folder, ```shell dbt deps --target {your_target} -dbt seed --target {your_target} -dbt run --target {your_target} --model {your_model_name} -dbt test --target {your_target} --model {your_model_name} +dbt build --target {your_target} --select +{your_selection_criteria} ``` -Alternatively: -```shell -dbt deps --target {your_target} -dbt build --target {your_target} --select +{your_model_name} -``` +The `dbt build` command will handle seeding, running, and testing the selection in a single command. The `+` operator in the `--select` flag indicates we also want to build everything that this selection depends on. + +Or simply `make dev target={your_target}` and then `make test target={your_target}` if you're okay with running the entire project and all tests. + +Remember, typically you'll want to create a failing test _first_, then implement the functionality to make it pass. This is called "test-driven development" (TDD) and it's a great way to ensure that your code really does what you expect it to. For example, let's imagine you wrote a test expecting it to fail, but it passed before you even implemented your logic! That would mean the test is not actually testing what you want, and you'd need to re-evaluate your assumptions. That's something you want to catch early in the development process, and what TDD is all about. So, expect this run of tests after you add your new logic to fail. + +## Implement the functionality + +Okay finally, this is the fun part! You can now implement the functionality in the macro you're working on.The development flow should be something like: + +1. You've got a failing test, so you know what you need to implement. +1. Implement some logic in the macro you're working on. +1. Run the relevant tests to see if they pass. +1. Repeat until the tests pass. +1. Run the full test suite to ensure you didn't break anything else by accident. + +## Commit your changes and open a pull request -If the tests all pass, then you're good to go! All tests will be run automatically when you create a PR against this repo. +Once your tests are passing and you're happy with the code, you'll want to commit it and open a new PR on GitHub. Don't forget to run the full test suite against your target database before you open a PR to make sure you didn't accidentally break any existing functionality. When you open a PR, CircleCI will run the same test suite against all the database targets. If they're passing, we'll triage and review the code as soon as we can! Thank you for contributing to dbt-codegen! diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/profiles.yml similarity index 100% rename from integration_tests/ci/sample.profiles.yml rename to integration_tests/profiles.yml From bc422d112e9668e1e74d95d73fffbd06b2f2d232 Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Sat, 2 Mar 2024 08:49:48 -0600 Subject: [PATCH 2/8] Clean up formatting in READMEs --- README.md | 3 ++- integration_tests/README.md | 38 ++++++++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 93daff5..c7c68b5 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Macros that generate dbt code, and log it to the command line. - [generate_model_import_ctes (source)](#generate_model_import_ctes-source) - [Arguments:](#arguments-5) - [Usage:](#usage-5) +- [Contributing](#contributing) # Installation instructions @@ -387,4 +388,4 @@ select * from final ## Contributing -To contirbute code to this package, please follow the steps outlined in the `integration_tests` directory's[README](https://github.com/dbt-labs/dbt-codegen/blob/main/integration_tests/README.md) file. +To contirbute code to this package, please follow the steps outlined in the `integration_tests` directory's [README](https://github.com/dbt-labs/dbt-codegen/blob/main/integration_tests/README.md) file. diff --git a/integration_tests/README.md b/integration_tests/README.md index 242dc27..9dc03fa 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -1,12 +1,29 @@ ## Table of Contents +1. [Overview](#overview) + 1. [Prerequisites](#prerequisites) + 2. [Introduction](#introduction) +2. [Setup](#setup) + 1. [Configure credentials](#configure-credentials) + 2. [Setup Postgres or other database targets](#setup-postgres-or-other-database-targets) + 3. [Set up virtual environment](#set-up-virtual-environment) + 4. [Install dependencies](#install-dependencies) +3. [Write or modify an integration test](#write-or-modify-an-integration-test) + 1. [Run the integration tests](#run-the-integration-tests) + 2. [Creating a new integration test](#creating-a-new-integration-test) + 1. [Add your integration test](#add-your-integration-test) + 2. [Implement the functionality](#implement-the-functionality) + 3. [Commit your changes and open a pull request](#commit-your-changes-and-open-a-pull-request) + ## Overview ### Prerequisites -- python3 -- make (Optional, but highly recommended for better development experience) -- Docker (optional, but recommended for using Postgres as your target database easily) +- [python3](https://www.python.org/) +- [make]() (Optional, but highly recommended for better development experience) +- [Docker](https://www.docker.com/) (optional, but recommended for using Postgres as your target database easily) + +### Introduction Packages in dbt are actually dbt projects themselves, you write SQL and Jinja, sometimes in macros, to add new functionality or models to another dbt project. As SQL and Jinja rely on input data, it's essential to have a functioning project to be able to test that the code works as expected. Constantly running the code, loading data, running bits and pieces, and hoping for the best is not a good development flow though, nor is it a reliable way to ensure that everything works. This is why our dbt packages have integration tests. These tests run all of the data loading, model building, and tests that are defined in the package inside testing environments, and check that the results are as expected. @@ -23,13 +40,13 @@ If you add or modify functionality in any codegen macros, there should be corres You'll need to set environment variables with the credentials to access your target database. If you're using the recommended local development path of Postgres in Docker, these values are already filled in as they are generic. For the cloud warehouses listed, you'll need real credentials. You probably want to ensure you're building into a testing schema as well to keep the output of this codegen separate from any production data. We run against all the warehouses listed in the CI (implmented via CircleCI) when you open a PR, so feel free to test against Postgres while developing, and we'll ensure the code works against all the other targets. -You can set these env vars a couple ways: +You can set these env vars in a couple ways: > [!WARNING] > The files below are _not_ gitignored — never, ever put your credentials into them and commit them! If you do by accident, you'll need to rotate your credentials immediately! - Use the `.env/[TARGET].env` files in the `integration_tests` directory as a guide to set your own environment variables, you'll need one for every variable listed in the sample file. You run `export [VARIABLE_NAME]=[VALUE]` for each variable in the file. -- **Most robust**: If you anticipate developing for multiple sessions, set these environment variables in your shell profile (like `~/.bashrc` or `~/.zshrc`). This way, you won't have to set them every time you open a new terminal. +- **More robust**: If you anticipate developing for multiple sessions, set these environment variables in your shell profile (like `~/.bashrc` or `~/.zshrc`). This way, you won't have to set them every time you open a new terminal. ### Setup Postgres or other database targets @@ -49,8 +66,7 @@ Or, alternatively: docker-compose up --detach postgres ``` -> [!NOTE] -> `make` is a venerable build tool that is included in most Unix-like operating systems. It's not strictly necessary to use `make` to develop on this project, but there are several `make` commands that wrap more complex commands and make development easier. If you don't have `make` installed or don't want to use it, you can just run the commands in the `Makefile` directly. All the examples will show both options. +> [!NOTE] > `make` is a venerable build tool that is included in most Unix-like operating systems. It's not strictly necessary to use `make` to develop on this project, but there are several `make` commands that wrap more complex commands and make development easier. If you don't have `make` installed or don't want to use it, you can just run the commands in the `Makefile` directly. All the examples will show both options. ### Set up virtual environment @@ -146,10 +162,10 @@ Remember, typically you'll want to create a failing test _first_, then implement Okay finally, this is the fun part! You can now implement the functionality in the macro you're working on.The development flow should be something like: 1. You've got a failing test, so you know what you need to implement. -1. Implement some logic in the macro you're working on. -1. Run the relevant tests to see if they pass. -1. Repeat until the tests pass. -1. Run the full test suite to ensure you didn't break anything else by accident. +2. Implement some logic in the macro you're working on. +3. Run the relevant tests to see if they pass. +4. Repeat until the tests pass. +5. Run the full test suite to ensure you didn't break anything else by accident. ## Commit your changes and open a pull request From f978143c6a52a90c8570133294331dd558af914b Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Sat, 2 Mar 2024 08:56:57 -0600 Subject: [PATCH 3/8] Fix headings and TOC in dev README --- integration_tests/README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/integration_tests/README.md b/integration_tests/README.md index 9dc03fa..36a2a27 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -11,9 +11,8 @@ 3. [Write or modify an integration test](#write-or-modify-an-integration-test) 1. [Run the integration tests](#run-the-integration-tests) 2. [Creating a new integration test](#creating-a-new-integration-test) - 1. [Add your integration test](#add-your-integration-test) - 2. [Implement the functionality](#implement-the-functionality) - 3. [Commit your changes and open a pull request](#commit-your-changes-and-open-a-pull-request) +4. [Implement the functionality](#implement-the-functionality) +5. [Commit your changes and open a pull request](#commit-your-changes-and-open-a-pull-request) ## Overview @@ -136,8 +135,6 @@ Run all the tests _before_ you start developing to make sure everything is worki ### Creating a new integration test -#### Add your integration test - Adding integration tests for new functionality typically involves making one or more of the following: - a new seed file of fixture data From d4b0bb6a085f7b4c552b5a53e86da45c84074cd8 Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Sat, 2 Mar 2024 10:17:47 -0600 Subject: [PATCH 4/8] Update phrasing in dev README --- integration_tests/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/README.md b/integration_tests/README.md index 36a2a27..b5bd35b 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -29,7 +29,7 @@ Packages in dbt are actually dbt projects themselves, you write SQL and Jinja, s If you add or modify functionality in any codegen macros, there should be corresponding changes to the integration tests. This README will walk you through this process. Let's outline the basic steps first: 1. Set up your environment (credentials, virtual environment, dependencies, test database(s)) -2. Write or modify an integration test (you should expect to fail as you haven't implemented the functionality yet!) +2. Write or modify an integration test (you should expect this to fail as you haven't implemented the functionality yet!) 3. Implement the functionality in the new or modified macro, and run the tests to get them to pass. 4. Commit your changes and open a pull request. From d507392d00cf55dca77993abc2f60715695bf627 Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Tue, 5 Mar 2024 14:31:42 -0600 Subject: [PATCH 5/8] Update env vars in README --- integration_tests/README.md | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/integration_tests/README.md b/integration_tests/README.md index b5bd35b..f09d268 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -41,12 +41,39 @@ You'll need to set environment variables with the credentials to access your tar You can set these env vars in a couple ways: -> [!WARNING] -> The files below are _not_ gitignored — never, ever put your credentials into them and commit them! If you do by accident, you'll need to rotate your credentials immediately! - -- Use the `.env/[TARGET].env` files in the `integration_tests` directory as a guide to set your own environment variables, you'll need one for every variable listed in the sample file. You run `export [VARIABLE_NAME]=[VALUE]` for each variable in the file. +- **Temporary**: Set these environment variables in your shell before running the tests. This is the easiest way to get started, but you'll have to set them every time you open a new terminal. - **More robust**: If you anticipate developing for multiple sessions, set these environment variables in your shell profile (like `~/.bashrc` or `~/.zshrc`). This way, you won't have to set them every time you open a new terminal. +The environment variables you'll need to set for each adapter are: + +```bash +# Postgres — these are the defaults for the Docker container so actually have values +export POSTGRES_TEST_HOST=localhost +export POSTGRES_TEST_USER=root +export POSTGRES_TEST_PASS='' +export POSTGRES_TEST_PORT=5432 +export POSTGRES_TEST_DBNAME=circle_test + +# BigQuery +export BIGQUERY_SERVICE_KEY_PATH= +export BIGQUERY_TEST_DATABASE= + +# Redshift +export REDSHIFT_TEST_HOST= +export REDSHIFT_TEST_USER= +export REDSHIFT_TEST_PASS= +export REDSHIFT_TEST_DBNAME= +export REDSHIFT_TEST_PORT= + +# Snowflake +export SNOWFLAKE_TEST_ACCOUNT= +export SNOWFLAKE_TEST_USER= +export SNOWFLAKE_TEST_PASSWORD= +export SNOWFLAKE_TEST_ROLE= +export SNOWFLAKE_TEST_DATABASE= +export SNOWFLAKE_TEST_WAREHOUSE= +``` + ### Setup Postgres or other database targets As mentioned, you'll need a target database to run the integration tests and develop against. You can use a cloud warehouse, but the easiest and free way to work is to use Postgres locally. We include a `docker-compose.yml` file that will spin up a Postgres container for you to make this easy. From e0db1b1850f3a96bbe5a87cb601d3bf06c38658e Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Tue, 5 Mar 2024 14:31:57 -0600 Subject: [PATCH 6/8] Delete .env dir and files --- integration_tests/.env/bigquery.env | 2 -- integration_tests/.env/postgres.env | 5 ----- integration_tests/.env/redshift.env | 5 ----- integration_tests/.env/snowflake.env | 6 ------ 4 files changed, 18 deletions(-) delete mode 100644 integration_tests/.env/bigquery.env delete mode 100644 integration_tests/.env/postgres.env delete mode 100644 integration_tests/.env/redshift.env delete mode 100644 integration_tests/.env/snowflake.env diff --git a/integration_tests/.env/bigquery.env b/integration_tests/.env/bigquery.env deleted file mode 100644 index 59fce01..0000000 --- a/integration_tests/.env/bigquery.env +++ /dev/null @@ -1,2 +0,0 @@ -BIGQUERY_SERVICE_KEY_PATH= -BIGQUERY_TEST_DATABASE= diff --git a/integration_tests/.env/postgres.env b/integration_tests/.env/postgres.env deleted file mode 100644 index c3f7dd8..0000000 --- a/integration_tests/.env/postgres.env +++ /dev/null @@ -1,5 +0,0 @@ -POSTGRES_TEST_HOST=localhost -POSTGRES_TEST_USER=root -POSTGRES_TEST_PASS='' -POSTGRES_TEST_PORT=5432 -POSTGRES_TEST_DBNAME=circle_test diff --git a/integration_tests/.env/redshift.env b/integration_tests/.env/redshift.env deleted file mode 100644 index 77378d5..0000000 --- a/integration_tests/.env/redshift.env +++ /dev/null @@ -1,5 +0,0 @@ -REDSHIFT_TEST_HOST= -REDSHIFT_TEST_USER= -REDSHIFT_TEST_PASS= -REDSHIFT_TEST_DBNAME= -REDSHIFT_TEST_PORT= \ No newline at end of file diff --git a/integration_tests/.env/snowflake.env b/integration_tests/.env/snowflake.env deleted file mode 100644 index 134cc8d..0000000 --- a/integration_tests/.env/snowflake.env +++ /dev/null @@ -1,6 +0,0 @@ -SNOWFLAKE_TEST_ACCOUNT= -SNOWFLAKE_TEST_USER= -SNOWFLAKE_TEST_PASSWORD= -SNOWFLAKE_TEST_ROLE= -SNOWFLAKE_TEST_DATABASE= -SNOWFLAKE_TEST_WAREHOUSE= \ No newline at end of file From e2cbdb026ce77f1a285b565aaec2ca7a25093d44 Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Tue, 5 Mar 2024 19:34:39 -0600 Subject: [PATCH 7/8] Touch up dev README language --- integration_tests/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/integration_tests/README.md b/integration_tests/README.md index f09d268..fc73a10 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -42,7 +42,7 @@ You'll need to set environment variables with the credentials to access your tar You can set these env vars in a couple ways: - **Temporary**: Set these environment variables in your shell before running the tests. This is the easiest way to get started, but you'll have to set them every time you open a new terminal. -- **More robust**: If you anticipate developing for multiple sessions, set these environment variables in your shell profile (like `~/.bashrc` or `~/.zshrc`). This way, you won't have to set them every time you open a new terminal. +- **Reusable**: If you anticipate developing for multiple sessions, set these environment variables in your shell profile (like `~/.bashrc` or `~/.zshrc`). This way, you won't have to set them every time you open a new terminal. The environment variables you'll need to set for each adapter are: @@ -92,7 +92,8 @@ Or, alternatively: docker-compose up --detach postgres ``` -> [!NOTE] > `make` is a venerable build tool that is included in most Unix-like operating systems. It's not strictly necessary to use `make` to develop on this project, but there are several `make` commands that wrap more complex commands and make development easier. If you don't have `make` installed or don't want to use it, you can just run the commands in the `Makefile` directly. All the examples will show both options. +> [!NOTE] +> `make` is a venerable build tool that is included in most Unix-like operating systems. It's not strictly necessary to use `make` to develop on this project, but there are several `make` commands that wrap more complex commands and make development easier. If you don't have `make` installed or don't want to use it, you can just run the commands in the `Makefile` directly. All the examples will show both options. ### Set up virtual environment From 1f4771f5344fb8a08b8cbc2d1fac3cdd3ec78ee4 Mon Sep 17 00:00:00 2001 From: gwen windflower Date: Mon, 18 Mar 2024 17:17:17 -0500 Subject: [PATCH 8/8] Include footnotes on installing make and docker --- integration_tests/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/integration_tests/README.md b/integration_tests/README.md index fc73a10..d089399 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -19,8 +19,8 @@ ### Prerequisites - [python3](https://www.python.org/) -- [make]() (Optional, but highly recommended for better development experience) -- [Docker](https://www.docker.com/) (optional, but recommended for using Postgres as your target database easily) +- [make]() (Optional, but recommended for better development experience)[^1] +- [Docker](https://www.docker.com/) (Optional, but recommended for using Postgres as your target database easily)[^2] ### Introduction @@ -78,8 +78,6 @@ export SNOWFLAKE_TEST_WAREHOUSE= As mentioned, you'll need a target database to run the integration tests and develop against. You can use a cloud warehouse, but the easiest and free way to work is to use Postgres locally. We include a `docker-compose.yml` file that will spin up a Postgres container for you to make this easy. -Specific instructions on installing and getting started with Docker for your OS can be found [here](https://docs.docker.com/get-docker/). - To run the Postgres container, just run: ```shell @@ -92,8 +90,7 @@ Or, alternatively: docker-compose up --detach postgres ``` -> [!NOTE] -> `make` is a venerable build tool that is included in most Unix-like operating systems. It's not strictly necessary to use `make` to develop on this project, but there are several `make` commands that wrap more complex commands and make development easier. If you don't have `make` installed or don't want to use it, you can just run the commands in the `Makefile` directly. All the examples will show both options. +> [!NOTE] > `make` is a venerable build tool that is included in most Unix-like operating systems. It's not strictly necessary to use `make` to develop on this project, but there are several `make` commands that wrap more complex commands and make development easier. If you don't have `make` installed or don't want to use it, you can just run the commands in the `Makefile` directly. All the examples will show both options. ### Set up virtual environment @@ -195,3 +192,6 @@ Okay finally, this is the fun part! You can now implement the functionality in t ## Commit your changes and open a pull request Once your tests are passing and you're happy with the code, you'll want to commit it and open a new PR on GitHub. Don't forget to run the full test suite against your target database before you open a PR to make sure you didn't accidentally break any existing functionality. When you open a PR, CircleCI will run the same test suite against all the database targets. If they're passing, we'll triage and review the code as soon as we can! Thank you for contributing to dbt-codegen! + +[^1]: If you're on a Mac, `make` is probably best installed with the XCode Command Line Tools, or you can install `make` via Homebrew with `brew install cmake`. If you're on Windows, you can either use the Windows Subsystem for Linux (WSL) or use `scoop` or `chocolatey` to install `make`. If you're on Linux, you probably already have `make` installed. +[^2]: Specific instructions on installing and getting started with Docker for your OS can be found [here](https://docs.docker.com/get-docker/).