diff --git a/README.md b/README.md index fcb32a7..53eabd3 100644 --- a/README.md +++ b/README.md @@ -24,13 +24,13 @@ The following features are implemented or planned: - [x] dbt [Best Practices project structure](https://docs.getdbt.com/best-practices/how-we-structure/1-guide-overview) - [x] Basic initial dbt Project configuration -- [ ] Coverage of all non-password-based authentication methods for the below warehouses[^1] -- [ ] Warehouse-aware dbt profile configuration for the following options, check out the [dbt docs on warehouse-specific profile configs](https://docs.getdbt.com/docs/core/connect-data-platform/about-core-connections) for more details: +- [x] Coverage of all non-password-based authentication methods for the below warehouses[^1] +- [x] Warehouse-aware dbt profile configuration for the following options, check out the [dbt docs on warehouse-specific profile configs](https://docs.getdbt.com/docs/core/connect-data-platform/about-core-connections) for more details: - [x] Snowflake - using `authenticator: externalbrowser` with SSO - [x] BigQuery - using `method: oauth` via `gcloud` CLI - - [ ] Databricks - using `token: `, you will need to create a personal access token in Databricks, and fill it into that field in to your `~/.dbt/profiles.yml` manually once the project is created[^1] - - [ ] Redshift - using `method: IAM` via `awscli` - - [ ] Postgres - still haven't decided the best way to handle Postgres + - [x] Databricks - using `token: `, you will need to create a personal access token in Databricks, and fill it into that field in to your `~/.dbt/profiles.yml` manually once the project is created[^1] + - [x] Redshift - using `method: IAM` via `awscli` + - [x] Postgres - still haven't decided the best way to handle Postgres - [x] DuckDB - local warehouse, no authentication required - [x] Linting and formatting of SQL with SQLFluff - [x] Configurable linting rules in the setup process diff --git a/copier.yml b/copier.yml index 74d0b6d..448ecef 100644 --- a/copier.yml +++ b/copier.yml @@ -6,10 +6,11 @@ project_name: project_slug: type: str - default: "{{ project_name | lower | replace(' ', '_') | replace('-', '_') | replace('.', '') }}" + default: "{{ project_name | lower | replace(' ', '_') | replace('-', '_') | replace('.', '') | replace(\"'\", '')}}" help: The slugified name of your project. -# Warehouse profile connection configs +# Warehouse profile connection shared configs +# All configs that are shared by more than one warehouse profile_name: type: str default: "{{ project_slug }}" @@ -29,25 +30,28 @@ data_warehouse: - postgres - duckdb -username: - type: str - help: The username you log in to your database with. - when: "{{ data_warehouse in ['snowflake'] }}" - database: type: str help: | The name or id of the database (or project in BigQuery) *within* your data platform that you want to build into. + when: "{{ data_warehouse in [ 'snowflake', 'bigquery', 'duckdb' ] }}" schema: type: str help: | The name of the schema (or dataset in BigQuery) in the database you want to build into. -thread_count: +host: + type: str + help: | + The URL string of the database host — don't include 'https://'. + when: "{{ data_warehouse in [ 'databricks', 'redshift', 'postgres' ] }}" + +port: type: int - default: 8 - help: How many threads you want to dbt to run in parallel? + help: | + The port number on your database host to connect to. + when: "{{ data_warehouse in [ 'redshift', 'postgres' ] }}" # Snowflake-specific configs account_id: @@ -69,14 +73,51 @@ warehouse: help: The name of your default development compute warehouse. when: "{{ data_warehouse == 'snowflake' }}" +username: + type: str + help: The username you log in to your database with. This may be an email address or a plain string. + when: "{{ data_warehouse in [ 'snowflake', 'redshift', 'postgres' ] }}" + # BigQuery specific configs -# There are none for now! `oauth` with `gcloud` is great. +# None for now! # Databricks specific configs +using_unity_catalog: + type: bool + default: False + help: Are you using Datbricks Unity Catalog? + when: "{{ data_warehouse == 'databricks' }}" + +catalog: + type: str + help: Optional catalog name if you're using Unity Catalog. + when: "{{ using_unity_catalog }}" + +http_path: + type: str + help: The http path to your SQL Warehouse or all-purpose cluster. + when: "{{ data_warehouse == 'databricks' }}" # Redshift specific configs +cluster_id: + type: str + help: The id of your warehouse cluster. + when: "{{ data_warehouse == 'redshift' }}" + +iam_profile: + type: str + default: default + help: The name of the profile to use for connection. + when: "{{ data_warehouse == 'redshift' }}" + +region: + type: str + default: "us-east-1" + help: The region to connect your IAM profile through. + when: "{{ data_warehouse == 'redshift' }}" # Postgres specific configs +# None for now! # DuckDB specific configs duckdb_file_path: @@ -87,6 +128,12 @@ duckdb_file_path: Strongly suggest the default, which will be automatically gitignored. when: "{{ data_warehouse == 'duckdb' }}" +# dbt development configs +thread_count: + type: int + default: 8 + help: How many threads you want to dbt to run in parallel? + # Formatting configs tab_space_size: type: int diff --git a/profiles.yml.jinja b/profiles.yml.jinja index 38612bf..aeb2354 100644 --- a/profiles.yml.jinja +++ b/profiles.yml.jinja @@ -5,10 +5,10 @@ dev: type: {{ data_warehouse | lower }} account: {{ account_env_var | lower }} - user: {{ username | lower }} authenticator: externalbrowser - database: {{ database | lower}} + user: {{ username | lower }} warehouse: {{ warehouse | lower }} + database: {{ database | lower}} schema: {{ schema | lower }} threads: {{ thread_count}} {%- endif -%} @@ -18,9 +18,9 @@ outputs: dev: type: {{ data_warehouse }} + method: oauth project: {{ database | lower }} dataset: {{ schema | lower }} - method: oauth threads: {{ thread_count }} {%- endif -%} {% if data_warehouse == "redshift" %} @@ -28,7 +28,17 @@ target: dev outputs: dev: - # coming soon + type: {{ data_warehouse | lower }} + method: iam + iam_profile: {{ iam_profile | lower }} + region: {{ region | lower }} + cluster_id: {{ cluster_id | lower }} + host: {{ host | lower }} + port: {{ port | lower }} + user: {{ username | lower }} + database: {{ database | lower }} + schema: {{ schema | lower }} + threads: {{ thread_count }} {%- endif -%} {% if data_warehouse == "duckdb" %} {{ profile_name }}: @@ -46,12 +56,25 @@ target: dev outputs: dev: - # coming soon + type: {{ data_warehouse | lower }} + host: {{ host | lower }} + port: {{ port | lower }} + user: {{ username | lower }} + password: "" # Add a password here if needed + database: {{ database | lower }} + schema: {{ schema | lower }} + threads: {{ thread_count }} {%- endif -%} {% if data_warehouse == "databricks" %} {{ profile_name }}: target: dev outputs: dev: - # coming soon + type: {{ data_warehouse | lower }} + {% if using_unity_catalog -%}catalog: {{ catalog | lower }}{%- endif %} + host: {{ host | lower }} + http_path: {{ http_path | lower }} + token: # Your Personal Access Token here + schema: {{ schema | lower }} + threads: {{ thread_count }} {%- endif -%} diff --git a/template-integration-tests/test-expectations/bigquery_profile.yml b/template-integration-tests/test-expectations/bigquery_profile.yml index 90556f9..c877396 100644 --- a/template-integration-tests/test-expectations/bigquery_profile.yml +++ b/template-integration-tests/test-expectations/bigquery_profile.yml @@ -1,4 +1,3 @@ - legoalas_corp: target: dev outputs: diff --git a/template-integration-tests/test-expectations/databricks_profile.yml b/template-integration-tests/test-expectations/databricks_profile.yml new file mode 100644 index 0000000..8cbf96b --- /dev/null +++ b/template-integration-tests/test-expectations/databricks_profile.yml @@ -0,0 +1,11 @@ +faramir_landscaping: + target: dev + outputs: + dev: + type: databricks + catalog: tower_of_ecthelion + host: faramir-landscaping + http_path: ithilien.databricks.com + token: # Your Personal Access Token here + schema: bridal_gifts + threads: 8 diff --git a/template-integration-tests/test-expectations/duckdb_profile.yml b/template-integration-tests/test-expectations/duckdb_profile.yml index ef2d372..a153fd5 100644 --- a/template-integration-tests/test-expectations/duckdb_profile.yml +++ b/template-integration-tests/test-expectations/duckdb_profile.yml @@ -1,10 +1,9 @@ - -lothlorien_enterprises: +galadriels_mirrors_and_more: target: dev outputs: dev: type: duckdb path: ./lothlorien.db - database: mallorn - schema: flets + database: lothlorien + schema: mallorn_trees threads: 8 diff --git a/template-integration-tests/test-expectations/postgres_profile.yml b/template-integration-tests/test-expectations/postgres_profile.yml new file mode 100644 index 0000000..58ffed9 --- /dev/null +++ b/template-integration-tests/test-expectations/postgres_profile.yml @@ -0,0 +1,12 @@ +shieldmaiden_security: + target: dev + outputs: + dev: + type: postgres + host: localhost + port: 5432 + user: eowyn + password: "" # Add a password here if needed + database: rohan + schema: nazgul_threat_assessments + threads: 8 diff --git a/template-integration-tests/test-expectations/redshift_profile.yml b/template-integration-tests/test-expectations/redshift_profile.yml new file mode 100644 index 0000000..7e9f9a7 --- /dev/null +++ b/template-integration-tests/test-expectations/redshift_profile.yml @@ -0,0 +1,15 @@ +gimli_mining: + target: dev + outputs: + dev: + type: redshift + method: iam + iam_profile: default + region: us-east-1 + cluster_id: legolas-bff-4eva + host: gimli-mining.us-east-1.redshift.amazonaws.com + port: 5439 + user: gimli_son_of_gloin + database: ores + schema: mithril + threads: 8 diff --git a/template-integration-tests/test-expectations/snowflake_profile.yml b/template-integration-tests/test-expectations/snowflake_profile.yml index ad489d1..eeb2408 100644 --- a/template-integration-tests/test-expectations/snowflake_profile.yml +++ b/template-integration-tests/test-expectations/snowflake_profile.yml @@ -1,13 +1,12 @@ - aragorn_inc: target: dev outputs: dev: type: snowflake - account: - user: strider + account: authenticator: externalbrowser - database: gondor + user: strider warehouse: narsil + database: dunedain schema: rangers threads: 8 diff --git a/template-integration-tests/test_profile_output_is_correct.py b/template-integration-tests/test_profile_output_is_correct.py index 1c43fbd..49f9064 100644 --- a/template-integration-tests/test_profile_output_is_correct.py +++ b/template-integration-tests/test_profile_output_is_correct.py @@ -4,21 +4,22 @@ from typing import Dict import copier +import pytest import yaml from deepdiff import DeepDiff -PROJECT_ROOT = Path(__file__).parent.parent -TEST_EXPECT = PROJECT_ROOT / "template-integration-tests" / "test-expectations" -TEST_BUILD_DIR = PROJECT_ROOT / "template-integration-tests" / "test-build" +PROJECT_ROOT: Path = Path(__file__).parent.parent +TEST_EXPECT: Path = PROJECT_ROOT / "template-integration-tests" / "test-expectations" +TEST_BUILD_DIR: Path = PROJECT_ROOT / "template-integration-tests" / "test-build" -warehouse_answers: Dict[str, Dict[str, str]] = { +warehouse_answers: Dict[str, Dict[str, str | bool | int]] = { "duckdb": { - "project_name": "Lothlorien Enterprises", + "project_name": "Galadriel's Mirrors and More", "data_warehouse": "duckdb", "username": "galadriel", - "database": "mallorn", - "schema": "flets", + "database": "lothlorien", + "schema": "mallorn_trees", "duckdb_file_path": "./lothlorien.db", }, "snowflake": { @@ -28,7 +29,7 @@ "username": "Strider", "warehouse": "Narsil", "role": "King", - "database": "gondor", + "database": "dunedain", "schema": "rangers", }, "bigquery": { @@ -38,33 +39,69 @@ "database": "mirkwood", "schema": "archers", }, + "redshift": { + "project_name": "Gimli Mining", + "data_warehouse": "redshift", + "iam_profile": "default", + "region": "us-east-1", + "cluster_id": "legolas-bff-4eva", + "host": "gimli-mining.us-east-1.redshift.amazonaws.com", + "port": 5439, + "username": "gimli_son_of_gloin", + "database": "ores", + "schema": "mithril", + }, + "postgres": { + "project_name": "Shieldmaiden Security", + "data_warehouse": "postgres", + "host": "localhost", + "port": 5432, + "username": "eowyn", + "password": "", + "database": "rohan", + "schema": "nazgul_threat_assessments", + }, + "databricks": { + "project_name": "Faramir Landscaping", + "using_unity_catalog": True, + "catalog": "tower_of_ecthelion", + "data_warehouse": "databricks", + "host": "faramir-landscaping", + "http_path": "ithilien.databricks.com", + "database": "eowyn", + "schema": "bridal_gifts", + }, } +test_data = [(warehouse, options) for warehouse, options in warehouse_answers.items()] + -def _check_profiles(warehouse): - data = warehouse_answers[warehouse] +def check_profiles(warehouse: str, options: Dict) -> None: copier.run_copy( str(PROJECT_ROOT), str(TEST_BUILD_DIR / warehouse), - data=data, + data=options, defaults=True, unsafe=True, vcs_ref="HEAD", ) with open(TEST_EXPECT / f"{warehouse}_profile.yml", "r") as f: - expected_output = yaml.safe_load(f) + expected_output: yaml.YAMLObject = yaml.safe_load(f) with open(TEST_BUILD_DIR / warehouse / "profiles.yml", "r") as f: - actual_output = yaml.safe_load(f) + actual_output: yaml.YAMLObject = yaml.safe_load(f) - diff = DeepDiff(expected_output, actual_output) + diff: DeepDiff = DeepDiff(expected_output, actual_output) assert diff == {}, f"Differences: {diff}" -def test_profile_output_is_correct(): - if os.path.exists(TEST_BUILD_DIR): - shutil.rmtree(TEST_BUILD_DIR) +@pytest.mark.parametrize("warehouse, options", test_data) +def test_profile_output_is_correct(warehouse, options, request): + # Clean up the test build directory before the first iteration + current_warehouse = request.node.callspec.params["warehouse"] + if current_warehouse == test_data[0]: + if os.path.exists(TEST_BUILD_DIR): + shutil.rmtree(TEST_BUILD_DIR) - for warehouse in warehouse_answers: - _check_profiles(warehouse) + check_profiles(warehouse, options)