From 143c1e7d3528adef401612807142c5e89e2dc96b Mon Sep 17 00:00:00 2001 From: Cor Date: Thu, 11 Aug 2022 11:03:22 +0200 Subject: [PATCH] Add testing (#22) * Add working script to run macro * Add comment about adapters * Try using a project instead of runtime config * Remove spark credentials and Project * Use connection from soda spark * Add test requirements * Add pytest ini * Move everything into pytest fixtures * Copy connection * Remove pytest-dbt-core code * Add pytest dbt core as test requirement * Add workflow for testing * Bump pytest dbt core version * Add profile to dbt project * Add profiles * Add profiles dir when running pytest * Remove redundant from future import annotations * Bump pytest-dbt-core version * Change version * Add pyspark dependency * Change pyspark dependency to dbt-spark session * Change required by to dbt-spark * Add test docstring * Make test less strict * Create and delete table with fixture * Fix typo * Add section about testing to the documentation * Move test macros into tests/unit * Run unit tests only in Github action * Merge dev and test requirements * Move conftest into functional --- .github/workflows/workflow.yml | 29 +++++++++++++++++++++++ README.md | 37 +++++++++++++++++++++++++++++- dbt_project.yml | 3 ++- dev-requirements.txt | 4 +++- profiles.yml | 8 +++++++ pytest.ini | 4 ++++ tests/{ => functional}/conftest.py | 0 tests/unit/test_macros.py | 26 +++++++++++++++++++++ 8 files changed, 108 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/workflow.yml create mode 100644 profiles.yml rename tests/{ => functional}/conftest.py (100%) create mode 100644 tests/unit/test_macros.py diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml new file mode 100644 index 0000000..d78c8e0 --- /dev/null +++ b/.github/workflows/workflow.yml @@ -0,0 +1,29 @@ +name: Test + +on: + pull_request: + push: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install dependencies + shell: bash + run: | + sudo apt-get install libsasl2-dev + python -m pip install --upgrade pip + python -m pip install -r dev-requirements.txt + + - name: Run unit tests + shell: bash + run: DBT_PROFILES_DIR=$PWD pytest tests/unit diff --git a/README.md b/README.md index a2da13a..9b4da3e 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ dispatch: ### Note to maintainers of other packages -The spark-utils package may be able to provide compatibility for your package, especially if your package leverages dbt-utils macros for cross-database compatibility. This package _does not_ need to be specified as a depedency of your package in `packages.yml`. Instead, you should encourage anyone using your package on Apache Spark / Databricks to: +The spark-utils package may be able to provide compatibility for your package, especially if your package leverages dbt-utils macros for cross-database compatibility. This package _does not_ need to be specified as a dependency of your package in `packages.yml`. Instead, you should encourage anyone using your package on Apache Spark / Databricks to: - Install `spark_utils` alongside your package - Add a `dispatch` config in their root project, like the one above @@ -56,6 +56,41 @@ We welcome contributions to this repo! To contribute a new feature or a fix, please open a Pull Request with 1) your changes and 2) updated documentation for the `README.md` file. +## Testing + +The macros are tested with [`pytest`](https://docs.pytest.org) and +[`pytest-dbt-core`](https://pypi.org/project/pytest-dbt-core/). For example, +the [`create_tables` macro is tested](./tests/test_macros.py) by: + +1. Create a test table (test setup): + ``` python + spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet") + ``` +2. Call the macro generator: + ``` python + tables = macro_generator() + ``` +3. Assert test condition: + ``` python + assert simple_table in tables + ``` +4. Delete the test table (test cleanup): + ``` python + spark_session.sql(f"DROP TABLE IF EXISTS {table_name}") + ``` + +A macro is fetched using the +[`macro_generator`](https://pytest-dbt-core.readthedocs.io/en/latest/dbt_spark.html#usage) +fixture and providing the macro name trough +[indirect parameterization](https://docs.pytest.org/en/7.1.x/example/parametrize.html?highlight=indirect#indirect-parametrization): + +``` python +@pytest.mark.parametrize( + "macro_generator", ["macro.spark_utils.get_tables"], indirect=True +) +def test_create_table(macro_generator: MacroGenerator) -> None: +``` + ---- ### Getting started with dbt + Spark diff --git a/dbt_project.yml b/dbt_project.yml index cda6511..7ebf95f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,6 @@ name: 'spark_utils' +profile: 'sparkutils' version: '0.3.0' config-version: 2 require-dbt-version: [">=1.2.0", "<2.0.0"] -macro-paths: ["macros"] \ No newline at end of file +macro-paths: ["macros"] diff --git a/dev-requirements.txt b/dev-requirements.txt index 866f0f3..fe851f4 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,4 +2,6 @@ pytest pyodbc==4.0.32 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter -git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC] \ No newline at end of file +git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC,session] +pytest-spark~=0.6.0 +pytest-dbt-core~=0.1.0 diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 0000000..4f84718 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,8 @@ +sparkutils: + target: test + outputs: + test: + type: spark + method: session + schema: test + host: NA # not used, but required by `dbt-spark` diff --git a/pytest.ini b/pytest.ini index c0ef765..058b349 100644 --- a/pytest.ini +++ b/pytest.ini @@ -6,3 +6,7 @@ env_files = test.env testpaths = tests/functional +spark_options = + spark.app.name: spark-utils + spark.executor.instances: 1 + spark.sql.catalogImplementation: in-memory diff --git a/tests/conftest.py b/tests/functional/conftest.py similarity index 100% rename from tests/conftest.py rename to tests/functional/conftest.py diff --git a/tests/unit/test_macros.py b/tests/unit/test_macros.py new file mode 100644 index 0000000..6382d30 --- /dev/null +++ b/tests/unit/test_macros.py @@ -0,0 +1,26 @@ +import uuid + +import pytest +from dbt.clients.jinja import MacroGenerator +from pyspark.sql import SparkSession + + +@pytest.fixture +def simple_table(spark_session: SparkSession) -> str: + """Create and delete a simple table used for testing.""" + table_name = f"default.table_{uuid.uuid4()}".replace("-", "_") + spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet") + yield table_name + spark_session.sql(f"DROP TABLE IF EXISTS {table_name}") + + +@pytest.mark.parametrize( + "macro_generator", ["macro.spark_utils.get_tables"], indirect=True +) +def test_create_table( + macro_generator: MacroGenerator, simple_table: str +) -> None: + """The `get_tables` macro should return the created table.""" + tables = macro_generator() + assert simple_table in tables +