From 143c1e7d3528adef401612807142c5e89e2dc96b Mon Sep 17 00:00:00 2001
From: Cor <jczuurmond@protonmail.com>
Date: Thu, 11 Aug 2022 11:03:22 +0200
Subject: [PATCH] Add testing (#22)

* Add working script to run macro

* Add comment about adapters

* Try using a project instead of runtime config

* Remove spark credentials and Project

* Use connection from soda spark

* Add test requirements

* Add pytest ini

* Move everything into pytest fixtures

* Copy connection

* Remove pytest-dbt-core code

* Add pytest dbt core as test requirement

* Add workflow for testing

* Bump pytest dbt core version

* Add profile to dbt project

* Add profiles

* Add profiles dir when running pytest

* Remove redundant from future import annotations

* Bump pytest-dbt-core version

* Change version

* Add pyspark dependency

* Change pyspark dependency to dbt-spark session

* Change required by to dbt-spark

* Add test docstring

* Make test less strict

* Create and delete table with fixture

* Fix typo

* Add section about testing to the documentation

* Move test macros into tests/unit

* Run unit tests only in Github action

* Merge dev and test requirements

* Move conftest into functional
---
 .github/workflows/workflow.yml     | 29 +++++++++++++++++++++++
 README.md                          | 37 +++++++++++++++++++++++++++++-
 dbt_project.yml                    |  3 ++-
 dev-requirements.txt               |  4 +++-
 profiles.yml                       |  8 +++++++
 pytest.ini                         |  4 ++++
 tests/{ => functional}/conftest.py |  0
 tests/unit/test_macros.py          | 26 +++++++++++++++++++++
 8 files changed, 108 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/workflow.yml
 create mode 100644 profiles.yml
 rename tests/{ => functional}/conftest.py (100%)
 create mode 100644 tests/unit/test_macros.py

diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml
new file mode 100644
index 0000000..d78c8e0
--- /dev/null
+++ b/.github/workflows/workflow.yml
@@ -0,0 +1,29 @@
+name: Test
+
+on:
+  pull_request:
+  push:
+    branches:
+    - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+     - uses: actions/checkout@v2
+
+     - name: Set up Python 3.9
+       uses: actions/setup-python@v2
+       with:
+         python-version: 3.9
+
+     - name: Install dependencies
+       shell: bash
+       run: |
+         sudo apt-get install libsasl2-dev
+         python -m pip install --upgrade pip
+         python -m pip install -r dev-requirements.txt
+
+     - name: Run unit tests
+       shell: bash
+       run: DBT_PROFILES_DIR=$PWD pytest tests/unit
diff --git a/README.md b/README.md
index a2da13a..9b4da3e 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ dispatch:
 
 ### Note to maintainers of other packages
 
-The spark-utils package may be able to provide compatibility for your package, especially if your package leverages dbt-utils macros for cross-database compatibility. This package _does not_ need to be specified as a depedency of your package in `packages.yml`. Instead, you should encourage anyone using your package on Apache Spark / Databricks to:
+The spark-utils package may be able to provide compatibility for your package, especially if your package leverages dbt-utils macros for cross-database compatibility. This package _does not_ need to be specified as a dependency of your package in `packages.yml`. Instead, you should encourage anyone using your package on Apache Spark / Databricks to:
 - Install `spark_utils` alongside your package
 - Add a `dispatch` config in their root project, like the one above
 
@@ -56,6 +56,41 @@ We welcome contributions to this repo! To contribute a new feature or a fix,
 please open a Pull Request with 1) your changes and 2) updated documentation for 
 the `README.md` file.
 
+## Testing
+
+The macros are tested with [`pytest`](https://docs.pytest.org) and
+[`pytest-dbt-core`](https://pypi.org/project/pytest-dbt-core/). For example,
+the [`create_tables` macro is tested](./tests/test_macros.py) by:
+
+1. Create a test table (test setup):
+   ``` python
+   spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet")
+   ```
+2. Call the macro generator:
+   ``` python
+   tables = macro_generator()
+   ```
+3. Assert test condition:
+   ``` python
+   assert simple_table in tables
+   ```
+4. Delete the test table (test cleanup):
+   ``` python
+   spark_session.sql(f"DROP TABLE IF EXISTS {table_name}")
+   ```
+
+A macro is fetched using the 
+[`macro_generator`](https://pytest-dbt-core.readthedocs.io/en/latest/dbt_spark.html#usage) 
+fixture and providing the macro name trough 
+[indirect parameterization](https://docs.pytest.org/en/7.1.x/example/parametrize.html?highlight=indirect#indirect-parametrization):
+
+``` python
+@pytest.mark.parametrize(
+    "macro_generator", ["macro.spark_utils.get_tables"], indirect=True
+)
+def test_create_table(macro_generator: MacroGenerator) -> None:
+```
+
 ----
 
 ### Getting started with dbt + Spark
diff --git a/dbt_project.yml b/dbt_project.yml
index cda6511..7ebf95f 100644
--- a/dbt_project.yml
+++ b/dbt_project.yml
@@ -1,5 +1,6 @@
 name: 'spark_utils'
+profile: 'sparkutils'
 version: '0.3.0'
 config-version: 2
 require-dbt-version: [">=1.2.0", "<2.0.0"]
-macro-paths: ["macros"]
\ No newline at end of file
+macro-paths: ["macros"]
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 866f0f3..fe851f4 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -2,4 +2,6 @@ pytest
 pyodbc==4.0.32
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
-git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC]
\ No newline at end of file
+git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC,session]
+pytest-spark~=0.6.0
+pytest-dbt-core~=0.1.0
diff --git a/profiles.yml b/profiles.yml
new file mode 100644
index 0000000..4f84718
--- /dev/null
+++ b/profiles.yml
@@ -0,0 +1,8 @@
+sparkutils:
+  target: test
+  outputs:
+    test:
+      type: spark
+      method: session
+      schema: test
+      host: NA        # not used, but required by `dbt-spark`
diff --git a/pytest.ini b/pytest.ini
index c0ef765..058b349 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -6,3 +6,7 @@ env_files =
     test.env
 testpaths =
     tests/functional
+spark_options =
+    spark.app.name: spark-utils
+    spark.executor.instances: 1
+    spark.sql.catalogImplementation: in-memory
diff --git a/tests/conftest.py b/tests/functional/conftest.py
similarity index 100%
rename from tests/conftest.py
rename to tests/functional/conftest.py
diff --git a/tests/unit/test_macros.py b/tests/unit/test_macros.py
new file mode 100644
index 0000000..6382d30
--- /dev/null
+++ b/tests/unit/test_macros.py
@@ -0,0 +1,26 @@
+import uuid
+
+import pytest
+from dbt.clients.jinja import MacroGenerator
+from pyspark.sql import SparkSession
+
+
+@pytest.fixture
+def simple_table(spark_session: SparkSession) -> str:
+    """Create and delete a simple table used for testing."""
+    table_name = f"default.table_{uuid.uuid4()}".replace("-", "_")
+    spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet")
+    yield table_name
+    spark_session.sql(f"DROP TABLE IF EXISTS {table_name}")
+
+
+@pytest.mark.parametrize(
+    "macro_generator", ["macro.spark_utils.get_tables"], indirect=True
+)
+def test_create_table(
+    macro_generator: MacroGenerator, simple_table: str
+) -> None:
+    """The `get_tables` macro should return the created table."""
+    tables = macro_generator()
+    assert simple_table in tables
+