From c6b517deda2c9f2df8597dc09ff98b5ee8fefc2d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 28 Dec 2021 11:10:47 +0100 Subject: [PATCH 01/31] Add working script to run macro --- tests/test_macros.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 tests/test_macros.py diff --git a/tests/test_macros.py b/tests/test_macros.py new file mode 100644 index 0000000..abd93ec --- /dev/null +++ b/tests/test_macros.py @@ -0,0 +1,38 @@ +import dataclasses +import os + +import dbt.tracking +from dbt.adapters.factory import get_adapter, register_adapter +from dbt.clients.jinja import MacroGenerator +from dbt.config.runtime import RuntimeConfig +from dbt.context import providers +from dbt.parser.manifest import ManifestLoader +from dbt.tracking import User + + +dbt.tracking.active_user = User(os.getcwd()) + + +@dataclasses.dataclass(frozen=True) +class Args: + project_dir: str = os.getcwd() + + +args = Args() +config = RuntimeConfig.from_args(args) + +register_adapter(config) + +adapter = get_adapter(config) +adapter.acquire_connection() + +manifest = ManifestLoader.get_full_manifest(config) + +macro = manifest.macros["macro.spark_utils.get_tables"] + +context = providers.generate_runtime_macro_context( + macro, config, manifest, macro.package_name +) + +result = MacroGenerator(macro, context)() + From b3b0a0a7077a21bced0bcc355b7b0aa0984313b2 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 28 Dec 2021 15:11:00 +0100 Subject: [PATCH 02/31] Add comment about adapters --- tests/test_macros.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_macros.py b/tests/test_macros.py index abd93ec..fa483b1 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -19,6 +19,7 @@ class Args: args = Args() +# Sets the Spark plugin in dbt.adapters.factory.FACTORY config = RuntimeConfig.from_args(args) register_adapter(config) From 5bb97289eb358b5bb7c97a6149e6ba2841ee7c9a Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 28 Dec 2021 15:11:11 +0100 Subject: [PATCH 03/31] Try using a project instead of runtime config --- tests/test_macros.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_macros.py b/tests/test_macros.py index fa483b1..4ea48a4 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -5,7 +5,9 @@ from dbt.adapters.factory import get_adapter, register_adapter from dbt.clients.jinja import MacroGenerator from dbt.config.runtime import RuntimeConfig +from dbt.config.project import Project from dbt.context import providers +from dbt.adapters.spark import SparkCredentials from dbt.parser.manifest import ManifestLoader from dbt.tracking import User @@ -18,10 +20,20 @@ class Args: project_dir: str = os.getcwd() +credentials = SparkCredentials( + database="default", + schema="default", + host="localhost", + method="pyspark" +) + args = Args() # Sets the Spark plugin in dbt.adapters.factory.FACTORY config = RuntimeConfig.from_args(args) +config = Project.partial_load(os.getcwd()) +config.credentials = credentials + register_adapter(config) adapter = get_adapter(config) From 4e3123234fa919fa638eeed7a9bca476526cf1df Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 28 Dec 2021 15:28:18 +0100 Subject: [PATCH 04/31] Remove spark credentials and Project --- tests/test_macros.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/test_macros.py b/tests/test_macros.py index 4ea48a4..fa483b1 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -5,9 +5,7 @@ from dbt.adapters.factory import get_adapter, register_adapter from dbt.clients.jinja import MacroGenerator from dbt.config.runtime import RuntimeConfig -from dbt.config.project import Project from dbt.context import providers -from dbt.adapters.spark import SparkCredentials from dbt.parser.manifest import ManifestLoader from dbt.tracking import User @@ -20,20 +18,10 @@ class Args: project_dir: str = os.getcwd() -credentials = SparkCredentials( - database="default", - schema="default", - host="localhost", - method="pyspark" -) - args = Args() # Sets the Spark plugin in dbt.adapters.factory.FACTORY config = RuntimeConfig.from_args(args) -config = Project.partial_load(os.getcwd()) -config.credentials = credentials - register_adapter(config) adapter = get_adapter(config) From 37aa6e6eaee66d92c1aa98b27764dd30892ffaba Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 28 Dec 2021 16:01:24 +0100 Subject: [PATCH 05/31] Use connection from soda spark --- tests/test_macros.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/test_macros.py b/tests/test_macros.py index fa483b1..86cbfdd 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -6,13 +6,24 @@ from dbt.clients.jinja import MacroGenerator from dbt.config.runtime import RuntimeConfig from dbt.context import providers +from dbt.contracts.connection import ConnectionState +from dbt.adapters.spark.connections import SparkConnectionManager, PyodbcConnectionWrapper from dbt.parser.manifest import ManifestLoader from dbt.tracking import User - +from sodaspark.scan import Connection dbt.tracking.active_user = User(os.getcwd()) +class _SparkConnectionManager(SparkConnectionManager): + @classmethod + def open(cls, connection): + handle = PyodbcConnectionWrapper(Connection()) + connection.handle = handle + connection.state = ConnectionState.OPEN + return connection + + @dataclasses.dataclass(frozen=True) class Args: project_dir: str = os.getcwd() @@ -25,6 +36,9 @@ class Args: register_adapter(config) adapter = get_adapter(config) + +connection_manager = _SparkConnectionManager(adapter.config) +adapter.connections = connection_manager adapter.acquire_connection() manifest = ManifestLoader.get_full_manifest(config) @@ -36,4 +50,3 @@ class Args: ) result = MacroGenerator(macro, context)() - From 69ed207a963b4ec1448e4d55c6183d0edc57844d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 28 Dec 2021 16:25:47 +0100 Subject: [PATCH 06/31] Add test requirements --- test-requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 test-requirements.txt diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 0000000..bb99be5 --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1,2 @@ +pytest>=6.2.5 +pytest-spark>=0.6.0 From 236286d011d4205d072ddf3d2e07384c6fd42742 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 5 Aug 2022 17:42:41 +0200 Subject: [PATCH 07/31] Add pytest ini --- pytest.ini | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pytest.ini b/pytest.ini index c0ef765..058b349 100644 --- a/pytest.ini +++ b/pytest.ini @@ -6,3 +6,7 @@ env_files = test.env testpaths = tests/functional +spark_options = + spark.app.name: spark-utils + spark.executor.instances: 1 + spark.sql.catalogImplementation: in-memory From d72ebf24bfac5e1037ecaf53ba1db2765da011be Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 28 Dec 2021 16:45:32 +0100 Subject: [PATCH 08/31] Move everything into pytest fixtures --- tests/test_macros.py | 69 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/tests/test_macros.py b/tests/test_macros.py index 86cbfdd..a958242 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -2,16 +2,24 @@ import os import dbt.tracking -from dbt.adapters.factory import get_adapter, register_adapter +import pytest +from _pytest.fixtures import SubRequest +from dbt.adapters.factory import get_adapter, register_adapter, AdapterContainer from dbt.clients.jinja import MacroGenerator from dbt.config.runtime import RuntimeConfig from dbt.context import providers from dbt.contracts.connection import ConnectionState -from dbt.adapters.spark.connections import SparkConnectionManager, PyodbcConnectionWrapper +from dbt.contracts.graph.manifest import Manifest +from dbt.adapters.spark.connections import ( + SparkConnectionManager, + PyodbcConnectionWrapper, +) from dbt.parser.manifest import ManifestLoader from dbt.tracking import User +from pyspark.sql import SparkSession from sodaspark.scan import Connection + dbt.tracking.active_user = User(os.getcwd()) @@ -29,24 +37,53 @@ class Args: project_dir: str = os.getcwd() -args = Args() -# Sets the Spark plugin in dbt.adapters.factory.FACTORY -config = RuntimeConfig.from_args(args) +@pytest.fixture +def config() -> RuntimeConfig: + # requires a profile in your project wich also exists in your profiles file + config = RuntimeConfig.from_args(Args()) + return config -register_adapter(config) -adapter = get_adapter(config) +@pytest.fixture +def adapter(config: RuntimeConfig) -> AdapterContainer: + register_adapter(config) + adapter = get_adapter(config) -connection_manager = _SparkConnectionManager(adapter.config) -adapter.connections = connection_manager -adapter.acquire_connection() + connection_manager = _SparkConnectionManager(adapter.config) + adapter.connections = connection_manager -manifest = ManifestLoader.get_full_manifest(config) + adapter.acquire_connection() -macro = manifest.macros["macro.spark_utils.get_tables"] + return adapter -context = providers.generate_runtime_macro_context( - macro, config, manifest, macro.package_name -) -result = MacroGenerator(macro, context)() +@pytest.fixture +def manifest( + adapter: AdapterContainer, +) -> Manifest: + manifest = ManifestLoader.get_full_manifest(adapter.config) + return manifest + + +@pytest.fixture +def macro_generator( + request: SubRequest, config: RuntimeConfig, manifest: Manifest +) -> MacroGenerator: + macro = manifest.macros[request.param] + context = providers.generate_runtime_macro_context( + macro, config, manifest, macro.package_name + ) + macro_generator = MacroGenerator(macro, context) + return macro_generator + + +@pytest.mark.parametrize( + "macro_generator", ["macro.spark_utils.get_tables"], indirect=True +) +def test_create_table( + spark_session: SparkSession, macro_generator: MacroGenerator +) -> None: + expected_table = "default.example" + spark_session.sql(f"CREATE TABLE {expected_table} (id int) USING parquet") + tables = macro_generator() + assert tables == [expected_table] From 18170a181652fcbb871cb6aa4c7678551fa5ad22 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 29 Dec 2021 13:18:05 +0100 Subject: [PATCH 09/31] Copy connection --- tests/test_macros.py | 163 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 2 deletions(-) diff --git a/tests/test_macros.py b/tests/test_macros.py index a958242..41a5a2b 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -1,5 +1,9 @@ +from __future__ import annotations + import dataclasses import os +from types import TracebackType +from typing import Any import dbt.tracking import pytest @@ -16,13 +20,168 @@ ) from dbt.parser.manifest import ManifestLoader from dbt.tracking import User -from pyspark.sql import SparkSession -from sodaspark.scan import Connection +from pyspark.sql import DataFrame, Row, SparkSession dbt.tracking.active_user = User(os.getcwd()) +class Cursor: + """ + Mock a pyodbc cursor. + + Source + ------ + https://github.com/mkleehammer/pyodbc/wiki/Cursor + """ + + def __init__(self) -> None: + self._df: DataFrame | None = None + self._rows: list[Row] | None = None + + def __enter__(self) -> Cursor: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: Exception | None, + exc_tb: TracebackType | None, + ) -> bool: + self.close() + return True + + @property + def description( + self, + ) -> list[tuple[str, str, None, None, None, None, bool]]: + """ + Get the description. + + Returns + ------- + out : list[tuple[str, str, None, None, None, None, bool]] + The description. + + Source + ------ + https://github.com/mkleehammer/pyodbc/wiki/Cursor#description + """ + if self._df is None: + description = list() + else: + description = [ + ( + field.name, + field.dataType.simpleString(), + None, + None, + None, + None, + field.nullable, + ) + for field in self._df.schema.fields + ] + return description + + def close(self) -> None: + """ + Close the connection. + + Source + ------ + https://github.com/mkleehammer/pyodbc/wiki/Cursor#close + """ + self._df = None + self._rows = None + + def execute(self, sql: str, *parameters: Any) -> None: + """ + Execute a sql statement. + + Parameters + ---------- + sql : str + Execute a sql statement. + *parameters : Any + The parameters. + + Raises + ------ + NotImplementedError + If there are parameters given. We do not format sql statements. + + Source + ------ + https://github.com/mkleehammer/pyodbc/wiki/Cursor#executesql-parameters + """ + if len(parameters) > 0: + raise NotImplementedError("Formatting sql statement is not implemented.") + spark_session = SparkSession.builder.getOrCreate() + self._df = spark_session.sql(sql) + + def fetchall(self) -> list[Row] | None: + """ + Fetch all data. + + Returns + ------- + out : list[Row] | None + The rows. + + Source + ------ + https://github.com/mkleehammer/pyodbc/wiki/Cursor#fetchall + """ + if self._rows is None and self._df is not None: + self._rows = self._df.collect() + return self._rows + + def fetchone(self) -> Row | None: + """ + Fetch the first output. + + Returns + ------- + out : Row | None + The first row. + + Source + ------ + https://github.com/mkleehammer/pyodbc/wiki/Cursor#fetchone + """ + if self._rows is None and self._df is not None: + self._rows = self._df.collect() + + if self._rows is not None and len(self._rows) > 0: + row = self._rows.pop(0) + else: + row = None + + return row + + +class Connection: + """ + Mock a pyodbc connection. + + Source + ------ + https://github.com/mkleehammer/pyodbc/wiki/Connection + """ + + def cursor(self) -> Cursor: + """ + Get a cursor. + + Returns + ------- + out : Cursor + The cursor. + """ + return Cursor() + + class _SparkConnectionManager(SparkConnectionManager): @classmethod def open(cls, connection): From 25e5806383edf7e6f9898314fbf5ed683224a2fd Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 28 Jan 2022 13:45:56 +0100 Subject: [PATCH 10/31] Remove pytest-dbt-core code --- tests/test_macros.py | 233 +------------------------------------------ 1 file changed, 1 insertion(+), 232 deletions(-) diff --git a/tests/test_macros.py b/tests/test_macros.py index 41a5a2b..48f5866 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -1,239 +1,8 @@ from __future__ import annotations -import dataclasses -import os -from types import TracebackType -from typing import Any - -import dbt.tracking import pytest -from _pytest.fixtures import SubRequest -from dbt.adapters.factory import get_adapter, register_adapter, AdapterContainer from dbt.clients.jinja import MacroGenerator -from dbt.config.runtime import RuntimeConfig -from dbt.context import providers -from dbt.contracts.connection import ConnectionState -from dbt.contracts.graph.manifest import Manifest -from dbt.adapters.spark.connections import ( - SparkConnectionManager, - PyodbcConnectionWrapper, -) -from dbt.parser.manifest import ManifestLoader -from dbt.tracking import User -from pyspark.sql import DataFrame, Row, SparkSession - - -dbt.tracking.active_user = User(os.getcwd()) - - -class Cursor: - """ - Mock a pyodbc cursor. - - Source - ------ - https://github.com/mkleehammer/pyodbc/wiki/Cursor - """ - - def __init__(self) -> None: - self._df: DataFrame | None = None - self._rows: list[Row] | None = None - - def __enter__(self) -> Cursor: - return self - - def __exit__( - self, - exc_type: type[BaseException] | None, - exc_val: Exception | None, - exc_tb: TracebackType | None, - ) -> bool: - self.close() - return True - - @property - def description( - self, - ) -> list[tuple[str, str, None, None, None, None, bool]]: - """ - Get the description. - - Returns - ------- - out : list[tuple[str, str, None, None, None, None, bool]] - The description. - - Source - ------ - https://github.com/mkleehammer/pyodbc/wiki/Cursor#description - """ - if self._df is None: - description = list() - else: - description = [ - ( - field.name, - field.dataType.simpleString(), - None, - None, - None, - None, - field.nullable, - ) - for field in self._df.schema.fields - ] - return description - - def close(self) -> None: - """ - Close the connection. - - Source - ------ - https://github.com/mkleehammer/pyodbc/wiki/Cursor#close - """ - self._df = None - self._rows = None - - def execute(self, sql: str, *parameters: Any) -> None: - """ - Execute a sql statement. - - Parameters - ---------- - sql : str - Execute a sql statement. - *parameters : Any - The parameters. - - Raises - ------ - NotImplementedError - If there are parameters given. We do not format sql statements. - - Source - ------ - https://github.com/mkleehammer/pyodbc/wiki/Cursor#executesql-parameters - """ - if len(parameters) > 0: - raise NotImplementedError("Formatting sql statement is not implemented.") - spark_session = SparkSession.builder.getOrCreate() - self._df = spark_session.sql(sql) - - def fetchall(self) -> list[Row] | None: - """ - Fetch all data. - - Returns - ------- - out : list[Row] | None - The rows. - - Source - ------ - https://github.com/mkleehammer/pyodbc/wiki/Cursor#fetchall - """ - if self._rows is None and self._df is not None: - self._rows = self._df.collect() - return self._rows - - def fetchone(self) -> Row | None: - """ - Fetch the first output. - - Returns - ------- - out : Row | None - The first row. - - Source - ------ - https://github.com/mkleehammer/pyodbc/wiki/Cursor#fetchone - """ - if self._rows is None and self._df is not None: - self._rows = self._df.collect() - - if self._rows is not None and len(self._rows) > 0: - row = self._rows.pop(0) - else: - row = None - - return row - - -class Connection: - """ - Mock a pyodbc connection. - - Source - ------ - https://github.com/mkleehammer/pyodbc/wiki/Connection - """ - - def cursor(self) -> Cursor: - """ - Get a cursor. - - Returns - ------- - out : Cursor - The cursor. - """ - return Cursor() - - -class _SparkConnectionManager(SparkConnectionManager): - @classmethod - def open(cls, connection): - handle = PyodbcConnectionWrapper(Connection()) - connection.handle = handle - connection.state = ConnectionState.OPEN - return connection - - -@dataclasses.dataclass(frozen=True) -class Args: - project_dir: str = os.getcwd() - - -@pytest.fixture -def config() -> RuntimeConfig: - # requires a profile in your project wich also exists in your profiles file - config = RuntimeConfig.from_args(Args()) - return config - - -@pytest.fixture -def adapter(config: RuntimeConfig) -> AdapterContainer: - register_adapter(config) - adapter = get_adapter(config) - - connection_manager = _SparkConnectionManager(adapter.config) - adapter.connections = connection_manager - - adapter.acquire_connection() - - return adapter - - -@pytest.fixture -def manifest( - adapter: AdapterContainer, -) -> Manifest: - manifest = ManifestLoader.get_full_manifest(adapter.config) - return manifest - - -@pytest.fixture -def macro_generator( - request: SubRequest, config: RuntimeConfig, manifest: Manifest -) -> MacroGenerator: - macro = manifest.macros[request.param] - context = providers.generate_runtime_macro_context( - macro, config, manifest, macro.package_name - ) - macro_generator = MacroGenerator(macro, context) - return macro_generator +from pyspark.sql import SparkSession @pytest.mark.parametrize( From 409d8279012231e377a5f92ed3b7eb44c0ddebde Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 28 Jan 2022 13:46:16 +0100 Subject: [PATCH 11/31] Add pytest dbt core as test requirement --- test-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test-requirements.txt b/test-requirements.txt index bb99be5..b7d78c0 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,3 @@ pytest>=6.2.5 pytest-spark>=0.6.0 +pytest-dbt-core==0.1.0.dev2 From 56c848a91b51419f66248c7831598f0be9b4b898 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 28 Jan 2022 13:48:58 +0100 Subject: [PATCH 12/31] Add workflow for testing --- .github/workflows/workflow.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/workflow.yml diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml new file mode 100644 index 0000000..d177059 --- /dev/null +++ b/.github/workflows/workflow.yml @@ -0,0 +1,29 @@ +name: Test + +on: + pull_request: + push: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install dependencies + shell: bash + run: | + sudo apt-get install libsasl2-dev + python -m pip install --upgrade pip + python -m pip install -r test-requirements.txt + + - name: Run pytest + shell: bash + run: pytest tests/ From 1560e5e65e8c412b26c24b2d3b1a84ae7492734c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 27 May 2022 16:52:24 +0200 Subject: [PATCH 13/31] Bump pytest dbt core version --- test-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-requirements.txt b/test-requirements.txt index b7d78c0..634dff1 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,3 @@ pytest>=6.2.5 pytest-spark>=0.6.0 -pytest-dbt-core==0.1.0.dev2 +pytest-dbt-core==0.1.0rc1 From 3014264bab49d817b0d1126eecf5bbef0417993d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 27 May 2022 17:08:23 +0200 Subject: [PATCH 14/31] Add profile to dbt project --- dbt_project.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbt_project.yml b/dbt_project.yml index cda6511..7ebf95f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,6 @@ name: 'spark_utils' +profile: 'sparkutils' version: '0.3.0' config-version: 2 require-dbt-version: [">=1.2.0", "<2.0.0"] -macro-paths: ["macros"] \ No newline at end of file +macro-paths: ["macros"] From 153708bce2c95458b6a43dede54b9b9a522b2bfa Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 27 May 2022 17:13:24 +0200 Subject: [PATCH 15/31] Add profiles --- profiles.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 profiles.yml diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 0000000..ea66485 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,8 @@ +sparkutils: + target: test + outputs: + test: + type: spark + method: session + schema: test + host: NA # not used, but required by `dbt-core` From f9b0db718e79e5043234aa4edfa939fa4010368a Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 27 May 2022 17:54:55 +0200 Subject: [PATCH 16/31] Add profiles dir when running pytest --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index d177059..3b7a49a 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -26,4 +26,4 @@ jobs: - name: Run pytest shell: bash - run: pytest tests/ + run: DBT_PROFILES_DIR=$PWD pytest tests/ From 52307bb9b2454a338a8252fdcea6a60d03d3c010 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 27 May 2022 18:14:46 +0200 Subject: [PATCH 17/31] Remove redundant from future import annotations --- tests/test_macros.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_macros.py b/tests/test_macros.py index 48f5866..3bfc434 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import pytest from dbt.clients.jinja import MacroGenerator from pyspark.sql import SparkSession From cb447a22741744593cfc1864dd6982b8ede777bb Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 22 Jul 2022 09:40:09 +0200 Subject: [PATCH 18/31] Bump pytest-dbt-core version --- test-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-requirements.txt b/test-requirements.txt index 634dff1..b9679f1 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,3 @@ pytest>=6.2.5 pytest-spark>=0.6.0 -pytest-dbt-core==0.1.0rc1 +pytest-dbt-core==0.1.0 From 8b7eb8f14073fe3020a14a741c03b65bb78bd260 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 22 Jul 2022 09:44:52 +0200 Subject: [PATCH 19/31] Change version --- test-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test-requirements.txt b/test-requirements.txt index b9679f1..c1201b0 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,3 @@ -pytest>=6.2.5 -pytest-spark>=0.6.0 -pytest-dbt-core==0.1.0 +pytest~=6.2.5 +pytest-spark~=0.6.0 +pytest-dbt-core~=0.1.0 From 6dfd9f7e3951ab73d4fed7afd705f4b1961b6fb6 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 22 Jul 2022 09:53:08 +0200 Subject: [PATCH 20/31] Add pyspark dependency --- test-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test-requirements.txt b/test-requirements.txt index c1201b0..f30df56 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,4 @@ pytest~=6.2.5 pytest-spark~=0.6.0 pytest-dbt-core~=0.1.0 +pyspark~=3.3 From 91b6bb14055299cdde8c356a951de4b394d43489 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 22 Jul 2022 09:57:50 +0200 Subject: [PATCH 21/31] Change pyspark dependency to dbt-spark session --- test-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-requirements.txt b/test-requirements.txt index f30df56..0057422 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,4 @@ pytest~=6.2.5 pytest-spark~=0.6.0 pytest-dbt-core~=0.1.0 -pyspark~=3.3 +dbt-spark[session]~=1.1.0 From 30112b08afe34cfeb113159cabe56af69d32f00d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Sat, 23 Jul 2022 07:54:52 +0200 Subject: [PATCH 22/31] Change required by to dbt-spark --- profiles.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiles.yml b/profiles.yml index ea66485..4f84718 100644 --- a/profiles.yml +++ b/profiles.yml @@ -5,4 +5,4 @@ sparkutils: type: spark method: session schema: test - host: NA # not used, but required by `dbt-core` + host: NA # not used, but required by `dbt-spark` From 59f2139f0ff9b64a7e1b05d7143913d9a18dac7e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 5 Aug 2022 17:19:00 +0200 Subject: [PATCH 23/31] Add test docstring --- tests/test_macros.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_macros.py b/tests/test_macros.py index 3bfc434..d99c5f8 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -9,6 +9,7 @@ def test_create_table( spark_session: SparkSession, macro_generator: MacroGenerator ) -> None: + """The `get_tables` macro should return the created table.""" expected_table = "default.example" spark_session.sql(f"CREATE TABLE {expected_table} (id int) USING parquet") tables = macro_generator() From 74482a727f530f02eef715ada3d31cdbe165f117 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 5 Aug 2022 17:19:55 +0200 Subject: [PATCH 24/31] Make test less strict --- tests/test_macros.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_macros.py b/tests/test_macros.py index d99c5f8..59ab675 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -13,4 +13,5 @@ def test_create_table( expected_table = "default.example" spark_session.sql(f"CREATE TABLE {expected_table} (id int) USING parquet") tables = macro_generator() - assert tables == [expected_table] + assert expected_table in tables + From df2934694cf7e9051c5413eadac95135538477cb Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 5 Aug 2022 17:26:14 +0200 Subject: [PATCH 25/31] Create and delete table with fixture --- tests/test_macros.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/test_macros.py b/tests/test_macros.py index 59ab675..6382d30 100644 --- a/tests/test_macros.py +++ b/tests/test_macros.py @@ -1,17 +1,26 @@ +import uuid + import pytest from dbt.clients.jinja import MacroGenerator from pyspark.sql import SparkSession +@pytest.fixture +def simple_table(spark_session: SparkSession) -> str: + """Create and delete a simple table used for testing.""" + table_name = f"default.table_{uuid.uuid4()}".replace("-", "_") + spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet") + yield table_name + spark_session.sql(f"DROP TABLE IF EXISTS {table_name}") + + @pytest.mark.parametrize( "macro_generator", ["macro.spark_utils.get_tables"], indirect=True ) def test_create_table( - spark_session: SparkSession, macro_generator: MacroGenerator + macro_generator: MacroGenerator, simple_table: str ) -> None: """The `get_tables` macro should return the created table.""" - expected_table = "default.example" - spark_session.sql(f"CREATE TABLE {expected_table} (id int) USING parquet") tables = macro_generator() - assert expected_table in tables + assert simple_table in tables From ffe50cb00264bfc76da3d5fbb099fe51bc0a32b1 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 5 Aug 2022 17:26:47 +0200 Subject: [PATCH 26/31] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2da13a..5212e5f 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ dispatch: ### Note to maintainers of other packages -The spark-utils package may be able to provide compatibility for your package, especially if your package leverages dbt-utils macros for cross-database compatibility. This package _does not_ need to be specified as a depedency of your package in `packages.yml`. Instead, you should encourage anyone using your package on Apache Spark / Databricks to: +The spark-utils package may be able to provide compatibility for your package, especially if your package leverages dbt-utils macros for cross-database compatibility. This package _does not_ need to be specified as a dependency of your package in `packages.yml`. Instead, you should encourage anyone using your package on Apache Spark / Databricks to: - Install `spark_utils` alongside your package - Add a `dispatch` config in their root project, like the one above From 8b13fdaeb7573f22c312ec6f258d7116d48a61c8 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 5 Aug 2022 17:38:44 +0200 Subject: [PATCH 27/31] Add section about testing to the documentation --- README.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/README.md b/README.md index 5212e5f..9b4da3e 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,41 @@ We welcome contributions to this repo! To contribute a new feature or a fix, please open a Pull Request with 1) your changes and 2) updated documentation for the `README.md` file. +## Testing + +The macros are tested with [`pytest`](https://docs.pytest.org) and +[`pytest-dbt-core`](https://pypi.org/project/pytest-dbt-core/). For example, +the [`create_tables` macro is tested](./tests/test_macros.py) by: + +1. Create a test table (test setup): + ``` python + spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet") + ``` +2. Call the macro generator: + ``` python + tables = macro_generator() + ``` +3. Assert test condition: + ``` python + assert simple_table in tables + ``` +4. Delete the test table (test cleanup): + ``` python + spark_session.sql(f"DROP TABLE IF EXISTS {table_name}") + ``` + +A macro is fetched using the +[`macro_generator`](https://pytest-dbt-core.readthedocs.io/en/latest/dbt_spark.html#usage) +fixture and providing the macro name trough +[indirect parameterization](https://docs.pytest.org/en/7.1.x/example/parametrize.html?highlight=indirect#indirect-parametrization): + +``` python +@pytest.mark.parametrize( + "macro_generator", ["macro.spark_utils.get_tables"], indirect=True +) +def test_create_table(macro_generator: MacroGenerator) -> None: +``` + ---- ### Getting started with dbt + Spark From 29e88d646d3029b6ae10e9f37cad5f52592ad203 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 11 Aug 2022 07:57:33 +0200 Subject: [PATCH 28/31] Move test macros into tests/unit --- tests/{ => unit}/test_macros.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{ => unit}/test_macros.py (100%) diff --git a/tests/test_macros.py b/tests/unit/test_macros.py similarity index 100% rename from tests/test_macros.py rename to tests/unit/test_macros.py From ee25a3ef54a84bf568300443d2cb8bdb151be480 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 11 Aug 2022 07:57:53 +0200 Subject: [PATCH 29/31] Run unit tests only in Github action --- .github/workflows/workflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 3b7a49a..36bd981 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -24,6 +24,6 @@ jobs: python -m pip install --upgrade pip python -m pip install -r test-requirements.txt - - name: Run pytest + - name: Run unit tests shell: bash - run: DBT_PROFILES_DIR=$PWD pytest tests/ + run: DBT_PROFILES_DIR=$PWD pytest tests/unit From bbc792340c2b5084d8512bf110f645ead47da7bc Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 11 Aug 2022 08:03:25 +0200 Subject: [PATCH 30/31] Merge dev and test requirements --- .github/workflows/workflow.yml | 2 +- dev-requirements.txt | 4 +++- test-requirements.txt | 4 ---- 3 files changed, 4 insertions(+), 6 deletions(-) delete mode 100644 test-requirements.txt diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 36bd981..d78c8e0 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -22,7 +22,7 @@ jobs: run: | sudo apt-get install libsasl2-dev python -m pip install --upgrade pip - python -m pip install -r test-requirements.txt + python -m pip install -r dev-requirements.txt - name: Run unit tests shell: bash diff --git a/dev-requirements.txt b/dev-requirements.txt index 866f0f3..fe851f4 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,4 +2,6 @@ pytest pyodbc==4.0.32 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter -git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC] \ No newline at end of file +git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC,session] +pytest-spark~=0.6.0 +pytest-dbt-core~=0.1.0 diff --git a/test-requirements.txt b/test-requirements.txt deleted file mode 100644 index 0057422..0000000 --- a/test-requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -pytest~=6.2.5 -pytest-spark~=0.6.0 -pytest-dbt-core~=0.1.0 -dbt-spark[session]~=1.1.0 From d380607d26a0af1abe279a54900c0711e511ade1 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 11 Aug 2022 08:09:46 +0200 Subject: [PATCH 31/31] Move conftest into functional --- tests/{ => functional}/conftest.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{ => functional}/conftest.py (100%) diff --git a/tests/conftest.py b/tests/functional/conftest.py similarity index 100% rename from tests/conftest.py rename to tests/functional/conftest.py