fix spark cluster start mechanism and add extra dev requirements (#986)

dbt-labs · Feb 20, 2024 · 613fa58 · 613fa58
1 parent 5d90ff9
commit 613fa58
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 13 deletions.
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
@@ -112,15 +112,27 @@ async def test_spark(test_args):
             .with_exec(["./scripts/install_os_reqs.sh"])
             # install dbt-spark + python deps
             .with_directory("/src", req_files)
-            .with_directory("src/dbt", dbt_spark_dir)
-            .with_directory("src/tests", test_dir)
-            .with_workdir("/src")
             .with_exec(["pip", "install", "-U", "pip"])
+            .with_workdir("/src")
             .with_exec(["pip", "install", "-r", "requirements.txt"])
             .with_exec(["pip", "install", "-r", "dev-requirements.txt"])
+        )
+
+        # install local dbt-spark changes
+        tst_container = (
+            tst_container.with_workdir("/")
+            .with_directory("src/dbt", dbt_spark_dir)
+            .with_workdir("/src")
             .with_exec(["pip", "install", "-e", "."])
         )
 
+        # install local test changes
+        tst_container = (
+            tst_container.with_workdir("/")
+            .with_directory("src/tests", test_dir)
+            .with_workdir("/src")
+        )
+
         if test_profile == "apache_spark":
             spark_ctr, spark_host = get_spark_container(client)
             tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -1,5 +1,8 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-common.git
+git+https://github.com/dbt-labs/dbt-adapters.git
 git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 
 # if version 1.x or greater -> pin to major version

diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py
@@ -1,19 +1,27 @@
-from multiprocessing import Lock
-
+import time
 import pytest
 
-_db_start_lock = Lock()
-_DB_CLUSTER_STARTED = False
+
+def _wait_for_databricks_cluster(project):
+    """
+    It takes roughly 3min for the cluster to start, to be safe we'll wait for 5min
+    """
+    for _ in range(60):
+        try:
+            project.run_sql("SELECT 1", fetch=True)
+            return
+        except Exception:
+            time.sleep(10)
+
+    raise Exception("Databricks cluster did not start in time")
 
 
 # Running this should prevent tests from needing to be retried because the Databricks cluster isn't available
 @pytest.fixture(scope="class", autouse=True)
 def start_databricks_cluster(project, request):
-    global _DB_CLUSTER_STARTED
     profile_type = request.config.getoption("--profile")
-    with _db_start_lock:
-        if "databricks" in profile_type and not _DB_CLUSTER_STARTED:
-            print("Starting Databricks cluster")
-            project.run_sql("SELECT 1")
 
-            _DB_CLUSTER_STARTED = True
+    if "databricks" in profile_type:
+        _wait_for_databricks_cluster(project)
+
+    yield 1