ray-project · jjyao · Jan 8, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 4, 2024
diff --git a/python/ray/tests/spark/test_databricks_hook.py b/python/ray/tests/spark/test_databricks_hook.py
@@ -23,6 +23,9 @@ def __init__(self):
     def getIdleTimeMillisSinceLastNotebookExecution(self):
         return (time.time() - self.created_time) * 1000
 
+    def registerBackgroundSparkJobGroup(self, job_group_id):
+        self.registered_job_groups.append(job_group_id)
+
 
 class TestDatabricksHook:
     @classmethod
@@ -62,6 +65,7 @@ def test_hook(self, monkeypatch):
                 head_node_options={"include_dashboard": False},
             )
             cluster = ray.util.spark.cluster_init._active_ray_cluster
+            assert db_api_entry.registered_job_groups == [cluster.spark_job_group_id]
             assert not cluster.is_shutdown
             time.sleep(35)
             assert cluster.is_shutdown

diff --git a/python/ray/util/spark/cluster_init.py b/python/ray/util/spark/cluster_init.py
@@ -1619,11 +1619,11 @@ def ray_cluster_job_mapper(_):
         )
         job_rdd = job_rdd.withResources(resource_profile)
 
-    job_rdd.mapPartitions(ray_cluster_job_mapper).collect()
-
     hook_entry = _create_hook_entry(is_global=(ray_temp_dir is None))
     hook_entry.on_spark_job_created(spark_job_group_id)
 
+    job_rdd.mapPartitions(ray_cluster_job_mapper).collect()
+
 
 @PublicAPI(stability="alpha")
 def shutdown_ray_cluster() -> None:

diff --git a/python/ray/util/spark/databricks_hook.py b/python/ray/util/spark/databricks_hook.py
@@ -166,7 +166,7 @@ def auto_shutdown_watcher():
 
     def on_spark_job_created(self, job_group_id):
         db_api_entry = get_db_entry_point()
-        db_api_entry.registerBackgroundSparkJobGroup("job_group_id")
+        db_api_entry.registerBackgroundSparkJobGroup(job_group_id)
 
     def custom_environment_variables(self):
         """Hardcode `GLOO_SOCKET_IFNAME` to `eth0` for Databricks runtime.