dbt-labs · jtcohen6 · Jun 28, 2022 · May 6, 2022 · May 6, 2022 · May 6, 2022
@@ -33,7 +33,7 @@ jobs:
       DBT_INVOCATION_ENV: circle
     docker:
       - image: fishtownanalytics/test-container:10
-      - image: godatadriven/spark:2
+      - image: godatadriven/spark:3.0
         environment:
           WAIT_FOR: localhost:5432
         command: >
@@ -44,9 +44,11 @@ jobs:
           --conf spark.hadoop.javax.jdo.option.ConnectionPassword=dbt
           --conf spark.hadoop.javax.jdo.option.ConnectionDriverName=org.postgresql.Driver
           --conf spark.serializer=org.apache.spark.serializer.KryoSerializer
-          --conf spark.jars.packages=org.apache.hudi:hudi-spark-bundle_2.11:0.9.0
+          --conf spark.jars.packages=org.apache.hudi:hudi-spark3-bundle_2.12:0.9.0
           --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension
           --conf spark.driver.userClassPathFirst=true
+          --conf spark.driver.memory=2g
+          --conf spark.executor.memory=2g
           --conf spark.hadoop.datanucleus.autoCreateTables=true
           --conf spark.hadoop.datanucleus.schema.autoCreateTables=true
           --conf spark.hadoop.datanucleus.fixedDatastore=false
@@ -55,6 +57,9 @@ jobs:
           --hiveconf hoodie.datasource.hive_sync.mode=hms
           --hiveconf datanucleus.schema.autoCreateAll=true
           --hiveconf hive.metastore.schema.verification=false
+          --hiveconf hive.metastore.sasl.enabled=true
+          --hiveconf hive.server2.thrift.port=10000
+          --hiveconf hive.server2.thrift.bind.host=localhost
 
       - image: postgres:9.6.17-alpine
         environment:

@@ -26,7 +26,7 @@ more information, consult [the docs](https://docs.getdbt.com/docs/profile-spark)
 
 ## Running locally
 A `docker-compose` environment starts a Spark Thrift server and a Postgres database as a Hive Metastore backend.
-Note that this is spark 2 not spark 3 so some functionalities might not be available.
+Note: Spark has moved to Spark 3 (formerly on Spark 2).
 
 The following command would start two docker containers
 ```

@@ -1,8 +1,8 @@
 version: "3.7"
 services:
 
-  dbt-spark2-thrift:
-    image: godatadriven/spark:3.0
+  dbt-spark3-thrift:
+    image: apache/spark:v3.1.3
     ports:
       - "10000:10000"
       - "4040:4040"

@@ -1,3 +1,5 @@
+spark.driver.memory 2g
+spark.executor.memory 2g
 spark.hadoop.datanucleus.autoCreateTables	true
 spark.hadoop.datanucleus.schema.autoCreateTables	true
 spark.hadoop.datanucleus.fixedDatastore 	false

@@ -64,7 +64,7 @@ def project_config_update(self):
         }
 
 
-#hese tests were not enabled in the dbtspec files, so skipping here.
+# These tests were not enabled in the dbtspec files, so skipping here.
 # Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
 @pytest.mark.skip_profile('apache_spark', 'spark_session')
 class TestSnapshotTimestampSpark(BaseSnapshotTimestamp):
@@ -79,5 +79,6 @@ def project_config_update(self):
             }
         }
 
+
 class TestBaseAdapterMethod(BaseAdapterMethod):
-    pass
+    pass