diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index c1276e29755f0..3f28ff03a6e96 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -101,6 +101,7 @@ jobs:
\"java-11-17\": \"true\",
\"lint\" : \"true\",
\"k8s-integration-tests\" : \"true\",
+ \"connect-jvm-e2e-tests\" : \"true\",
}"
echo $precondition # For debugging
# Remove `\n` to avoid "Invalid format" error
@@ -689,6 +690,71 @@ jobs:
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install
rm -rf ~/.m2/repository/org/apache/spark
+ connect-jvm-e2e-tests:
+ needs: precondition
+ if: fromJson(needs.precondition.outputs.required).connect-jvm-e2e-tests == 'true' &&
+ (inputs.branch != 'branch-3.2' && inputs.branch != 'branch-3.3')
+ name: Connect JVM Client E2E Tests with Maven
+ strategy:
+ fail-fast: false
+ matrix:
+ java:
+ - ${{ inputs.java }}
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Checkout Spark repository
+ uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+ repository: apache/spark
+ ref: ${{ inputs.branch }}
+ - name: Sync the current branch with the latest in Apache Spark
+ if: github.repository != 'apache/spark'
+ run: |
+ git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
+ git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
+ git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
+ - name: Cache Scala, SBT and Maven
+ uses: actions/cache@v3
+ with:
+ path: |
+ build/apache-maven-*
+ build/scala-*
+ build/*.jar
+ ~/.sbt
+ key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+ restore-keys: |
+ build-
+ - name: Cache Maven local repository
+ uses: actions/cache@v3
+ with:
+ path: ~/.m2/repository
+ key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ java${{ matrix.java }}-maven-
+ - name: Install Java ${{ matrix.java }}
+ uses: actions/setup-java@v3
+ with:
+ distribution: temurin
+ java-version: ${{ matrix.java }}
+ - name: Build and Test with Maven
+ run: |
+ export SCALA_VERSION=2.12
+ if [[ "SCALA_PROFILE" == "scala2.13" ]] ; then
+ export SCALA_VERSION=2.13;
+ ./dev/change-scala-version.sh 2.13
+ fi
+ export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+ export MAVEN_CLI_OPTS="--no-transfer-progress"
+ export JAVA_VERSION=${{ matrix.java }}
+ # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
+ ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Djava.version=${JAVA_VERSION/-ea} install -pl repl -am -Pscala-$SCALA_VERSION
+ ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Djava.version=${JAVA_VERSION/-ea} install -pl connector/connect/common -Pscala-$SCALA_VERSION
+ ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Djava.version=${JAVA_VERSION/-ea} install -pl connector/connect/server -Pscala-$SCALA_VERSION
+ ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Djava.version=${JAVA_VERSION/-ea} install -pl connector/connect/client/jvm -Pscala-$SCALA_VERSION
+ ./build/mvn $MAVEN_CLI_OPTS -Djava.version=${JAVA_VERSION/-ea} clean test -pl connector/connect/client/jvm-e2e-tests -Pscala-$SCALA_VERSION
+ rm -rf ~/.m2/repository/org/apache/spark
+
scala-213:
needs: precondition
if: fromJson(needs.precondition.outputs.required).scala-213 == 'true'
diff --git a/connector/connect/client/jvm-e2e-tests/pom.xml b/connector/connect/client/jvm-e2e-tests/pom.xml
new file mode 100644
index 0000000000000..225e8570f5bb9
--- /dev/null
+++ b/connector/connect/client/jvm-e2e-tests/pom.xml
@@ -0,0 +1,69 @@
+
+
+
+
+ 4.0.0
+
+ org.apache.spark
+ spark-parent_2.12
+ 3.5.0-SNAPSHOT
+ ../../../../pom.xml
+
+
+ spark-connect-client-jvm-e2e-tests_2.12
+ jar
+ Spark Project Connect Client E2E Tests
+ https://spark.apache.org/
+
+ connect-client-jvm-e2e-tests
+ 31.0.1-jre
+ 1.1.0
+
+
+
+
+ org.apache.spark
+ spark-connect-client-jvm_${scala.binary.version}
+ ${project.version}
+ test
+
+
+ org.apache.spark
+ spark-connect-common_${scala.binary.version}
+ test
+ ${project.version}
+
+
+ org.scala-lang
+ scala-compiler
+ ${scala.version}
+ test
+
+
+
+ com.typesafe
+ mima-core_${scala.binary.version}
+ ${mima.version}
+ test
+
+
+
+ target/scala-${scala.binary.version}/test-classes
+
+
diff --git a/connector/connect/client/jvm-e2e-tests/src/test/resources/log4j2.properties b/connector/connect/client/jvm-e2e-tests/src/test/resources/log4j2.properties
new file mode 100644
index 0000000000000..ab02104c69697
--- /dev/null
+++ b/connector/connect/client/jvm-e2e-tests/src/test/resources/log4j2.properties
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n%ex
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.sparkproject.jetty
+logger.jetty.level = warn
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
similarity index 98%
rename from connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
rename to connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index db2b8b269876f..5c3615775f0b2 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -49,7 +49,7 @@ class ClientE2ETestSuite extends RemoteSparkSession {
assert(array(2).getLong(0) == 2)
}
- test("simple udf test") {
+ ignore("simple udf test") {
def dummyUdf(x: Int): Int = x + 5
val myUdf = udf(dummyUdf _)
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CompatibilitySuite.scala b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/CompatibilitySuite.scala
similarity index 100%
rename from connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CompatibilitySuite.scala
rename to connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/CompatibilitySuite.scala
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala
similarity index 100%
rename from connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala
rename to connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
similarity index 100%
rename from connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
rename to connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 3178b5a1ca3ba..e1f5cf0b41f12 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -33,7 +33,6 @@
connect-client-jvm
31.0.1-jre
- 1.1.0
@@ -93,13 +92,6 @@
mockito-core
test
-
-
- com.typesafe
- mima-core_${scala.binary.version}
- ${mima.version}
- test
-
target/scala-${scala.binary.version}/test-classes
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 9d96e5ad6333c..ae97ec74c1a5b 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -280,6 +280,7 @@ def __hash__(self):
sbt_test_goals=[
"connect/test",
"connect-client-jvm/test",
+ "connect-client-jvm-e2e-tests/test",
],
)
diff --git a/pom.xml b/pom.xml
index 2dc7c72dfe8b5..7050582e34c23 100644
--- a/pom.xml
+++ b/pom.xml
@@ -103,6 +103,7 @@
connector/connect/server
connector/connect/common
connector/connect/client/jvm
+ connector/connect/client/jvm-e2e-tests
connector/protobuf
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 4eb17e88d4d83..5067b38c0cef1 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -55,13 +55,14 @@ object BuildCommons {
val connectCommon = ProjectRef(buildLocation, "connect-common")
val connect = ProjectRef(buildLocation, "connect")
val connectClient = ProjectRef(buildLocation, "connect-client-jvm")
+ val connectClientE2ETests = ProjectRef(buildLocation, "connect-client-jvm-e2e-tests")
val allProjects@Seq(
core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, _*
) = Seq(
"core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe",
"tags", "sketch", "kvstore"
- ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect, connectClient)
+ ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect, connectClient, connectClientE2ETests)
val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn,
sparkGangliaLgpl, streamingKinesisAsl,
@@ -403,7 +404,8 @@ object SparkBuild extends PomBuild {
val mimaProjects = allProjects.filterNot { x =>
Seq(
spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn,
- unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf
+ unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient,
+ connectClientE2ETests, protobuf
).contains(x)
}
@@ -447,6 +449,7 @@ object SparkBuild extends PomBuild {
enable(SparkConnectCommon.settings)(connectCommon)
enable(SparkConnect.settings)(connect)
enable(SparkConnectClient.settings)(connectClient)
+ enable(SparkConnectClientE2ETests.settings)(connectClientE2ETests)
/* Protobuf settings */
enable(SparkProtobuf.settings)(protobuf)
@@ -851,9 +854,6 @@ object SparkConnectClient {
)
},
- // Make sure the connect server assembly jar is available for testing.
- test := ((Test / test) dependsOn (LocalProject("connect") / assembly)).value,
-
(assembly / test) := { },
(assembly / logLevel) := Level.Info,
@@ -895,6 +895,32 @@ object SparkConnectClient {
}
}
+object SparkConnectClientE2ETests {
+ val buildTestDeps = TaskKey[Unit]("buildTestDeps", "Build needed dependencies for test.")
+ lazy val settings = Seq(
+ buildTestDeps := {
+ (LocalProject("sql") / Compile / Keys.`package`).value
+ (LocalProject("connect") / assembly).value
+ (LocalProject("connect-client-jvm") / assembly).value
+ },
+ test := ((Test / test) dependsOn (buildTestDeps)).value,
+ // Make sure the connect server assembly jar is available for testing.
+ test := ((Test / test) dependsOn (LocalProject("connect") / assembly)).value,
+ libraryDependencies ++= {
+ val guavaVersion =
+ SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+ Seq("com.google.guava" % "guava" % guavaVersion)
+ },
+ dependencyOverrides ++= {
+ val guavaVersion =
+ SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+ Seq(
+ "com.google.guava" % "guava" % guavaVersion
+ )
+ }
+ )
+}
+
object SparkProtobuf {
import BuildCommons.protoVersion