diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c1276e29755f0..3f28ff03a6e96 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -101,6 +101,7 @@ jobs: \"java-11-17\": \"true\", \"lint\" : \"true\", \"k8s-integration-tests\" : \"true\", + \"connect-jvm-e2e-tests\" : \"true\", }" echo $precondition # For debugging # Remove `\n` to avoid "Invalid format" error @@ -689,6 +690,71 @@ jobs: ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install rm -rf ~/.m2/repository/org/apache/spark + connect-jvm-e2e-tests: + needs: precondition + if: fromJson(needs.precondition.outputs.required).connect-jvm-e2e-tests == 'true' && + (inputs.branch != 'branch-3.2' && inputs.branch != 'branch-3.3') + name: Connect JVM Client E2E Tests with Maven + strategy: + fail-fast: false + matrix: + java: + - ${{ inputs.java }} + runs-on: ubuntu-22.04 + steps: + - name: Checkout Spark repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + - name: Cache Scala, SBT and Maven + uses: actions/cache@v3 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Maven local repository + uses: actions/cache@v3 + with: + path: ~/.m2/repository + key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + java${{ matrix.java }}-maven- + - name: Install Java ${{ matrix.java }} + uses: actions/setup-java@v3 + with: + distribution: temurin + java-version: ${{ matrix.java }} + - name: Build and Test with Maven + run: | + export SCALA_VERSION=2.12 + if [[ "SCALA_PROFILE" == "scala2.13" ]] ; then + export SCALA_VERSION=2.13; + ./dev/change-scala-version.sh 2.13 + fi + export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" + export MAVEN_CLI_OPTS="--no-transfer-progress" + export JAVA_VERSION=${{ matrix.java }} + # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414. + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Djava.version=${JAVA_VERSION/-ea} install -pl repl -am -Pscala-$SCALA_VERSION + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Djava.version=${JAVA_VERSION/-ea} install -pl connector/connect/common -Pscala-$SCALA_VERSION + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Djava.version=${JAVA_VERSION/-ea} install -pl connector/connect/server -Pscala-$SCALA_VERSION + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Djava.version=${JAVA_VERSION/-ea} install -pl connector/connect/client/jvm -Pscala-$SCALA_VERSION + ./build/mvn $MAVEN_CLI_OPTS -Djava.version=${JAVA_VERSION/-ea} clean test -pl connector/connect/client/jvm-e2e-tests -Pscala-$SCALA_VERSION + rm -rf ~/.m2/repository/org/apache/spark + scala-213: needs: precondition if: fromJson(needs.precondition.outputs.required).scala-213 == 'true' diff --git a/connector/connect/client/jvm-e2e-tests/pom.xml b/connector/connect/client/jvm-e2e-tests/pom.xml new file mode 100644 index 0000000000000..225e8570f5bb9 --- /dev/null +++ b/connector/connect/client/jvm-e2e-tests/pom.xml @@ -0,0 +1,69 @@ + + + + + 4.0.0 + + org.apache.spark + spark-parent_2.12 + 3.5.0-SNAPSHOT + ../../../../pom.xml + + + spark-connect-client-jvm-e2e-tests_2.12 + jar + Spark Project Connect Client E2E Tests + https://spark.apache.org/ + + connect-client-jvm-e2e-tests + 31.0.1-jre + 1.1.0 + + + + + org.apache.spark + spark-connect-client-jvm_${scala.binary.version} + ${project.version} + test + + + org.apache.spark + spark-connect-common_${scala.binary.version} + test + ${project.version} + + + org.scala-lang + scala-compiler + ${scala.version} + test + + + + com.typesafe + mima-core_${scala.binary.version} + ${mima.version} + test + + + + target/scala-${scala.binary.version}/test-classes + + diff --git a/connector/connect/client/jvm-e2e-tests/src/test/resources/log4j2.properties b/connector/connect/client/jvm-e2e-tests/src/test/resources/log4j2.properties new file mode 100644 index 0000000000000..ab02104c69697 --- /dev/null +++ b/connector/connect/client/jvm-e2e-tests/src/test/resources/log4j2.properties @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the file target/unit-tests.log +rootLogger.level = info +rootLogger.appenderRef.file.ref = ${sys:test.appender:-File} + +appender.file.type = File +appender.file.name = File +appender.file.fileName = target/unit-tests.log +appender.file.layout.type = PatternLayout +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex + +# Tests that launch java subprocesses can set the "test.appender" system property to +# "console" to avoid having the child process's logs overwrite the unit test's +# log file. +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %t: %m%n%ex + +# Ignore messages below warning level from Jetty, because it's a bit verbose +logger.jetty.name = org.sparkproject.jetty +logger.jetty.level = warn diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala similarity index 98% rename from connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala rename to connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala index db2b8b269876f..5c3615775f0b2 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala +++ b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala @@ -49,7 +49,7 @@ class ClientE2ETestSuite extends RemoteSparkSession { assert(array(2).getLong(0) == 2) } - test("simple udf test") { + ignore("simple udf test") { def dummyUdf(x: Int): Int = x + 5 val myUdf = udf(dummyUdf _) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CompatibilitySuite.scala b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/CompatibilitySuite.scala similarity index 100% rename from connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CompatibilitySuite.scala rename to connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/CompatibilitySuite.scala diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala similarity index 100% rename from connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala rename to connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala b/connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala similarity index 100% rename from connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala rename to connector/connect/client/jvm-e2e-tests/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index 3178b5a1ca3ba..e1f5cf0b41f12 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -33,7 +33,6 @@ connect-client-jvm 31.0.1-jre - 1.1.0 @@ -93,13 +92,6 @@ mockito-core test - - - com.typesafe - mima-core_${scala.binary.version} - ${mima.version} - test - target/scala-${scala.binary.version}/test-classes diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 9d96e5ad6333c..ae97ec74c1a5b 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -280,6 +280,7 @@ def __hash__(self): sbt_test_goals=[ "connect/test", "connect-client-jvm/test", + "connect-client-jvm-e2e-tests/test", ], ) diff --git a/pom.xml b/pom.xml index 2dc7c72dfe8b5..7050582e34c23 100644 --- a/pom.xml +++ b/pom.xml @@ -103,6 +103,7 @@ connector/connect/server connector/connect/common connector/connect/client/jvm + connector/connect/client/jvm-e2e-tests connector/protobuf diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 4eb17e88d4d83..5067b38c0cef1 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -55,13 +55,14 @@ object BuildCommons { val connectCommon = ProjectRef(buildLocation, "connect-common") val connect = ProjectRef(buildLocation, "connect") val connectClient = ProjectRef(buildLocation, "connect-client-jvm") + val connectClientE2ETests = ProjectRef(buildLocation, "connect-client-jvm-e2e-tests") val allProjects@Seq( core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, _* ) = Seq( "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe", "tags", "sketch", "kvstore" - ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect, connectClient) + ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect, connectClient, connectClientE2ETests) val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn, sparkGangliaLgpl, streamingKinesisAsl, @@ -403,7 +404,8 @@ object SparkBuild extends PomBuild { val mimaProjects = allProjects.filterNot { x => Seq( spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn, - unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf + unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient, + connectClientE2ETests, protobuf ).contains(x) } @@ -447,6 +449,7 @@ object SparkBuild extends PomBuild { enable(SparkConnectCommon.settings)(connectCommon) enable(SparkConnect.settings)(connect) enable(SparkConnectClient.settings)(connectClient) + enable(SparkConnectClientE2ETests.settings)(connectClientE2ETests) /* Protobuf settings */ enable(SparkProtobuf.settings)(protobuf) @@ -851,9 +854,6 @@ object SparkConnectClient { ) }, - // Make sure the connect server assembly jar is available for testing. - test := ((Test / test) dependsOn (LocalProject("connect") / assembly)).value, - (assembly / test) := { }, (assembly / logLevel) := Level.Info, @@ -895,6 +895,32 @@ object SparkConnectClient { } } +object SparkConnectClientE2ETests { + val buildTestDeps = TaskKey[Unit]("buildTestDeps", "Build needed dependencies for test.") + lazy val settings = Seq( + buildTestDeps := { + (LocalProject("sql") / Compile / Keys.`package`).value + (LocalProject("connect") / assembly).value + (LocalProject("connect-client-jvm") / assembly).value + }, + test := ((Test / test) dependsOn (buildTestDeps)).value, + // Make sure the connect server assembly jar is available for testing. + test := ((Test / test) dependsOn (LocalProject("connect") / assembly)).value, + libraryDependencies ++= { + val guavaVersion = + SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String] + Seq("com.google.guava" % "guava" % guavaVersion) + }, + dependencyOverrides ++= { + val guavaVersion = + SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String] + Seq( + "com.google.guava" % "guava" % guavaVersion + ) + } + ) +} + object SparkProtobuf { import BuildCommons.protoVersion