-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-42599][CONNECT][INFRA] Introduce dev/connect-jvm-client-mima-check instead of CompatibilitySuite
#40213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
658d0d0
6421468
a895dfc
82165ef
22f41fc
c494fba
8806856
c140b95
05367de
6ccdb6b
35e7b32
d4a05f1
0f0934a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,49 +16,83 @@ | |
| */ | ||
| package org.apache.spark.sql.connect.client | ||
|
|
||
| import java.io.File | ||
| import java.io.{File, Writer} | ||
| import java.net.URLClassLoader | ||
| import java.nio.charset.StandardCharsets | ||
| import java.nio.file.{Files, Paths} | ||
| import java.util.regex.Pattern | ||
|
|
||
| import com.typesafe.tools.mima.core._ | ||
| import scala.reflect.runtime.universe.runtimeMirror | ||
|
|
||
| import com.typesafe.tools.mima.core.{Problem, ProblemFilter, ProblemFilters} | ||
| import com.typesafe.tools.mima.lib.MiMaLib | ||
|
|
||
| import org.apache.spark.sql.connect.client.util.ConnectFunSuite | ||
| import org.apache.spark.sql.connect.client.util.IntegrationTestUtils._ | ||
| import org.apache.spark.util.ChildFirstURLClassLoader | ||
|
|
||
| /** | ||
| * This test checks the binary compatibility of the connect client API against the spark SQL API | ||
| * using MiMa. We did not write this check using a SBT build rule as the rule cannot provide the | ||
| * same level of freedom as a test. With a test we can: | ||
| * A tool for checking the binary compatibility of the connect client API against the spark SQL | ||
| * API using MiMa. We did not write this check using a SBT build rule as the rule cannot provide | ||
| * the same level of freedom as a test. With a test we can: | ||
| * 1. Specify any two jars to run the compatibility check. | ||
| * 1. Easily make the test automatically pick up all new methods added while the client is being | ||
| * built. | ||
| * | ||
| * The test requires the following artifacts built before running: | ||
| * {{{ | ||
| * spark-sql | ||
| * spark-connect-client-jvm | ||
| * }}} | ||
| * To build the above artifact, use e.g. `build/sbt package` or `build/mvn clean install | ||
| * -DskipTests`. | ||
| * | ||
| * When debugging this test, if any changes to the client API, the client jar need to be built | ||
| * before running the test. An example workflow with SBT for this test: | ||
| * 1. Compatibility test has reported an unexpected client API change. | ||
| * 1. Fix the wrong client API. | ||
| * 1. Build the client jar: `build/sbt package` | ||
| * 1. Run the test again: `build/sbt "testOnly | ||
| * org.apache.spark.sql.connect.client.CompatibilitySuite"` | ||
| * We can run this check by executing the `dev/connect-jvm-client-mima-check`. | ||
| */ | ||
| class CompatibilitySuite extends ConnectFunSuite { | ||
| // scalastyle:off println | ||
| object CheckConnectJvmClientCompatibility { | ||
|
|
||
| private lazy val clientJar: File = | ||
| findJar( | ||
| "connector/connect/client/jvm", | ||
| "spark-connect-client-jvm-assembly", | ||
| "spark-connect-client-jvm") | ||
| private lazy val sparkHome: String = { | ||
| if (!sys.env.contains("SPARK_HOME")) { | ||
| throw new IllegalArgumentException("SPARK_HOME is not set.") | ||
| } | ||
| sys.env("SPARK_HOME") | ||
| } | ||
|
|
||
| private lazy val sqlJar: File = findJar("sql/core", "spark-sql", "spark-sql") | ||
| def main(args: Array[String]): Unit = { | ||
| var resultWriter: Writer = null | ||
| try { | ||
| resultWriter = Files.newBufferedWriter( | ||
| Paths.get(s"$sparkHome/.connect-mima-check-result"), | ||
| StandardCharsets.UTF_8) | ||
| val clientJar: File = | ||
| findJar( | ||
| "connector/connect/client/jvm", | ||
| "spark-connect-client-jvm-assembly", | ||
| "spark-connect-client-jvm") | ||
| val sqlJar: File = findJar("sql/core", "spark-sql", "spark-sql") | ||
| val problems = checkMiMaCompatibility(clientJar, sqlJar) | ||
| if (problems.nonEmpty) { | ||
| resultWriter.write(s"ERROR: Comparing client jar: $clientJar and and sql jar: $sqlJar \n") | ||
| resultWriter.write(s"problems: \n") | ||
| resultWriter.write(s"${problems.map(p => p.description("client")).mkString("\n")}") | ||
| resultWriter.write("\n") | ||
| resultWriter.write( | ||
| "Exceptions to binary compatibility can be added in " + | ||
| "'CheckConnectJvmClientCompatibility#checkMiMaCompatibility'\n") | ||
| } | ||
| val incompatibleApis = checkDatasetApiCompatibility(clientJar, sqlJar) | ||
| if (incompatibleApis.nonEmpty) { | ||
| resultWriter.write( | ||
| "ERROR: The Dataset apis only exist in the connect client " + | ||
| "module and not belong to the sql module include: \n") | ||
| resultWriter.write(incompatibleApis.mkString("\n")) | ||
| resultWriter.write("\n") | ||
| resultWriter.write( | ||
| "Exceptions can be added to exceptionMethods in " + | ||
| "'CheckConnectJvmClientCompatibility#checkDatasetApiCompatibility'\n") | ||
| } | ||
| } catch { | ||
| case e: Throwable => | ||
| println(e.getMessage) | ||
| resultWriter.write(s"ERROR: ${e.getMessage}") | ||
| } finally { | ||
| if (resultWriter != null) { | ||
| resultWriter.close() | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * MiMa takes an old jar (sql jar) and a new jar (client jar) as inputs and then reports all | ||
|
|
@@ -67,7 +101,7 @@ class CompatibilitySuite extends ConnectFunSuite { | |
| * need to be checked. Then exclude rules are applied to filter out all unsupported methods in | ||
| * the client classes. | ||
| */ | ||
| test("compatibility MiMa tests") { | ||
| private def checkMiMaCompatibility(clientJar: File, sqlJar: File): List[Problem] = { | ||
| val mima = new MiMaLib(Seq(clientJar, sqlJar)) | ||
| val allProblems = mima.collectProblems(sqlJar, clientJar, List.empty) | ||
| val includedRules = Seq( | ||
|
|
@@ -184,30 +218,36 @@ class CompatibilitySuite extends ConnectFunSuite { | |
| .filter { p => | ||
| excludeRules.forall(rule => rule(p)) | ||
| } | ||
|
|
||
| if (problems.nonEmpty) { | ||
| fail( | ||
| s"\nComparing client jar: $clientJar\nand sql jar: $sqlJar\n" + | ||
| problems.map(p => p.description("client")).mkString("\n")) | ||
| } | ||
| problems | ||
| } | ||
|
|
||
| test("compatibility API tests: Dataset") { | ||
| val clientClassLoader: URLClassLoader = new URLClassLoader(Seq(clientJar.toURI.toURL).toArray) | ||
| val sqlClassLoader: URLClassLoader = new URLClassLoader(Seq(sqlJar.toURI.toURL).toArray) | ||
| private def checkDatasetApiCompatibility(clientJar: File, sqlJar: File): Seq[String] = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zhenlineo is this only supposed to check Dataset? what about the other classes?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test was initially added because we did not cover the Dataset in the mima check. If we found such test is useful and want to expand to more classes we can run the mima check the other way around to ensure that our new code does not introduce more unwanted methods. |
||
|
|
||
| val clientClass = clientClassLoader.loadClass("org.apache.spark.sql.Dataset") | ||
| val sqlClass = sqlClassLoader.loadClass("org.apache.spark.sql.Dataset") | ||
| def methods(jar: File, className: String): Seq[String] = { | ||
| val classLoader: URLClassLoader = | ||
| new ChildFirstURLClassLoader(Seq(jar.toURI.toURL).toArray, this.getClass.getClassLoader) | ||
| val mirror = runtimeMirror(classLoader) | ||
| // scalastyle:off classforname | ||
| val classSymbol = | ||
| mirror.classSymbol(Class.forName(className, false, classLoader)) | ||
| // scalastyle:on classforname | ||
| classSymbol.typeSignature.members | ||
| .filter(_.isMethod) | ||
| .map(_.asMethod) | ||
| .filter(m => m.isPublic) | ||
| .map(_.fullName) | ||
| .toSeq | ||
| } | ||
|
|
||
| val newMethods = clientClass.getMethods | ||
| val oldMethods = sqlClass.getMethods | ||
| val className = "org.apache.spark.sql.Dataset" | ||
| val clientMethods = methods(clientJar, className) | ||
| val sqlMethods = methods(sqlJar, className) | ||
| // Exclude some public methods that must be added through `exceptionMethods` | ||
| val exceptionMethods = | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hvanhovell
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I can refine this check, I will give followup later
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks good |
||
| Seq("org.apache.spark.sql.Dataset.collectResult", "org.apache.spark.sql.Dataset.plan") | ||
|
|
||
| // For now we simply check the new methods is a subset of the old methods. | ||
| newMethods | ||
| .map(m => m.toString) | ||
| .foreach(method => { | ||
| assert(oldMethods.map(m => m.toString).contains(method)) | ||
| }) | ||
| // Find new public functions that are not in sql module `Dataset`. | ||
| clientMethods.diff(sqlMethods).diff(exceptionMethods) | ||
| } | ||
|
|
||
| private case class IncludeByName(name: String) extends ProblemFilter { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| #!/usr/bin/env bash | ||
|
|
||
| # | ||
| # Licensed to the Apache Software Foundation (ASF) under one or more | ||
| # contributor license agreements. See the NOTICE file distributed with | ||
| # this work for additional information regarding copyright ownership. | ||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| # (the "License"); you may not use this file except in compliance with | ||
| # the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
|
|
||
| set -o pipefail | ||
| set -e | ||
|
|
||
| # Go to the Spark project root directory | ||
| FWDIR="$(cd "`dirname "$0"`"/..; pwd)" | ||
| cd "$FWDIR" | ||
| export SPARK_HOME=$FWDIR | ||
| echo $SPARK_HOME | ||
|
|
||
| if [[ -x "$JAVA_HOME/bin/java" ]]; then | ||
| JAVA_CMD="$JAVA_HOME/bin/java" | ||
| else | ||
| JAVA_CMD=java | ||
| fi | ||
|
|
||
| rm -f .connect-mima-check-result | ||
|
|
||
| echo "Build sql module, connect-client-jvm module and connect-client-jvm test jar..." | ||
| build/sbt "sql/package;connect-client-jvm/assembly;connect-client-jvm/Test/package" | ||
|
|
||
| CONNECT_TEST_CLASSPATH="$(build/sbt -DcopyDependencies=false "export connect-client-jvm/Test/fullClasspath" | grep jar | tail -n1)" | ||
| CONNECT_CLASSPATH="$(build/sbt -DcopyDependencies=false "export connect-client-jvm/fullClasspath" | grep jar | tail -n1)" | ||
| SQL_CLASSPATH="$(build/sbt -DcopyDependencies=false "export sql/fullClasspath" | grep jar | tail -n1)" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There will be duplication of entries here. Is that an issue?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, there will be duplicate entries, but there is no problem for the time being, |
||
|
|
||
|
|
||
| echo "Do connect-client-jvm module mima check ..." | ||
|
|
||
| $JAVA_CMD \ | ||
| -Xmx2g \ | ||
| -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.util.jar=ALL-UNNAMED \ | ||
| -cp "$CONNECT_CLASSPATH:$CONNECT_TEST_CLASSPATH:$SQL_CLASSPATH" \ | ||
| org.apache.spark.sql.connect.client.CheckConnectJvmClientCompatibility | ||
|
|
||
| echo "finish connect-client-jvm module mima check ..." | ||
|
|
||
| RESULT_SIZE=$(wc -l .connect-mima-check-result | awk '{print $1}') | ||
|
|
||
| # The the file has no content if check passed. | ||
| if [[ $RESULT_SIZE -eq "0" ]]; then | ||
| ERRORS="" | ||
| else | ||
| ERRORS=$(grep ERROR .connect-mima-check-result | tail -n1) | ||
| fi | ||
|
|
||
| if test ! -z "$ERRORS"; then | ||
| cat .connect-mima-check-result | ||
| echo -e "connect-client-jvm module mima check failed." | ||
| rm .connect-mima-check-result | ||
| exit 1 | ||
| else | ||
| rm .connect-mima-check-result | ||
| echo -e "sql and connect-client-jvm module mima check passed." | ||
| fi | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: When
CheckConnectJvmClientCompatibilityin theconnect/jvmmodule, this check will fail due to #39712 (comment)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, I would recommend to use this PR. For this failing tests. Let's just delete this. I was planning to delete it after this PR get merged. The test case is already covered by mima test. Thanks so much. This change looks awesome.