diff --git a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala index c0f8866dd58d..e97d1c939370 100644 --- a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala @@ -24,6 +24,7 @@ private[spark] object VersionUtils { private val majorMinorRegex = """^(\d+)\.(\d+)(\..*)?$""".r private val shortVersionRegex = """^(\d+\.\d+\.\d+)(.*)?$""".r + private val majorMinorPatchRegex = """^(\d+)(?:\.(\d+)(?:\.(\d+)(?:[.-].*)?)?)?$""".r /** * Given a Spark version string, return the major version number. @@ -63,4 +64,36 @@ private[spark] object VersionUtils { s" version string, but it could not find the major and minor version numbers.") } } + + /** + * Extracts the major, minor and patch parts from the input `version`. Note that if minor or patch + * version is missing from the input, this will return 0 for these parts. Returns `None` if the + * input is not of a valid format. + * + * Examples of valid version: + * - 1 (extracts to (1, 0, 0)) + * - 2.4 (extracts to (2, 4, 0)) + * - 3.2.2 (extracts to (3, 2, 2)) + * - 3.2.2.4 (extracts to 3, 2, 2)) + * - 3.3.1-SNAPSHOT (extracts to (3, 3, 1)) + * - 3.2.2.4SNAPSHOT (extracts to (3, 2, 2), only the first 3 components) + * + * Examples of invalid version: + * - ABC + * - 1X + * - 2.4XYZ + * - 2.4-SNAPSHOT + * - 3.4.5ABC + * + * @return A non-empty option containing a 3-value tuple (major, minor, patch) iff the + * input is a valid version. `None` otherwise. + */ + def majorMinorPatchVersion(version: String): Option[(Int, Int, Int)] = { + majorMinorPatchRegex.findFirstMatchIn(version).map { m => + val major = m.group(1).toInt + val minor = Option(m.group(2)).map(_.toInt).getOrElse(0) + val patch = Option(m.group(3)).map(_.toInt).getOrElse(0) + (major, minor, patch) + } + } } diff --git a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala index 56623ebea165..ff68dd150973 100644 --- a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala @@ -98,4 +98,18 @@ class VersionUtilsSuite extends SparkFunSuite { } } } + + test("SPARK-33212: retrieve major/minor/patch version parts") { + assert(VersionUtils.majorMinorPatchVersion("3.2.2").contains((3, 2, 2))) + assert(VersionUtils.majorMinorPatchVersion("3.2.2.4").contains((3, 2, 2))) + assert(VersionUtils.majorMinorPatchVersion("3.2.2-SNAPSHOT").contains((3, 2, 2))) + assert(VersionUtils.majorMinorPatchVersion("3.2.2.4XXX").contains((3, 2, 2))) + assert(VersionUtils.majorMinorPatchVersion("3.2").contains((3, 2, 0))) + assert(VersionUtils.majorMinorPatchVersion("3").contains((3, 0, 0))) + + // illegal cases + Seq("ABC", "3X", "3.2-SNAPSHOT", "3.2ABC", "3-ABC", "3.2.4XYZ").foreach { version => + assert(VersionUtils.majorMinorPatchVersion(version).isEmpty, s"version $version") + } + } } diff --git a/pom.xml b/pom.xml index fe13056f0932..d4be0f7c8217 100644 --- a/pom.xml +++ b/pom.xml @@ -2448,6 +2448,17 @@ + + enforce-no-duplicate-dependencies + + enforce + + + + + + + diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 58ca476e6ae8..e520a0a115ee 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.quietly import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.internal.NonClosableMutableURLClassLoader import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.util.{MutableURLClassLoader, Utils} +import org.apache.spark.util.{MutableURLClassLoader, Utils, VersionUtils} /** Factory for `IsolatedClientLoader` with specific versions of hive. */ private[hive] object IsolatedClientLoader extends Logging { @@ -107,12 +107,19 @@ private[hive] object IsolatedClientLoader extends Logging { s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid version.") } + def supportsHadoopShadedClient(hadoopVersion: String): Boolean = { + VersionUtils.majorMinorPatchVersion(hadoopVersion).exists { + case (3, 2, v) if v >= 2 => true + case _ => false + } + } + private def downloadVersion( version: HiveVersion, hadoopVersion: String, ivyPath: Option[String], remoteRepos: String): Seq[URL] = { - val hadoopJarNames = if (hadoopVersion.startsWith("3")) { + val hadoopJarNames = if (supportsHadoopShadedClient(hadoopVersion)) { Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion", s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion") } else { @@ -123,14 +130,6 @@ private[hive] object IsolatedClientLoader extends Logging { .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames - val extraExclusions = if (hadoopVersion.startsWith("3")) { - // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so - // exclude here in favor of the ones in Hadoop 3.2+ - Seq("org.apache.hadoop:hadoop-auth") - } else { - Seq.empty - } - val classpaths = quietly { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), @@ -138,7 +137,7 @@ private[hive] object IsolatedClientLoader extends Logging { Some(remoteRepos), ivyPath), transitive = true, - exclusions = version.exclusions ++ extraExclusions) + exclusions = version.exclusions) } val allFiles = classpaths.map(new File(_)).toSet diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala index 8d55356da28e..5701a2a8d9c8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala @@ -21,6 +21,7 @@ import java.io.File import java.net.URLClassLoader import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.util.VersionInfo import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} @@ -68,4 +69,19 @@ class HadoopVersionInfoSuite extends SparkFunSuite { Utils.deleteRecursively(ivyPath) } } + + test("SPARK-32212: test supportHadoopShadedClient()") { + Seq("3.2.2", "3.2.3", "3.2.2.1", "3.2.2-XYZ", "3.2.2.4-SNAPSHOT").foreach { version => + assert(IsolatedClientLoader.supportsHadoopShadedClient(version), s"version $version") + } + + // negative cases + Seq("3.1.3", "3.2", "3.2.1", "4").foreach { version => + assert(!IsolatedClientLoader.supportsHadoopShadedClient(version), s"version $version") + } + } + + test("SPARK-32212: built-in Hadoop version should support shaded client") { + assert(IsolatedClientLoader.supportsHadoopShadedClient(VersionInfo.getVersion)) + } }