diff --git a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala
index c0f8866dd58d..e97d1c939370 100644
--- a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala
@@ -24,6 +24,7 @@ private[spark] object VersionUtils {
private val majorMinorRegex = """^(\d+)\.(\d+)(\..*)?$""".r
private val shortVersionRegex = """^(\d+\.\d+\.\d+)(.*)?$""".r
+ private val majorMinorPatchRegex = """^(\d+)(?:\.(\d+)(?:\.(\d+)(?:[.-].*)?)?)?$""".r
/**
* Given a Spark version string, return the major version number.
@@ -63,4 +64,36 @@ private[spark] object VersionUtils {
s" version string, but it could not find the major and minor version numbers.")
}
}
+
+ /**
+ * Extracts the major, minor and patch parts from the input `version`. Note that if minor or patch
+ * version is missing from the input, this will return 0 for these parts. Returns `None` if the
+ * input is not of a valid format.
+ *
+ * Examples of valid version:
+ * - 1 (extracts to (1, 0, 0))
+ * - 2.4 (extracts to (2, 4, 0))
+ * - 3.2.2 (extracts to (3, 2, 2))
+ * - 3.2.2.4 (extracts to 3, 2, 2))
+ * - 3.3.1-SNAPSHOT (extracts to (3, 3, 1))
+ * - 3.2.2.4SNAPSHOT (extracts to (3, 2, 2), only the first 3 components)
+ *
+ * Examples of invalid version:
+ * - ABC
+ * - 1X
+ * - 2.4XYZ
+ * - 2.4-SNAPSHOT
+ * - 3.4.5ABC
+ *
+ * @return A non-empty option containing a 3-value tuple (major, minor, patch) iff the
+ * input is a valid version. `None` otherwise.
+ */
+ def majorMinorPatchVersion(version: String): Option[(Int, Int, Int)] = {
+ majorMinorPatchRegex.findFirstMatchIn(version).map { m =>
+ val major = m.group(1).toInt
+ val minor = Option(m.group(2)).map(_.toInt).getOrElse(0)
+ val patch = Option(m.group(3)).map(_.toInt).getOrElse(0)
+ (major, minor, patch)
+ }
+ }
}
diff --git a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala
index 56623ebea165..ff68dd150973 100644
--- a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala
@@ -98,4 +98,18 @@ class VersionUtilsSuite extends SparkFunSuite {
}
}
}
+
+ test("SPARK-33212: retrieve major/minor/patch version parts") {
+ assert(VersionUtils.majorMinorPatchVersion("3.2.2").contains((3, 2, 2)))
+ assert(VersionUtils.majorMinorPatchVersion("3.2.2.4").contains((3, 2, 2)))
+ assert(VersionUtils.majorMinorPatchVersion("3.2.2-SNAPSHOT").contains((3, 2, 2)))
+ assert(VersionUtils.majorMinorPatchVersion("3.2.2.4XXX").contains((3, 2, 2)))
+ assert(VersionUtils.majorMinorPatchVersion("3.2").contains((3, 2, 0)))
+ assert(VersionUtils.majorMinorPatchVersion("3").contains((3, 0, 0)))
+
+ // illegal cases
+ Seq("ABC", "3X", "3.2-SNAPSHOT", "3.2ABC", "3-ABC", "3.2.4XYZ").foreach { version =>
+ assert(VersionUtils.majorMinorPatchVersion(version).isEmpty, s"version $version")
+ }
+ }
}
diff --git a/pom.xml b/pom.xml
index fe13056f0932..d4be0f7c8217 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2448,6 +2448,17 @@
+
+ enforce-no-duplicate-dependencies
+
+ enforce
+
+
+
+
+
+
+
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 58ca476e6ae8..e520a0a115ee 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.quietly
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.internal.NonClosableMutableURLClassLoader
import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.util.{MutableURLClassLoader, Utils}
+import org.apache.spark.util.{MutableURLClassLoader, Utils, VersionUtils}
/** Factory for `IsolatedClientLoader` with specific versions of hive. */
private[hive] object IsolatedClientLoader extends Logging {
@@ -107,12 +107,19 @@ private[hive] object IsolatedClientLoader extends Logging {
s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid version.")
}
+ def supportsHadoopShadedClient(hadoopVersion: String): Boolean = {
+ VersionUtils.majorMinorPatchVersion(hadoopVersion).exists {
+ case (3, 2, v) if v >= 2 => true
+ case _ => false
+ }
+ }
+
private def downloadVersion(
version: HiveVersion,
hadoopVersion: String,
ivyPath: Option[String],
remoteRepos: String): Seq[URL] = {
- val hadoopJarNames = if (hadoopVersion.startsWith("3")) {
+ val hadoopJarNames = if (supportsHadoopShadedClient(hadoopVersion)) {
Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion",
s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion")
} else {
@@ -123,14 +130,6 @@ private[hive] object IsolatedClientLoader extends Logging {
.map(a => s"org.apache.hive:$a:${version.fullVersion}") ++
Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames
- val extraExclusions = if (hadoopVersion.startsWith("3")) {
- // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so
- // exclude here in favor of the ones in Hadoop 3.2+
- Seq("org.apache.hadoop:hadoop-auth")
- } else {
- Seq.empty
- }
-
val classpaths = quietly {
SparkSubmitUtils.resolveMavenCoordinates(
hiveArtifacts.mkString(","),
@@ -138,7 +137,7 @@ private[hive] object IsolatedClientLoader extends Logging {
Some(remoteRepos),
ivyPath),
transitive = true,
- exclusions = version.exclusions ++ extraExclusions)
+ exclusions = version.exclusions)
}
val allFiles = classpaths.map(new File(_)).toSet
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
index 8d55356da28e..5701a2a8d9c8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
@@ -21,6 +21,7 @@ import java.io.File
import java.net.URLClassLoader
import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.util.VersionInfo
import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
@@ -68,4 +69,19 @@ class HadoopVersionInfoSuite extends SparkFunSuite {
Utils.deleteRecursively(ivyPath)
}
}
+
+ test("SPARK-32212: test supportHadoopShadedClient()") {
+ Seq("3.2.2", "3.2.3", "3.2.2.1", "3.2.2-XYZ", "3.2.2.4-SNAPSHOT").foreach { version =>
+ assert(IsolatedClientLoader.supportsHadoopShadedClient(version), s"version $version")
+ }
+
+ // negative cases
+ Seq("3.1.3", "3.2", "3.2.1", "4").foreach { version =>
+ assert(!IsolatedClientLoader.supportsHadoopShadedClient(version), s"version $version")
+ }
+ }
+
+ test("SPARK-32212: built-in Hadoop version should support shaded client") {
+ assert(IsolatedClientLoader.supportsHadoopShadedClient(VersionInfo.getVersion))
+ }
}