From b62b58dba60392fe88cd0bc91ce1188c21fcdb16 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 15 Jan 2021 15:45:43 -0800 Subject: [PATCH 1/5] SPARK-33212 follow-up --- pom.xml | 11 +++++++++++ .../sql/hive/client/IsolatedClientLoader.scala | 17 +++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index fe13056f0932..d4be0f7c8217 100644 --- a/pom.xml +++ b/pom.xml @@ -2448,6 +2448,17 @@ + + enforce-no-duplicate-dependencies + + enforce + + + + + + + diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 58ca476e6ae8..6af56b41d69b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -107,12 +107,17 @@ private[hive] object IsolatedClientLoader extends Logging { s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid version.") } + def supportHadoopShadedClient(hadoopVersion: String): Boolean = hadoopVersion match { + case "3.2.2" => true + case _ => false + } + private def downloadVersion( version: HiveVersion, hadoopVersion: String, ivyPath: Option[String], remoteRepos: String): Seq[URL] = { - val hadoopJarNames = if (hadoopVersion.startsWith("3")) { + val hadoopJarNames = if (supportHadoopShadedClient(hadoopVersion)) { Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion", s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion") } else { @@ -123,14 +128,6 @@ private[hive] object IsolatedClientLoader extends Logging { .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames - val extraExclusions = if (hadoopVersion.startsWith("3")) { - // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so - // exclude here in favor of the ones in Hadoop 3.2+ - Seq("org.apache.hadoop:hadoop-auth") - } else { - Seq.empty - } - val classpaths = quietly { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), @@ -138,7 +135,7 @@ private[hive] object IsolatedClientLoader extends Logging { Some(remoteRepos), ivyPath), transitive = true, - exclusions = version.exclusions ++ extraExclusions) + exclusions = version.exclusions) } val allFiles = classpaths.map(new File(_)).toSet From 6657c874d48c16513b8cef4a0b8ee9df6b2c531d Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 19 Jan 2021 14:25:17 -0800 Subject: [PATCH 2/5] add version parsing --- .../hive/client/IsolatedClientLoader.scala | 76 ++++++++++++++----- .../hive/client/HadoopVersionInfoSuite.scala | 31 ++++++++ 2 files changed, 89 insertions(+), 18 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 6af56b41d69b..9900968992c6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -21,6 +21,7 @@ import java.io.File import java.lang.reflect.InvocationTargetException import java.net.{URL, URLClassLoader} import java.util +import java.util.regex.Pattern import scala.util.Try @@ -88,28 +89,63 @@ private[hive] object IsolatedClientLoader extends Logging { barrierPrefixes = barrierPrefixes) } - def hiveVersion(version: String): HiveVersion = version match { - case "12" | "0.12" | "0.12.0" => hive.v12 - case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13 - case "14" | "0.14" | "0.14.0" => hive.v14 - case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0 - case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1 - case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2 - case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 - case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 - case "2.2" | "2.2.0" => hive.v2_2 - case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" | - "2.3.8" => hive.v2_3 - case "3.0" | "3.0.0" => hive.v3_0 - case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1 - case version => + def hiveVersion(version: String): HiveVersion = { + getVersionParts(version).flatMap { + case (12, _, _) | (0, 12, _) => Some(hive.v12) + case (13, _, _) | (0, 13, _) => Some(hive.v13) + case (14, _, _) | (0, 14, _) => Some(hive.v14) + case (1, 0, _) => Some(hive.v1_0) + case (1, 1, _) => Some(hive.v1_1) + case (1, 2, _) => Some(hive.v1_2) + case (2, 0, _) => Some(hive.v2_0) + case (2, 1, _) => Some(hive.v2_1) + case (2, 2, _) => Some(hive.v2_2) + case (2, 3, _) => Some(hive.v2_3) + case (3, 0, _) => Some(hive.v3_0) + case (3, 1, _) => Some(hive.v3_1) + case _ => None + }.getOrElse { throw new UnsupportedOperationException(s"Unsupported Hive Metastore version ($version). " + s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid version.") + } } - def supportHadoopShadedClient(hadoopVersion: String): Boolean = hadoopVersion match { - case "3.2.2" => true - case _ => false + def supportHadoopShadedClient(hadoopVersion: String): Boolean = { + getVersionParts(hadoopVersion).exists { + case (3, 2, v) if v >= 2 => true + case _ => false + } + } + + /** + * Retrieves the major, minor and patch parts from the input `version`. Returns `None` if the + * input is not of a valid format. + * + * Examples of valid version: + * - 1 + * - 2.4 + * - 3.2.2 + * - 3.2.2.4 + * - 3.3.1-SNAPSHOT + * - 3.2.2.4SNAPSHOT (we only retrieve the first 3 components) + * + * Examples of invalid version: + * - ABC + * - 1X + * - 2.4XYZ + * - 2.4-SNAPSHOT + * - 3.4.5ABC + */ + def getVersionParts(version: String): Option[(Int, Int, Int)] = { + val matcher = VERSION_PATTERN.matcher(version) + if (matcher.matches() && matcher.groupCount() == 3) { + val major = matcher.group(1).toInt + val minor = if (matcher.group(2) == null) 0 else matcher.group(2).toInt + val patch = if (matcher.group(3) == null) 0 else matcher.group(3).toInt + Some((major, minor, patch)) + } else { + None + } } private def downloadVersion( @@ -146,6 +182,10 @@ private[hive] object IsolatedClientLoader extends Logging { tempDir.listFiles().map(_.toURI.toURL) } + // A regex pattern to match Maven version numbers + private lazy val VERSION_PATTERN = + Pattern.compile("(\\d+)(?:\\.(\\d+)(?:\\.(\\d+)(?:[.-].*)?)?)?") + // A map from a given pair of HiveVersion and Hadoop version to jar files. // It is only used by forVersion. private val resolvedVersions = diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala index 8d55356da28e..bd3726931c82 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala @@ -68,4 +68,35 @@ class HadoopVersionInfoSuite extends SparkFunSuite { Utils.deleteRecursively(ivyPath) } } + + test("SPARK-33212: test getVersionParts()") { + assert(IsolatedClientLoader.getVersionParts("3.2.2").contains((3, 2, 2))) + assert(IsolatedClientLoader.getVersionParts("3.2.2.4").contains((3, 2, 2))) + assert(IsolatedClientLoader.getVersionParts("3.2.2-SNAPSHOT").contains((3, 2, 2))) + assert(IsolatedClientLoader.getVersionParts("3.2.2.4XXX").contains((3, 2, 2))) + assert(IsolatedClientLoader.getVersionParts("3.2").contains((3, 2, 0))) + assert(IsolatedClientLoader.getVersionParts("3").contains((3, 0, 0))) + + // illegal cases + assert(IsolatedClientLoader.getVersionParts("ABC").isEmpty) + assert(IsolatedClientLoader.getVersionParts("3X").isEmpty) + assert(IsolatedClientLoader.getVersionParts("3.2-SNAPSHOT").isEmpty) + assert(IsolatedClientLoader.getVersionParts("3.2ABC").isEmpty) + assert(IsolatedClientLoader.getVersionParts("3-ABC").isEmpty) + assert(IsolatedClientLoader.getVersionParts("3.2.4XYZ").isEmpty) + } + + test("SPARK-32212: test supportHadoopShadedClient()") { + assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2")) + assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.3")) + assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2.1")) + assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2-XYZ")) + assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2.4-SNAPSHOT")) + + // negative cases + assert(!IsolatedClientLoader.supportHadoopShadedClient("3.1.3")) + assert(!IsolatedClientLoader.supportHadoopShadedClient("3.2")) + assert(!IsolatedClientLoader.supportHadoopShadedClient("3.2.1")) + assert(!IsolatedClientLoader.supportHadoopShadedClient("4")) + } } From 9b8a911dcf1aa95ca39ca7e64648c39573f5451b Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 23 Jan 2021 15:17:10 -0800 Subject: [PATCH 3/5] address comments - move version parsing to `VersionUtils` - add compatibility test - use `Option` for null --- .../org/apache/spark/util/VersionUtils.scala | 34 +++++++++++++++ .../apache/spark/util/VersionUtilsSuite.scala | 17 ++++++++ .../hive/client/IsolatedClientLoader.scala | 42 ++----------------- .../hive/client/HadoopVersionInfoSuite.scala | 22 +++------- 4 files changed, 59 insertions(+), 56 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala index c0f8866dd58d..5bf2e22ae1b1 100644 --- a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala @@ -24,6 +24,7 @@ private[spark] object VersionUtils { private val majorMinorRegex = """^(\d+)\.(\d+)(\..*)?$""".r private val shortVersionRegex = """^(\d+\.\d+\.\d+)(.*)?$""".r + private val majorMinorPatchRegex = """^(\d+)(?:\.(\d+)(?:\.(\d+)(?:[.-].*)?)?)?$""".r /** * Given a Spark version string, return the major version number. @@ -63,4 +64,37 @@ private[spark] object VersionUtils { s" version string, but it could not find the major and minor version numbers.") } } + + /** + * Retrieves the major, minor and patch parts from the input `version`. Returns `None` if the + * input is not of a valid format. + * + * Examples of valid version: + * - 1 + * - 2.4 + * - 3.2.2 + * - 3.2.2.4 + * - 3.3.1-SNAPSHOT + * - 3.2.2.4SNAPSHOT (we only retrieve the first 3 components) + * + * Examples of invalid version: + * - ABC + * - 1X + * - 2.4XYZ + * - 2.4-SNAPSHOT + * - 3.4.5ABC + * + * @return A non-empty option containing a 3-value tuple (major, minor, patch) iff the + * input is a valid version. `None` otherwise. + */ + def majorMinorPatchVersion(version: String): Option[(Int, Int, Int)] = { + majorMinorPatchRegex.findFirstMatchIn(version) match { + case Some(m) => + val major = m.group(1).toInt + val minor = Option(m.group(2)).map(_.toInt).getOrElse(0) + val patch = Option(m.group(3)).map(_.toInt).getOrElse(0) + Some((major, minor, patch)) + case None => None + } + } } diff --git a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala index 56623ebea165..ffa7876fd979 100644 --- a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala @@ -98,4 +98,21 @@ class VersionUtilsSuite extends SparkFunSuite { } } } + + test("SPARK-33212: retrieve major/minor/patch version parts") { + assert(VersionUtils.majorMinorPatchVersion("3.2.2").contains((3, 2, 2))) + assert(VersionUtils.majorMinorPatchVersion("3.2.2.4").contains((3, 2, 2))) + assert(VersionUtils.majorMinorPatchVersion("3.2.2-SNAPSHOT").contains((3, 2, 2))) + assert(VersionUtils.majorMinorPatchVersion("3.2.2.4XXX").contains((3, 2, 2))) + assert(VersionUtils.majorMinorPatchVersion("3.2").contains((3, 2, 0))) + assert(VersionUtils.majorMinorPatchVersion("3").contains((3, 0, 0))) + + // illegal cases + assert(VersionUtils.majorMinorPatchVersion("ABC").isEmpty) + assert(VersionUtils.majorMinorPatchVersion("3X").isEmpty) + assert(VersionUtils.majorMinorPatchVersion("3.2-SNAPSHOT").isEmpty) + assert(VersionUtils.majorMinorPatchVersion("3.2ABC").isEmpty) + assert(VersionUtils.majorMinorPatchVersion("3-ABC").isEmpty) + assert(VersionUtils.majorMinorPatchVersion("3.2.4XYZ").isEmpty) + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 9900968992c6..fef9e085939c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -21,7 +21,6 @@ import java.io.File import java.lang.reflect.InvocationTargetException import java.net.{URL, URLClassLoader} import java.util -import java.util.regex.Pattern import scala.util.Try @@ -38,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.quietly import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.internal.NonClosableMutableURLClassLoader import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.util.{MutableURLClassLoader, Utils} +import org.apache.spark.util.{MutableURLClassLoader, Utils, VersionUtils} /** Factory for `IsolatedClientLoader` with specific versions of hive. */ private[hive] object IsolatedClientLoader extends Logging { @@ -90,7 +89,7 @@ private[hive] object IsolatedClientLoader extends Logging { } def hiveVersion(version: String): HiveVersion = { - getVersionParts(version).flatMap { + VersionUtils.majorMinorPatchVersion(version).flatMap { case (12, _, _) | (0, 12, _) => Some(hive.v12) case (13, _, _) | (0, 13, _) => Some(hive.v13) case (14, _, _) | (0, 14, _) => Some(hive.v14) @@ -111,43 +110,12 @@ private[hive] object IsolatedClientLoader extends Logging { } def supportHadoopShadedClient(hadoopVersion: String): Boolean = { - getVersionParts(hadoopVersion).exists { + VersionUtils.majorMinorPatchVersion(hadoopVersion).exists { case (3, 2, v) if v >= 2 => true case _ => false } } - /** - * Retrieves the major, minor and patch parts from the input `version`. Returns `None` if the - * input is not of a valid format. - * - * Examples of valid version: - * - 1 - * - 2.4 - * - 3.2.2 - * - 3.2.2.4 - * - 3.3.1-SNAPSHOT - * - 3.2.2.4SNAPSHOT (we only retrieve the first 3 components) - * - * Examples of invalid version: - * - ABC - * - 1X - * - 2.4XYZ - * - 2.4-SNAPSHOT - * - 3.4.5ABC - */ - def getVersionParts(version: String): Option[(Int, Int, Int)] = { - val matcher = VERSION_PATTERN.matcher(version) - if (matcher.matches() && matcher.groupCount() == 3) { - val major = matcher.group(1).toInt - val minor = if (matcher.group(2) == null) 0 else matcher.group(2).toInt - val patch = if (matcher.group(3) == null) 0 else matcher.group(3).toInt - Some((major, minor, patch)) - } else { - None - } - } - private def downloadVersion( version: HiveVersion, hadoopVersion: String, @@ -182,10 +150,6 @@ private[hive] object IsolatedClientLoader extends Logging { tempDir.listFiles().map(_.toURI.toURL) } - // A regex pattern to match Maven version numbers - private lazy val VERSION_PATTERN = - Pattern.compile("(\\d+)(?:\\.(\\d+)(?:\\.(\\d+)(?:[.-].*)?)?)?") - // A map from a given pair of HiveVersion and Hadoop version to jar files. // It is only used by forVersion. private val resolvedVersions = diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala index bd3726931c82..8ede73022c6e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala @@ -21,6 +21,7 @@ import java.io.File import java.net.URLClassLoader import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.util.VersionInfo import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} @@ -69,23 +70,6 @@ class HadoopVersionInfoSuite extends SparkFunSuite { } } - test("SPARK-33212: test getVersionParts()") { - assert(IsolatedClientLoader.getVersionParts("3.2.2").contains((3, 2, 2))) - assert(IsolatedClientLoader.getVersionParts("3.2.2.4").contains((3, 2, 2))) - assert(IsolatedClientLoader.getVersionParts("3.2.2-SNAPSHOT").contains((3, 2, 2))) - assert(IsolatedClientLoader.getVersionParts("3.2.2.4XXX").contains((3, 2, 2))) - assert(IsolatedClientLoader.getVersionParts("3.2").contains((3, 2, 0))) - assert(IsolatedClientLoader.getVersionParts("3").contains((3, 0, 0))) - - // illegal cases - assert(IsolatedClientLoader.getVersionParts("ABC").isEmpty) - assert(IsolatedClientLoader.getVersionParts("3X").isEmpty) - assert(IsolatedClientLoader.getVersionParts("3.2-SNAPSHOT").isEmpty) - assert(IsolatedClientLoader.getVersionParts("3.2ABC").isEmpty) - assert(IsolatedClientLoader.getVersionParts("3-ABC").isEmpty) - assert(IsolatedClientLoader.getVersionParts("3.2.4XYZ").isEmpty) - } - test("SPARK-32212: test supportHadoopShadedClient()") { assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2")) assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.3")) @@ -99,4 +83,8 @@ class HadoopVersionInfoSuite extends SparkFunSuite { assert(!IsolatedClientLoader.supportHadoopShadedClient("3.2.1")) assert(!IsolatedClientLoader.supportHadoopShadedClient("4")) } + + test("SPARK-32212: built-in Hadoop version should support shaded client") { + assert(IsolatedClientLoader.supportHadoopShadedClient(VersionInfo.getVersion)) + } } From 396130b5c6fe9a671e1a0ca90b533f5a4d6724de Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 26 Jan 2021 02:42:16 -0800 Subject: [PATCH 4/5] address comments & remove hive changes --- .../org/apache/spark/util/VersionUtils.scala | 27 +++++++------- .../apache/spark/util/VersionUtilsSuite.scala | 9 ++--- .../hive/client/IsolatedClientLoader.scala | 36 +++++++++---------- .../hive/client/HadoopVersionInfoSuite.scala | 17 ++++----- 4 files changed, 40 insertions(+), 49 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala index 5bf2e22ae1b1..119a4f1e43ec 100644 --- a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala @@ -66,16 +66,17 @@ private[spark] object VersionUtils { } /** - * Retrieves the major, minor and patch parts from the input `version`. Returns `None` if the + * Extracts the major, minor and patch parts from the input `version`. Note that if minor or patch + * version is missing from the input, this will return 0 for these parts. Returns `None` if the * input is not of a valid format. * * Examples of valid version: - * - 1 - * - 2.4 - * - 3.2.2 - * - 3.2.2.4 - * - 3.3.1-SNAPSHOT - * - 3.2.2.4SNAPSHOT (we only retrieve the first 3 components) + * - 1 (extracts to (1, 0, 0)) + * - 2.4 (extracts to (2, 4, 0)) + * - 3.2.2 (extracts to (3, 2, 2)) + * - 3.2.2.4 (extracts to 3, 2, 2)) + * - 3.3.1-SNAPSHOT (extracts to (3, 2, 2)) + * - 3.2.2.4SNAPSHOT (extracts to (3, 2, 2), only the first 3 components) * * Examples of invalid version: * - ABC @@ -88,13 +89,11 @@ private[spark] object VersionUtils { * input is a valid version. `None` otherwise. */ def majorMinorPatchVersion(version: String): Option[(Int, Int, Int)] = { - majorMinorPatchRegex.findFirstMatchIn(version) match { - case Some(m) => - val major = m.group(1).toInt - val minor = Option(m.group(2)).map(_.toInt).getOrElse(0) - val patch = Option(m.group(3)).map(_.toInt).getOrElse(0) - Some((major, minor, patch)) - case None => None + majorMinorPatchRegex.findFirstMatchIn(version).map { m => + val major = m.group(1).toInt + val minor = Option(m.group(2)).map(_.toInt).getOrElse(0) + val patch = Option(m.group(3)).map(_.toInt).getOrElse(0) + (major, minor, patch) } } } diff --git a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala index ffa7876fd979..ff68dd150973 100644 --- a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala @@ -108,11 +108,8 @@ class VersionUtilsSuite extends SparkFunSuite { assert(VersionUtils.majorMinorPatchVersion("3").contains((3, 0, 0))) // illegal cases - assert(VersionUtils.majorMinorPatchVersion("ABC").isEmpty) - assert(VersionUtils.majorMinorPatchVersion("3X").isEmpty) - assert(VersionUtils.majorMinorPatchVersion("3.2-SNAPSHOT").isEmpty) - assert(VersionUtils.majorMinorPatchVersion("3.2ABC").isEmpty) - assert(VersionUtils.majorMinorPatchVersion("3-ABC").isEmpty) - assert(VersionUtils.majorMinorPatchVersion("3.2.4XYZ").isEmpty) + Seq("ABC", "3X", "3.2-SNAPSHOT", "3.2ABC", "3-ABC", "3.2.4XYZ").foreach { version => + assert(VersionUtils.majorMinorPatchVersion(version).isEmpty, s"version $version") + } } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index fef9e085939c..e520a0a115ee 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -88,28 +88,26 @@ private[hive] object IsolatedClientLoader extends Logging { barrierPrefixes = barrierPrefixes) } - def hiveVersion(version: String): HiveVersion = { - VersionUtils.majorMinorPatchVersion(version).flatMap { - case (12, _, _) | (0, 12, _) => Some(hive.v12) - case (13, _, _) | (0, 13, _) => Some(hive.v13) - case (14, _, _) | (0, 14, _) => Some(hive.v14) - case (1, 0, _) => Some(hive.v1_0) - case (1, 1, _) => Some(hive.v1_1) - case (1, 2, _) => Some(hive.v1_2) - case (2, 0, _) => Some(hive.v2_0) - case (2, 1, _) => Some(hive.v2_1) - case (2, 2, _) => Some(hive.v2_2) - case (2, 3, _) => Some(hive.v2_3) - case (3, 0, _) => Some(hive.v3_0) - case (3, 1, _) => Some(hive.v3_1) - case _ => None - }.getOrElse { + def hiveVersion(version: String): HiveVersion = version match { + case "12" | "0.12" | "0.12.0" => hive.v12 + case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13 + case "14" | "0.14" | "0.14.0" => hive.v14 + case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0 + case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1 + case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2 + case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 + case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 + case "2.2" | "2.2.0" => hive.v2_2 + case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" | + "2.3.8" => hive.v2_3 + case "3.0" | "3.0.0" => hive.v3_0 + case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1 + case version => throw new UnsupportedOperationException(s"Unsupported Hive Metastore version ($version). " + s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid version.") - } } - def supportHadoopShadedClient(hadoopVersion: String): Boolean = { + def supportsHadoopShadedClient(hadoopVersion: String): Boolean = { VersionUtils.majorMinorPatchVersion(hadoopVersion).exists { case (3, 2, v) if v >= 2 => true case _ => false @@ -121,7 +119,7 @@ private[hive] object IsolatedClientLoader extends Logging { hadoopVersion: String, ivyPath: Option[String], remoteRepos: String): Seq[URL] = { - val hadoopJarNames = if (supportHadoopShadedClient(hadoopVersion)) { + val hadoopJarNames = if (supportsHadoopShadedClient(hadoopVersion)) { Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion", s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion") } else { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala index 8ede73022c6e..5701a2a8d9c8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala @@ -71,20 +71,17 @@ class HadoopVersionInfoSuite extends SparkFunSuite { } test("SPARK-32212: test supportHadoopShadedClient()") { - assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2")) - assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.3")) - assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2.1")) - assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2-XYZ")) - assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2.4-SNAPSHOT")) + Seq("3.2.2", "3.2.3", "3.2.2.1", "3.2.2-XYZ", "3.2.2.4-SNAPSHOT").foreach { version => + assert(IsolatedClientLoader.supportsHadoopShadedClient(version), s"version $version") + } // negative cases - assert(!IsolatedClientLoader.supportHadoopShadedClient("3.1.3")) - assert(!IsolatedClientLoader.supportHadoopShadedClient("3.2")) - assert(!IsolatedClientLoader.supportHadoopShadedClient("3.2.1")) - assert(!IsolatedClientLoader.supportHadoopShadedClient("4")) + Seq("3.1.3", "3.2", "3.2.1", "4").foreach { version => + assert(!IsolatedClientLoader.supportsHadoopShadedClient(version), s"version $version") + } } test("SPARK-32212: built-in Hadoop version should support shaded client") { - assert(IsolatedClientLoader.supportHadoopShadedClient(VersionInfo.getVersion)) + assert(IsolatedClientLoader.supportsHadoopShadedClient(VersionInfo.getVersion)) } } From 43367c51cc85c2893d663bc248ee95d4d15cdbde Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 26 Jan 2021 13:30:15 -0800 Subject: [PATCH 5/5] fix comment --- core/src/main/scala/org/apache/spark/util/VersionUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala index 119a4f1e43ec..e97d1c939370 100644 --- a/core/src/main/scala/org/apache/spark/util/VersionUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/VersionUtils.scala @@ -75,7 +75,7 @@ private[spark] object VersionUtils { * - 2.4 (extracts to (2, 4, 0)) * - 3.2.2 (extracts to (3, 2, 2)) * - 3.2.2.4 (extracts to 3, 2, 2)) - * - 3.3.1-SNAPSHOT (extracts to (3, 2, 2)) + * - 3.3.1-SNAPSHOT (extracts to (3, 3, 1)) * - 3.2.2.4SNAPSHOT (extracts to (3, 2, 2), only the first 3 components) * * Examples of invalid version: