diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index d7b205c2fa0df..2972bc36e8261 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1746,6 +1746,13 @@ The following options can be used to configure the version of Hive that is used
+
+ spark.sql.hive.metastore.mavenRepo |
+ http://www.datanucleus.org/downloads/maven2 |
+
+ Maven repository to download Hive Metastore jars when spark.sql.hive.metastore.jars is set to maven.
+ |
+
spark.sql.hive.metastore.sharedPrefixes |
com.mysql.jdbc, org.postgresql, com.microsoft.sqlserver, oracle.jdbc |
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index e83941c2ecf66..fb1eba3531db6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -168,6 +168,13 @@ class HiveContext private[hive](
*/
protected[hive] def hiveMetastoreJars: String = getConf(HIVE_METASTORE_JARS)
+ /**
+ * The Maven repository where the jars are to be downloaded which should be used to instantiate
+ * the HiveMetastoreClient. This setting will take effect when HIVE_METASTORE_JARS is set to
+ * 'maven'.
+ */
+ protected[hive] def hiveMetastoreMavenRepo: String = getConf(HIVE_METASTORE_MAVEN_REPO)
+
/**
* A comma separated list of class prefixes that should be loaded using the classloader that
* is shared between Spark SQL and a specific version of Hive. An example of classes that should
@@ -292,6 +299,7 @@ class HiveContext private[hive](
hiveMetastoreVersion = hiveMetastoreVersion,
hadoopVersion = VersionInfo.getVersion,
config = allConfig,
+ mavenRepo = Some(hiveMetastoreMavenRepo),
barrierPrefixes = hiveMetastoreBarrierPrefixes,
sharedPrefixes = hiveMetastoreSharedPrefixes)
} else {
@@ -685,6 +693,12 @@ private[hive] object HiveContext {
| Use Hive jars of specified version downloaded from Maven repositories.
| 3. A classpath in the standard format for both Hive and Hadoop.
""".stripMargin)
+
+ val HIVE_METASTORE_MAVEN_REPO = stringConf("spark.sql.hive.metastore.mavenRepo",
+ defaultValue = Some("http://www.datanucleus.org/downloads/maven2"),
+ doc = "Maven repositories where Hive metastore jars which are used to instantiate the" +
+ "HiveMetastoreClient are downloaded.")
+
val CONVERT_METASTORE_PARQUET = booleanConf("spark.sql.hive.convertMetastoreParquet",
defaultValue = Some(true),
doc = "When set to false, Spark SQL will use the Hive SerDe for parquet tables instead of " +
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 010051d255fdc..b5081d173fcfc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -42,6 +42,7 @@ private[hive] object IsolatedClientLoader extends Logging {
hiveMetastoreVersion: String,
hadoopVersion: String,
config: Map[String, String] = Map.empty,
+ mavenRepo: Option[String],
ivyPath: Option[String] = None,
sharedPrefixes: Seq[String] = Seq.empty,
barrierPrefixes: Seq[String] = Seq.empty): IsolatedClientLoader = synchronized {
@@ -54,7 +55,7 @@ private[hive] object IsolatedClientLoader extends Logging {
} else {
val (downloadedFiles, actualHadoopVersion) =
try {
- (downloadVersion(resolvedVersion, hadoopVersion, ivyPath), hadoopVersion)
+ (downloadVersion(resolvedVersion, hadoopVersion, mavenRepo, ivyPath), hadoopVersion)
} catch {
case e: RuntimeException if e.getMessage.contains("hadoop") =>
// If the error message contains hadoop, it is probably because the hadoop
@@ -68,7 +69,7 @@ private[hive] object IsolatedClientLoader extends Logging {
"It is recommended to set jars used by Hive metastore client through " +
"spark.sql.hive.metastore.jars in the production environment.")
sharesHadoopClasses = false
- (downloadVersion(resolvedVersion, "2.4.0", ivyPath), "2.4.0")
+ (downloadVersion(resolvedVersion, "2.4.0", mavenRepo, ivyPath), "2.4.0")
}
resolvedVersions.put((resolvedVersion, actualHadoopVersion), downloadedFiles)
resolvedVersions((resolvedVersion, actualHadoopVersion))
@@ -95,6 +96,7 @@ private[hive] object IsolatedClientLoader extends Logging {
private def downloadVersion(
version: HiveVersion,
hadoopVersion: String,
+ mavenRepo: Option[String],
ivyPath: Option[String]): Seq[URL] = {
val hiveArtifacts = version.extraDeps ++
Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde")
@@ -105,7 +107,7 @@ private[hive] object IsolatedClientLoader extends Logging {
val classpath = quietly {
SparkSubmitUtils.resolveMavenCoordinates(
hiveArtifacts.mkString(","),
- Some("http://www.datanucleus.org/downloads/maven2"),
+ mavenRepo,
ivyPath,
exclusions = version.exclusions)
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 502b240f3650f..2d53cebfd85ea 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -45,6 +45,8 @@ class VersionsSuite extends SparkFunSuite with Logging {
Some(new File(sys.props("java.io.tmpdir"), "hive-ivy-cache").getAbsolutePath))
}
+ private val mavenRepo = HiveContext.HIVE_METASTORE_MAVEN_REPO.defaultValue
+
private def buildConf() = {
lazy val warehousePath = Utils.createTempDir()
lazy val metastorePath = Utils.createTempDir()
@@ -59,6 +61,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
hiveMetastoreVersion = HiveContext.hiveExecutionVersion,
hadoopVersion = VersionInfo.getVersion,
config = buildConf(),
+ mavenRepo = mavenRepo,
ivyPath = ivyPath).createClient()
val db = new HiveDatabase("default", "")
badClient.createDatabase(db)
@@ -93,6 +96,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
hiveMetastoreVersion = "13",
hadoopVersion = VersionInfo.getVersion,
config = buildConf(),
+ mavenRepo = mavenRepo,
ivyPath = ivyPath).createClient()
}
}
@@ -112,6 +116,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
hiveMetastoreVersion = version,
hadoopVersion = VersionInfo.getVersion,
config = buildConf(),
+ mavenRepo = mavenRepo,
ivyPath = ivyPath).createClient()
}