diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index d7b205c2fa0df..2972bc36e8261 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1746,6 +1746,13 @@ The following options can be used to configure the version of Hive that is used + + spark.sql.hive.metastore.mavenRepo + http://www.datanucleus.org/downloads/maven2 + + Maven repository to download Hive Metastore jars when spark.sql.hive.metastore.jars is set to maven. + + spark.sql.hive.metastore.sharedPrefixes com.mysql.jdbc,
org.postgresql,
com.microsoft.sqlserver,
oracle.jdbc
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index e83941c2ecf66..fb1eba3531db6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -168,6 +168,13 @@ class HiveContext private[hive]( */ protected[hive] def hiveMetastoreJars: String = getConf(HIVE_METASTORE_JARS) + /** + * The Maven repository where the jars are to be downloaded which should be used to instantiate + * the HiveMetastoreClient. This setting will take effect when HIVE_METASTORE_JARS is set to + * 'maven'. + */ + protected[hive] def hiveMetastoreMavenRepo: String = getConf(HIVE_METASTORE_MAVEN_REPO) + /** * A comma separated list of class prefixes that should be loaded using the classloader that * is shared between Spark SQL and a specific version of Hive. An example of classes that should @@ -292,6 +299,7 @@ class HiveContext private[hive]( hiveMetastoreVersion = hiveMetastoreVersion, hadoopVersion = VersionInfo.getVersion, config = allConfig, + mavenRepo = Some(hiveMetastoreMavenRepo), barrierPrefixes = hiveMetastoreBarrierPrefixes, sharedPrefixes = hiveMetastoreSharedPrefixes) } else { @@ -685,6 +693,12 @@ private[hive] object HiveContext { | Use Hive jars of specified version downloaded from Maven repositories. | 3. A classpath in the standard format for both Hive and Hadoop. """.stripMargin) + + val HIVE_METASTORE_MAVEN_REPO = stringConf("spark.sql.hive.metastore.mavenRepo", + defaultValue = Some("http://www.datanucleus.org/downloads/maven2"), + doc = "Maven repositories where Hive metastore jars which are used to instantiate the" + + "HiveMetastoreClient are downloaded.") + val CONVERT_METASTORE_PARQUET = booleanConf("spark.sql.hive.convertMetastoreParquet", defaultValue = Some(true), doc = "When set to false, Spark SQL will use the Hive SerDe for parquet tables instead of " + diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 010051d255fdc..b5081d173fcfc 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -42,6 +42,7 @@ private[hive] object IsolatedClientLoader extends Logging { hiveMetastoreVersion: String, hadoopVersion: String, config: Map[String, String] = Map.empty, + mavenRepo: Option[String], ivyPath: Option[String] = None, sharedPrefixes: Seq[String] = Seq.empty, barrierPrefixes: Seq[String] = Seq.empty): IsolatedClientLoader = synchronized { @@ -54,7 +55,7 @@ private[hive] object IsolatedClientLoader extends Logging { } else { val (downloadedFiles, actualHadoopVersion) = try { - (downloadVersion(resolvedVersion, hadoopVersion, ivyPath), hadoopVersion) + (downloadVersion(resolvedVersion, hadoopVersion, mavenRepo, ivyPath), hadoopVersion) } catch { case e: RuntimeException if e.getMessage.contains("hadoop") => // If the error message contains hadoop, it is probably because the hadoop @@ -68,7 +69,7 @@ private[hive] object IsolatedClientLoader extends Logging { "It is recommended to set jars used by Hive metastore client through " + "spark.sql.hive.metastore.jars in the production environment.") sharesHadoopClasses = false - (downloadVersion(resolvedVersion, "2.4.0", ivyPath), "2.4.0") + (downloadVersion(resolvedVersion, "2.4.0", mavenRepo, ivyPath), "2.4.0") } resolvedVersions.put((resolvedVersion, actualHadoopVersion), downloadedFiles) resolvedVersions((resolvedVersion, actualHadoopVersion)) @@ -95,6 +96,7 @@ private[hive] object IsolatedClientLoader extends Logging { private def downloadVersion( version: HiveVersion, hadoopVersion: String, + mavenRepo: Option[String], ivyPath: Option[String]): Seq[URL] = { val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") @@ -105,7 +107,7 @@ private[hive] object IsolatedClientLoader extends Logging { val classpath = quietly { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), - Some("http://www.datanucleus.org/downloads/maven2"), + mavenRepo, ivyPath, exclusions = version.exclusions) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index 502b240f3650f..2d53cebfd85ea 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -45,6 +45,8 @@ class VersionsSuite extends SparkFunSuite with Logging { Some(new File(sys.props("java.io.tmpdir"), "hive-ivy-cache").getAbsolutePath)) } + private val mavenRepo = HiveContext.HIVE_METASTORE_MAVEN_REPO.defaultValue + private def buildConf() = { lazy val warehousePath = Utils.createTempDir() lazy val metastorePath = Utils.createTempDir() @@ -59,6 +61,7 @@ class VersionsSuite extends SparkFunSuite with Logging { hiveMetastoreVersion = HiveContext.hiveExecutionVersion, hadoopVersion = VersionInfo.getVersion, config = buildConf(), + mavenRepo = mavenRepo, ivyPath = ivyPath).createClient() val db = new HiveDatabase("default", "") badClient.createDatabase(db) @@ -93,6 +96,7 @@ class VersionsSuite extends SparkFunSuite with Logging { hiveMetastoreVersion = "13", hadoopVersion = VersionInfo.getVersion, config = buildConf(), + mavenRepo = mavenRepo, ivyPath = ivyPath).createClient() } } @@ -112,6 +116,7 @@ class VersionsSuite extends SparkFunSuite with Logging { hiveMetastoreVersion = version, hadoopVersion = VersionInfo.getVersion, config = buildConf(), + mavenRepo = mavenRepo, ivyPath = ivyPath).createClient() }