diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 0899fd266268..9efdf192ff35 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -83,21 +83,21 @@ hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar -hive-beeline/2.3.8//hive-beeline-2.3.8.jar -hive-cli/2.3.8//hive-cli-2.3.8.jar -hive-common/2.3.8//hive-common-2.3.8.jar -hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar -hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar -hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar -hive-metastore/2.3.8//hive-metastore-2.3.8.jar -hive-serde/2.3.8//hive-serde-2.3.8.jar +hive-beeline/2.3.9//hive-beeline-2.3.9.jar +hive-cli/2.3.9//hive-cli-2.3.9.jar +hive-common/2.3.9//hive-common-2.3.9.jar +hive-exec/2.3.9/core/hive-exec-2.3.9-core.jar +hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar +hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar +hive-metastore/2.3.9//hive-metastore-2.3.9.jar +hive-serde/2.3.9//hive-serde-2.3.9.jar hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar -hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar -hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar -hive-shims/2.3.8//hive-shims-2.3.8.jar +hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar +hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar +hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar +hive-shims/2.3.9//hive-shims-2.3.9.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar +hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 70bcad453604..2f3da18f0569 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -61,21 +61,21 @@ guava/14.0.1//guava-14.0.1.jar hadoop-client-api/3.2.2//hadoop-client-api-3.2.2.jar hadoop-client-runtime/3.2.2//hadoop-client-runtime-3.2.2.jar hadoop-yarn-server-web-proxy/3.2.2//hadoop-yarn-server-web-proxy-3.2.2.jar -hive-beeline/2.3.8//hive-beeline-2.3.8.jar -hive-cli/2.3.8//hive-cli-2.3.8.jar -hive-common/2.3.8//hive-common-2.3.8.jar -hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar -hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar -hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar -hive-metastore/2.3.8//hive-metastore-2.3.8.jar -hive-serde/2.3.8//hive-serde-2.3.8.jar +hive-beeline/2.3.9//hive-beeline-2.3.9.jar +hive-cli/2.3.9//hive-cli-2.3.9.jar +hive-common/2.3.9//hive-common-2.3.9.jar +hive-exec/2.3.9/core/hive-exec-2.3.9-core.jar +hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar +hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar +hive-metastore/2.3.9//hive-metastore-2.3.9.jar +hive-serde/2.3.9//hive-serde-2.3.9.jar hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar -hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar -hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar -hive-shims/2.3.8//hive-shims-2.3.8.jar +hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar +hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar +hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar +hive-shims/2.3.9//hive-shims-2.3.9.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar +hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar diff --git a/docs/building-spark.md b/docs/building-spark.md index 8e1c84a37b43..1a0dee6a913b 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -83,9 +83,9 @@ Example: To enable Hive integration for Spark SQL along with its JDBC server and CLI, add the `-Phive` and `-Phive-thriftserver` profiles to your existing build options. -By default Spark will build with Hive 2.3.8. +By default Spark will build with Hive 2.3.9. - # With Hive 2.3.8 support + # With Hive 2.3.9 support ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package ## Packaging without Hadoop Dependencies for YARN diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index 14dd75070994..6f480d0808d7 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -127,10 +127,10 @@ The following options can be used to configure the version of Hive that is used Property NameDefaultMeaningSince Version spark.sql.hive.metastore.version - 2.3.8 + 2.3.9 Version of the Hive metastore. Available - options are 0.12.0 through 2.3.8 and 3.0.0 through 3.1.2. + options are 0.12.0 through 2.3.9 and 3.0.0 through 3.1.2. 1.4.0 @@ -142,9 +142,9 @@ The following options can be used to configure the version of Hive that is used property can be one of four options:
  1. builtin
  2. - Use Hive 2.3.8, which is bundled with the Spark assembly when -Phive is + Use Hive 2.3.9, which is bundled with the Spark assembly when -Phive is enabled. When this option is chosen, spark.sql.hive.metastore.version must be - either 2.3.8 or not defined. + either 2.3.9 or not defined.
  3. maven
  4. Use Hive jars of specified version downloaded from Maven repositories. This configuration is not generally recommended for production deployments. diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index a5b2d8be7aaa..fd36e19a4963 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -921,7 +921,7 @@ Python UDF registration is unchanged. Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs. Currently, Hive SerDes and UDFs are based on built-in Hive, and Spark SQL can be connected to different versions of Hive Metastore -(from 0.12.0 to 2.3.8 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). +(from 0.12.0 to 2.3.9 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). #### Deploying in Existing Hive Warehouses {:.no_toc} diff --git a/pom.xml b/pom.xml index 930030ef30b5..3ca661aee727 100644 --- a/pom.xml +++ b/pom.xml @@ -128,8 +128,8 @@ org.apache.hive core - 2.3.8 - 2.3.8 + 2.3.9 + 2.3.9 2.3 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ee0c7c370922..76c9f790d988 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3642,7 +3642,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark test("SPARK-33084: Add jar support Ivy URI in SQL") { val sc = spark.sparkContext - val hiveVersion = "2.3.8" + val hiveVersion = "2.3.9" // transitive=false, only download specified jar sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion?transitive=false") assert(sc.listJars() diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 8c2f02cfa118..3b010d94939f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -71,7 +71,7 @@ private[spark] object HiveUtils extends Logging { val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + - "0.12.0 through 2.3.8 and " + + "0.12.0 through 2.3.9 and " + "3.0.0 through 3.1.2.") .version("1.4.0") .stringConf diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index bb78944fb161..d8203648b736 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -102,7 +102,7 @@ package object client { // Since HIVE-23980, calcite-core included in Hive package jar. // For spark, only VersionsSuite currently creates a hive materialized view for testing. - case object v2_3 extends HiveVersion("2.3.8", + case object v2_3 extends HiveVersion("2.3.9", exclusions = Seq("org.apache.calcite:calcite-core", "org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index c9712968f9bd..250c46063d5c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -203,7 +203,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { tryDownloadSpark(version, sparkTestingDir.getCanonicalPath) } - // Extract major.minor for testing Spark 3.1.x and 3.0.x with metastore 2.3.8 and Java 11. + // Extract major.minor for testing Spark 3.1.x and 3.0.x with metastore 2.3.9 and Java 11. val hiveMetastoreVersion = """^\d+\.\d+""".r.findFirstIn(hiveVersion).get val args = Seq( "--name", "prepare testing tables", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala index aae49f70ca93..efec011b7157 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala @@ -185,4 +185,35 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS checkComplexTypes(fileFormat) } } + + test("SPARK-34512: Disable validate default values when parsing Avro schemas") { + withTable("t1") { + hiveClient.runSqlHive( + """ + |CREATE TABLE t1 + | ROW FORMAT SERDE + | 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + | STORED AS INPUTFORMAT + | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + | OUTPUTFORMAT + | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + | TBLPROPERTIES ( + | 'avro.schema.literal'='{ + | "namespace": "org.apache.spark.sql.hive.test", + | "name": "schema_with_default_value", + | "type": "record", + | "fields": [ + | { + | "name": "ARRAY_WITH_DEFAULT", + | "type": {"type": "array", "items": "string"}, + | "default": null + | } + | ] + | }') + |""".stripMargin) + + hiveClient.runSqlHive("INSERT INTO t1 SELECT array('SPARK-34512', 'HIVE-24797')") + checkAnswer(spark.table("t1"), Seq(Row(Array("SPARK-34512", "HIVE-24797")))) + } + } }