diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 0899fd266268..9efdf192ff35 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -83,21 +83,21 @@ hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar -hive-beeline/2.3.8//hive-beeline-2.3.8.jar -hive-cli/2.3.8//hive-cli-2.3.8.jar -hive-common/2.3.8//hive-common-2.3.8.jar -hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar -hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar -hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar -hive-metastore/2.3.8//hive-metastore-2.3.8.jar -hive-serde/2.3.8//hive-serde-2.3.8.jar +hive-beeline/2.3.9//hive-beeline-2.3.9.jar +hive-cli/2.3.9//hive-cli-2.3.9.jar +hive-common/2.3.9//hive-common-2.3.9.jar +hive-exec/2.3.9/core/hive-exec-2.3.9-core.jar +hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar +hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar +hive-metastore/2.3.9//hive-metastore-2.3.9.jar +hive-serde/2.3.9//hive-serde-2.3.9.jar hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar -hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar -hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar -hive-shims/2.3.8//hive-shims-2.3.8.jar +hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar +hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar +hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar +hive-shims/2.3.9//hive-shims-2.3.9.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar +hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 70bcad453604..2f3da18f0569 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -61,21 +61,21 @@ guava/14.0.1//guava-14.0.1.jar hadoop-client-api/3.2.2//hadoop-client-api-3.2.2.jar hadoop-client-runtime/3.2.2//hadoop-client-runtime-3.2.2.jar hadoop-yarn-server-web-proxy/3.2.2//hadoop-yarn-server-web-proxy-3.2.2.jar -hive-beeline/2.3.8//hive-beeline-2.3.8.jar -hive-cli/2.3.8//hive-cli-2.3.8.jar -hive-common/2.3.8//hive-common-2.3.8.jar -hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar -hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar -hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar -hive-metastore/2.3.8//hive-metastore-2.3.8.jar -hive-serde/2.3.8//hive-serde-2.3.8.jar +hive-beeline/2.3.9//hive-beeline-2.3.9.jar +hive-cli/2.3.9//hive-cli-2.3.9.jar +hive-common/2.3.9//hive-common-2.3.9.jar +hive-exec/2.3.9/core/hive-exec-2.3.9-core.jar +hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar +hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar +hive-metastore/2.3.9//hive-metastore-2.3.9.jar +hive-serde/2.3.9//hive-serde-2.3.9.jar hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar -hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar -hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar -hive-shims/2.3.8//hive-shims-2.3.8.jar +hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar +hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar +hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar +hive-shims/2.3.9//hive-shims-2.3.9.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar +hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar diff --git a/docs/building-spark.md b/docs/building-spark.md index 8e1c84a37b43..1a0dee6a913b 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -83,9 +83,9 @@ Example: To enable Hive integration for Spark SQL along with its JDBC server and CLI, add the `-Phive` and `-Phive-thriftserver` profiles to your existing build options. -By default Spark will build with Hive 2.3.8. +By default Spark will build with Hive 2.3.9. - # With Hive 2.3.8 support + # With Hive 2.3.9 support ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package ## Packaging without Hadoop Dependencies for YARN diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index 14dd75070994..6f480d0808d7 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -127,10 +127,10 @@ The following options can be used to configure the version of Hive that is used
spark.sql.hive.metastore.version2.3.82.3.90.12.0 through 2.3.8 and 3.0.0 through 3.1.2.
+ options are 0.12.0 through 2.3.9 and 3.0.0 through 3.1.2.
builtin-Phive is
+ Use Hive 2.3.9, which is bundled with the Spark assembly when -Phive is
enabled. When this option is chosen, spark.sql.hive.metastore.version must be
- either 2.3.8 or not defined.
+ either 2.3.9 or not defined.
maven0.12.0 through 2.3.8 and " +
+ "0.12.0 through 2.3.9 and " +
"3.0.0 through 3.1.2.")
.version("1.4.0")
.stringConf
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index bb78944fb161..d8203648b736 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -102,7 +102,7 @@ package object client {
// Since HIVE-23980, calcite-core included in Hive package jar.
// For spark, only VersionsSuite currently creates a hive materialized view for testing.
- case object v2_3 extends HiveVersion("2.3.8",
+ case object v2_3 extends HiveVersion("2.3.9",
exclusions = Seq("org.apache.calcite:calcite-core",
"org.apache.calcite:calcite-druid",
"org.apache.calcite.avatica:avatica",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index c9712968f9bd..250c46063d5c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -203,7 +203,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
tryDownloadSpark(version, sparkTestingDir.getCanonicalPath)
}
- // Extract major.minor for testing Spark 3.1.x and 3.0.x with metastore 2.3.8 and Java 11.
+ // Extract major.minor for testing Spark 3.1.x and 3.0.x with metastore 2.3.9 and Java 11.
val hiveMetastoreVersion = """^\d+\.\d+""".r.findFirstIn(hiveVersion).get
val args = Seq(
"--name", "prepare testing tables",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
index aae49f70ca93..efec011b7157 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
@@ -185,4 +185,35 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
checkComplexTypes(fileFormat)
}
}
+
+ test("SPARK-34512: Disable validate default values when parsing Avro schemas") {
+ withTable("t1") {
+ hiveClient.runSqlHive(
+ """
+ |CREATE TABLE t1
+ | ROW FORMAT SERDE
+ | 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+ | STORED AS INPUTFORMAT
+ | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+ | OUTPUTFORMAT
+ | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+ | TBLPROPERTIES (
+ | 'avro.schema.literal'='{
+ | "namespace": "org.apache.spark.sql.hive.test",
+ | "name": "schema_with_default_value",
+ | "type": "record",
+ | "fields": [
+ | {
+ | "name": "ARRAY_WITH_DEFAULT",
+ | "type": {"type": "array", "items": "string"},
+ | "default": null
+ | }
+ | ]
+ | }')
+ |""".stripMargin)
+
+ hiveClient.runSqlHive("INSERT INTO t1 SELECT array('SPARK-34512', 'HIVE-24797')")
+ checkAnswer(spark.table("t1"), Seq(Row(Array("SPARK-34512", "HIVE-24797"))))
+ }
+ }
}