diff --git a/core/pom.xml b/core/pom.xml index 09fa153c8f20..597b17316dc0 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -35,10 +35,6 @@ - - com.thoughtworks.paranamer - paranamer - org.apache.avro avro @@ -46,7 +42,6 @@ org.apache.avro avro-mapred - ${avro.mapred.classifier} com.google.guava diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 8d8ef2e97209..ed2aff4e96d1 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -22,9 +22,9 @@ arrow-memory-netty/2.0.0//arrow-memory-netty-2.0.0.jar arrow-vector/2.0.0//arrow-vector-2.0.0.jar audience-annotations/0.5.0//audience-annotations-0.5.0.jar automaton/1.11-8//automaton-1.11-8.jar -avro-ipc/1.8.2//avro-ipc-1.8.2.jar -avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar -avro/1.8.2//avro-1.8.2.jar +avro-ipc/1.10.1//avro-ipc-1.10.1.jar +avro-mapred/1.10.1//avro-mapred-1.10.1.jar +avro/1.10.1//avro-1.10.1.jar bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar breeze_2.12/1.0//breeze_2.12-1.0.jar @@ -81,21 +81,21 @@ hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar -hive-beeline/2.3.7//hive-beeline-2.3.7.jar -hive-cli/2.3.7//hive-cli-2.3.7.jar -hive-common/2.3.7//hive-common-2.3.7.jar -hive-exec/2.3.7/core/hive-exec-2.3.7-core.jar -hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar -hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar -hive-metastore/2.3.7//hive-metastore-2.3.7.jar -hive-serde/2.3.7//hive-serde-2.3.7.jar +hive-beeline/2.3.8//hive-beeline-2.3.8.jar +hive-cli/2.3.8//hive-cli-2.3.8.jar +hive-common/2.3.8//hive-common-2.3.8.jar +hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar +hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar +hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar +hive-metastore/2.3.8//hive-metastore-2.3.8.jar +hive-serde/2.3.8//hive-serde-2.3.8.jar hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar -hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar -hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar -hive-shims/2.3.7//hive-shims-2.3.7.jar +hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar +hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar +hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar +hive-shims/2.3.8//hive-shims-2.3.8.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar +hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar @@ -125,6 +125,7 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar +javax.annotation-api/1.3.2//javax.annotation-api-1.3.2.jar javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar @@ -202,12 +203,12 @@ orc-shims/1.6.6//orc-shims-1.6.6.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.10.1//parquet-column-1.10.1.jar -parquet-common/1.10.1//parquet-common-1.10.1.jar -parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar -parquet-format/2.4.0//parquet-format-2.4.0.jar -parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar -parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar +parquet-column/1.11.1//parquet-column-1.11.1.jar +parquet-common/1.11.1//parquet-common-1.11.1.jar +parquet-encoding/1.11.1//parquet-encoding-1.11.1.jar +parquet-format-structures/1.11.1//parquet-format-structures-1.11.1.jar +parquet-hadoop/1.11.1//parquet-hadoop-1.11.1.jar +parquet-jackson/1.11.1//parquet-jackson-1.11.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9.1//py4j-0.10.9.1.jar pyrolite/4.30//pyrolite-4.30.jar @@ -234,12 +235,13 @@ super-csv/2.2.0//super-csv-2.2.0.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar +velocity-engine-core/2.2//velocity-engine-core-2.2.jar velocity/1.5//velocity-1.5.jar xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar xercesImpl/2.12.0//xercesImpl-2.12.0.jar xml-apis/1.4.01//xml-apis-1.4.01.jar xmlenc/0.52//xmlenc-0.52.jar -xz/1.5//xz-1.5.jar +xz/1.8//xz-1.8.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper/3.4.14//zookeeper-3.4.14.jar zstd-jni/1.4.8-1//zstd-jni-1.4.8-1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index bf56fc18c044..9ca2cc9a4b1a 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -19,9 +19,9 @@ arrow-memory-netty/2.0.0//arrow-memory-netty-2.0.0.jar arrow-vector/2.0.0//arrow-vector-2.0.0.jar audience-annotations/0.5.0//audience-annotations-0.5.0.jar automaton/1.11-8//automaton-1.11-8.jar -avro-ipc/1.8.2//avro-ipc-1.8.2.jar -avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar -avro/1.8.2//avro-1.8.2.jar +avro-ipc/1.10.1//avro-ipc-1.10.1.jar +avro-mapred/1.10.1//avro-mapred-1.10.1.jar +avro/1.10.1//avro-1.10.1.jar bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar breeze_2.12/1.0//breeze_2.12-1.0.jar @@ -80,21 +80,21 @@ hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar -hive-beeline/2.3.7//hive-beeline-2.3.7.jar -hive-cli/2.3.7//hive-cli-2.3.7.jar -hive-common/2.3.7//hive-common-2.3.7.jar -hive-exec/2.3.7/core/hive-exec-2.3.7-core.jar -hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar -hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar -hive-metastore/2.3.7//hive-metastore-2.3.7.jar -hive-serde/2.3.7//hive-serde-2.3.7.jar +hive-beeline/2.3.8//hive-beeline-2.3.8.jar +hive-cli/2.3.8//hive-cli-2.3.8.jar +hive-common/2.3.8//hive-common-2.3.8.jar +hive-exec/2.3.8/core/hive-exec-2.3.8-core.jar +hive-jdbc/2.3.8//hive-jdbc-2.3.8.jar +hive-llap-common/2.3.8//hive-llap-common-2.3.8.jar +hive-metastore/2.3.8//hive-metastore-2.3.8.jar +hive-serde/2.3.8//hive-serde-2.3.8.jar hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar -hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar -hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar -hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar -hive-shims/2.3.7//hive-shims-2.3.7.jar +hive-shims-0.23/2.3.8//hive-shims-0.23-2.3.8.jar +hive-shims-common/2.3.8//hive-shims-common-2.3.8.jar +hive-shims-scheduler/2.3.8//hive-shims-scheduler-2.3.8.jar +hive-shims/2.3.8//hive-shims-2.3.8.jar hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar -hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar +hive-vector-code-gen/2.3.8//hive-vector-code-gen-2.3.8.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar @@ -124,6 +124,7 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar +javax.annotation-api/1.3.2//javax.annotation-api-1.3.2.jar javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar @@ -217,12 +218,12 @@ orc-shims/1.6.6//orc-shims-1.6.6.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.10.1//parquet-column-1.10.1.jar -parquet-common/1.10.1//parquet-common-1.10.1.jar -parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar -parquet-format/2.4.0//parquet-format-2.4.0.jar -parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar -parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar +parquet-column/1.11.1//parquet-column-1.11.1.jar +parquet-common/1.11.1//parquet-common-1.11.1.jar +parquet-encoding/1.11.1//parquet-encoding-1.11.1.jar +parquet-format-structures/1.11.1//parquet-format-structures-1.11.1.jar +parquet-hadoop/1.11.1//parquet-hadoop-1.11.1.jar +parquet-jackson/1.11.1//parquet-jackson-1.11.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9.1//py4j-0.10.9.1.jar pyrolite/4.30//pyrolite-4.30.jar @@ -251,10 +252,11 @@ threeten-extra/1.5.0//threeten-extra-1.5.0.jar token-provider/1.0.1//token-provider-1.0.1.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar +velocity-engine-core/2.2//velocity-engine-core-2.2.jar velocity/1.5//velocity-1.5.jar woodstox-core/5.0.3//woodstox-core-5.0.3.jar xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar -xz/1.5//xz-1.5.jar +xz/1.8//xz-1.8.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper/3.4.14//zookeeper-3.4.14.jar zstd-jni/1.4.8-1//zstd-jni-1.4.8-1.jar diff --git a/docs/building-spark.md b/docs/building-spark.md index 5106f2abd418..f9599b642d30 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -83,9 +83,9 @@ Example: To enable Hive integration for Spark SQL along with its JDBC server and CLI, add the `-Phive` and `-Phive-thriftserver` profiles to your existing build options. -By default Spark will build with Hive 2.3.7. +By default Spark will build with Hive 2.3.8. - # With Hive 2.3.7 support + # With Hive 2.3.8 support ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package ## Packaging without Hadoop Dependencies for YARN diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md index 9ecc6eb91da5..f69327a8c390 100644 --- a/docs/sql-data-sources-avro.md +++ b/docs/sql-data-sources-avro.md @@ -339,7 +339,7 @@ applications. Read the [Advanced Dependency Management](https://spark.apache Submission Guide for more details. ## Supported types for Avro -> Spark SQL conversion -Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.8.2/spec.html#schema_primitive) and [complex types](https://avro.apache.org/docs/1.8.2/spec.html#schema_complex) under records of Avro. +Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.10.1/spec.html#schema_primitive) and [complex types](https://avro.apache.org/docs/1.8.2/spec.html#schema_complex) under records of Avro. @@ -403,7 +403,7 @@ In addition to the types listed above, it supports reading `union` types. The fo 3. `union(something, null)`, where something is any supported Avro type. This will be mapped to the same Spark SQL type as that of something, with nullable set to true. All other union types are considered complex. They will be mapped to StructType where field names are member0, member1, etc., in accordance with members of the union. This is consistent with the behavior when converting between Avro and Parquet. -It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.8.2/spec.html#Logical+Types): +It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.10.1/spec.html#Logical+Types):
Avro typeSpark SQL type
diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index 376c2042d422..723236e866dd 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -127,10 +127,10 @@ The following options can be used to configure the version of Hive that is used - + @@ -142,9 +142,9 @@ The following options can be used to configure the version of Hive that is used property can be one of four options:
  1. builtin
  2. - Use Hive 2.3.7, which is bundled with the Spark assembly when -Phive is + Use Hive 2.3.8, which is bundled with the Spark assembly when -Phive is enabled. When this option is chosen, spark.sql.hive.metastore.version must be - either 2.3.7 or not defined. + either 2.3.8 or not defined.
  3. maven
  4. Use Hive jars of specified version downloaded from Maven repositories. This configuration is not generally recommended for production deployments. diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 8cf1a9c6f701..d8cbbbb748c3 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -854,7 +854,7 @@ Python UDF registration is unchanged. Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs. Currently, Hive SerDes and UDFs are based on built-in Hive, and Spark SQL can be connected to different versions of Hive Metastore -(from 0.12.0 to 2.3.7 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). +(from 0.12.0 to 2.3.8 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). #### Deploying in Existing Hive Warehouses {:.no_toc} diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala index 8972b0553c52..a2228f2564dd 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala @@ -51,14 +51,14 @@ private[sql] class AvroOptions( /** * Top level record name in write result, which is required in Avro spec. - * See https://avro.apache.org/docs/1.8.2/spec.html#schema_record . + * See https://avro.apache.org/docs/1.10.1/spec.html#schema_record . * Default value is "topLevelRecord" */ val recordName: String = parameters.getOrElse("recordName", "topLevelRecord") /** * Record namespace in write result. Default value is "". - * See Avro spec for details: https://avro.apache.org/docs/1.8.2/spec.html#schema_record . + * See Avro spec for details: https://avro.apache.org/docs/1.10.1/spec.html#schema_record . */ val recordNamespace: String = parameters.getOrElse("recordNamespace", "") diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index d3bfb716f515..6f6e8c7c4cc6 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1015,7 +1015,7 @@ abstract class AvroSuite .save(s"$tempDir/${UUID.randomUUID()}") }.getCause.getMessage assert(message.contains("Caused by: java.lang.NullPointerException: " + - "in test_schema in string null of string in field Name")) + "null of string in string in field Name of test_schema in test_schema")) } } diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 2359e99f657f..814244fb3939 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -77,7 +77,6 @@ org.apache.avro avro-mapred - ${avro.mapred.classifier} provided diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 22259b08141d..958bce75aa21 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -94,15 +94,9 @@ hadoop-client provided - - org.apache.avro - avro-ipc - provided - org.apache.avro avro-mapred - ${avro.mapred.classifier} provided diff --git a/pom.xml b/pom.xml index f921e35a76b4..fa06959b25c5 100644 --- a/pom.xml +++ b/pom.xml @@ -128,15 +128,15 @@ org.apache.hive core - 2.3.7 - 2.3.7 + 2.3.8 + 2.3.8 2.3 2.6.0 10.14.2.0 - 1.10.1 + 1.11.1 1.6.6 9.4.34.v20201102 4.0.3 @@ -148,8 +148,7 @@ the link to metrics.dropwizard.io in docs/monitoring.md. --> 4.1.1 - 1.8.2 - hadoop2 + 1.10.1 1.12.0 1.11.655 @@ -194,10 +193,6 @@ 1.1 3.141.59 2.40.0 - - 2.8 1.8 1.1.0 - - org.apache.avro - avro-ipc - tests - ${avro.version} - test - - - org.apache.avro - avro-mapred - ${avro.version} - ${avro.mapred.classifier} - ${hive.deps.scope} - io.netty netty @@ -1845,6 +1840,22 @@ org.apache.logging.log4j * + + net.hydromatic + eigenbase-properties + + + org.codehaus.janino + commons-compiler + + + org.codehaus.janino + janino + + + org.pentaho + pentaho-aggdesigner-algorithm + @@ -2327,12 +2338,6 @@ - - com.thoughtworks.paranamer - paranamer - ${paranamer.version} - runtime - org.apache.arrow arrow-vector diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 668701be0ae9..820e7f26201f 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -271,6 +271,9 @@ object SparkBuild extends PomBuild { DefaultMavenRepository, Resolver.mavenLocal, Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns) + ) ++ Seq( + "hive-staged-releases-mirror" at "https://repository.apache.org/content/repositories/staging/", + Resolver.file("local", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns) ), externalResolvers := resolvers.value, otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value, @@ -655,7 +658,7 @@ object DependencyOverrides { dependencyOverrides += "com.google.guava" % "guava" % guavaVersion, dependencyOverrides += "xerces" % "xercesImpl" % "2.12.0", dependencyOverrides += "jline" % "jline" % "2.14.6", - dependencyOverrides += "org.apache.avro" % "avro" % "1.8.2") + dependencyOverrides += "org.apache.avro" % "avro" % "1.10.1") } /** diff --git a/project/plugins.sbt b/project/plugins.sbt index c33a96772d5a..0a73b26c8f69 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -34,7 +34,7 @@ addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.8.0") addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.3") addSbtPlugin("com.cavorite" % "sbt-avro" % "2.1.1") -libraryDependencies += "org.apache.avro" % "avro-compiler" % "1.8.2" +libraryDependencies += "org.apache.avro" % "avro-compiler" % "1.10.1" addSbtPlugin("io.spray" % "sbt-revolver" % "0.9.1") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 50cc47d0f80f..89829ca90613 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2740,7 +2740,8 @@ object SQLConf { .stringConf .createWithDefault( sys.env.getOrElse("DEFAULT_ARTIFACT_REPOSITORY", - "https://maven-central.storage-download.googleapis.com/maven2/")) + "https://maven-central.storage-download.googleapis.com/maven2/," + + "https://repository.apache.org/content/repositories/staging/")) val LEGACY_FROM_DAYTIME_STRING = buildConf("spark.sql.legacy.fromDayTimeString.enabled") diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java index be68880e49a8..f931f60430f6 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java @@ -153,10 +153,7 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont this.sparkSchema = StructType$.MODULE$.fromString(sparkRequestedSchemaString); this.reader = new ParquetFileReader( configuration, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns()); - // use the blocks from the reader in case some do not match filters and will not be read - for (BlockMetaData block : reader.getRowGroups()) { - this.totalRowCount += block.getRowCount(); - } + this.totalRowCount += reader.getFilteredRecordCount(); // For test purpose. // If the last external accumulator is `NumRowGroupsAccumulator`, the row group number to read @@ -232,10 +229,7 @@ protected void initialize(String path, List columns) throws IOException this.sparkSchema = new ParquetToSparkSchemaConverter(config).convert(requestedSchema); this.reader = new ParquetFileReader( config, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns()); - // use the blocks from the reader in case some do not match filters and will not be read - for (BlockMetaData block : reader.getRowGroups()) { - this.totalRowCount += block.getRowCount(); - } + this.totalRowCount += reader.getFilteredRecordCount(); } @Override diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java index 9d38a74a2956..1b159534c8a4 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java @@ -320,7 +320,7 @@ private void initializeInternal() throws IOException, UnsupportedOperationExcept private void checkEndOfRowGroup() throws IOException { if (rowsReturned != totalCountLoadedSoFar) return; - PageReadStore pages = reader.readNextRowGroup(); + PageReadStore pages = reader.readNextFilteredRowGroup(); if (pages == null) { throw new IOException("expecting more rows but reached last block. Read " + rowsReturned + " out of " + totalRowCount); diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala index e97c6cd29709..fcc08ee16e80 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala @@ -251,7 +251,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { """ |message root { | optional group _1 (MAP) { - | repeated group map (MAP_KEY_VALUE) { + | repeated group key_value (MAP_KEY_VALUE) { | required int32 key; | optional binary value (UTF8); | } @@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { """ |message root { | optional group _1 (MAP) { - | repeated group map (MAP_KEY_VALUE) { + | repeated group key_value (MAP_KEY_VALUE) { | required group key { | optional binary _1 (UTF8); | optional binary _2 (UTF8); @@ -300,7 +300,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { """ |message root { | optional group _1 (MAP_KEY_VALUE) { - | repeated group map { + | repeated group key_value { | required int32 key; | optional group value { | optional binary _1 (UTF8); @@ -740,7 +740,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (MAP_KEY_VALUE) { - | repeated group map { + | repeated group key_value { | required int32 num; | required binary str (UTF8); | } @@ -759,7 +759,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (MAP) { - | repeated group map (MAP_KEY_VALUE) { + | repeated group key_value (MAP_KEY_VALUE) { | required int32 key; | required binary value (UTF8); | } @@ -797,7 +797,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (MAP_KEY_VALUE) { - | repeated group map { + | repeated group key_value { | required int32 num; | optional binary str (UTF8); | } @@ -816,7 +816,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (MAP) { - | repeated group map (MAP_KEY_VALUE) { + | repeated group key_value (MAP_KEY_VALUE) { | required int32 key; | optional binary value (UTF8); | } @@ -857,7 +857,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (MAP) { - | repeated group map (MAP_KEY_VALUE) { + | repeated group key_value (MAP_KEY_VALUE) { | required int32 key; | required binary value (UTF8); | } @@ -893,7 +893,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (MAP) { - | repeated group map (MAP_KEY_VALUE) { + | repeated group key_value (MAP_KEY_VALUE) { | required int32 key; | optional binary value (UTF8); | } @@ -1447,7 +1447,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { parquetSchema = """message root { | required group f0 (MAP) { - | repeated group map (MAP_KEY_VALUE) { + | repeated group key_value (MAP_KEY_VALUE) { | required int32 key; | required group value { | required int32 value_f0; @@ -1472,7 +1472,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { expectedSchema = """message root { | required group f0 (MAP) { - | repeated group map (MAP_KEY_VALUE) { + | repeated group key_value (MAP_KEY_VALUE) { | required int32 key; | required group value { | required int64 value_f1; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index 440fe997ae13..25a0796bba43 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -214,7 +214,7 @@ class StreamSuite extends StreamTest { .start(outputDir.getAbsolutePath) try { query.processAllAvailable() - val outputDf = spark.read.parquet(outputDir.getAbsolutePath).as[Long] + val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort('a).as[Long] checkDataset[Long](outputDf, (0L to 10L).toArray: _*) } finally { query.stop() diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index bd0db743b8d4..d9003aa404a3 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -546,7 +546,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test { conf += resultSet.getString(1) -> resultSet.getString(2) } - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7")) + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.8")) } } @@ -559,7 +559,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test { conf += resultSet.getString(1) -> resultSet.getString(2) } - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7")) + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.8")) } } diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 27d2756c741e..2074d9256672 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -122,12 +122,9 @@ org.apache.avro avro - org.apache.avro avro-mapred - ${avro.mapred.classifier} commons-httpclient diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 46a8e9660a20..840a48c38d0d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -60,7 +60,7 @@ private[spark] object HiveUtils extends Logging { val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + - "0.12.0 through 2.3.7 and " + + "0.12.0 through 2.3.8 and " + "3.0.0 through 3.1.2.") .version("1.4.0") .stringConf diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 02bf86533c89..2bd083121c62 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -98,8 +98,8 @@ private[hive] object IsolatedClientLoader extends Logging { case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 case "2.2" | "2.2.0" => hive.v2_2 - case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" => - hive.v2_3 + case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" | + "2.3.8" => hive.v2_3 case "3.0" | "3.0.0" => hive.v3_0 case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1 case version => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index 27ba3eca8194..bb78944fb161 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -100,11 +100,13 @@ package object client { "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) - // Since HIVE-14496, Hive materialized view need calcite-core. + // Since HIVE-23980, calcite-core included in Hive package jar. // For spark, only VersionsSuite currently creates a hive materialized view for testing. - case object v2_3 extends HiveVersion("2.3.7", - exclusions = Seq("org.apache.calcite:calcite-druid", + case object v2_3 extends HiveVersion("2.3.8", + exclusions = Seq("org.apache.calcite:calcite-core", + "org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", + "com.fasterxml.jackson.core:*", "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) @@ -114,7 +116,6 @@ package object client { extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0", "org.apache.derby:derby:10.14.1.0"), exclusions = Seq("org.apache.calcite:calcite-druid", - "org.apache.calcite.avatica:avatica", "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) @@ -124,7 +125,6 @@ package object client { extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0", "org.apache.derby:derby:10.14.1.0"), exclusions = Seq("org.apache.calcite:calcite-druid", - "org.apache.calcite.avatica:avatica", "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 37287fc39464..a78385ff384a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -60,7 +60,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { .map(new File(_)).getOrElse(Utils.createTempDir(namePrefix = "test-spark")) private val unusedJar = TestUtils.createJarWithClasses(Seq.empty) val hiveVersion = if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) { - "2.3.7" + "2.3.8" } else { "1.2.1" } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 5357f4b63d79..c91ee92350fc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -1528,7 +1528,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto Seq(tbl, ext_tbl).foreach { tblName => sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')") - val expectedSize = 601 + val expectedSize = 651 // analyze table sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN") var tableStats = getTableStats(tblName) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index cbba9be32b77..f20cf3cdbf82 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -666,7 +666,7 @@ private[sql] class TestHiveSessionStateBuilder( } private[hive] object HiveTestJars { - private val repository = SQLConf.ADDITIONAL_REMOTE_REPOSITORIES.defaultValueString.split(",")(0) + private val repository = SQLConf.ADDITIONAL_REMOTE_REPOSITORIES.defaultValueString.split(",").last private val hiveTestJarsDir = Utils.createTempDir() def getHiveContribJar(version: String = HiveUtils.builtinHiveVersion): File =
Avro logical typeAvro typeSpark SQL type
Property NameDefaultMeaningSince Version
spark.sql.hive.metastore.version2.3.72.3.8 Version of the Hive metastore. Available - options are 0.12.0 through 2.3.7 and 3.0.0 through 3.1.2. + options are 0.12.0 through 2.3.8 and 3.0.0 through 3.1.2. 1.4.0