diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 7aff79ea91d7..5036e05b52fa 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -65,7 +65,13 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} + + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} org.slf4j diff --git a/core/pom.xml b/core/pom.xml index 09fa153c8f20..4b3e04084d56 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -66,7 +66,13 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} + + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} org.apache.spark @@ -177,6 +183,14 @@ org.apache.commons commons-text + + commons-io + commons-io + + + commons-collections + commons-collections + com.google.code.findbugs jsr305 diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 8d8ef2e97209..caede04f415b 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -128,7 +128,7 @@ javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar -jaxb-api/2.2.2//jaxb-api-2.2.2.jar +jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar @@ -227,7 +227,6 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar -stax-api/1.0-2//stax-api-1.0-2.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index bf56fc18c044..344d8e5b4bd6 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -3,7 +3,6 @@ JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar -accessors-smart/1.2//accessors-smart-1.2.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.16//aircompressor-0.16.jar algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar @@ -11,7 +10,6 @@ annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar -aopalliance/1.0//aopalliance-1.0.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar arrow-format/2.0.0//arrow-format-2.0.0.jar arrow-memory-core/2.0.0//arrow-memory-core-2.0.0.jar @@ -28,15 +26,12 @@ breeze_2.12/1.0//breeze_2.12-1.0.jar cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar chill-java/0.9.5//chill-java-0.9.5.jar chill_2.12/0.9.5//chill_2.12-0.9.5.jar -commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar commons-cli/1.2//commons-cli-1.2.jar commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar commons-compress/1.20//commons-compress-1.20.jar -commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar -commons-daemon/1.0.13//commons-daemon-1.0.13.jar commons-dbcp/1.4//commons-dbcp-1.4.jar commons-httpclient/3.1//commons-httpclient-3.1.jar commons-io/2.5//commons-io-2.5.jar @@ -56,30 +51,13 @@ datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar -dnsjava/2.1.7//dnsjava-2.1.7.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar -ehcache/3.3.1//ehcache-3.3.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar -geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar -guice-servlet/4.0//guice-servlet-4.0.jar -guice/4.0//guice-4.0.jar -hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar -hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar -hadoop-client/3.2.0//hadoop-client-3.2.0.jar -hadoop-common/3.2.0//hadoop-common-3.2.0.jar -hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar -hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar -hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar -hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar -hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar -hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar -hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar -hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar -hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar -hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar +hadoop-client-api/3.2.2//hadoop-client-api-3.2.2.jar +hadoop-client-runtime/3.2.2//hadoop-client-runtime-3.2.2.jar hive-beeline/2.3.7//hive-beeline-2.3.7.jar hive-cli/2.3.7//hive-cli-2.3.7.jar hive-common/2.3.7//hive-common-2.3.7.jar @@ -109,8 +87,6 @@ jackson-core/2.11.4//jackson-core-2.11.4.jar jackson-databind/2.11.4//jackson-databind-2.11.4.jar jackson-dataformat-yaml/2.11.4//jackson-dataformat-yaml-2.11.4.jar jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar -jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar -jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations/2.11.4//jackson-module-jaxb-annotations-2.11.4.jar jackson-module-paranamer/2.11.4//jackson-module-paranamer-2.11.4.jar @@ -124,13 +100,10 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar -javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar -javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.30//jersey-client-2.30.jar @@ -144,30 +117,14 @@ jline/2.14.6//jline-2.14.6.jar joda-time/2.10.5//joda-time-2.10.5.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar -json-smart/2.3//json-smart-2.3.jar json/1.8//json-1.8.jar json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar -jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar -kerb-admin/1.0.1//kerb-admin-1.0.1.jar -kerb-client/1.0.1//kerb-client-1.0.1.jar -kerb-common/1.0.1//kerb-common-1.0.1.jar -kerb-core/1.0.1//kerb-core-1.0.1.jar -kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar -kerb-identity/1.0.1//kerb-identity-1.0.1.jar -kerb-server/1.0.1//kerb-server-1.0.1.jar -kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar -kerb-util/1.0.1//kerb-util-1.0.1.jar -kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar -kerby-config/1.0.1//kerby-config-1.0.1.jar -kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar -kerby-util/1.0.1//kerby-util-1.0.1.jar -kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/4.12.0//kubernetes-client-4.12.0.jar kubernetes-model-admissionregistration/4.12.0//kubernetes-model-admissionregistration-4.12.0.jar @@ -205,9 +162,7 @@ metrics-json/4.1.1//metrics-json-4.1.1.jar metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar -nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar objenesis/2.6//objenesis-2.6.jar -okhttp/2.7.5//okhttp-2.7.5.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar @@ -226,7 +181,6 @@ parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9.1//py4j-0.10.9.1.jar pyrolite/4.30//pyrolite-4.30.jar -re2j/1.1//re2j-1.1.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar scala-compiler/2.12.10//scala-compiler-2.12.10.jar scala-library/2.12.10//scala-library-2.12.10.jar @@ -244,15 +198,12 @@ spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar stax-api/1.0.1//stax-api-1.0.1.jar -stax2-api/3.1.4//stax2-api-3.1.4.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar -token-provider/1.0.1//token-provider-1.0.1.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar velocity/1.5//velocity-1.5.jar -woodstox-core/5.0.3//woodstox-core-5.0.3.jar xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar xz/1.5//xz-1.5.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 2359e99f657f..121bc56c0ac4 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -71,9 +71,15 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} provided + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} + org.apache.avro avro-mapred diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 843f16067463..1833b35096b1 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -79,6 +79,10 @@ kafka-clients ${kafka.version} + + com.google.code.findbugs + jsr305 + org.apache.commons commons-pool2 diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index dbe2ab92a28e..4ee09fa4b51b 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -58,6 +58,11 @@ mockito-core test + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.deps.scope} + org.apache.spark spark-tags_${scala.binary.version} diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 22259b08141d..9a98d7cca5ca 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -91,9 +91,15 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} provided + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} + org.apache.avro avro-ipc diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 03910ba09199..c0997e5e0a1c 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -58,10 +58,15 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} ${hadoop.version} provided + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} + org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} + test + + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} test diff --git a/pom.xml b/pom.xml index f921e35a76b4..26b5186b53f1 100644 --- a/pom.xml +++ b/pom.xml @@ -120,7 +120,7 @@ spark 1.7.30 1.2.17 - 3.2.0 + 3.2.2 2.5.0 ${hadoop.version} 3.4.14 @@ -245,6 +245,15 @@ compile test + + hadoop-client-api + hadoop-client-runtime + hadoop-client-minicluster + + + org.apache.hadoop + hadoop-client-api + ${hadoop.version} + ${hadoop.deps.scope} + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + ${hadoop.deps.scope} + + + org.apache.hadoop + hadoop-client-minicluster + ${yarn.version} + test + + org.apache.hadoop hadoop-client @@ -1656,6 +1690,14 @@ org.apache.ant ant + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-auth + org.apache.zookeeper zookeeper @@ -2420,17 +2462,6 @@ - - enforce-no-duplicate-dependencies - - enforce - - - - - - - @@ -2901,6 +2932,7 @@ maven-shade-plugin false + false org.spark-project.spark:unused @@ -3162,6 +3194,9 @@ 2.7.4 2.7.1 2.4 + hadoop-client + hadoop-client + hadoop-client diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 44df4e1da533..3fff9408ccbb 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -63,10 +63,6 @@ com.fasterxml.jackson.core * - - com.fasterxml.jackson.module - jackson-module-jaxb-annotations - com.fasterxml.jackson.dataformat jackson-dataformat-yaml @@ -85,11 +81,6 @@ jackson-dataformat-yaml ${fasterxml.jackson.version} - - com.fasterxml.jackson.module - jackson-module-jaxb-annotations - ${fasterxml.jackson.version} - diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index c0ce1c8e151e..a6629539f59b 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -40,6 +40,42 @@ true + + hadoop-2.7 + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.hadoop + hadoop-yarn-server-tests + tests + test + + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + test + + + @@ -69,23 +105,20 @@ org.apache.hadoop - hadoop-yarn-api - - - org.apache.hadoop - hadoop-yarn-common + ${hadoop-client-api.artifact} + ${hadoop.version} org.apache.hadoop - hadoop-yarn-server-web-proxy + ${hadoop-client-runtime.artifact} + ${hadoop.version} + ${hadoop.deps.scope} org.apache.hadoop - hadoop-yarn-client - - - org.apache.hadoop - hadoop-client + ${hadoop-client-minicluster.artifact} + ${hadoop.version} + test @@ -135,18 +168,6 @@ test - - org.apache.hadoop - hadoop-yarn-server-tests - tests - test - - - org.apache.hadoop - hadoop-yarn-server-resourcemanager - test - - org.mockito mockito-core diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index ab6950716ad9..eb927a3c296c 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn import java.io.{File, IOException} import java.lang.reflect.{InvocationTargetException, Modifier} -import java.net.{URI, URL} +import java.net.{URI, URL, URLEncoder} import java.security.PrivilegedExceptionAction import java.util.concurrent.{TimeoutException, TimeUnit} @@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException -import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import org.apache.spark._ @@ -308,7 +307,8 @@ private[spark] class ApplicationMaster( // The client-mode AM doesn't listen for incoming connections, so report an invalid port. registerAM(Utils.localHostName, -1, sparkConf, sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId) - addAmIpFilter(Some(driverRef), ProxyUriUtils.getPath(appAttemptId.getApplicationId)) + val encodedAppId = URLEncoder.encode(appAttemptId.getApplicationId.toString, "UTF-8") + addAmIpFilter(Some(driverRef), s"/proxy/$encodedAppId") createAllocator(driverRef, sparkConf, clientRpcEnv, appAttemptId, cachedResourcesConf) reporterThread.join() } catch { diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala index 20f5339c46fe..a813b9913f23 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala @@ -80,6 +80,16 @@ abstract class BaseYarnClusterSuite yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage", "100.0") + // capacity-scheduler.xml is missing in hadoop-client-minicluster so this is a workaround + yarnConf.set("yarn.scheduler.capacity.root.queues", "default") + yarnConf.setInt("yarn.scheduler.capacity.root.default.capacity", 100) + yarnConf.setFloat("yarn.scheduler.capacity.root.default.user-limit-factor", 1) + yarnConf.setInt("yarn.scheduler.capacity.root.default.maximum-capacity", 100) + yarnConf.set("yarn.scheduler.capacity.root.default.state", "RUNNING") + yarnConf.set("yarn.scheduler.capacity.root.default.acl_submit_applications", "*") + yarnConf.set("yarn.scheduler.capacity.root.default.acl_administer_queue", "*") + yarnConf.setInt("yarn.scheduler.capacity.node-locality-delay", -1) + yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1) yarnCluster.init(yarnConf) yarnCluster.start() diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 0553438a1ad4..583738bc561a 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -104,6 +104,10 @@ org.antlr antlr4-runtime + + javax.xml.bind + jaxb-api + commons-codec commons-codec diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 27d2756c741e..74b1f9d987c4 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -162,6 +162,11 @@ org.datanucleus datanucleus-core + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.deps.scope} + org.apache.thrift libthrift diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 02bf86533c89..4e5e58dc084f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -112,11 +112,24 @@ private[hive] object IsolatedClientLoader extends Logging { hadoopVersion: String, ivyPath: Option[String], remoteRepos: String): Seq[URL] = { + val hadoopJarNames = if (hadoopVersion.startsWith("3")) { + Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion", + s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion") + } else { + Seq(s"org.apache.hadoop:hadoop-client:$hadoopVersion") + } val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ - Seq("com.google.guava:guava:14.0.1", - s"org.apache.hadoop:hadoop-client:$hadoopVersion") + Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames + + val extraExclusions = if (hadoopVersion.startsWith("3")) { + // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so + // exclude here in favor of the ones in Hadoop 3.2+ + Seq("org.apache.hadoop:hadoop-auth") + } else { + Seq.empty + } val classpaths = quietly { SparkSubmitUtils.resolveMavenCoordinates( @@ -125,7 +138,7 @@ private[hive] object IsolatedClientLoader extends Logging { Some(remoteRepos), ivyPath), transitive = true, - exclusions = version.exclusions) + exclusions = version.exclusions ++ extraExclusions) } val allFiles = classpaths.map(new File(_)).toSet