From f85dbff97618066d60f37736808c8c24aa0a98e5 Mon Sep 17 00:00:00 2001 From: Sahil Takiar Date: Wed, 29 Jul 2020 17:54:11 -0700 Subject: [PATCH] IMPALA-10030: Remove unnecessary jar dependencies Remove the dependency on hadoop-hdfs, this jar file contains the core code for implementing HDFS, and thus pulls in a bunch of unnecessary transitive dependencies. Impala currently only requires this jar for some configuration key names. Most of these configuration key names have been moved to the appropriate HDFS client jars, and some others are deprecated altogether. Removing this jar required making a few code changes to move the location of the referenced configuration keys. Removes all transitive Kafka dependencies from the Apache Ranger dependency. Previously, Impala only excluded Kafka jars with binary version kafka_2.11, however, it seems the Ranger recently upgraded the dependency version to kafka_2.12. Now all Kafka dependencies are excluded, regardless of artifact name. Removes all transitive dependencies from the Apache Ozone dependency. Impala has a dependency on the Ozone client shaded-jar, which already includes all required transitive dependencies. For some reason, Ozone still pulls in some transitive dependencies even though they are not needed. Made some other minor cleanup / improvements in the fe/pom.xml file. This saves about 70 MB of space in the Docker images. Testing: * Ran exhaustive tests * Ran on-prem cluster E2E tests Change-Id: Iadbb6142466f73f067dd7cf9d401ff81145c74cc Reviewed-on: http://gerrit.cloudera.org:8080/16311 Reviewed-by: Impala Public Jenkins Tested-by: Impala Public Jenkins --- fe/pom.xml | 98 +++++-------------- .../apache/impala/service/JniFrontend.java | 21 ++-- .../impala/util/FsPermissionChecker.java | 8 +- .../apache/impala/util/HdfsCachingUtil.java | 7 +- .../impala/service/JniFrontendTest.java | 19 ++-- 5 files changed, 51 insertions(+), 102 deletions(-) diff --git a/fe/pom.xml b/fe/pom.xml index 061a7398ca..6ea5c8aca4 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -35,14 +35,6 @@ under the License. Apache Impala Query Engine Frontend - - - net.minidev - json-smart - 2.3 - - org.apache.impala query-event-hook-api @@ -54,46 +46,18 @@ under the License. impala-data-source-api ${impala.extdatasrc.api.version} - - org.apache.hadoop - hadoop-hdfs - ${hadoop.version} - - - org.eclipse.jetty - * - - - - org.fusesource.leveldbjni - * - - - - io.netty - * - - - com.sun.jersey - jersey-server - - - + org.apache.hadoop hadoop-hdfs-client ${hadoop.version} + org.apache.hadoop hadoop-common ${hadoop.version} - - - net.minidev - json-smart - org.eclipse.jetty * @@ -113,17 +77,11 @@ under the License. + org.apache.hadoop hadoop-auth ${hadoop.version} - - - - net.minidev - json-smart - - @@ -162,13 +120,6 @@ under the License. org.apache.hadoop hadoop-azure-datalake ${hadoop.version} - - - - net.minidev - json-smart - - @@ -218,7 +169,7 @@ under the License. org.apache.kafka - kafka_2.11 + * org.apache.shiro @@ -230,12 +181,14 @@ under the License. - + + javax.mail mail 1.4 + javax.ws.rs javax.ws.rs-api @@ -290,26 +243,12 @@ under the License. org.apache.hbase hbase-client ${hbase.version} - - - - net.minidev - json-smart - - org.apache.hbase hbase-common ${hbase.version} - - - - net.minidev - json-smart - - @@ -382,6 +321,7 @@ under the License. slf4j-api ${slf4j.version} + org.slf4j slf4j-log4j12 @@ -401,9 +341,9 @@ under the License. - com.google.errorprone - error_prone_annotations - 2.3.1 + com.google.errorprone + error_prone_annotations + 2.3.1 @@ -424,6 +364,7 @@ under the License. json-simple 1.1.1 + org.glassfish javax.json @@ -979,11 +920,6 @@ under the License. org.apache.logging.log4j log4j-1.2-api - - - net.minidev - json-smart - org.apache.hive hive-serde @@ -1035,6 +971,16 @@ under the License. org.apache.hadoop hadoop-ozone-filesystem-hadoop3 ${ozone.version} + + + + * + * + + diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java index 3642429f5d..324105d862 100644 --- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java +++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java @@ -30,7 +30,7 @@ import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem; import org.apache.hadoop.fs.s3a.S3AFileSystem; -import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.security.Groups; import org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback; @@ -753,8 +753,8 @@ private String checkLogFilePermission() { */ @VisibleForTesting protected static String checkShortCircuitRead(Configuration conf) { - if (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, - DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) { + if (!conf.getBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, + HdfsClientConfigKeys.Read.ShortCircuit.DEFAULT)) { LOG.info("Short-circuit reads are not enabled."); return ""; } @@ -765,11 +765,12 @@ protected static String checkShortCircuitRead(Configuration conf) { StringBuilder errorCause = new StringBuilder(); // dfs.domain.socket.path must be set properly - String domainSocketPath = conf.getTrimmed(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, - DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT); + String domainSocketPath = + conf.getTrimmed(HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, + HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT); if (domainSocketPath.isEmpty()) { errorCause.append(prefix); - errorCause.append(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY); + errorCause.append(HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY); errorCause.append(" is not configured.\n"); } else { // The socket path parent directory must be readable and executable. @@ -781,16 +782,16 @@ protected static String checkShortCircuitRead(Configuration conf) { } else if (socketDir == null || !socketDir.canRead() || !socketDir.canExecute()) { errorCause.append(prefix); errorCause.append("Impala cannot read or execute the parent directory of "); - errorCause.append(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY); + errorCause.append(HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY); errorCause.append("\n"); } } // dfs.client.use.legacy.blockreader.local must be set to false - if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, - DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT)) { + if (conf.getBoolean(HdfsClientConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, + HdfsClientConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT)) { errorCause.append(prefix); - errorCause.append(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL); + errorCause.append(HdfsClientConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL); errorCause.append(" should not be enabled.\n"); } diff --git a/fe/src/main/java/org/apache/impala/util/FsPermissionChecker.java b/fe/src/main/java/org/apache/impala/util/FsPermissionChecker.java index db5c555763..4970043414 100644 --- a/fe/src/main/java/org/apache/impala/util/FsPermissionChecker.java +++ b/fe/src/main/java/org/apache/impala/util/FsPermissionChecker.java @@ -39,8 +39,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.hdfs.protocol.AclException; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DeprecatedKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -72,8 +71,9 @@ public class FsPermissionChecker { private FsPermissionChecker() throws IOException { UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); groups_.addAll(Arrays.asList(ugi.getGroupNames())); - supergroup_ = CONF.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, - DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); + // The default value is taken from the String DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT + // in DFSConfigKeys.java from the hadoop-hdfs jar. + supergroup_ = CONF.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, "supergroup"); user_ = ugi.getShortUserName(); } diff --git a/fe/src/main/java/org/apache/impala/util/HdfsCachingUtil.java b/fe/src/main/java/org/apache/impala/util/HdfsCachingUtil.java index 1a0c1d9da8..b22c2f8361 100644 --- a/fe/src/main/java/org/apache/impala/util/HdfsCachingUtil.java +++ b/fe/src/main/java/org/apache/impala/util/HdfsCachingUtil.java @@ -22,7 +22,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; @@ -265,9 +264,11 @@ public static void waitForDirective(long directiveId) // The refresh interval is how often HDFS will update cache directive stats. We use // this value to determine how frequently we should poll for changes. + // The key dfs.namenode.path.based.cache.refresh.interval.ms is copied from the string + // DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS in DFSConfigKeys.java from the + // hadoop-hdfs jar. long hdfsRefreshIntervalMs = getDfs().getConf().getLong( - DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, - DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT); + "dfs.namenode.path.based.cache.refresh.interval.ms", 30000L); Preconditions.checkState(hdfsRefreshIntervalMs > 0); // Loop until either MAX_UNCHANGED_CACHING_REFRESH_INTERVALS have passed with no diff --git a/fe/src/test/java/org/apache/impala/service/JniFrontendTest.java b/fe/src/test/java/org/apache/impala/service/JniFrontendTest.java index 26752d986a..771b1c7850 100644 --- a/fe/src/test/java/org/apache/impala/service/JniFrontendTest.java +++ b/fe/src/test/java/org/apache/impala/service/JniFrontendTest.java @@ -27,7 +27,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback; import org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMappingWithFallback; import org.apache.hadoop.security.ShellBasedUnixGroupsMapping; @@ -96,13 +96,14 @@ public void testCheckShortCircuitConfigs() { socketDir.getParentFile().setExecutable(false); Configuration conf = mock(Configuration.class); - when(conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, - DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)).thenReturn(true); - when(conf.getTrimmed(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, - DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT)) + when(conf.getBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, + HdfsClientConfigKeys.Read.ShortCircuit.DEFAULT)).thenReturn(true); + when(conf.getTrimmed(HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, + HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT)) .thenReturn(socketDir.getAbsolutePath()); - when(conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, - DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT)).thenReturn(false); + when(conf.getBoolean(HdfsClientConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, + HdfsClientConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT)) + .thenReturn(false); BackendConfig.INSTANCE = mock(BackendConfig.class); when(BackendConfig.INSTANCE.isDedicatedCoordinator()).thenReturn(true); @@ -113,7 +114,7 @@ public void testCheckShortCircuitConfigs() { actualErrorMessage = JniFrontend.checkShortCircuitRead(conf); assertEquals("Invalid short-circuit reads configuration:\n" + " - Impala cannot read or execute the parent directory of " - + DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY + "\n", + + HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY + "\n", actualErrorMessage); if (socketDir != null) { @@ -122,4 +123,4 @@ public void testCheckShortCircuitConfigs() { socketDir.getParentFile().delete(); } } -} \ No newline at end of file +}