From 630c73fda479d587779b35053c452b3219e579a6 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Tue, 11 May 2021 13:23:14 -0500 Subject: [PATCH] HBASE-25867 Extra doc around ITBLL (#3242) * HBASE-25867 Extra doc around ITBLL Minor edits to a few log messages. Explain how the '-c' option works when passed to ChaosMonkeyRunner. Some added notes on ITBLL. Fix whacky 'R' and 'Not r' thing in Master (shows when you run ITBLL). In HRS, report hostname and port when it checks in (was debugging issue where Master and HRS had different notions of its hostname). Spare a dirty FNFException on startup if base dir not yet in place. * Address Review by Sean Signed-off-by: Sean Busbey --- .../hadoop/hbase/DistributedHBaseCluster.java | 4 +-- .../hbase/IntegrationTestingUtility.java | 6 ++-- .../actions/RestartRsHoldingMetaAction.java | 2 +- .../hbase/chaos/util/ChaosMonkeyRunner.java | 32 +++++++++++++------ .../test/IntegrationTestBigLinkedList.java | 29 ++++++++--------- .../apache/hadoop/hbase/master/HMaster.java | 10 +++--- .../hbase/regionserver/HRegionServer.java | 6 ++-- .../org/apache/hadoop/hbase/util/FSUtils.java | 9 +++--- 8 files changed, 57 insertions(+), 41 deletions(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java index 796bc1f27e00..712a6490ab62 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java @@ -243,7 +243,7 @@ private void waitForServiceToStop(ServiceType service, ServerName serverName, lo } Threads.sleep(100); } - throw new IOException("did timeout waiting for service to stop:" + serverName); + throw new IOException("Timed-out waiting for service to stop: " + serverName); } private void waitForServiceToStart(ServiceType service, ServerName serverName, long timeout) @@ -257,7 +257,7 @@ private void waitForServiceToStart(ServiceType service, ServerName serverName, l } Threads.sleep(100); } - throw new IOException("did timeout waiting for service to start:" + serverName); + throw new IOException("Timed-out waiting for service to start: " + serverName); } @Override diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestingUtility.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestingUtility.java index d617523e2892..43749d11b3f0 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestingUtility.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestingUtility.java @@ -58,8 +58,9 @@ public IntegrationTestingUtility(Configuration conf) { */ public static final String IS_DISTRIBUTED_CLUSTER = "hbase.test.cluster.distributed"; - /** Config for pluggable hbase cluster manager */ - private static final String HBASE_CLUSTER_MANAGER_CLASS = "hbase.it.clustermanager.class"; + /** Config for pluggable hbase cluster manager. Pass fully-qualified class name as property + * value. Drop the '.class' suffix.*/ + public static final String HBASE_CLUSTER_MANAGER_CLASS = "hbase.it.clustermanager.class"; private static final Class DEFAULT_HBASE_CLUSTER_MANAGER_CLASS = HBaseClusterManager.class; @@ -153,5 +154,4 @@ public void createDistributedHBaseCluster() throws IOException { setHBaseCluster(new DistributedHBaseCluster(conf, clusterManager)); getAdmin(); } - } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java index 8df41da36cfc..51d5bf92fa32 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java @@ -40,7 +40,7 @@ public RestartRsHoldingMetaAction(long sleepTime) { @Override public void perform() throws Exception { - getLogger().info("Performing action: Restart region server holding META"); + getLogger().info("Performing action: Restart regionserver holding META"); ServerName server = cluster.getServerHoldingMeta(); if (server == null) { getLogger().warn("No server is holding hbase:meta right now."); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/util/ChaosMonkeyRunner.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/util/ChaosMonkeyRunner.java index 24ade5d09cad..b64ca68933f6 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/util/ChaosMonkeyRunner.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/util/ChaosMonkeyRunner.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -38,12 +38,10 @@ public class ChaosMonkeyRunner extends AbstractHBaseTool { private static final Logger LOG = LoggerFactory.getLogger(ChaosMonkeyRunner.class); - public static final String MONKEY_LONG_OPT = "monkey"; public static final String CHAOS_MONKEY_PROPS = "monkeyProps"; public static final String TABLE_NAME_OPT = "tableName"; public static final String FAMILY_NAME_OPT = "familyName"; - protected IntegrationTestingUtility util; protected ChaosMonkey monkey; protected String monkeyToUse; @@ -54,6 +52,9 @@ public class ChaosMonkeyRunner extends AbstractHBaseTool { @Override public void addOptions() { + // The -c option is processed down in the main, not along w/ other options. Added here so shows + // in the usage output. + addOptWithArg("c", "Name of extra configurations file to find on CLASSPATH"); addOptWithArg("m", MONKEY_LONG_OPT, "Which chaos monkey to run"); addOptWithArg(CHAOS_MONKEY_PROPS, "The properties file for specifying chaos " + "monkey properties."); @@ -168,19 +169,33 @@ protected Set getColumnFamilies() { } /* - * If caller wants to add config parameters contained in a file, the path of conf file - * can be passed as the first two arguments like this: - * -c + * If caller wants to add config parameters from a file, the path to the conf file + * can be passed like this: -c . The file is presumed to have the Configuration + * file xml format and is added as a new Resource to the current Configuration. + * Use this mechanism to set Configuration such as what ClusterManager to use, etc. + * Here is an example file you might references that sets an alternate ClusterManager: + * {code} + * + * + * + * hbase.it.clustermanager.class + * org.apache.hadoop.hbase.MyCustomClusterManager + * + * + * {code} + * NOTE: The code searches for the file name passed on the CLASSPATH! Passing the path to a file + * will not work! Add the file to the CLASSPATH and then pass the filename as the '-c' arg. */ public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); - String[] actualArgs = args; + String [] actualArgs = args; if (args.length > 0 && "-c".equals(args[0])) { int argCount = args.length - 2; if (argCount < 0) { throw new IllegalArgumentException("Missing path for -c parameter"); } - // load the resource specified by the second parameter + // Load the resource specified by the second parameter. We load from the classpath, not + // from filesystem path. conf.addResource(args[1]); actualArgs = new String[argCount]; System.arraycopy(args, 2, actualArgs, 0, argCount); @@ -189,5 +204,4 @@ public static void main(String[] args) throws Exception { int ret = ToolRunner.run(conf, new ChaosMonkeyRunner(), actualArgs); System.exit(ret); } - } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java index 27d221313d48..08538c93e699 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java @@ -152,11 +152,11 @@ *

*

*

    - *
  1. Write out 1 million nodes
  2. + *
  3. Write out 1 million nodes (1M is the configurable 'width' mentioned below)
  4. *
  5. Flush the client
  6. *
  7. Write out 1 million that reference previous million
  8. *
  9. If this is the 25th set of 1 million nodes, then update 1st set of - * million to point to last
  10. + * million to point to last (25 is configurable; its the 'wrap multiplier' referred to below) *
  11. goto 1
  12. *
*

@@ -224,11 +224,8 @@ @Category(IntegrationTests.class) public class IntegrationTestBigLinkedList extends IntegrationTestBase { protected static final byte[] NO_KEY = new byte[1]; - protected static String TABLE_NAME_KEY = "IntegrationTestBigLinkedList.table"; - protected static String DEFAULT_TABLE_NAME = "IntegrationTestBigLinkedList"; - protected static byte[] FAMILY_NAME = Bytes.toBytes("meta"); private static byte[] BIG_FAMILY_NAME = Bytes.toBytes("big"); private static byte[] TINY_FAMILY_NAME = Bytes.toBytes("tiny"); @@ -263,6 +260,10 @@ public class IntegrationTestBigLinkedList extends IntegrationTestBase { private static final int MISSING_ROWS_TO_LOG = 10; // YARN complains when too many counters private static final int WIDTH_DEFAULT = 1000000; + + /** + * The 'wrap multipler' default. + */ private static final int WRAP_DEFAULT = 25; private static final int ROWKEY_LENGTH = 16; @@ -282,7 +283,6 @@ static class CINode { * A Map only job that generates random linked list and stores them. */ static class Generator extends Configured implements Tool { - private static final Logger LOG = LoggerFactory.getLogger(Generator.class); /** @@ -307,16 +307,18 @@ static class Generator extends Configured implements Tool { */ public static final String BIG_FAMILY_VALUE_SIZE_KEY = "generator.big.family.value.size"; - public static enum Counts { SUCCESS, TERMINATING, UNDEFINED, IOEXCEPTION } public static final String USAGE = "Usage : " + Generator.class.getSimpleName() + - " [ " + - " ] \n" + - "where should be a multiple of width*wrap multiplier, 25M by default \n" + - "walkers will verify random flushed loop during Generation."; + " [ " + + " ] \n" + + "Where should be a multiple of 'width' * 'wrap multiplier'.\n" + + "25M is default because default 'width' is 1M and default 'wrap multiplier' is 25.\n" + + "We write out 1M nodes and then flush the client. After 25 flushes, we connect \n" + + "first written nodes back to the 25th set.\n" + + "Walkers verify random flushed loops during Generation."; public Job job; @@ -1089,17 +1091,14 @@ private static SortedSet readFileToSearch(final Configuration conf, * {@link Generator} do not have any holes. */ static class Verify extends Configured implements Tool { - private static final Logger LOG = LoggerFactory.getLogger(Verify.class); protected static final BytesWritable DEF = new BytesWritable(new byte[] { 0 }); protected static final BytesWritable DEF_LOST_FAMILIES = new BytesWritable(new byte[] { 1 }); - protected Job job; public static class VerifyMapper extends TableMapper { private BytesWritable row = new BytesWritable(); private BytesWritable ref = new BytesWritable(); - private boolean multipleUnevenColumnFamilies; @Override @@ -1141,7 +1140,7 @@ public static enum Counts { } /** - * Per reducer, we output problem rows as byte arrasy so can be used as input for + * Per reducer, we output problem rows as byte arrays so can be used as input for * subsequent investigative mapreduce jobs. Each emitted value is prefaced by a one byte flag * saying what sort of emission it is. Flag is the Count enum ordinal as a short. */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 02f4f7a61c25..0702e3d839ed 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1686,7 +1686,6 @@ public boolean balance(boolean force) throws IOException { // if hbase:meta region is in transition, result of assignment cannot be recorded // ignore the force flag in that case boolean metaInTransition = assignmentManager.isMetaRegionInTransition(); - String prefix = force && !metaInTransition ? "R" : "Not r"; List toPrint = regionsInTransition; int max = 5; boolean truncated = false; @@ -1694,9 +1693,12 @@ public boolean balance(boolean force) throws IOException { toPrint = regionsInTransition.subList(0, max); truncated = true; } - LOG.info(prefix + " not running balancer because " + regionsInTransition.size() + - " region(s) in transition: " + toPrint + (truncated? "(truncated list)": "")); - if (!force || metaInTransition) return false; + if (!force || metaInTransition) { + LOG.info("Not running balancer (force=" + force + ", metaRIT=" + metaInTransition + + ") because " + regionsInTransition.size() + " region(s) in transition: " + toPrint + + (truncated? "(truncated list)": "")); + return false; + } } if (this.serverManager.areDeadServersInProgress()) { LOG.info("Not running balancer because processing dead regionserver(s): " + diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 012841c5c731..21f9864af5cd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -480,7 +480,7 @@ public class HRegionServer extends Thread implements "hbase.regionserver.hostname.disable.master.reversedns"; /** - * HBASE-18226: This config and hbase.unasfe.regionserver.hostname are mutually exclusive. + * HBASE-18226: This config and hbase.unsafe.regionserver.hostname are mutually exclusive. * Exception will be thrown if both are used. */ @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) @@ -2867,8 +2867,8 @@ private RegionServerStartupResponse reportForDuty() throws IOException { rpcServices.rpcFullScanRequestCount.reset(); rpcServices.rpcMultiRequestCount.reset(); rpcServices.rpcMutateRequestCount.reset(); - LOG.info("reportForDuty to master=" + masterServerName + " with port=" - + rpcServices.isa.getPort() + ", startcode=" + this.startcode); + LOG.info("reportForDuty to master=" + masterServerName + " with isa=" + + rpcServices.isa + ", startcode=" + this.startcode); long now = EnvironmentEdgeManager.currentTime(); int port = rpcServices.isa.getPort(); RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java index 413b6ba78620..c73e530c1d2a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -940,10 +940,11 @@ protected boolean isValidName(final String name) { public static List getTableDirs(final FileSystem fs, final Path rootdir) throws IOException { List tableDirs = new ArrayList<>(); - - for (FileStatus status : fs - .globStatus(new Path(rootdir, new Path(HConstants.BASE_NAMESPACE_DIR, "*")))) { - tableDirs.addAll(FSUtils.getLocalTableDirs(fs, status.getPath())); + Path baseNamespaceDir = new Path(rootdir, HConstants.BASE_NAMESPACE_DIR); + if (fs.exists(baseNamespaceDir)) { + for (FileStatus status : fs.globStatus(new Path(baseNamespaceDir, "*"))) { + tableDirs.addAll(FSUtils.getLocalTableDirs(fs, status.getPath())); + } } return tableDirs; }