From 57e608d6cc481170cbc93e4a33599d648108057d Mon Sep 17 00:00:00 2001 From: Xiaolin Ha Date: Mon, 13 Feb 2023 18:55:55 +0800 Subject: [PATCH] HBASE-21521 Expose master startup status via web UI (#4788) Signed-off-by: Bryan Beaudreault --- .../hbase/tmpl/master/MasterStatusTmpl.jamon | 1 + .../hbase/master/ActiveMasterManager.java | 15 ++- .../apache/hadoop/hbase/master/HMaster.java | 69 ++++++---- .../master/snapshot/TakeSnapshotHandler.java | 4 +- .../hadoop/hbase/monitoring/TaskGroup.java | 115 ++++++++++++++++ .../hadoop/hbase/monitoring/TaskMonitor.java | 31 ++++- .../hadoop/hbase/regionserver/HRegion.java | 5 +- .../apache/hadoop/hbase/wal/WALSplitter.java | 4 +- .../resources/hbase-webapps/master/header.jsp | 1 + .../hbase-webapps/master/startupProgress.jsp | 124 ++++++++++++++++++ .../hbase/master/AlwaysStandByHMaster.java | 7 +- .../hbase/master/TestActiveMasterManager.java | 19 ++- .../hbase/monitoring/TestTaskMonitor.java | 24 +++- 13 files changed, 370 insertions(+), 49 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskGroup.java create mode 100644 hbase-server/src/main/resources/hbase-webapps/master/startupProgress.jsp diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon index 5582532a95c3..863d7e039a40 100644 --- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon +++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon @@ -167,6 +167,7 @@ AssignmentManager assignmentManager = master.getAssignmentManager(); <%if HBaseConfiguration.isShowConfInServlet()%>
  • HBase Configuration
  • +
  • Startup Progress
  • diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java index 3f8d6d2a702d..6768a6b541fa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.ZNodeClearer; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskGroup; import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; import org.apache.hadoop.hbase.zookeeper.ZKListener; import org.apache.hadoop.hbase.zookeeper.ZKUtil; @@ -218,16 +219,18 @@ private void handleMasterNodeChange() { * Block until becoming the active master. Method blocks until there is not another active master * and our attempt to become the new active master is successful. This also makes sure that we are * watching the master znode so will be notified if another master dies. - * @param checkInterval the interval to check if the master is stopped - * @param startupStatus the monitor status to track the progress + * @param checkInterval the interval to check if the master is stopped + * @param startupTaskGroup the task group for master startup to track the progress * @return True if no issue becoming active master else false if another master was running or if * some other problem (zookeeper, stop flag has been set on this Master) */ - boolean blockUntilBecomingActiveMaster(int checkInterval, MonitoredTask startupStatus) { + boolean blockUntilBecomingActiveMaster(int checkInterval, TaskGroup startupTaskGroup) { + MonitoredTask blockUntilActive = + startupTaskGroup.addTask("Blocking until becoming active master"); String backupZNode = ZNodePaths .joinZNode(this.watcher.getZNodePaths().backupMasterAddressesZNode, this.sn.toString()); while (!(master.isAborted() || master.isStopped())) { - startupStatus.setStatus("Trying to register in ZK as active master"); + blockUntilActive.setStatus("Trying to register in ZK as active master"); // Try to become the active master, watch if there is another master. // Write out our ServerName as versioned bytes. try { @@ -246,7 +249,7 @@ boolean blockUntilBecomingActiveMaster(int checkInterval, MonitoredTask startupS ZNodeClearer.writeMyEphemeralNodeOnDisk(this.sn.toString()); // We are the master, return - startupStatus.setStatus("Successfully registered as active master."); + blockUntilActive.setStatus("Successfully registered as active master."); this.clusterHasActiveMaster.set(true); activeMasterServerName = sn; LOG.info("Registered as active master=" + this.sn); @@ -291,7 +294,7 @@ boolean blockUntilBecomingActiveMaster(int checkInterval, MonitoredTask startupS } } LOG.info(msg); - startupStatus.setStatus(msg); + blockUntilActive.setStatus(msg); } catch (KeeperException ke) { master.abort("Received an unexpected KeeperException, aborting", ke); return false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 2e52aaf6a08b..a9b708470717 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -178,6 +178,7 @@ import org.apache.hadoop.hbase.mob.MobFileCompactionChore; import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer; import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskGroup; import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost; import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager; @@ -444,6 +445,8 @@ public class HMaster extends HRegionServer implements MasterServices { public static final String WARMUP_BEFORE_MOVE = "hbase.master.warmup.before.move"; private static final boolean DEFAULT_WARMUP_BEFORE_MOVE = true; + private TaskGroup startupTaskGroup; + /** * Initializes the HMaster. The steps are as follows: *

    @@ -452,9 +455,8 @@ public class HMaster extends HRegionServer implements MasterServices { *

  • Start the ActiveMasterManager. * *

    - * Remaining steps of initialization occur in - * {@link #finishActiveMasterInitialization(MonitoredTask)} after the master becomes the active - * one. + * Remaining steps of initialization occur in {@link #finishActiveMasterInitialization()} after + * the master becomes the active one. */ public HMaster(final Configuration conf) throws IOException { super(conf); @@ -887,12 +889,13 @@ private void tryMigrateMetaLocationsFromZooKeeper() throws IOException, KeeperEx * Notice that now we will not schedule a special procedure to make meta online(unless the first * time where meta has not been created yet), we will rely on SCP to bring meta online. */ - private void finishActiveMasterInitialization(MonitoredTask status) + + private void finishActiveMasterInitialization() throws IOException, InterruptedException, KeeperException, ReplicationException { /* * We are active master now... go initialize components we need to run. */ - status.setStatus("Initializing Master file system"); + startupTaskGroup.addTask("Initializing Master file system"); this.masterActiveTime = EnvironmentEdgeManager.currentTime(); // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring. @@ -905,7 +908,7 @@ private void finishActiveMasterInitialization(MonitoredTask status) // warm-up HTDs cache on master initialization if (preLoadTableDescriptors) { - status.setStatus("Pre-loading table descriptors"); + startupTaskGroup.addTask("Pre-loading table descriptors"); this.tableDescriptors.getAll(); } @@ -913,7 +916,7 @@ private void finishActiveMasterInitialization(MonitoredTask status) // only after it has checked in with the Master. At least a few tests ask Master for clusterId // before it has called its run method and before RegionServer has done the reportForDuty. ClusterId clusterId = fileSystemManager.getClusterId(); - status.setStatus("Publishing Cluster ID " + clusterId + " in ZooKeeper"); + startupTaskGroup.addTask("Publishing Cluster ID " + clusterId + " in ZooKeeper"); ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId()); this.clusterId = clusterId.toString(); @@ -932,7 +935,7 @@ private void finishActiveMasterInitialization(MonitoredTask status) } } - status.setStatus("Initialize ServerManager and schedule SCP for crash servers"); + startupTaskGroup.addTask("Initialize ServerManager and schedule SCP for crash servers"); // The below two managers must be created before loading procedures, as they will be used during // loading. // initialize master local region @@ -982,8 +985,9 @@ private void finishActiveMasterInitialization(MonitoredTask status) ? new MirroringTableStateManager(this) : new TableStateManager(this); - status.setStatus("Initializing ZK system trackers"); + startupTaskGroup.addTask("Initializing ZK system trackers"); initializeZKBasedSystemTrackers(); + // Set ourselves as active Master now our claim has succeeded up in zk. this.activeMaster = true; @@ -995,19 +999,19 @@ private void finishActiveMasterInitialization(MonitoredTask status) // This is for backwards compatibility // See HBASE-11393 - status.setStatus("Update TableCFs node in ZNode"); + startupTaskGroup.addTask("Update TableCFs node in ZNode"); ReplicationPeerConfigUpgrader tableCFsUpdater = new ReplicationPeerConfigUpgrader(zooKeeper, conf); tableCFsUpdater.copyTableCFs(); if (!maintenanceMode) { - status.setStatus("Initializing master coprocessors"); + startupTaskGroup.addTask("Initializing master coprocessors"); setQuotasObserver(conf); initializeCoprocessorHost(conf); } // Checking if meta needs initializing. - status.setStatus("Initializing meta table if this is a new deploy"); + startupTaskGroup.addTask("Initializing meta table if this is a new deploy"); InitMetaProcedure initMetaProc = null; // Print out state of hbase:meta on startup; helps debugging. if (!this.assignmentManager.getRegionStates().hasTableRegionStates(TableName.META_TABLE_NAME)) { @@ -1030,7 +1034,7 @@ private void finishActiveMasterInitialization(MonitoredTask status) this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor()); // start up all service threads. - status.setStatus("Initializing master service threads"); + startupTaskGroup.addTask("Initializing master service threads"); startServiceThreads(); // wait meta to be initialized after we start procedure executor if (initMetaProc != null) { @@ -1046,16 +1050,16 @@ private void finishActiveMasterInitialization(MonitoredTask status) // With this as part of master initialization, it precludes our being able to start a single // server that is both Master and RegionServer. Needs more thought. TODO. String statusStr = "Wait for region servers to report in"; - status.setStatus(statusStr); - LOG.info(Objects.toString(status)); - waitForRegionServers(status); + MonitoredTask waitRegionServer = startupTaskGroup.addTask(statusStr); + LOG.info(Objects.toString(waitRegionServer)); + waitForRegionServers(waitRegionServer); // Check if master is shutting down because issue initializing regionservers or balancer. if (isStopped()) { return; } - status.setStatus("Starting assignment manager"); + startupTaskGroup.addTask("Starting assignment manager"); // FIRST HBASE:META READ!!!! // The below cannot make progress w/o hbase:meta being online. // This is the FIRST attempt at going to hbase:meta. Meta on-lining is going on in background @@ -1132,7 +1136,7 @@ private void finishActiveMasterInitialization(MonitoredTask status) this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor()); // Start balancer and meta catalog janitor after meta and regions have been assigned. - status.setStatus("Starting balancer and catalog janitor"); + startupTaskGroup.addTask("Starting balancer and catalog janitor"); this.clusterStatusChore = new ClusterStatusChore(this, balancer); getChoreService().scheduleChore(clusterStatusChore); this.balancerChore = new BalancerChore(this); @@ -1154,7 +1158,7 @@ private void finishActiveMasterInitialization(MonitoredTask status) if (!waitForNamespaceOnline()) { return; } - status.setStatus("Starting cluster schema service"); + startupTaskGroup.addTask("Starting cluster schema service"); try { initClusterSchemaService(); } catch (IllegalStateException e) { @@ -1177,7 +1181,6 @@ private void finishActiveMasterInitialization(MonitoredTask status) } } - status.markComplete("Initialization successful"); LOG.info(String.format("Master has completed initialization %.3fsec", (EnvironmentEdgeManager.currentTime() - masterActiveTime) / 1000.0f)); this.masterFinishedInitializationTime = EnvironmentEdgeManager.currentTime(); @@ -1196,6 +1199,9 @@ private void finishActiveMasterInitialization(MonitoredTask status) } // Set master as 'initialized'. setInitialized(true); + startupTaskGroup.markComplete("Initialization successful"); + MonitoredTask status = + TaskMonitor.get().createStatus("Progress after master initialized", false, true); if (tableFamilyDesc == null && replBarrierFamilyDesc == null) { // create missing CFs in meta table after master is set to 'initialized'. @@ -1277,9 +1283,9 @@ private void finishActiveMasterInitialization(MonitoredTask status) LOG.debug("Balancer post startup initialization complete, took " + ((EnvironmentEdgeManager.currentTime() - start) / 1000) + " seconds"); } - this.rollingUpgradeChore = new RollingUpgradeChore(this); getChoreService().scheduleChore(rollingUpgradeChore); + status.markComplete("Progress after master initialized complete"); } private void createMissingCFsInMetaDuringUpgrade(TableDescriptor metaDescriptor) @@ -2387,14 +2393,19 @@ private void startActiveMasterManager(int infoPort) throws KeeperException { Threads.sleep(timeout); } } - MonitoredTask status = TaskMonitor.get().createStatus("Master startup"); - status.setDescription("Master startup"); + + // Here for the master startup process, we use TaskGroup to monitor the whole progress. + // The UI is similar to how Hadoop designed the startup page for the NameNode. + // See HBASE-21521 for more details. + // We do not cleanup the startupTaskGroup, let the startup progress information + // be permanent in the MEM. + startupTaskGroup = TaskMonitor.createTaskGroup(true, "Master startup"); try { - if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) { - finishActiveMasterInitialization(status); + if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, startupTaskGroup)) { + finishActiveMasterInitialization(); } } catch (Throwable t) { - status.setStatus("Failed to become active: " + t.getMessage()); + startupTaskGroup.abort("Failed to become active master due to:" + t.getMessage()); LOG.error(HBaseMarkers.FATAL, "Failed to become active master", t); // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility if ( @@ -2408,8 +2419,6 @@ private void startActiveMasterManager(int infoPort) throws KeeperException { } else { abort("Unhandled exception. Starting shutdown.", t); } - } finally { - status.cleanup(); } } @@ -3099,6 +3108,10 @@ public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() { return rsFatals; } + public TaskGroup getStartupProgress() { + return startupTaskGroup; + } + /** * Shutdown the cluster. Master runs a coordinated stop of all RegionServers and then itself. */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java index e8493fa15bf9..8d4e80bdebc8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java @@ -127,8 +127,8 @@ public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices ma // prepare the verify this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, workingDirFs); // update the running tasks - this.status = TaskMonitor.get() - .createStatus("Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable, true); + this.status = TaskMonitor.get().createStatus( + "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable, false, true); this.snapshotManifest = SnapshotManifest.create(conf, rootFs, workingDir, snapshot, monitor, status); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskGroup.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskGroup.java new file mode 100644 index 000000000000..5f2faf08a3a1 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskGroup.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.monitoring; + +import java.util.Collection; +import java.util.Collections; +import java.util.concurrent.ConcurrentLinkedDeque; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The {@link TaskGroup} can be seen as a big {@link MonitoredTask}, which contains a list of sub + * monitored tasks. The monitored tasks in the group are still be managed by the + * {@link TaskMonitor}, but whether to clear/expire the monitored tasks in a task group is optional. + * Since the monitored task already has journals, which mark the phases in a task, we still also + * need a task group to monitor a big task/process because the journals in a task is serial but the + * tasks in the task group can be parallel, then we have more flexible ability to monitor the + * process. Grouping the tasks is not strictly necessary but it is cleaner for presentation to + * operators. We might want to display the tasks in a group in a list view where each task can be + * collapsed (probably by default) or expanded. + */ +@InterfaceAudience.Private +public class TaskGroup extends MonitoredTaskImpl { + private static final Logger LOG = LoggerFactory.getLogger(TaskGroup.class); + + /** Sub-tasks in the group */ + private final ConcurrentLinkedDeque tasks = new ConcurrentLinkedDeque<>(); + + /** Whether to ignore to track(e.g. show/clear/expire) in the singleton {@link TaskMonitor} */ + private boolean ignoreSubTasksInTaskMonitor; + + /** Used to track this task group in {@link TaskMonitor} */ + private final MonitoredTask delegate; + + public TaskGroup(boolean ignoreSubTasksInTaskMonitor, String description) { + super(true, description); + this.ignoreSubTasksInTaskMonitor = ignoreSubTasksInTaskMonitor; + this.delegate = TaskMonitor.get().createStatus(description, false, true); + } + + public synchronized MonitoredTask addTask(String description) { + return addTask(description, true); + } + + /** + * Add a new task to the group, and before that might complete the last task in the group + * @param description the description of the new task + * @param withCompleteLast whether to complete the last task in the group + * @return the added new task + */ + public synchronized MonitoredTask addTask(String description, boolean withCompleteLast) { + if (withCompleteLast) { + MonitoredTask previousTask = this.tasks.peekLast(); + if ( + previousTask != null && previousTask.getState() != State.COMPLETE + && previousTask.getState() != State.ABORTED + ) { + previousTask.markComplete("Completed"); + } + } + MonitoredTask task = + TaskMonitor.get().createStatus(description, ignoreSubTasksInTaskMonitor, true); + this.setStatus(description); + this.tasks.addLast(task); + delegate.setStatus(description); + return task; + } + + public synchronized Collection getTasks() { + return Collections.unmodifiableCollection(this.tasks); + } + + @Override + public synchronized void abort(String msg) { + setStatus(msg); + setState(State.ABORTED); + for (MonitoredTask task : tasks) { + if (task.getState() != State.COMPLETE && task.getState() != State.ABORTED) { + task.abort(msg); + } + } + delegate.abort(msg); + } + + @Override + public synchronized void markComplete(String msg) { + setState(State.COMPLETE); + setStatus(msg); + if (tasks.getLast() != null) { + tasks.getLast().markComplete(msg); + } + delegate.markComplete(msg); + } + + @Override + public synchronized void cleanup() { + this.tasks.clear(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskMonitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskMonitor.java index d012d8229ecf..81ce9fec647d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskMonitor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskMonitor.java @@ -88,7 +88,20 @@ public MonitoredTask createStatus(String description) { return createStatus(description, false); } - public synchronized MonitoredTask createStatus(String description, boolean enableJournal) { + public MonitoredTask createStatus(String description, boolean ignore) { + return createStatus(description, ignore, false); + } + + /** + * Create a monitored task for users to inquire about the status + * @param description description of the status + * @param ignore whether to ignore to track(e.g. show/clear/expire) the task in the + * {@link TaskMonitor} + * @param enableJournal enable when the task contains some stage journals + * @return a monitored task + */ + public synchronized MonitoredTask createStatus(String description, boolean ignore, + boolean enableJournal) { MonitoredTask stat = new MonitoredTaskImpl(enableJournal, description); MonitoredTask proxy = (MonitoredTask) Proxy.newProxyInstance(stat.getClass().getClassLoader(), new Class[] { MonitoredTask.class }, new PassthroughInvocationHandler<>(stat)); @@ -96,10 +109,24 @@ public synchronized MonitoredTask createStatus(String description, boolean enabl if (tasks.isFull()) { purgeExpiredTasks(); } - tasks.add(pair); + if (!ignore) { + tasks.add(pair); + } return proxy; } + /** + * Create a task group which contains a series of monitored tasks for users to inquire about the + * status + * @param ignoreSubTasksInTaskMonitor whether to ignore to track(e.g. show/clear/expire) the task + * in the {@link TaskMonitor} + * @param description description of the status + * @return a group of monitored tasks + */ + public static TaskGroup createTaskGroup(boolean ignoreSubTasksInTaskMonitor, String description) { + return new TaskGroup(ignoreSubTasksInTaskMonitor, description); + } + public synchronized MonitoredRPCHandler createRPCStatus(String description) { MonitoredRPCHandler stat = new MonitoredRPCHandlerImpl(description); MonitoredRPCHandler proxy = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 80a594de258a..614f60b01351 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -935,7 +935,8 @@ long initialize(final CancelableProgressable reporter) throws IOException { + " should have at least one column family."); } - MonitoredTask status = TaskMonitor.get().createStatus("Initializing region " + this, true); + MonitoredTask status = + TaskMonitor.get().createStatus("Initializing region " + this, false, true); long nextSeqId = -1; try { nextSeqId = initializeRegionInternals(reporter, status); @@ -1544,7 +1545,7 @@ public Map> close(boolean abort) throws IOException { // threads attempting to close will run up against each other. MonitoredTask status = TaskMonitor.get().createStatus( "Closing region " + this.getRegionInfo().getEncodedName() + (abort ? " due to abort" : ""), - true); + false, true); status.setStatus("Waiting for close lock"); try { synchronized (closeLock) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java index a6463094beae..62395bff10bc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java @@ -287,8 +287,8 @@ SplitWALResult splitWAL(FileStatus walStatus, CancelableProgressable cancel) thr boolean cancelled = false; int editsCount = 0; int editsSkipped = 0; - MonitoredTask status = - TaskMonitor.get().createStatus("Splitting " + wal + " to temporary staging area.", true); + MonitoredTask status = TaskMonitor.get() + .createStatus("Splitting " + wal + " to temporary staging area.", false, true); Reader walReader = null; this.fileBeingSplit = walStatus; long startTS = EnvironmentEdgeManager.currentTime(); diff --git a/hbase-server/src/main/resources/hbase-webapps/master/header.jsp b/hbase-server/src/main/resources/hbase-webapps/master/header.jsp index 3da82c95c750..7f4fa55f59da 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/header.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/header.jsp @@ -74,6 +74,7 @@ <% if (HBaseConfiguration.isShowConfInServlet()) { %>

  • HBase Configuration
  • <% } %> +
  • Startup Progress
  • diff --git a/hbase-server/src/main/resources/hbase-webapps/master/startupProgress.jsp b/hbase-server/src/main/resources/hbase-webapps/master/startupProgress.jsp new file mode 100644 index 000000000000..f44d67cf848c --- /dev/null +++ b/hbase-server/src/main/resources/hbase-webapps/master/startupProgress.jsp @@ -0,0 +1,124 @@ +<%-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--%> +<%@ page contentType="text/html;charset=UTF-8" + import="java.util.Date" + import="java.util.Iterator" + import="java.util.List" +%> +<%@ page import="org.apache.hadoop.hbase.master.HMaster" %> +<%@ page import="org.apache.hadoop.hbase.monitoring.MonitoredTask" %> +<%@ page import="org.apache.hadoop.hbase.monitoring.TaskGroup" %> +<% + final HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER); +%> + + + + +
    +
    + +
    + + + + + + + + + + + + <% + if(startupTaskGroup != null){ + for (MonitoredTask task : startupTaskGroup.getTasks()) { %> + + + + + + + + + <% } + } %> + +
    TaskCurrent StateStart TimeLast status TimeElapsed Time(ms)Journals
    <%= task.getDescription() %><%= task.getState().name() %><%= new Date(task.getStartTime()) %><%= new Date(task.getStatusTime()) %><%= task.getStatusTime() - task.getStartTime() %><%= printLatestJournals(task, 30) %>
    + +
    + + +<%! + private static String printLatestJournals(MonitoredTask task, int count) { + List journal = task.getStatusJournal(); + if (journal == null) { + return ""; + } + int journalSize = journal.size(); + StringBuilder sb = new StringBuilder(); + int skips = journalSize - count; + if (skips > 0) { + sb.append("Current journal size is ").append(journalSize).append(", "); + sb.append("skip the previous ones and show the latest ").append(count).append(" journals..."); + sb.append("
    "); + } + Iterator iter = journal.iterator(); + MonitoredTask.StatusJournalEntry previousEntry = null; + int i = 0; + while (iter.hasNext()) { + MonitoredTask.StatusJournalEntry entry = iter.next(); + if (i >= skips) { + sb.append(entry); + if (previousEntry != null) { + long delta = entry.getTimeStamp() - previousEntry.getTimeStamp(); + if (delta != 0) { + sb.append(" (+").append(delta).append(" ms)"); + } + } + sb.append("
    "); + previousEntry = entry; + } + i++; + } + return sb.toString(); + } + + private static String getStartupStatusString(TaskGroup startupTaskGroup) { + MonitoredTask.State currentState = startupTaskGroup.getState(); + if (currentState.equals(MonitoredTask.State.COMPLETE)) { + return "Master initialized"; + } else if (currentState.equals(MonitoredTask.State.RUNNING) | + currentState.equals(MonitoredTask.State.WAITING)) { + return "Master initialize in progress"; + } else { + return currentState.toString(); + } + } +%> diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AlwaysStandByHMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AlwaysStandByHMaster.java index 2bef48a79556..cddc38615ee8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AlwaysStandByHMaster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AlwaysStandByHMaster.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskGroup; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; @@ -53,9 +54,11 @@ private static class AlwaysStandByMasterManager extends ActiveMasterManager { /** * An implementation that never transitions to an active master. */ - boolean blockUntilBecomingActiveMaster(int checkInterval, MonitoredTask startupStatus) { + @Override + boolean blockUntilBecomingActiveMaster(int checkInterval, TaskGroup startupTaskGroup) { + MonitoredTask loopTask = startupTaskGroup.addTask("Stay as a standby master."); while (!(master.isAborted() || master.isStopped())) { - startupStatus.setStatus("Forever looping to stay as a standby master."); + loopTask.setStatus("Forever looping to stay as a standby master."); try { activeMasterServerName = null; try { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java index 7d7ce4f71895..0206e23cee4f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java @@ -21,6 +21,8 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; import java.io.IOException; import java.io.InterruptedIOException; @@ -38,6 +40,7 @@ import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskGroup; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -100,7 +103,7 @@ public void testRestartMaster() throws IOException, KeeperException { assertFalse(activeMasterManager.getActiveMasterServerName().isPresent()); // First test becoming the active master uninterrupted - MonitoredTask status = Mockito.mock(MonitoredTask.class); + TaskGroup status = mockTaskGroup(); clusterStatusTracker.setClusterUp(); activeMasterManager.blockUntilBecomingActiveMaster(100, status); @@ -149,7 +152,8 @@ public void testActiveMasterManagerFromZK() throws Exception { // First test becoming the active master uninterrupted ClusterStatusTracker clusterStatusTracker = ms1.getClusterStatusTracker(); clusterStatusTracker.setClusterUp(); - activeMasterManager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class)); + + activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup()); assertTrue(activeMasterManager.clusterHasActiveMaster.get()); assertMaster(zk, firstMasterAddress); assertMaster(zk, activeMasterManager.getActiveMasterServerName().get()); @@ -215,7 +219,7 @@ public void testBackupMasterUpdates() throws Exception { ServerName sn1 = ServerName.valueOf("localhost", 1, -1); DummyMaster master1 = new DummyMaster(zk, sn1); ActiveMasterManager activeMasterManager = master1.getActiveMasterManager(); - activeMasterManager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class)); + activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup()); assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get()); assertEquals(0, activeMasterManager.getBackupMasters().size()); // Add backup masters @@ -268,12 +272,19 @@ public WaitToBeMasterThread(ZKWatcher zk, ServerName address) throws Interrupted @Override public void run() { - manager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class)); + manager.blockUntilBecomingActiveMaster(100, mockTaskGroup()); LOG.info("Second master has become the active master!"); isActiveMaster = true; } } + private static TaskGroup mockTaskGroup() { + TaskGroup taskGroup = Mockito.mock(TaskGroup.class); + MonitoredTask task = Mockito.mock(MonitoredTask.class); + when(taskGroup.addTask(any())).thenReturn(task); + return taskGroup; + } + public static class NodeDeletionListener extends ZKListener { private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/monitoring/TestTaskMonitor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/monitoring/TestTaskMonitor.java index c84eaeac07cb..9321efe0ee52 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/monitoring/TestTaskMonitor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/monitoring/TestTaskMonitor.java @@ -19,10 +19,12 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; @@ -218,7 +220,7 @@ public void testStatusJournal() { task.setStatus("status1"); // journal should be empty since it is disabled assertTrue(task.getStatusJournal().isEmpty()); - task = tm.createStatus("Test task with journal", true); + task = tm.createStatus("Test task with journal", false, true); task.setStatus("status2"); assertEquals(1, task.getStatusJournal().size()); assertEquals("status2", task.getStatusJournal().get(0).getStatus()); @@ -229,6 +231,26 @@ public void testStatusJournal() { tm.shutdown(); } + @Test + public void testTaskGroup() { + TaskGroup group = TaskMonitor.createTaskGroup(true, "test task group"); + group.addTask("task1"); + MonitoredTask task2 = group.addTask("task2"); + task2.setStatus("task2 status2"); + task2.setStatus("task2 status3"); + group.addTask("task3"); + group.markComplete("group complete"); + Collection tasks = group.getTasks(); + assertNotNull(tasks); + assertEquals(tasks.size(), 3); + for (MonitoredTask task : tasks) { + if (task.getDescription().equals("task2")) { + assertEquals(task.getStatusJournal().size(), 3); + task.prettyPrintJournal(); + } + } + } + @Test public void testClone() throws Exception { MonitoredRPCHandlerImpl monitor = new MonitoredRPCHandlerImpl("test");