From b76f6eb4f66cc551573907b746097afba15ef763 Mon Sep 17 00:00:00 2001 From: Umesh <9414umeshkumar@gmail.com> Date: Mon, 5 Aug 2024 17:59:11 +0530 Subject: [PATCH] HBASE-28690 added masterStartCode as fencing token for remote procedures (#1) * HBASE-28690 added masterStartCode as fencing token for remote procedures * HBASE-28690 comments updated * HBASE-28690 add masterStartCode for RemoteProcedureRequest * HBASE-28690 used master active time for fencing and review comments * HBASE-28690 minor comment addition * HBASE-28690 spotless apply * HBASE-28690 reduce log line length for checkstyle --------- Co-authored-by: ukumawat --- .../hbase/shaded/protobuf/ProtobufUtil.java | 4 ++- .../src/main/protobuf/Admin.proto | 6 ++++ .../main/protobuf/RegionServerStatus.proto | 5 +++ .../hbase/master/MasterRpcServices.java | 33 +++++++++++++++++-- .../procedure/RSProcedureDispatcher.java | 22 +++++++++---- .../hbase/regionserver/HRegionServer.java | 12 ++++--- .../hbase/regionserver/RSRpcServices.java | 17 ++++++---- .../regionserver/RegionServerServices.java | 20 +++++++++-- .../RemoteProcedureResultReporter.java | 5 +-- .../hbase/regionserver/SplitRequest.java | 2 +- .../handler/AssignRegionHandler.java | 17 +++++++--- .../handler/CloseRegionHandler.java | 2 +- .../handler/OpenRegionHandler.java | 7 ++-- .../handler/RSProcedureHandler.java | 9 +++-- .../handler/UnassignRegionHandler.java | 16 ++++++--- 15 files changed, 134 insertions(+), 43 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java index 7c33e123231e..0fe181b5039e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java @@ -3054,10 +3054,12 @@ public static CloseRegionRequest buildCloseRegionRequest(ServerName server, byte } public static CloseRegionRequest buildCloseRegionRequest(ServerName server, byte[] regionName, - ServerName destinationServer, long closeProcId, boolean evictCache) { + ServerName destinationServer, long closeProcId, boolean evictCache, + long initiatingMasterActiveTime) { CloseRegionRequest.Builder builder = getBuilder(server, regionName, destinationServer, closeProcId); builder.setEvictCache(evictCache); + builder.setInitiatingMasterActiveTime(initiatingMasterActiveTime); return builder.build(); } diff --git a/hbase-protocol-shaded/src/main/protobuf/Admin.proto b/hbase-protocol-shaded/src/main/protobuf/Admin.proto index ae0bd78fbf98..c9edc2877b88 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Admin.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Admin.proto @@ -80,6 +80,8 @@ message OpenRegionRequest { repeated RegionOpenInfo open_info = 1; // the intended server for this RPC. optional uint64 serverStartCode = 2; + // Master active time as fencing token + optional int64 initiating_master_active_time = 3; // wall clock time from master optional uint64 master_system_time = 5; @@ -123,6 +125,8 @@ message CloseRegionRequest { optional uint64 serverStartCode = 5; optional int64 close_proc_id = 6 [default = -1]; optional bool evict_cache = 7 [default = false]; + // Master active time as fencing token + optional int64 initiating_master_active_time = 8; } message CloseRegionResponse { @@ -272,6 +276,8 @@ message RemoteProcedureRequest { required uint64 proc_id = 1; required string proc_class = 2; optional bytes proc_data = 3; + // Master active time as fencing token + optional int64 initiating_master_active_time = 4; } message ExecuteProceduresRequest { diff --git a/hbase-protocol-shaded/src/main/protobuf/RegionServerStatus.proto b/hbase-protocol-shaded/src/main/protobuf/RegionServerStatus.proto index 6aed5b467189..53751082f933 100644 --- a/hbase-protocol-shaded/src/main/protobuf/RegionServerStatus.proto +++ b/hbase-protocol-shaded/src/main/protobuf/RegionServerStatus.proto @@ -97,6 +97,9 @@ message RegionStateTransition { optional uint64 open_seq_num = 3; repeated int64 proc_id = 4; + + // Master active time as fencing token + optional int64 initiating_master_active_time = 5; enum TransitionCode { OPENED = 0; FAILED_OPEN = 1; @@ -155,6 +158,8 @@ message RemoteProcedureResult { } required Status status = 2; optional ForeignExceptionMessage error = 3; + // Master active time as fencing token + optional int64 initiating_master_active_time = 4; } message ReportProcedureDoneRequest { repeated RemoteProcedureResult result = 1; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 86c16f80a256..d92a6e1ee0f5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.ClusterMetricsBuilder; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.Server; @@ -338,6 +339,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.GetSpaceQuotaRegionSizesResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.GetSpaceQuotaRegionSizesResponse.RegionSizes; import org.apache.hadoop.hbase.shaded.protobuf.generated.RecentLogs; +import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.FileArchiveNotificationRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.FileArchiveNotificationResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdRequest; @@ -1794,6 +1796,15 @@ public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcContro ReportRegionStateTransitionRequest req) throws ServiceException { try { master.checkServiceStarted(); + for (RegionServerStatusProtos.RegionStateTransition transition : req.getTransitionList()) { + long procId = + transition.getProcIdCount() > 0 ? transition.getProcId(0) : Procedure.NO_PROC_ID; + // -1 is less than any possible MasterActiveCode + long initiatingMasterActiveTime = transition.hasInitiatingMasterActiveTime() + ? transition.getInitiatingMasterActiveTime() + : -1; + throwOnOldMasterStartCode(procId, initiatingMasterActiveTime); + } return master.getAssignmentManager().reportRegionStateTransition(req); } catch (IOException ioe) { throw new ServiceException(ioe); @@ -2544,8 +2555,14 @@ public ReportProcedureDoneResponse reportProcedureDone(RpcController controller, // Check Masters is up and ready for duty before progressing. Remote side will keep trying. try { this.master.checkServiceStarted(); - } catch (ServerNotRunningYetException snrye) { - throw new ServiceException(snrye); + for (RemoteProcedureResult result : request.getResultList()) { + // -1 is less than any possible MasterActiveCode + long initiatingMasterActiveTime = + result.hasInitiatingMasterActiveTime() ? result.getInitiatingMasterActiveTime() : -1; + throwOnOldMasterStartCode(result.getProcId(), initiatingMasterActiveTime); + } + } catch (IOException ioe) { + throw new ServiceException(ioe); } request.getResultList().forEach(result -> { if (result.getStatus() == RemoteProcedureResult.Status.SUCCESS) { @@ -2558,6 +2575,18 @@ public ReportProcedureDoneResponse reportProcedureDone(RpcController controller, return ReportProcedureDoneResponse.getDefaultInstance(); } + private void throwOnOldMasterStartCode(long procId, long initiatingMasterActiveTime) + throws MasterNotRunningException { + if (initiatingMasterActiveTime > master.getMasterActiveTime()) { + // procedure is initiated by new active master but report received on master with older active + // time + LOG.warn( + "Report for procId: {} and initiatingMasterAT {} received on master with activeTime {}", + procId, initiatingMasterActiveTime, master.getMasterActiveTime()); + throw new MasterNotRunningException("Another master is active"); + } + } + // HBCK Services @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java index acabc153978a..a8a0e4f62566 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.ipc.RpcConnectionConstants; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; +import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.ServerListener; import org.apache.hadoop.hbase.master.ServerManager; @@ -421,13 +422,16 @@ public void dispatchOpenRequests(final MasterProcedureEnv env, public void dispatchCloseRequests(final MasterProcedureEnv env, final List operations) { for (RegionCloseOperation op : operations) { - request.addCloseRegion(op.buildCloseRegionRequest(getServerName())); + request.addCloseRegion(op.buildCloseRegionRequest(getServerName(), + ((HMaster) env.getMasterServices()).getMasterActiveTime())); } } @Override public void dispatchServerOperations(MasterProcedureEnv env, List operations) { - operations.stream().map(o -> o.buildRequest()).forEachOrdered(request::addProc); + operations.stream() + .map(o -> o.buildRequest(((HMaster) env.getMasterServices()).getMasterActiveTime())) + .forEachOrdered(request::addProc); } // will be overridden in test. @@ -450,7 +454,9 @@ protected final void remoteCallFailed(final MasterProcedureEnv env, final IOExce private static OpenRegionRequest buildOpenRegionRequest(final MasterProcedureEnv env, final ServerName serverName, final List operations) { final OpenRegionRequest.Builder builder = OpenRegionRequest.newBuilder(); - builder.setServerStartCode(serverName.getStartcode()); + builder.setServerStartCode(serverName.getStartCode()); + builder + .setInitiatingMasterActiveTime(((HMaster) env.getMasterServices()).getMasterActiveTime()); builder.setMasterSystemTime(EnvironmentEdgeManager.currentTime()); for (RegionOpenOperation op : operations) { builder.addOpenInfo(op.buildRegionOpenInfoRequest(env)); @@ -480,9 +486,10 @@ public ServerOperation(RemoteProcedure remoteProcedure, long procId, Class rs this.rsProcData = rsProcData; } - public RemoteProcedureRequest buildRequest() { + public RemoteProcedureRequest buildRequest(long initiatingMasterActiveTime) { return RemoteProcedureRequest.newBuilder().setProcId(procId) - .setProcClass(rsProcClass.getName()).setProcData(ByteString.copyFrom(rsProcData)).build(); + .setProcClass(rsProcClass.getName()).setProcData(ByteString.copyFrom(rsProcData)) + .setInitiatingMasterActiveTime(initiatingMasterActiveTime).build(); } } @@ -526,9 +533,10 @@ public ServerName getDestinationServer() { return destinationServer; } - public CloseRegionRequest buildCloseRegionRequest(final ServerName serverName) { + public CloseRegionRequest buildCloseRegionRequest(final ServerName serverName, + long initiatingMasterActiveTime) { return ProtobufUtil.buildCloseRegionRequest(serverName, regionInfo.getRegionName(), - getDestinationServer(), procId, evictCache); + getDestinationServer(), procId, evictCache, initiatingMasterActiveTime); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index c50d964ca503..ee816cc5d32c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -2596,6 +2596,7 @@ public void postOpenDeployTasks(final PostOpenDeployContext context) throws IOEx HRegion r = context.getRegion(); long openProcId = context.getOpenProcId(); long masterSystemTime = context.getMasterSystemTime(); + long initiatingMasterActiveTime = context.getInitiatingMasterActiveTime(); rpcServices.checkOpen(); LOG.info("Post open deploy tasks for {}, pid={}, masterSystemTime={}", r.getRegionInfo().getRegionNameAsString(), openProcId, masterSystemTime); @@ -2616,7 +2617,7 @@ public void postOpenDeployTasks(final PostOpenDeployContext context) throws IOEx // Notify master if ( !reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.OPENED, - openSeqNum, openProcId, masterSystemTime, r.getRegionInfo())) + openSeqNum, openProcId, masterSystemTime, r.getRegionInfo(), initiatingMasterActiveTime)) ) { throw new IOException( "Failed to report opened region to master: " + r.getRegionInfo().getRegionNameAsString()); @@ -2677,6 +2678,7 @@ private boolean skipReportingTransition(final RegionStateTransitionContext conte for (long procId : procIds) { transition.addProcId(procId); } + transition.setInitiatingMasterActiveTime(context.getInitiatingMasterActiveTime()); return builder.build(); } @@ -4080,12 +4082,12 @@ public Connection createConnection(Configuration conf) throws IOException { this.rpcServices, this.rpcServices, new RegionServerRegistry(this)); } - void executeProcedure(long procId, RSProcedureCallable callable) { - executorService.submit(new RSProcedureHandler(this, procId, callable)); + void executeProcedure(long procId, long initiatingMasterActiveTime, RSProcedureCallable callable) { + executorService.submit(new RSProcedureHandler(this, procId,initiatingMasterActiveTime, callable)); } - public void remoteProcedureComplete(long procId, Throwable error) { - procedureResultReporter.complete(procId, error); + public void remoteProcedureComplete(long procId, long initiatingMasterActiveTime, Throwable error) { + procedureResultReporter.complete(procId,initiatingMasterActiveTime, error); } void reportProcedureDone(ReportProcedureDoneRequest request) throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 193d75226bbf..85256673caa6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -3920,6 +3920,8 @@ public ClearRegionBlockCacheResponse clearRegionBlockCache(RpcController control private void executeOpenRegionProcedures(OpenRegionRequest request, Map tdCache) { long masterSystemTime = request.hasMasterSystemTime() ? request.getMasterSystemTime() : -1; + long initiatingMasterActiveTime = + request.hasInitiatingMasterActiveTime() ? request.getInitiatingMasterActiveTime() : -1; for (RegionOpenInfo regionOpenInfo : request.getOpenInfoList()) { RegionInfo regionInfo = ProtobufUtil.toRegionInfo(regionOpenInfo.getRegion()); TableName tableName = regionInfo.getTable(); @@ -3945,14 +3947,16 @@ private void executeOpenRegionProcedures(OpenRegionRequest request, } long procId = regionOpenInfo.getOpenProcId(); if (regionServer.submitRegionProcedure(procId)) { - regionServer.executorService.submit(AssignRegionHandler.create(regionServer, regionInfo, - procId, tableDesc, masterSystemTime)); + regionServer.getExecutorService().submit(AssignRegionHandler.create(regionServer, regionInfo, procId, + tableDesc, masterSystemTime, initiatingMasterActiveTime)); } } } private void executeCloseRegionProcedures(CloseRegionRequest request) { String encodedName; + long initiatingMasterActiveTime = + request.hasInitiatingMasterActiveTime() ? request.getInitiatingMasterActiveTime() : -1; try { encodedName = ProtobufUtil.getRegionEncodedName(request.getRegion()); } catch (DoNotRetryIOException e) { @@ -3964,8 +3968,8 @@ private void executeCloseRegionProcedures(CloseRegionRequest request) { long procId = request.getCloseProcId(); boolean evictCache = request.getEvictCache(); if (regionServer.submitRegionProcedure(procId)) { - regionServer.getExecutorService().submit(UnassignRegionHandler.create(regionServer, - encodedName, procId, false, destination, evictCache)); + regionServer.getExecutorService().submit(UnassignRegionHandler.create(regionServer, encodedName, procId, + false, destination, evictCache, initiatingMasterActiveTime)); } } @@ -3977,12 +3981,13 @@ private void executeProcedures(RemoteProcedureRequest request) { } catch (Exception e) { LOG.warn("Failed to instantiating remote procedure {}, pid={}", request.getProcClass(), request.getProcId(), e); - regionServer.remoteProcedureComplete(request.getProcId(), e); + regionServer.remoteProcedureComplete(request.getProcId(), request.getInitiatingMasterActiveTime(), + e); return; } callable.init(request.getProcData().toByteArray(), regionServer); LOG.debug("Executing remote procedure {}, pid={}", callable.getClass(), request.getProcId()); - regionServer.executeProcedure(request.getProcId(), callable); + regionServer.executeProcedure(request.getProcId(), request.getInitiatingMasterActiveTime(), callable); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java index 0906a4b44b1c..95210f0bdd49 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java @@ -91,11 +91,14 @@ class PostOpenDeployContext { private final HRegion region; private final long openProcId; private final long masterSystemTime; + private final long initiatingMasterActiveTime; - public PostOpenDeployContext(HRegion region, long openProcId, long masterSystemTime) { + public PostOpenDeployContext(HRegion region, long openProcId, long masterSystemTime, + long initiatingMasterActiveTime) { this.region = region; this.openProcId = openProcId; this.masterSystemTime = masterSystemTime; + this.initiatingMasterActiveTime = initiatingMasterActiveTime; } public HRegion getRegion() { @@ -109,6 +112,10 @@ public long getOpenProcId() { public long getMasterSystemTime() { return masterSystemTime; } + + public long getInitiatingMasterActiveTime() { + return initiatingMasterActiveTime; + } } /** @@ -121,23 +128,26 @@ class RegionStateTransitionContext { private final TransitionCode code; private final long openSeqNum; private final long masterSystemTime; + private final long initiatingMasterActiveTime; private final long[] procIds; private final RegionInfo[] hris; public RegionStateTransitionContext(TransitionCode code, long openSeqNum, long masterSystemTime, - RegionInfo... hris) { + long initiatingMasterActiveTime, RegionInfo... hris) { this.code = code; this.openSeqNum = openSeqNum; this.masterSystemTime = masterSystemTime; + this.initiatingMasterActiveTime = initiatingMasterActiveTime; this.hris = hris; this.procIds = new long[hris.length]; } public RegionStateTransitionContext(TransitionCode code, long openSeqNum, long procId, - long masterSystemTime, RegionInfo hri) { + long masterSystemTime, RegionInfo hri, long initiatingMasterActiveTime) { this.code = code; this.openSeqNum = openSeqNum; this.masterSystemTime = masterSystemTime; + this.initiatingMasterActiveTime = initiatingMasterActiveTime; this.hris = new RegionInfo[] { hri }; this.procIds = new long[] { procId }; } @@ -161,6 +171,10 @@ public RegionInfo[] getHris() { public long[] getProcIds() { return procIds; } + + public long getInitiatingMasterActiveTime() { + return initiatingMasterActiveTime; + } } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java index 817ecd42ce0b..21016fe59dd0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java @@ -51,8 +51,9 @@ public RemoteProcedureResultReporter(HRegionServer server) { this.server = server; } - public void complete(long procId, Throwable error) { - RemoteProcedureResult.Builder builder = RemoteProcedureResult.newBuilder().setProcId(procId); + public void complete(long procId, long initiatingMasterActiveTime, Throwable error) { + RemoteProcedureResult.Builder builder = RemoteProcedureResult.newBuilder().setProcId(procId) + .setInitiatingMasterActiveTime(initiatingMasterActiveTime); if (error != null) { LOG.debug("Failed to complete execution of pid={}", procId, error); builder.setStatus(RemoteProcedureResult.Status.ERROR).setError( diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java index 20a7e0c9af2f..d979a3ac82e2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java @@ -81,7 +81,7 @@ private void requestRegionSplit() { // are created just to pass the information to the reportRegionStateTransition(). if ( !server.reportRegionStateTransition(new RegionStateTransitionContext( - TransitionCode.READY_TO_SPLIT, HConstants.NO_SEQNUM, -1, parent, hri_a, hri_b)) + TransitionCode.READY_TO_SPLIT, HConstants.NO_SEQNUM, -1, -1, parent, hri_a, hri_b)) ) { LOG.error("Unable to ask master to split " + parent.getRegionNameAsString()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java index a9ab6f502a35..559777ac95f1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java @@ -62,15 +62,20 @@ public class AssignRegionHandler extends EventHandler { private final long masterSystemTime; + // active time of the master that sent this assign request + private final long initiatingMasterActiveTime; + private final RetryCounter retryCounter; public AssignRegionHandler(HRegionServer server, RegionInfo regionInfo, long openProcId, - @Nullable TableDescriptor tableDesc, long masterSystemTime, EventType eventType) { + @Nullable TableDescriptor tableDesc, long masterSystemTime, long initiatingMasterActiveTime, + EventType eventType) { super(server, eventType); this.regionInfo = regionInfo; this.openProcId = openProcId; this.tableDesc = tableDesc; this.masterSystemTime = masterSystemTime; + this.initiatingMasterActiveTime = initiatingMasterActiveTime; this.retryCounter = HandlerUtil.getRetryCounter(); } @@ -85,7 +90,7 @@ private void cleanUpAndReportFailure(IOException error) throws IOException { rs.getRegionsInTransitionInRS().remove(regionInfo.getEncodedNameAsBytes(), Boolean.TRUE); if ( !rs.reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.FAILED_OPEN, - HConstants.NO_SEQNUM, openProcId, masterSystemTime, regionInfo)) + HConstants.NO_SEQNUM, openProcId, masterSystemTime, regionInfo, initiatingMasterActiveTime)) ) { throw new IOException( "Failed to report failed open to master: " + regionInfo.getRegionNameAsString()); @@ -153,7 +158,8 @@ public void process() throws IOException { } // From here on out, this is PONR. We can not revert back. The only way to address an // exception from here on out is to abort the region server. - rs.postOpenDeployTasks(new PostOpenDeployContext(region, openProcId, masterSystemTime)); + rs.postOpenDeployTasks( + new PostOpenDeployContext(region, openProcId, masterSystemTime, initiatingMasterActiveTime)); rs.addRegion(region); LOG.info("Opened {}", regionName); // Cache the open region procedure id after report region transition succeed. @@ -180,7 +186,8 @@ protected void handleException(Throwable t) { } public static AssignRegionHandler create(HRegionServer server, RegionInfo regionInfo, - long openProcId, TableDescriptor tableDesc, long masterSystemTime) { + long openProcId, TableDescriptor tableDesc, long masterSystemTime, + long initiatingMasterActiveTime) { EventType eventType; if (regionInfo.isMetaRegion()) { eventType = EventType.M_RS_OPEN_META; @@ -193,6 +200,6 @@ public static AssignRegionHandler create(HRegionServer server, RegionInfo region eventType = EventType.M_RS_OPEN_REGION; } return new AssignRegionHandler(server, regionInfo, openProcId, tableDesc, masterSystemTime, - eventType); + initiatingMasterActiveTime, eventType); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java index e184cb42fb91..f18e7d9ba635 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java @@ -111,7 +111,7 @@ public void process() throws IOException { this.rsServices.removeRegion(region, destination); rsServices.reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.CLOSED, - HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo)); + HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo, -1)); // Done! Region is closed on this RS LOG.debug("Closed {}", region.getRegionInfo().getRegionNameAsString()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java index 898121602a4e..0430b442410c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java @@ -166,8 +166,9 @@ private void doCleanUpOnFailedOpen(HRegion region) throws IOException { cleanupFailedOpen(region); } } finally { - rsServices.reportRegionStateTransition(new RegionStateTransitionContext( - TransitionCode.FAILED_OPEN, HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo)); + rsServices + .reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.FAILED_OPEN, + HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo, -1)); } } @@ -253,7 +254,7 @@ static class PostOpenDeployTasksThread extends Thread { public void run() { try { this.services.postOpenDeployTasks( - new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime)); + new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime, -1)); } catch (Throwable e) { String msg = "Exception running postOpenDeployTasks; region=" + this.region.getRegionInfo().getEncodedName(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/RSProcedureHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/RSProcedureHandler.java index d3ecc8a51e22..0a04b1bd5f82 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/RSProcedureHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/RSProcedureHandler.java @@ -35,12 +35,17 @@ public class RSProcedureHandler extends EventHandler { private final long procId; + // active time of the master that sent this procedure request + private final long initiatingMasterActiveTime; + private final RSProcedureCallable callable; - public RSProcedureHandler(HRegionServer rs, long procId, RSProcedureCallable callable) { + public RSProcedureHandler(HRegionServer rs, long procId, long initiatingMasterActiveTime, + RSProcedureCallable callable) { super(rs, callable.getEventType()); this.procId = procId; this.callable = callable; + this.initiatingMasterActiveTime = initiatingMasterActiveTime; } @Override @@ -53,7 +58,7 @@ public void process() { LOG.error("pid=" + this.procId, t); error = t; } finally { - ((HRegionServer) server).remoteProcedureComplete(procId, error); + ((HRegionServer) server).remoteProcedureComplete(procId, initiatingMasterActiveTime, error); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java index 3c8def6db34d..a5eaffbefb97 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java @@ -65,13 +65,17 @@ public class UnassignRegionHandler extends EventHandler { private boolean evictCache; + // active time of the master that sent this unassign request + private final long initiatingMasterActiveTime; + public UnassignRegionHandler(HRegionServer server, String encodedName, long closeProcId, boolean abort, @Nullable ServerName destination, EventType eventType) { - this(server, encodedName, closeProcId, abort, destination, eventType, false); + this(server, encodedName, closeProcId, abort, destination, eventType, -1, false); } public UnassignRegionHandler(HRegionServer server, String encodedName, long closeProcId, - boolean abort, @Nullable ServerName destination, EventType eventType, boolean evictCache) { + boolean abort, @Nullable ServerName destination, EventType eventType, + long initiatingMasterActiveTime, boolean evictCache) { super(server, eventType); this.encodedName = encodedName; this.closeProcId = closeProcId; @@ -79,6 +83,7 @@ public UnassignRegionHandler(HRegionServer server, String encodedName, long clos this.destination = destination; this.retryCounter = HandlerUtil.getRetryCounter(); this.evictCache = evictCache; + this.initiatingMasterActiveTime = initiatingMasterActiveTime; } private HRegionServer getServer() { @@ -151,7 +156,7 @@ public void process() throws IOException { } if ( !rs.reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.CLOSED, - HConstants.NO_SEQNUM, closeProcId, -1, region.getRegionInfo())) + HConstants.NO_SEQNUM, closeProcId, -1, region.getRegionInfo(), initiatingMasterActiveTime)) ) { throw new IOException("Failed to report close to master: " + regionName); } @@ -171,7 +176,8 @@ protected void handleException(Throwable t) { } public static UnassignRegionHandler create(HRegionServer server, String encodedName, - long closeProcId, boolean abort, @Nullable ServerName destination, boolean evictCache) { + long closeProcId, boolean abort, @Nullable ServerName destination, boolean evictCache, + long initiatingMasterActiveTime) { // Just try our best to determine whether it is for closing meta. It is not the end of the world // if we put the handler into a wrong executor. Region region = server.getRegion(encodedName); @@ -179,6 +185,6 @@ public static UnassignRegionHandler create(HRegionServer server, String encodedN ? EventType.M_RS_CLOSE_META : EventType.M_RS_CLOSE_REGION; return new UnassignRegionHandler(server, encodedName, closeProcId, abort, destination, - eventType, evictCache); + eventType, initiatingMasterActiveTime, evictCache); } }