From 4399b4ca3880de59e176833557d91153137d1ab3 Mon Sep 17 00:00:00 2001 From: "lvhaiping.lhp" Date: Tue, 5 Dec 2023 21:31:50 +0800 Subject: [PATCH 1/2] Fixed the check for an ongoing Snapshot before proceeding with the merge region operation. --- .../hbase/master/assignment/MergeTableRegionsProcedure.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index c0b47b0bc246..7d4ec71d35b1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -442,7 +442,7 @@ protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { private boolean prepareMergeRegion(final MasterProcedureEnv env) throws IOException { // Fail if we are taking snapshot for the given table TableName tn = regionsToMerge[0].getTable(); - if (env.getMasterServices().getSnapshotManager().isTakingSnapshot(tn)) { + if (env.getMasterServices().getSnapshotManager().isTableTakingAnySnapshot(tn)) { throw new MergeRegionException("Skip merging regions " + RegionInfo.getShortNameToLog(regionsToMerge) + ", because we are snapshotting " + tn); } From d99340cef9ef4c2e4d9fc29a77566df35d97a664 Mon Sep 17 00:00:00 2001 From: "lvhaiping.lhp" Date: Mon, 11 Dec 2023 21:21:50 +0800 Subject: [PATCH 2/2] Fixed the check for an ongoing Snapshot before proceeding with the split region operation. --- .../assignment/SplitTableRegionProcedure.java | 3 +- .../TestMergeTableRegionsProcedure.java | 44 +++++++++++++++ .../TestSplitTableRegionProcedure.java | 53 +++++++++++++++++++ .../procedure/TestSnapshotProcedure.java | 24 +++++++++ 4 files changed, 123 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index a0118cbd7b05..2e2182b25d29 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -505,7 +505,8 @@ private byte[] getSplitRow() { public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { // Fail if we are taking snapshot for the given table if ( - env.getMasterServices().getSnapshotManager().isTakingSnapshot(getParentRegion().getTable()) + env.getMasterServices().getSnapshotManager() + .isTableTakingAnySnapshot(getParentRegion().getTable()) ) { setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() + ", because we are taking snapshot for the table " + getParentRegion().getTable())); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java index abc6fc45ad30..c0c4e355f2bd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.master.assignment; +import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -33,16 +34,20 @@ import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.SnapshotDescription; +import org.apache.hadoop.hbase.client.SnapshotType; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; +import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.util.Bytes; @@ -59,6 +64,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + @Category({ MasterTests.class, LargeTests.class }) public class TestMergeTableRegionsProcedure { @@ -347,6 +355,42 @@ public void testMergeWithoutPONR() throws Exception { assertRegionCount(tableName, initialRegionCount - 1); } + @Test + public void testMergingRegionWhileTakingSnapshot() throws Exception { + final TableName tableName = TableName.valueOf("testMergingRegionWhileTakingSnapshot"); + final ProcedureExecutor procExec = getMasterProcedureExecutor(); + + List tableRegions = createTable(tableName); + + ProcedureTestingUtility.waitNoProcedureRunning(procExec); + + SnapshotDescription snapshot = + new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH); + SnapshotProtos.SnapshotDescription snapshotProto = + ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot); + snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto, + UTIL.getHBaseCluster().getMaster().getConfiguration()); + long snapshotProcId = procExec.submitProcedure( + new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto)); + UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto, + snapshotProcId); + + RegionInfo[] regionsToMerge = new RegionInfo[2]; + regionsToMerge[0] = tableRegions.get(0); + regionsToMerge[1] = tableRegions.get(1); + + long mergeProcId = procExec.submitProcedure( + new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); + + ProcedureTestingUtility + .waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), mergeProcId); + ProcedureTestingUtility.waitProcedure( + UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), snapshotProcId); + + assertProcFailed(procExec, mergeProcId); + assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size()); + } + private List createTable(final TableName tableName) throws Exception { TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java index 6e25dbab48ce..6ec36e75bea2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java @@ -41,6 +41,8 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.SnapshotDescription; +import org.apache.hadoop.hbase.client.SnapshotType; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.coprocessor.ObserverContext; @@ -50,10 +52,12 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; +import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; @@ -69,6 +73,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + @Category({ MasterTests.class, MediumTests.class }) public class TestSplitTableRegionProcedure { @@ -550,6 +557,52 @@ public void testSplitWithoutPONR() throws Exception { verify(tableName, splitRowNum); } + @Test + public void testSplitRegionWhileTakingSnapshot() throws Exception { + final TableName tableName = TableName.valueOf(name.getMethodName()); + final ProcedureExecutor procExec = getMasterProcedureExecutor(); + + RegionInfo[] regions = MasterProcedureTestingUtility.createTable(procExec, tableName, null, + columnFamilyName1, columnFamilyName2); + int splitRowNum = startRowNum + rowCount / 2; + byte[] splitKey = Bytes.toBytes("" + splitRowNum); + + assertTrue("not able to find a splittable region", regions != null); + assertTrue("not able to find a splittable region", regions.length == 1); + ProcedureTestingUtility.waitNoProcedureRunning(procExec); + + // task snapshot + SnapshotDescription snapshot = + new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH); + SnapshotProtos.SnapshotDescription snapshotProto = + ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot); + snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto, + UTIL.getHBaseCluster().getMaster().getConfiguration()); + long snapshotProcId = procExec.submitProcedure( + new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto)); + UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto, + snapshotProcId); + + // collect AM metrics before test + collectAssignmentManagerMetrics(); + + // Split region of the table + long procId = procExec.submitProcedure( + new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); + // Wait the completion + ProcedureTestingUtility.waitProcedure(procExec, procId); + ProcedureTestingUtility.waitProcedure(procExec, snapshotProcId); + + ProcedureTestingUtility.assertProcFailed(procExec, procId); + ProcedureTestingUtility.assertProcNotFailed(procExec, snapshotProcId); + + assertTrue(UTIL.getMiniHBaseCluster().getRegions(tableName).size() == 1); + assertTrue(UTIL.countRows(tableName) == 0); + + assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount()); + assertEquals(splitFailedCount + 1, splitProcMetrics.getFailedCounter().getCount()); + } + private void deleteData(final TableName tableName, final int startDeleteRowNum) throws IOException, InterruptedException { Table t = UTIL.getConnection().getTable(tableName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java index 04442eb771d8..84a3e84763b6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java @@ -17,10 +17,12 @@ */ package org.apache.hadoop.hbase.master.procedure; +import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Optional; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; @@ -51,6 +53,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; @@ -70,6 +73,27 @@ public class TestSnapshotProcedure { protected SnapshotDescription snapshot; protected SnapshotProtos.SnapshotDescription snapshotProto; + public static final class DelaySnapshotProcedure extends SnapshotProcedure { + public DelaySnapshotProcedure() { + } + + public DelaySnapshotProcedure(final MasterProcedureEnv env, + final SnapshotProtos.SnapshotDescription snapshot) { + super(env, snapshot); + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, + MasterProcedureProtos.SnapshotState state) + throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { + Flow flow = super.executeFromState(env, state); + if (state == SNAPSHOT_SNAPSHOT_ONLINE_REGIONS) { + TimeUnit.SECONDS.sleep(20); + } + return flow; + } + } + @Before public void setup() throws Exception { TEST_UTIL = new HBaseTestingUtil();