From ebc7d1240ccee0fd2eb3064c0f0839c30c4275ad Mon Sep 17 00:00:00 2001 From: huaxiangsun Date: Wed, 6 Apr 2022 12:11:10 -0700 Subject: [PATCH] =?UTF-8?q?HBASE-26618=20Involving=20primary=20meta=20regi?= =?UTF-8?q?on=20in=20meta=20scan=20with=20CatalogRe=E2=80=A6=20(#4321)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michael Stack --- .../CatalogReplicaLoadBalanceSimpleSelector.java | 14 +++++++------- ...stCatalogReplicaLoadBalanceSimpleSelector.java | 11 ++++++++--- .../TestMetaRegionReplicaReplicationEndpoint.java | 15 ++++++++++++--- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java index f78dfb199c11..7590943e12b1 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java @@ -46,15 +46,15 @@ * balancing algorithm. It maintains a stale location cache for each table. Whenever client looks * up location, it first check if the row is the stale location cache. If yes, the location from * catalog replica is stale, it will go to the primary region to look up update-to-date location; - * otherwise, it will randomly pick up a replica region for lookup. When clients receive - * RegionNotServedException from region servers, it will add these region locations to the stale - * location cache. The stale cache will be cleaned up periodically by a chore.

+ * otherwise, it will randomly pick up a replica region or primary region for lookup. When clients + * receive RegionNotServedException from region servers, it will add these region locations to the + * stale location cache. The stale cache will be cleaned up periodically by a chore.

* - * It follows a simple algorithm to choose a replica to go: + * It follows a simple algorithm to choose a meta replica region (including primary meta) to go: * *
    *
  1. If there is no stale location entry for rows it looks up, it will randomly - * pick a replica region to do lookup.
  2. + * pick a meta replica region (including primary meta) to do lookup. *
  3. If the location from the replica region is stale, client gets RegionNotServedException * from region server, in this case, it will create StaleLocationCacheEntry in * CatalogReplicaLoadBalanceReplicaSimpleSelector.
  4. @@ -141,7 +141,7 @@ public void onError(HRegionLocation loc) { } /** - * Select an random replica id. In case there is no replica region configured, return + * Select an random replica id (including the primary replica id). In case there is no replica region configured, return * the primary replica id. * @return Replica id */ @@ -155,7 +155,7 @@ private int getRandomReplicaId() { if (cachedNumOfReplicas <= 1) { return RegionInfo.DEFAULT_REPLICA_ID; } - return 1 + ThreadLocalRandom.current().nextInt(cachedNumOfReplicas - 1); + return ThreadLocalRandom.current().nextInt(cachedNumOfReplicas); } /** diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCatalogReplicaLoadBalanceSimpleSelector.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCatalogReplicaLoadBalanceSimpleSelector.java index 17b6d74af8c9..eaaac8034894 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCatalogReplicaLoadBalanceSimpleSelector.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCatalogReplicaLoadBalanceSimpleSelector.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.util.concurrent.TimeUnit; +import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -105,9 +106,13 @@ public void testMetaChangeFromReplicaNoReplica() throws IOException, Interrupted return numOfReplicas; }); - assertNotEquals( - metaSelector.select(TableName.valueOf("test"), EMPTY_START_ROW, RegionLocateType.CURRENT), - RegionReplicaUtil.DEFAULT_REPLICA_ID); + // Loop for 100 times, it should cover all replica ids. + int[] replicaIdCount = new int[numOfMetaReplica]; + IntStream.range(1, 100).forEach(i -> replicaIdCount[metaSelector.select( + TableName.valueOf("test"), EMPTY_START_ROW, RegionLocateType.CURRENT)] ++); + + // Make sure each replica id is returned by select() call, including primary replica id. + IntStream.range(0, numOfMetaReplica).forEach(i -> assertNotEquals(replicaIdCount[i], 0)); // Change to No meta replica HBaseTestingUtility.setReplicas(admin, TableName.META_TABLE_NAME, 1); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java index dd17eeb29ffe..7a2dc787e8d9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java @@ -519,6 +519,16 @@ private void primaryMayIncreaseReplicaNoChange(final long[] before, final long[] } } + private void primaryIncreaseReplicaIncrease(final long[] before, final long[] after) { + // There are read requests increase for primary meta replica. + assertTrue(after[RegionInfo.DEFAULT_REPLICA_ID] > before[RegionInfo.DEFAULT_REPLICA_ID]); + + // There are read requests incrase for meta replica regions. + for (int i = 1; i < after.length; i++) { + assertTrue(after[i] > before[i]); + } + } + private void getMetaReplicaReadRequests(final Region[] metaRegions, final long[] counters) { int i = 0; for (Region r : metaRegions) { @@ -579,9 +589,8 @@ public void testHBaseMetaReplicaGets() throws Exception { getMetaReplicaReadRequests(metaRegions, readReqsForMetaReplicasAfterGet); - // There is no read requests increase for primary meta replica. - // For rest of meta replicas, there are more reads against them. - primaryNoChangeReplicaIncrease(readReqsForMetaReplicas, readReqsForMetaReplicasAfterGet); + // There are more reads against all meta replica regions, including the primary region. + primaryIncreaseReplicaIncrease(readReqsForMetaReplicas, readReqsForMetaReplicasAfterGet); // move one of regions so it meta cache may be invalid. HTU.moveRegionAndWait(userRegion.getRegionInfo(), destRs.getServerName());