From ebc7d1240ccee0fd2eb3064c0f0839c30c4275ad Mon Sep 17 00:00:00 2001
From: huaxiangsun
Date: Wed, 6 Apr 2022 12:11:10 -0700
Subject: [PATCH] =?UTF-8?q?HBASE-26618=20Involving=20primary=20meta=20regi?=
=?UTF-8?q?on=20in=20meta=20scan=20with=20CatalogRe=E2=80=A6=20(#4321)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Michael Stack
---
.../CatalogReplicaLoadBalanceSimpleSelector.java | 14 +++++++-------
...stCatalogReplicaLoadBalanceSimpleSelector.java | 11 ++++++++---
.../TestMetaRegionReplicaReplicationEndpoint.java | 15 ++++++++++++---
3 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java
index f78dfb199c11..7590943e12b1 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java
@@ -46,15 +46,15 @@
* balancing algorithm. It maintains a stale location cache for each table. Whenever client looks
* up location, it first check if the row is the stale location cache. If yes, the location from
* catalog replica is stale, it will go to the primary region to look up update-to-date location;
- * otherwise, it will randomly pick up a replica region for lookup. When clients receive
- * RegionNotServedException from region servers, it will add these region locations to the stale
- * location cache. The stale cache will be cleaned up periodically by a chore.
+ * otherwise, it will randomly pick up a replica region or primary region for lookup. When clients
+ * receive RegionNotServedException from region servers, it will add these region locations to the
+ * stale location cache. The stale cache will be cleaned up periodically by a chore.
*
- * It follows a simple algorithm to choose a replica to go:
+ * It follows a simple algorithm to choose a meta replica region (including primary meta) to go:
*
*
* - If there is no stale location entry for rows it looks up, it will randomly
- * pick a replica region to do lookup.
+ * pick a meta replica region (including primary meta) to do lookup.
* - If the location from the replica region is stale, client gets RegionNotServedException
* from region server, in this case, it will create StaleLocationCacheEntry in
* CatalogReplicaLoadBalanceReplicaSimpleSelector.
@@ -141,7 +141,7 @@ public void onError(HRegionLocation loc) {
}
/**
- * Select an random replica id. In case there is no replica region configured, return
+ * Select an random replica id (including the primary replica id). In case there is no replica region configured, return
* the primary replica id.
* @return Replica id
*/
@@ -155,7 +155,7 @@ private int getRandomReplicaId() {
if (cachedNumOfReplicas <= 1) {
return RegionInfo.DEFAULT_REPLICA_ID;
}
- return 1 + ThreadLocalRandom.current().nextInt(cachedNumOfReplicas - 1);
+ return ThreadLocalRandom.current().nextInt(cachedNumOfReplicas);
}
/**
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCatalogReplicaLoadBalanceSimpleSelector.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCatalogReplicaLoadBalanceSimpleSelector.java
index 17b6d74af8c9..eaaac8034894 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCatalogReplicaLoadBalanceSimpleSelector.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCatalogReplicaLoadBalanceSimpleSelector.java
@@ -24,6 +24,7 @@
import java.io.IOException;
import java.util.concurrent.TimeUnit;
+import java.util.stream.IntStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -105,9 +106,13 @@ public void testMetaChangeFromReplicaNoReplica() throws IOException, Interrupted
return numOfReplicas;
});
- assertNotEquals(
- metaSelector.select(TableName.valueOf("test"), EMPTY_START_ROW, RegionLocateType.CURRENT),
- RegionReplicaUtil.DEFAULT_REPLICA_ID);
+ // Loop for 100 times, it should cover all replica ids.
+ int[] replicaIdCount = new int[numOfMetaReplica];
+ IntStream.range(1, 100).forEach(i -> replicaIdCount[metaSelector.select(
+ TableName.valueOf("test"), EMPTY_START_ROW, RegionLocateType.CURRENT)] ++);
+
+ // Make sure each replica id is returned by select() call, including primary replica id.
+ IntStream.range(0, numOfMetaReplica).forEach(i -> assertNotEquals(replicaIdCount[i], 0));
// Change to No meta replica
HBaseTestingUtility.setReplicas(admin, TableName.META_TABLE_NAME, 1);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java
index dd17eeb29ffe..7a2dc787e8d9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java
@@ -519,6 +519,16 @@ private void primaryMayIncreaseReplicaNoChange(final long[] before, final long[]
}
}
+ private void primaryIncreaseReplicaIncrease(final long[] before, final long[] after) {
+ // There are read requests increase for primary meta replica.
+ assertTrue(after[RegionInfo.DEFAULT_REPLICA_ID] > before[RegionInfo.DEFAULT_REPLICA_ID]);
+
+ // There are read requests incrase for meta replica regions.
+ for (int i = 1; i < after.length; i++) {
+ assertTrue(after[i] > before[i]);
+ }
+ }
+
private void getMetaReplicaReadRequests(final Region[] metaRegions, final long[] counters) {
int i = 0;
for (Region r : metaRegions) {
@@ -579,9 +589,8 @@ public void testHBaseMetaReplicaGets() throws Exception {
getMetaReplicaReadRequests(metaRegions, readReqsForMetaReplicasAfterGet);
- // There is no read requests increase for primary meta replica.
- // For rest of meta replicas, there are more reads against them.
- primaryNoChangeReplicaIncrease(readReqsForMetaReplicas, readReqsForMetaReplicasAfterGet);
+ // There are more reads against all meta replica regions, including the primary region.
+ primaryIncreaseReplicaIncrease(readReqsForMetaReplicas, readReqsForMetaReplicasAfterGet);
// move one of regions so it meta cache may be invalid.
HTU.moveRegionAndWait(userRegion.getRegionInfo(), destRs.getServerName());