Skip to content

Commit c50e3f3

Browse files
committed
HBASE-23078 BaseLoadBalancer should consider region replicas when randomAssignment and roundRobinAssignment (#663)
Signed-off-by: stack <stack@apache.org>
1 parent b7aeed7 commit c50e3f3

File tree

5 files changed

+118
-68
lines changed

5 files changed

+118
-68
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,9 @@
3737
import org.apache.hadoop.hbase.ServerName;
3838
import org.apache.hadoop.hbase.TableName;
3939
import org.apache.hadoop.hbase.client.RegionInfo;
40-
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
4140
import org.apache.hadoop.hbase.master.RegionState;
4241
import org.apache.hadoop.hbase.master.RegionState.State;
4342
import org.apache.hadoop.hbase.util.Bytes;
44-
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
4543
import org.apache.yetus.audience.InterfaceAudience;
4644
import org.slf4j.Logger;
4745
import org.slf4j.LoggerFactory;
@@ -746,26 +744,6 @@ public ServerStateNode addRegionToServer(final RegionStateNode regionNode) {
746744
return serverNode;
747745
}
748746

749-
public boolean isReplicaAvailableForRegion(final RegionInfo info) {
750-
// if the region info itself is a replica return true.
751-
if (!RegionReplicaUtil.isDefaultReplica(info)) {
752-
return true;
753-
}
754-
// iterate the regionsMap for the given region name. If there are replicas it should
755-
// list them in order.
756-
for (RegionStateNode node : regionsMap.tailMap(info.getRegionName()).values()) {
757-
if (!node.getTable().equals(info.getTable())
758-
|| !ServerRegionReplicaUtil.isReplicasForSameRegion(info, node.getRegionInfo())) {
759-
break;
760-
} else if (!RegionReplicaUtil.isDefaultReplica(node.getRegionInfo())) {
761-
// we have replicas
762-
return true;
763-
}
764-
}
765-
// we don have replicas
766-
return false;
767-
}
768-
769747
public ServerStateNode removeRegionFromServer(final ServerName serverName,
770748
final RegionStateNode regionNode) {
771749
ServerStateNode serverNode = getOrCreateServer(serverName);

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.hadoop.hbase.master.balancer;
2020

21+
import java.io.IOException;
2122
import java.util.ArrayList;
2223
import java.util.Arrays;
2324
import java.util.Collection;
@@ -48,12 +49,11 @@
4849
import org.apache.hadoop.hbase.TableName;
4950
import org.apache.hadoop.hbase.client.RegionInfo;
5051
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
52+
import org.apache.hadoop.hbase.client.TableDescriptor;
5153
import org.apache.hadoop.hbase.master.LoadBalancer;
5254
import org.apache.hadoop.hbase.master.MasterServices;
5355
import org.apache.hadoop.hbase.master.RackManager;
5456
import org.apache.hadoop.hbase.master.RegionPlan;
55-
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
56-
import org.apache.hadoop.hbase.master.assignment.RegionStates;
5757
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
5858
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
5959
import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
@@ -1263,7 +1263,7 @@ public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> r
12631263
return assignments;
12641264
}
12651265

1266-
Cluster cluster = createCluster(servers, regions, false);
1266+
Cluster cluster = createCluster(servers, regions);
12671267
List<RegionInfo> unassignedRegions = new ArrayList<>();
12681268

12691269
roundRobinAssignment(cluster, regions, unassignedRegions,
@@ -1319,8 +1319,24 @@ public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> r
13191319
return assignments;
13201320
}
13211321

1322-
protected Cluster createCluster(List<ServerName> servers, Collection<RegionInfo> regions,
1323-
boolean hasRegionReplica) {
1322+
protected Cluster createCluster(List<ServerName> servers, Collection<RegionInfo> regions)
1323+
throws HBaseIOException {
1324+
boolean hasRegionReplica = false;
1325+
try {
1326+
if (services != null && services.getTableDescriptors() != null) {
1327+
Map<String, TableDescriptor> tds = services.getTableDescriptors().getAll();
1328+
for (RegionInfo regionInfo : regions) {
1329+
TableDescriptor td = tds.get(regionInfo.getTable().getNameWithNamespaceInclAsString());
1330+
if (td != null && td.getRegionReplication() > 1) {
1331+
hasRegionReplica = true;
1332+
break;
1333+
}
1334+
}
1335+
}
1336+
} catch (IOException ioe) {
1337+
throw new HBaseIOException(ioe);
1338+
}
1339+
13241340
// Get the snapshot of the current assignments for the regions in question, and then create
13251341
// a cluster out of it. Note that we might have replicas already assigned to some servers
13261342
// earlier. So we want to get the snapshot to see those assignments, but this will only contain
@@ -1380,7 +1396,7 @@ public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> serve
13801396
final List<ServerName> finalServers = idleServers.isEmpty() ?
13811397
servers : idleServers;
13821398
List<RegionInfo> regions = Lists.newArrayList(regionInfo);
1383-
Cluster cluster = createCluster(finalServers, regions, false);
1399+
Cluster cluster = createCluster(finalServers, regions);
13841400
return randomAssignment(cluster, regionInfo, finalServers);
13851401
}
13861402

@@ -1452,21 +1468,9 @@ public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, Server
14521468

14531469
int numRandomAssignments = 0;
14541470
int numRetainedAssigments = 0;
1455-
boolean hasRegionReplica = false;
14561471
for (Map.Entry<RegionInfo, ServerName> entry : regions.entrySet()) {
14571472
RegionInfo region = entry.getKey();
14581473
ServerName oldServerName = entry.getValue();
1459-
// In the current set of regions even if one has region replica let us go with
1460-
// getting the entire snapshot
1461-
if (this.services != null) { // for tests
1462-
AssignmentManager am = this.services.getAssignmentManager();
1463-
if (am != null) {
1464-
RegionStates states = am.getRegionStates();
1465-
if (!hasRegionReplica && states != null && states.isReplicaAvailableForRegion(region)) {
1466-
hasRegionReplica = true;
1467-
}
1468-
}
1469-
}
14701474
List<ServerName> localServers = new ArrayList<>();
14711475
if (oldServerName != null) {
14721476
localServers = serversByHostname.get(oldServerName.getHostnameLowerCase());
@@ -1506,7 +1510,7 @@ public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, Server
15061510

15071511
// If servers from prior assignment aren't present, then lets do randomAssignment on regions.
15081512
if (randomAssignRegions.size() > 0) {
1509-
Cluster cluster = createCluster(servers, regions.keySet(), hasRegionReplica);
1513+
Cluster cluster = createCluster(servers, regions.keySet());
15101514
for (Map.Entry<ServerName, List<RegionInfo>> entry : assignments.entrySet()) {
15111515
ServerName sn = entry.getKey();
15121516
for (RegionInfo region : entry.getValue()) {

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterOperationsForRegionReplicas.java

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
import java.io.IOException;
2626
import java.nio.charset.StandardCharsets;
27+
import java.util.ArrayList;
2728
import java.util.Collection;
2829
import java.util.EnumSet;
2930
import java.util.HashMap;
@@ -43,6 +44,7 @@
4344
import org.apache.hadoop.hbase.MetaTableAccessor.Visitor;
4445
import org.apache.hadoop.hbase.RegionLocations;
4546
import org.apache.hadoop.hbase.ServerName;
47+
import org.apache.hadoop.hbase.StartMiniClusterOption;
4648
import org.apache.hadoop.hbase.TableName;
4749
import org.apache.hadoop.hbase.client.Admin;
4850
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
@@ -58,6 +60,7 @@
5860
import org.apache.hadoop.hbase.testclassification.MasterTests;
5961
import org.apache.hadoop.hbase.testclassification.MediumTests;
6062
import org.apache.hadoop.hbase.util.Bytes;
63+
import org.apache.hadoop.hbase.util.JVMClusterUtil;
6164
import org.junit.AfterClass;
6265
import org.junit.BeforeClass;
6366
import org.junit.ClassRule;
@@ -147,15 +150,7 @@ public void testCreateTableWithMultipleReplicas() throws Exception {
147150

148151
List<RegionInfo> hris = MetaTableAccessor.getTableRegions(ADMIN.getConnection(), tableName);
149152
assertEquals(numRegions * numReplica, hris.size());
150-
// check that the master created expected number of RegionState objects
151-
for (int i = 0; i < numRegions; i++) {
152-
for (int j = 0; j < numReplica; j++) {
153-
RegionInfo replica = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j);
154-
RegionState state = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
155-
.getRegionStates().getRegionState(replica);
156-
assertNotNull(state);
157-
}
158-
}
153+
assertRegionStateNotNull(hris, numRegions, numReplica);
159154

160155
List<Result> metaRows = MetaTableAccessor.fullScanRegions(ADMIN.getConnection());
161156
int numRows = 0;
@@ -184,14 +179,26 @@ public void testCreateTableWithMultipleReplicas() throws Exception {
184179
TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster();
185180
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
186181
TEST_UTIL.waitUntilNoRegionsInTransition();
187-
for (int i = 0; i < numRegions; i++) {
188-
for (int j = 0; j < numReplica; j++) {
189-
RegionInfo replica = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j);
190-
RegionState state = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
191-
.getRegionStates().getRegionState(replica);
192-
assertNotNull(state);
193-
}
182+
assertRegionStateNotNull(hris, numRegions, numReplica);
183+
validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica,
184+
ADMIN.getConnection());
185+
186+
// Now shut the whole cluster down, and verify the assignments are kept so that the
187+
// availability constraints are met. MiniHBaseCluster chooses arbitrary ports on each
188+
// restart. This messes with our being able to test that we retain locality. Therefore,
189+
// figure current cluster ports and pass them in on next cluster start so new cluster comes
190+
// up at same coordinates -- and the assignment retention logic has a chance to cut in.
191+
List<Integer> rsports = new ArrayList<>();
192+
for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getHBaseCluster()
193+
.getLiveRegionServerThreads()) {
194+
rsports.add(rst.getRegionServer().getRpcServer().getListenerAddress().getPort());
194195
}
196+
TEST_UTIL.shutdownMiniHBaseCluster();
197+
StartMiniClusterOption option =
198+
StartMiniClusterOption.builder().numRegionServers(numSlaves).rsPorts(rsports).build();
199+
TEST_UTIL.startMiniHBaseCluster(option);
200+
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
201+
TEST_UTIL.waitUntilNoRegionsInTransition();
195202
validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica,
196203
ADMIN.getConnection());
197204

@@ -255,6 +262,19 @@ public void testCreateTableWithMultipleReplicas() throws Exception {
255262
}
256263
}
257264

265+
private void assertRegionStateNotNull(List<RegionInfo> hris, int numRegions, int numReplica) {
266+
// check that the master created expected number of RegionState objects
267+
for (int i = 0; i < numRegions; i++) {
268+
for (int j = 0; j < numReplica; j++) {
269+
RegionInfo replica = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j);
270+
RegionState state =
271+
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
272+
.getRegionState(replica);
273+
assertNotNull(state);
274+
}
275+
}
276+
}
277+
258278
@Test
259279
@Ignore("Enable when we have support for alter_table- HBASE-10361")
260280
public void testIncompleteMetaTableReplicaInformation() throws Exception {

hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPBase.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@
1919

2020
import static org.junit.Assert.assertEquals;
2121
import static org.junit.Assert.assertTrue;
22+
import static org.junit.Assert.fail;
2223

2324
import java.io.IOException;
25+
import java.util.ArrayList;
26+
import java.util.List;
2427

2528
import org.apache.hadoop.conf.Configuration;
2629
import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -29,11 +32,14 @@
2932
import org.apache.hadoop.hbase.ServerName;
3033
import org.apache.hadoop.hbase.TableName;
3134
import org.apache.hadoop.hbase.client.RegionInfo;
35+
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
3236
import org.apache.hadoop.hbase.client.Table;
3337
import org.apache.hadoop.hbase.master.HMaster;
3438
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
3539
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
3640
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
41+
import org.apache.hadoop.hbase.regionserver.Region;
42+
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
3743
import org.junit.After;
3844
import org.junit.Before;
3945
import org.slf4j.Logger;
@@ -125,6 +131,7 @@ protected void testRecoveryAndDoubleExecution(boolean carryingMeta, boolean doub
125131
long procId = getSCPProcId(procExec);
126132
ProcedureTestingUtility.waitProcedure(procExec, procId);
127133
}
134+
assertReplicaDistributed(t);
128135
assertEquals(count, util.countRows(t));
129136
assertEquals(checksum, util.checksumRows(t));
130137
}
@@ -135,6 +142,36 @@ protected long getSCPProcId(ProcedureExecutor<?> procExec) {
135142
return procExec.getActiveProcIds().stream().mapToLong(Long::longValue).min().getAsLong();
136143
}
137144

145+
private void assertReplicaDistributed(Table t) throws IOException {
146+
if (t.getDescriptor().getRegionReplication() <= 1) {
147+
return;
148+
}
149+
// Assert all data came back.
150+
List<RegionInfo> regionInfos = new ArrayList<>();
151+
for (RegionServerThread rs : this.util.getMiniHBaseCluster().getRegionServerThreads()) {
152+
regionInfos.clear();
153+
for (Region r : rs.getRegionServer().getRegions(t.getName())) {
154+
LOG.info("The region is " + r.getRegionInfo() + " the location is " +
155+
rs.getRegionServer().getServerName());
156+
if (contains(regionInfos, r.getRegionInfo())) {
157+
LOG.error("Am exiting");
158+
fail("Replica regions should be assigned to different region servers");
159+
} else {
160+
regionInfos.add(r.getRegionInfo());
161+
}
162+
}
163+
}
164+
}
165+
166+
private boolean contains(List<RegionInfo> regionInfos, RegionInfo regionInfo) {
167+
for (RegionInfo info : regionInfos) {
168+
if (RegionReplicaUtil.isReplicasForSameRegion(info, regionInfo)) {
169+
return true;
170+
}
171+
}
172+
return false;
173+
}
174+
138175
protected Table createTable(final TableName tableName) throws IOException {
139176
final Table t = this.util.createTable(tableName, HBaseTestingUtility.COLUMNS,
140177
HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE, getRegionReplication());

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionReplicasWithRestartScenarios.java

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@
2424
import java.io.IOException;
2525
import java.util.ArrayList;
2626
import java.util.Collection;
27+
2728
import org.apache.hadoop.conf.Configuration;
2829
import org.apache.hadoop.hbase.HBaseClassTestRule;
2930
import org.apache.hadoop.hbase.HBaseTestingUtility;
3031
import org.apache.hadoop.hbase.HConstants;
32+
import org.apache.hadoop.hbase.ServerName;
3133
import org.apache.hadoop.hbase.TableName;
3234
import org.apache.hadoop.hbase.client.RegionInfo;
3335
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
@@ -37,7 +39,6 @@
3739
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
3840
import org.apache.hadoop.hbase.util.Bytes;
3941
import org.apache.hadoop.hbase.util.RegionSplitter;
40-
import org.apache.hadoop.hdfs.DFSConfigKeys;
4142
import org.junit.After;
4243
import org.junit.AfterClass;
4344
import org.junit.Before;
@@ -64,31 +65,27 @@ public class TestRegionReplicasWithRestartScenarios {
6465

6566
private static final int NB_SERVERS = 3;
6667
private Table table;
68+
private TableName tableName;
6769

6870
private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
6971
private static final byte[] f = HConstants.CATALOG_FAMILY;
7072

7173
@BeforeClass
7274
public static void beforeClass() throws Exception {
73-
// Reduce the hdfs block size and prefetch to trigger the file-link reopen
74-
// when the file is moved to archive (e.g. compaction)
75-
HTU.getConfiguration().setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 8192);
76-
HTU.getConfiguration().setInt(DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY, 1);
77-
HTU.getConfiguration().setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 128 * 1024 * 1024);
78-
HTU.getConfiguration().setInt("hbase.master.wait.on.regionservers.mintostart", 3);
75+
HTU.getConfiguration().setInt("hbase.master.wait.on.regionservers.mintostart", NB_SERVERS);
7976
HTU.startMiniCluster(NB_SERVERS);
8077
}
8178

8279
@Before
8380
public void before() throws IOException {
84-
TableName tableName = TableName.valueOf(this.name.getMethodName());
85-
// Create table then get the single region for our new table.
86-
this.table = createTableDirectlyFromHTD(tableName);
81+
this.tableName = TableName.valueOf(this.name.getMethodName());
82+
this.table = createTableDirectlyFromHTD(this.tableName);
8783
}
8884

8985
@After
9086
public void after() throws IOException {
9187
this.table.close();
88+
HTU.deleteTable(this.tableName);
9289
}
9390

9491
private static Table createTableDirectlyFromHTD(final TableName tableName) throws IOException {
@@ -125,6 +122,20 @@ private HRegionServer getTertiaryRS() {
125122

126123
@Test
127124
public void testRegionReplicasCreated() throws Exception {
125+
assertReplicaDistributed();
126+
}
127+
128+
@Test
129+
public void testWhenRestart() throws Exception {
130+
ServerName serverName = getRS().getServerName();
131+
HTU.getHBaseCluster().stopRegionServer(serverName);
132+
HTU.getHBaseCluster().waitForRegionServerToStop(serverName, 60000);
133+
HTU.getHBaseCluster().startRegionServerAndWait(60000);
134+
HTU.waitTableAvailable(this.tableName);
135+
assertReplicaDistributed();
136+
}
137+
138+
private void assertReplicaDistributed() throws Exception {
128139
Collection<HRegion> onlineRegions = getRS().getOnlineRegionsLocalContext();
129140
boolean res = checkDuplicates(onlineRegions);
130141
assertFalse(res);
@@ -150,7 +161,7 @@ private boolean checkDuplicates(Collection<HRegion> onlineRegions3) throws Excep
150161
RegionReplicaUtil.getRegionInfoForDefaultReplica(actualRegion.getRegionInfo()))) {
151162
i++;
152163
if (i > 1) {
153-
LOG.info("Duplicate found " + actualRegion.getRegionInfo() + " " +
164+
LOG.warn("Duplicate found {} and {}", actualRegion.getRegionInfo(),
154165
region.getRegionInfo());
155166
assertTrue(Bytes.equals(region.getRegionInfo().getStartKey(),
156167
actualRegion.getRegionInfo().getStartKey()));

0 commit comments

Comments
 (0)