Skip to content

Commit 78c56f3

Browse files
Neilxznjojochuang
authored andcommitted
HDFS-16182.numOfReplicas is given the wrong value in BlockPlacementPolicyDefault$chooseTarget can cause DataStreamer to fail with Heterogeneous Storage. (#3320)
(cherry picked from commit 5626734) (cherry picked from commit ce3a7155335416ed4665f90f0ddd7ac3a3edd7e6)
1 parent 980c35e commit 78c56f3

File tree

2 files changed

+60
-7
lines changed

2 files changed

+60
-7
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ private EnumMap<StorageType, Integer> getRequiredStorageTypes(
396396
* @param storageTypes storage type to be considered for target
397397
* @return local node of writer (not chosen node)
398398
*/
399-
private Node chooseTarget(int numOfReplicas,
399+
private Node chooseTarget(final int numOfReplicas,
400400
Node writer,
401401
final Set<Node> excludedNodes,
402402
final long blocksize,
@@ -432,7 +432,7 @@ private Node chooseTarget(int numOfReplicas,
432432
}
433433

434434
try {
435-
if ((numOfReplicas = requiredStorageTypes.size()) == 0) {
435+
if (requiredStorageTypes.size() == 0) {
436436
throw new NotEnoughReplicasException(
437437
"All required storage types are unavailable: "
438438
+ " unavailableStorages=" + unavailableStorages
@@ -464,10 +464,10 @@ private Node chooseTarget(int numOfReplicas,
464464
for (DatanodeStorageInfo resultStorage : results) {
465465
addToExcludedNodes(resultStorage.getDatanodeDescriptor(), oldExcludedNodes);
466466
}
467-
// Set numOfReplicas, since it can get out of sync with the result list
467+
// Set newNumOfReplicas, since it can get out of sync with the result list
468468
// if the NotEnoughReplicasException was thrown in chooseRandom().
469-
numOfReplicas = totalReplicasExpected - results.size();
470-
return chooseTarget(numOfReplicas, writer, oldExcludedNodes, blocksize,
469+
int newNumOfReplicas = totalReplicasExpected - results.size();
470+
return chooseTarget(newNumOfReplicas, writer, oldExcludedNodes, blocksize,
471471
maxNodesPerRack, results, false, storagePolicy, unavailableStorages,
472472
newBlock, null);
473473
}
@@ -486,8 +486,8 @@ private Node chooseTarget(int numOfReplicas,
486486
addToExcludedNodes(resultStorage.getDatanodeDescriptor(),
487487
oldExcludedNodes);
488488
}
489-
numOfReplicas = totalReplicasExpected - results.size();
490-
return chooseTarget(numOfReplicas, writer, oldExcludedNodes, blocksize,
489+
int newNumOfReplicas = totalReplicasExpected - results.size();
490+
return chooseTarget(newNumOfReplicas, writer, oldExcludedNodes, blocksize,
491491
maxNodesPerRack, results, false, storagePolicy, unavailableStorages,
492492
newBlock, null);
493493
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockStoragePolicy.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,6 +1300,59 @@ public void testChooseSsdOverDisk() throws Exception {
13001300
Assert.assertEquals(StorageType.DISK, targets[1].getStorageType());
13011301
}
13021302

1303+
/**
1304+
* Consider a File with All_SSD storage policy.
1305+
* 1. Choose 3 DISK DNs for pipeline because SSD DNs no enough at
1306+
* the beginning.
1307+
* 2. One of DISK DNs fails And it need choose one new DN for existing.
1308+
* pipeline {@link DataStreamer addDatanode2ExistingPipeline()}.
1309+
* Make sure the number of target DNs are 3.
1310+
* see HDFS-16182.
1311+
*/
1312+
@Test
1313+
public void testAddDatanode2ExistingPipelineInSsd() throws Exception {
1314+
BlockStoragePolicy policy = POLICY_SUITE.getPolicy(ALLSSD);
1315+
1316+
final String[] racks = {"/d1/r1", "/d2/r2", "/d3/r3", "/d4/r4", "/d5/r5",
1317+
"/d6/r6", "/d7/r7"};
1318+
final String[] hosts = {"host1", "host2", "host3", "host4", "host5",
1319+
"host6", "host7"};
1320+
final StorageType[] disks = {StorageType.DISK, StorageType.DISK, StorageType.DISK};
1321+
1322+
final DatanodeStorageInfo[] diskStorages
1323+
= DFSTestUtil.createDatanodeStorageInfos(7, racks, hosts, disks);
1324+
final DatanodeDescriptor[] dataNodes
1325+
= DFSTestUtil.toDatanodeDescriptor(diskStorages);
1326+
for (int i = 0; i < dataNodes.length; i++) {
1327+
BlockManagerTestUtil.updateStorage(dataNodes[i],
1328+
new DatanodeStorage("ssd" + i + 1, DatanodeStorage.State.NORMAL,
1329+
StorageType.SSD));
1330+
}
1331+
1332+
FileSystem.setDefaultUri(conf, "hdfs://localhost:0");
1333+
conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "0.0.0.0:0");
1334+
File baseDir = PathUtils.getTestDir(TestReplicationPolicy.class);
1335+
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
1336+
new File(baseDir, "name").getPath());
1337+
DFSTestUtil.formatNameNode(conf);
1338+
NameNode namenode = new NameNode(conf);
1339+
1340+
final BlockManager bm = namenode.getNamesystem().getBlockManager();
1341+
BlockPlacementPolicy replicator = bm.getBlockPlacementPolicy();
1342+
NetworkTopology cluster = bm.getDatanodeManager().getNetworkTopology();
1343+
for (DatanodeDescriptor datanode : dataNodes) {
1344+
cluster.add(datanode);
1345+
}
1346+
// chsenDs are DISK StorageType to simulate not enough SDD Storage
1347+
List<DatanodeStorageInfo> chsenDs = new ArrayList<>();
1348+
chsenDs.add(diskStorages[0]);
1349+
chsenDs.add(diskStorages[1]);
1350+
DatanodeStorageInfo[] targets = replicator.chooseTarget("/foo", 1,
1351+
null, chsenDs, true,
1352+
new HashSet<Node>(), 0, policy, null);
1353+
Assert.assertEquals(3, targets.length);
1354+
}
1355+
13031356
@Test
13041357
public void testGetFileStoragePolicyAfterRestartNN() throws Exception {
13051358
//HDFS8219

0 commit comments

Comments
 (0)