Skip to content

Commit d73f2ca

Browse files
committed
HDFS-17365. EC: Add extra redunency configuration in checkStreamerFailures to prevent data loss.
1 parent 2f1718c commit d73f2ca

File tree

3 files changed

+76
-1
lines changed

3 files changed

+76
-1
lines changed

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@
7373
import java.util.concurrent.LinkedBlockingQueue;
7474
import java.util.concurrent.TimeUnit;
7575

76+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_RS_10_4_1024k_FAILED_WRITE_BLOCK_TOLERATED;
77+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_RS_10_4_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT;
78+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_RS_3_2_1024k_FAILED_WRITE_BLOCK_TOLERATED;
79+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_RS_3_2_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT;
80+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_RS_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED;
81+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_RS_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT;
82+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_RS_LEGACY_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED;
83+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_RS_LEGACY_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT;
84+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_XOR_2_1_1024k_FAILED_WRITE_BLOCK_TOLERATED;
85+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedunency.DFS_CLIENT_EC_XOR_2_1_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT;
7686
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.RECOVER_LEASE_ON_CLOSE_EXCEPTION_DEFAULT;
7787
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.RECOVER_LEASE_ON_CLOSE_EXCEPTION_KEY;
7888

@@ -286,6 +296,7 @@ private void flipDataBuffers() {
286296
private CompletionService<Void> flushAllExecutorCompletionService;
287297
private int blockGroupIndex;
288298
private long datanodeRestartTimeout;
299+
private final int failedStreamerTolerated;
289300

290301
/** Construct a new output stream for creating a file. */
291302
DFSStripedOutputStream(DFSClient dfsClient, String src, HdfsFileStatus stat,
@@ -325,6 +336,38 @@ private void flipDataBuffers() {
325336
currentPackets = new DFSPacket[streamers.size()];
326337
datanodeRestartTimeout = dfsClient.getConf().getDatanodeRestartTimeout();
327338
setCurrentStreamer(0);
339+
340+
int extraFailedStreamerToleratedTmp;
341+
switch (ecPolicy.getName()) {
342+
case "RS-10-4-1024k":
343+
extraFailedStreamerToleratedTmp = dfsClient.getConfiguration().getInt(
344+
DFS_CLIENT_EC_RS_10_4_1024k_FAILED_WRITE_BLOCK_TOLERATED,
345+
DFS_CLIENT_EC_RS_10_4_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT);
346+
break;
347+
case "RS-3-2-1024k":
348+
extraFailedStreamerToleratedTmp = dfsClient.getConfiguration().getInt(
349+
DFS_CLIENT_EC_RS_3_2_1024k_FAILED_WRITE_BLOCK_TOLERATED,
350+
DFS_CLIENT_EC_RS_3_2_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT);
351+
break;
352+
case "RS-6-3-1024k":
353+
extraFailedStreamerToleratedTmp = dfsClient.getConfiguration().getInt(
354+
DFS_CLIENT_EC_RS_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED,
355+
DFS_CLIENT_EC_RS_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT);
356+
break;
357+
case "RS-LEGACY-6-3-1024k":
358+
extraFailedStreamerToleratedTmp = dfsClient.getConfiguration().getInt(
359+
DFS_CLIENT_EC_RS_LEGACY_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED,
360+
DFS_CLIENT_EC_RS_LEGACY_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT);
361+
break;
362+
case "XOR-2-1-1024k":
363+
extraFailedStreamerToleratedTmp = dfsClient.getConfiguration().getInt(
364+
DFS_CLIENT_EC_XOR_2_1_1024k_FAILED_WRITE_BLOCK_TOLERATED,
365+
DFS_CLIENT_EC_XOR_2_1_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT);
366+
break;
367+
default :
368+
extraFailedStreamerToleratedTmp = 0;
369+
}
370+
failedStreamerTolerated = Math.max(extraFailedStreamerToleratedTmp, 0);
328371
}
329372

330373
/** Construct a new output stream for appending to a file. */
@@ -690,7 +733,7 @@ private void checkStreamerFailures(boolean isNeedFlushAllPackets)
690733
// 2) create new block outputstream
691734
newFailed = waitCreatingStreamers(healthySet);
692735
if (newFailed.size() + failedStreamers.size() >
693-
numAllBlocks - numDataBlocks) {
736+
failedStreamerTolerated) {
694737
// The write has failed, Close all the streamers.
695738
closeAllStreamers();
696739
throw new IOException(

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,25 @@ interface ByteArrayManager {
425425
PREFIX + "count-reset-time-period-ms";
426426
long COUNT_RESET_TIME_PERIOD_MS_DEFAULT = 10 * MS_PER_SECOND;
427427
}
428+
429+
interface ECRedunency {
430+
String DFS_CLIENT_EC_RS_10_4_1024k_FAILED_WRITE_BLOCK_TOLERATED =
431+
"dfs.client.ec.RS-10-4-1024k.failed.write.block.tolerated";
432+
int DFS_CLIENT_EC_RS_10_4_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT = 4;
433+
String DFS_CLIENT_EC_RS_3_2_1024k_FAILED_WRITE_BLOCK_TOLERATED =
434+
"dfs.client.ec.RS-3-2-1024k.failed.write.block.tolerated";
435+
int DFS_CLIENT_EC_RS_3_2_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT = 2;
436+
String DFS_CLIENT_EC_RS_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED =
437+
"dfs.client.ec.RS-6-3-1024k.failed.write.block.tolerated";
438+
int DFS_CLIENT_EC_RS_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT = 3;
439+
String DFS_CLIENT_EC_RS_LEGACY_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED =
440+
"dfs.client.ec.RS-LEGACY-6-3-1024k.checkstreamer.redunency";
441+
int DFS_CLIENT_EC_RS_LEGACY_6_3_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT = 3;
442+
String DFS_CLIENT_EC_XOR_2_1_1024k_FAILED_WRITE_BLOCK_TOLERATED =
443+
"dfs.client.ec.XOR-2-1-1024k.checkstreamer.redunency";
444+
int DFS_CLIENT_EC_XOR_2_1_1024k_FAILED_WRITE_BLOCK_TOLERATED_DEFAILT = 1;
445+
}
446+
428447
}
429448

430449
/** dfs.client.block.write configuration properties */

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3908,6 +3908,19 @@
39083908
</description>
39093909
</property>
39103910

3911+
<property>
3912+
<name>dfs.client.ec.EXAMPLEECPOLICYNAME.failed.write.block.tolerated</name>
3913+
<value></value>
3914+
<description>
3915+
Provide extra tolerated failed streamer for ec policy named EXAMPLEECPOLICYNAME
3916+
to prevent the potential data loss. For example, if we use RS-6-3-1024K ec policy.
3917+
We can write successfully when there are 3 failure streamers. But if one of the six
3918+
replicas lost during reconstruction, we may lose the data forever.
3919+
It should better configured between [0, numParityBlocks], the default value is
3920+
the parity block number of some ec policy.
3921+
</description>
3922+
</property>
3923+
39113924
<property>
39123925
<name>dfs.namenode.quota.init-threads</name>
39133926
<value>12</value>

0 commit comments

Comments
 (0)