Skip to content

Commit 510fce0

Browse files
committed
HDFS-17365. EC: Add extra redunency configuration in checkStreamerFailures to prevent data loss.
1 parent 2f1718c commit 510fce0

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,10 @@ private void flipDataBuffers() {
286286
private CompletionService<Void> flushAllExecutorCompletionService;
287287
private int blockGroupIndex;
288288
private long datanodeRestartTimeout;
289+
public static final String DFS_CLIENT_WRITE_EC_CHECKSTREAMER_REDUNENCY_KEY =
290+
"dfs.client.write.ec.checkstreamer.redunency";
291+
public static final int DFS_CLIENT_WRITE_EC_CHECKSTREAMER_REDUNENCY_DEFAULT = 0;
292+
private final int extraStreamerRedunency;
289293

290294
/** Construct a new output stream for creating a file. */
291295
DFSStripedOutputStream(DFSClient dfsClient, String src, HdfsFileStatus stat,
@@ -325,6 +329,9 @@ private void flipDataBuffers() {
325329
currentPackets = new DFSPacket[streamers.size()];
326330
datanodeRestartTimeout = dfsClient.getConf().getDatanodeRestartTimeout();
327331
setCurrentStreamer(0);
332+
int extraStreamerTmp = dfsClient.getConfiguration().getInt(DFS_CLIENT_WRITE_EC_CHECKSTREAMER_REDUNENCY_KEY,
333+
DFS_CLIENT_WRITE_EC_CHECKSTREAMER_REDUNENCY_DEFAULT);
334+
extraStreamerRedunency = Math.max(extraStreamerTmp, 0);
328335
}
329336

330337
/** Construct a new output stream for appending to a file. */
@@ -690,7 +697,7 @@ private void checkStreamerFailures(boolean isNeedFlushAllPackets)
690697
// 2) create new block outputstream
691698
newFailed = waitCreatingStreamers(healthySet);
692699
if (newFailed.size() + failedStreamers.size() >
693-
numAllBlocks - numDataBlocks) {
700+
numAllBlocks - numDataBlocks - extraStreamerRedunency) {
694701
// The write has failed, Close all the streamers.
695702
closeAllStreamers();
696703
throw new IOException(

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3907,6 +3907,19 @@
39073907
will fail and be retried by namenode.
39083908
</description>
39093909
</property>
3910+
3911+
<property>
3912+
<name>dfs.client.write.ec.checkstreamer.redunency</name>
3913+
<value>0</value>
3914+
<description>
3915+
Provide extra redunency healthy streamer in checkStreamerFailures to prevent
3916+
the potential data loss. For example, if we use RS-6-3-1024K ec policy. We can
3917+
write successfully when there are 3 failure streamers. But if one of the six
3918+
replicas lost during reconstruction, we may lose the data forever.
3919+
It should better configured between [0, numParityBlocks];
3920+
</description>
3921+
</property>
3922+
39103923

39113924
<property>
39123925
<name>dfs.namenode.quota.init-threads</name>

0 commit comments

Comments
 (0)