From 745091d44ed963532d74ae075b4b96335db61d28 Mon Sep 17 00:00:00 2001 From: "zhanghaobo@kanzhun.com" Date: Mon, 22 Jan 2024 13:03:19 +0800 Subject: [PATCH 1/8] HDFS-17344. Last packet will be splited into two parts when write block. --- .../src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index a1bfb7f5d594e..0bc425e0f7640 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -539,6 +539,10 @@ protected void adjustChunkBoundary() { int psize = 0; if (blockSize == getStreamer().getBytesCurBlock()) { psize = writePacketSize; + } else if (blockSize - getStreamer().getBytesCurBlock() + PacketHeader.PKT_MAX_HEADER_LEN + < writePacketSize) { + psize = (int)(blockSize - getStreamer().getBytesCurBlock()) + + PacketHeader.PKT_MAX_HEADER_LEN; } else { psize = (int) Math .min(blockSize - getStreamer().getBytesCurBlock(), writePacketSize); From 4f3d4aa792967a5cf023610e7e35add591ddcbd7 Mon Sep 17 00:00:00 2001 From: zhanghaobo Date: Mon, 23 Dec 2024 14:17:02 +0800 Subject: [PATCH 2/8] fix checkstyle --- .../src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index 0bc425e0f7640..d6b31150b7591 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -540,7 +540,7 @@ protected void adjustChunkBoundary() { if (blockSize == getStreamer().getBytesCurBlock()) { psize = writePacketSize; } else if (blockSize - getStreamer().getBytesCurBlock() + PacketHeader.PKT_MAX_HEADER_LEN - < writePacketSize) { + < writePacketSize) { psize = (int)(blockSize - getStreamer().getBytesCurBlock()) + PacketHeader.PKT_MAX_HEADER_LEN; } else { From 849b43d3ec2f095aff36589a4910150783b4cd74 Mon Sep 17 00:00:00 2001 From: zhanghaobo Date: Wed, 25 Dec 2024 15:06:50 +0800 Subject: [PATCH 3/8] fix bug --- .../java/org/apache/hadoop/hdfs/DFSOutputStream.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index d6b31150b7591..9b045824c570a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -539,13 +539,14 @@ protected void adjustChunkBoundary() { int psize = 0; if (blockSize == getStreamer().getBytesCurBlock()) { psize = writePacketSize; - } else if (blockSize - getStreamer().getBytesCurBlock() + PacketHeader.PKT_MAX_HEADER_LEN - < writePacketSize) { - psize = (int)(blockSize - getStreamer().getBytesCurBlock()) + - PacketHeader.PKT_MAX_HEADER_LEN; } else { psize = (int) Math .min(blockSize - getStreamer().getBytesCurBlock(), writePacketSize); + if (psize < writePacketSize) { + final int chunkSize = bytesPerChecksum + getChecksumSize(); + int numPackets = (psize + bytesPerChecksum - 1) / bytesPerChecksum; + psize = PacketHeader.PKT_MAX_HEADER_LEN + numPackets * chunkSize; + } } computePacketChunkSize(psize, bytesPerChecksum); } From 770f7f08630d9d21626d836eaf044b7cb8105577 Mon Sep 17 00:00:00 2001 From: zhanghaobo Date: Wed, 25 Dec 2024 17:20:47 +0800 Subject: [PATCH 4/8] add UT --- .../hadoop/hdfs/TestDFSOutputStream.java | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java index bdb91f91bc5e4..fe88ee281b3b6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java @@ -549,6 +549,45 @@ public void testFirstPacketSizeInNewBlocks() throws IOException { fs.delete(new Path("/testfile.dat"), true); } + @Test(timeout = 60000) + public void testLastPacketSizeInBlocks() throws IOException { + final long blockSize = (long) 1024 * 1024; + MiniDFSCluster dfsCluster = cluster; + DistributedFileSystem fs = dfsCluster.getFileSystem(); + Configuration dfsConf = fs.getConf(); + + EnumSet flags = EnumSet.of(CreateFlag.CREATE); + try(FSDataOutputStream fos = fs.create(new Path("/testfile.dat"), + FsPermission.getDefault(), + flags, 512, (short)3, blockSize, null)) { + + DataChecksum crc32c = DataChecksum.newDataChecksum( + DataChecksum.Type.CRC32C, 512); + + long loop = 0; + Random r = new Random(); + byte[] buf = new byte[(int) blockSize]; + r.nextBytes(buf); + fos.write(buf); + fos.hflush(); + + int chunkSize = crc32c.getBytesPerChecksum() + crc32c.getChecksumSize(); + int packetContentSize = (dfsConf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, + DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT) - + PacketHeader.PKT_MAX_HEADER_LEN) / chunkSize * chunkSize; + + while (loop < 20) { + r.nextBytes(buf); + fos.write(buf); + fos.hflush(); + loop++; + Assert.assertEquals(((DFSOutputStream) fos.getWrappedStream()).packetSize, + packetContentSize); + } + } + fs.delete(new Path("/testfile.dat"), true); + } + @AfterClass public static void tearDown() { if (cluster != null) { From fa7f984eb0b19a13803d3daa952a89eb6f233af5 Mon Sep 17 00:00:00 2001 From: zhanghaobo Date: Thu, 26 Dec 2024 09:39:37 +0800 Subject: [PATCH 5/8] trigger yetus. From e709dbb6f7bd4b707c1c9e2a14c3bf138bc42008 Mon Sep 17 00:00:00 2001 From: zhanghaobo Date: Thu, 26 Dec 2024 09:39:37 +0800 Subject: [PATCH 6/8] trigger yetus. From 8e9afe5bc681edefaf750c27b5d62f5593db39b8 Mon Sep 17 00:00:00 2001 From: zhanghaobo Date: Sat, 28 Dec 2024 12:02:03 +0800 Subject: [PATCH 7/8] trigger yetus. From f40370bfa4c49f2108ee883e701e55b1d76c8f78 Mon Sep 17 00:00:00 2001 From: zhanghaobo Date: Sat, 4 Jan 2025 07:40:32 +0800 Subject: [PATCH 8/8] trigger yetus.