apache · luoyuxia · Dec 16, 2025 · Aug 28, 2025 · Dec 15, 2025 · Dec 16, 2025
diff --git a/fluss-client/src/test/java/org/apache/fluss/client/admin/FlussAdminITCase.java b/fluss-client/src/test/java/org/apache/fluss/client/admin/FlussAdminITCase.java
@@ -659,7 +659,7 @@ void testCreateTableWithInvalidProperty() {
                 .cause()
                 .isInstanceOf(InvalidConfigException.class)
                 .hasMessageContaining(
-                        "Currently, Primary Key Table only supports ARROW log format if kv format is COMPACTED.");
+                        "Currently, Primary Key Table supports ARROW or COMPACTED log format when kv format is COMPACTED.");
     }
 
     @Test

diff --git a/fluss-client/src/test/java/org/apache/fluss/client/table/FlussTableITCase.java b/fluss-client/src/test/java/org/apache/fluss/client/table/FlussTableITCase.java
@@ -730,6 +730,11 @@ void testPutAndPoll(String kvFormat) throws Exception {
         verifyAppendOrPut(false, "ARROW", kvFormat);
     }
 
+    @Test
+    void testPutAndPollCompacted() throws Exception {
+        verifyAppendOrPut(false, "COMPACTED", "COMPACTED");
+    }
+
     void verifyAppendOrPut(boolean append, String logFormat, @Nullable String kvFormat)
             throws Exception {
         Schema schema =
@@ -888,8 +893,9 @@ void testAppendAndProject(String format) throws Exception {
         }
     }
 
-    @Test
-    void testPutAndProject() throws Exception {
+    @ParameterizedTest
+    @ValueSource(strings = {"ARROW", "COMPACTED"})
+    void testPutAndProject(String changelogFormat) throws Exception {
         Schema schema =
                 Schema.newBuilder()
                         .column("a", DataTypes.INT())
@@ -898,7 +904,11 @@ void testPutAndProject() throws Exception {
                         .column("d", DataTypes.BIGINT())
                         .primaryKey("a")
                         .build();
-        TableDescriptor tableDescriptor = TableDescriptor.builder().schema(schema).build();
+        TableDescriptor tableDescriptor =
+                TableDescriptor.builder()
+                        .schema(schema)
+                        .property(ConfigOptions.TABLE_LOG_FORMAT.key(), changelogFormat)
+                        .build();
         TablePath tablePath = TablePath.of("test_db_1", "test_pk_table_1");
         createTable(tablePath, tableDescriptor, false);
 

diff --git a/fluss-common/src/main/java/org/apache/fluss/config/ConfigOptions.java b/fluss-common/src/main/java/org/apache/fluss/config/ConfigOptions.java
@@ -1230,7 +1230,7 @@ public class ConfigOptions {
                     .defaultValue(LogFormat.ARROW)
                     .withDescription(
                             "The format of the log records in log store. The default value is `arrow`. "
-                                    + "The supported formats are `arrow` and `indexed`.");
+                                    + "The supported formats are `arrow`, `indexed` and `compacted`.");
 
     public static final ConfigOption<ArrowCompressionType> TABLE_LOG_ARROW_COMPRESSION_TYPE =
             key("table.log.arrow.compression.type")

diff --git a/fluss-common/src/main/java/org/apache/fluss/metadata/LogFormat.java b/fluss-common/src/main/java/org/apache/fluss/metadata/LogFormat.java
@@ -18,10 +18,15 @@
 package org.apache.fluss.metadata;
 
 import org.apache.fluss.record.MemoryLogRecordsArrowBuilder;
+import org.apache.fluss.record.MemoryLogRecordsCompactedBuilder;
 import org.apache.fluss.record.MemoryLogRecordsIndexedBuilder;
+import org.apache.fluss.row.compacted.CompactedRow;
 import org.apache.fluss.row.indexed.IndexedRow;
 
-/** The format of the log records in log store. The supported formats are 'arrow' and 'indexed'. */
+/**
+ * The format of the log records in log store. The supported formats are 'arrow', 'indexed' and
+ * 'compacted'.
+ */
 public enum LogFormat {
 
     /**
@@ -41,18 +46,29 @@ public enum LogFormat {
      *
      * @see MemoryLogRecordsIndexedBuilder
      */
-    INDEXED;
+    INDEXED,
+
+    /**
+     * The log record batches are stored in {@link CompactedRow} format which is a compact
+     * row-oriented format optimized for primary key tables to reduce storage while trading CPU for
+     * reads.
+     *
+     * @see MemoryLogRecordsCompactedBuilder
+     */
+    COMPACTED;
 
     /**
-     * Creates a {@link LogFormat} from the given string. The string must be either 'arrow' or
-     * 'indexed'.
+     * Creates a {@link LogFormat} from the given string. The string must be either 'arrow',
+     * 'indexed' or 'compacted'.
      */
     public static LogFormat fromString(String format) {
         switch (format.toUpperCase()) {
             case "ARROW":
                 return ARROW;
             case "INDEXED":
                 return INDEXED;
+            case "COMPACTED":
+                return COMPACTED;
             default:
                 throw new IllegalArgumentException("Unsupported log format: " + format);
         }

diff --git a/fluss-common/src/main/java/org/apache/fluss/record/AbstractRowMemoryLogRecordsBuilder.java b/fluss-common/src/main/java/org/apache/fluss/record/AbstractRowMemoryLogRecordsBuilder.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.record;
+
+import org.apache.fluss.memory.AbstractPagedOutputView;
+import org.apache.fluss.memory.MemorySegment;
+import org.apache.fluss.memory.MemorySegmentOutputView;
+import org.apache.fluss.record.bytesview.BytesView;
+import org.apache.fluss.record.bytesview.MultiBytesView;
+import org.apache.fluss.utils.crc.Crc32C;
+
+import java.io.IOException;
+
+import static org.apache.fluss.record.LogRecordBatchFormat.BASE_OFFSET_LENGTH;
+import static org.apache.fluss.record.LogRecordBatchFormat.LENGTH_LENGTH;
+import static org.apache.fluss.record.LogRecordBatchFormat.LOG_MAGIC_VALUE_V1;
+import static org.apache.fluss.record.LogRecordBatchFormat.NO_BATCH_SEQUENCE;
+import static org.apache.fluss.record.LogRecordBatchFormat.NO_LEADER_EPOCH;
+import static org.apache.fluss.record.LogRecordBatchFormat.NO_WRITER_ID;
+import static org.apache.fluss.record.LogRecordBatchFormat.crcOffset;
+import static org.apache.fluss.record.LogRecordBatchFormat.lastOffsetDeltaOffset;
+import static org.apache.fluss.record.LogRecordBatchFormat.recordBatchHeaderSize;
+import static org.apache.fluss.record.LogRecordBatchFormat.schemaIdOffset;
+import static org.apache.fluss.utils.Preconditions.checkArgument;
+
+/** Abstract base builder for row-based MemoryLogRecords builders sharing common logic. */
+abstract class AbstractRowMemoryLogRecordsBuilder<T> implements AutoCloseable {
+    protected static final int BUILDER_DEFAULT_OFFSET = 0;
+
+    protected final long baseLogOffset;
+    protected final int schemaId;
+    // The max bytes can be appended.
+    protected final int writeLimit;
+    protected final byte magic;
+    protected final AbstractPagedOutputView pagedOutputView;
+    protected final MemorySegment firstSegment;
+    protected final boolean appendOnly;
+
+    private BytesView builtBuffer = null;
+    private long writerId;
+    private int batchSequence;
+    private int currentRecordNumber;
+    private int sizeInBytes;
+    private volatile boolean isClosed;
+    private boolean aborted = false;
+
+    protected AbstractRowMemoryLogRecordsBuilder(
+            long baseLogOffset,
+            int schemaId,
+            int writeLimit,
+            byte magic,
+            AbstractPagedOutputView pagedOutputView,
+            boolean appendOnly) {
+        this.appendOnly = appendOnly;
+        checkArgument(
+                schemaId <= Short.MAX_VALUE,
+                "schemaId shouldn't be greater than the max value of short: " + Short.MAX_VALUE);
+        this.baseLogOffset = baseLogOffset;
+        this.schemaId = schemaId;
+        this.writeLimit = writeLimit;
+        this.magic = magic;
+        this.pagedOutputView = pagedOutputView;
+        this.firstSegment = pagedOutputView.getCurrentSegment();
+        this.writerId = NO_WRITER_ID;
+        this.batchSequence = NO_BATCH_SEQUENCE;
+        this.currentRecordNumber = 0;
+        this.isClosed = false;
+
+        // Skip header initially; will be written in build()
+        int headerSize = recordBatchHeaderSize(magic);
+        this.pagedOutputView.setPosition(headerSize);
+        this.sizeInBytes = headerSize;
+    }
+
+    /** Implement to return size of the record (including length field). */
+    protected abstract int sizeOf(T row);
+
+    /** Implement to write the record and return total written bytes including length field. */
+    protected abstract int writeRecord(ChangeType changeType, T row) throws IOException;
+
+    public boolean hasRoomFor(T row) {
+        return sizeInBytes + sizeOf(row) <= writeLimit;
+    }
+
+    public void append(ChangeType changeType, T row) throws Exception {
+        appendRecord(changeType, row);
+    }
+
+    private void appendRecord(ChangeType changeType, T row) throws IOException {
+        if (aborted) {
+            throw new IllegalStateException(
+                    "Tried to append a record, but "
+                            + getClass().getSimpleName()
+                            + " has already been aborted");
+        }
+        if (isClosed) {
+            throw new IllegalStateException(
+                    "Tried to append a record, but MemoryLogRecordsBuilder is closed for record appends");
+        }
+        if (appendOnly && changeType != ChangeType.APPEND_ONLY) {
+            throw new IllegalArgumentException(
+                    "Only append-only change type is allowed for append-only row log builder, but got "
+                            + changeType);
+        }
+
+        int recordByteSizes = writeRecord(changeType, row);
+        currentRecordNumber++;
+        sizeInBytes += recordByteSizes;
+    }
+
+    public BytesView build() throws IOException {
+        if (aborted) {
+            throw new IllegalStateException("Attempting to build an aborted record batch");
+        }
+        if (builtBuffer != null) {
+            return builtBuffer;
+        }
+        writeBatchHeader();
+        builtBuffer =
+                MultiBytesView.builder()
+                        .addMemorySegmentByteViewList(pagedOutputView.getWrittenSegments())
+                        .build();
+        return builtBuffer;
+    }
+
+    public void setWriterState(long writerId, int batchBaseSequence) {
+        this.writerId = writerId;
+        this.batchSequence = batchBaseSequence;
+    }
+
+    public void resetWriterState(long writerId, int batchSequence) {
+        // trigger to rewrite batch header
+        this.builtBuffer = null;
+        this.writerId = writerId;
+        this.batchSequence = batchSequence;
+    }
+
+    public long writerId() {
+        return writerId;
+    }
+
+    public int batchSequence() {
+        return batchSequence;
+    }
+
+    public boolean isClosed() {
+        return isClosed;
+    }
+
+    public void abort() {
+        aborted = true;
+    }
+
+    @Override
+    public void close() throws IOException {
+        if (aborted) {
+            throw new IllegalStateException(
+                    "Cannot close "
+                            + getClass().getSimpleName()
+                            + " as it has already been aborted");
+        }
+        isClosed = true;
+    }
+
+    public int getSizeInBytes() {
+        return sizeInBytes;
+    }
+
+    // ----------------------- internal methods -------------------------------
+    private void writeBatchHeader() throws IOException {
+        // pagedOutputView doesn't support seek to previous segment,
+        // so we create a new output view on the first segment
+        MemorySegmentOutputView outputView = new MemorySegmentOutputView(firstSegment);
+        outputView.setPosition(0);
+        // update header.
+        outputView.writeLong(baseLogOffset);
+        outputView.writeInt(sizeInBytes - BASE_OFFSET_LENGTH - LENGTH_LENGTH);
+        outputView.writeByte(magic);
+
+        // write empty timestamp which will be overridden on server side
+        outputView.writeLong(0);
+
+        // write empty leaderEpoch which will be overridden on server side
+        if (magic >= LOG_MAGIC_VALUE_V1) {
+            outputView.writeInt(NO_LEADER_EPOCH);
+        }
+
+        // write empty crc first.
+        outputView.writeUnsignedInt(0);
+
+        outputView.writeShort((short) schemaId);
+        // write attributes (currently only appendOnly flag)
+        outputView.writeBoolean(appendOnly);
+        // skip write attribute byte for now.
+        outputView.setPosition(lastOffsetDeltaOffset(magic));
+        if (currentRecordNumber > 0) {
+            outputView.writeInt(currentRecordNumber - 1);
+        } else {
+            // If there is no record, we write 0 for field lastOffsetDelta, see the comments about
+            // the field 'lastOffsetDelta' in DefaultLogRecordBatch.
+            outputView.writeInt(0);
+        }
+        outputView.writeLong(writerId);
+        outputView.writeInt(batchSequence);
+        outputView.writeInt(currentRecordNumber);
+
+        // Update crc.
+        long crc = Crc32C.compute(pagedOutputView.getWrittenSegments(), schemaIdOffset(magic));
+        outputView.setPosition(crcOffset(magic));
+        outputView.writeUnsignedInt(crc);
+    }
+}