From 93a31e12431fb82b997137fd99270a9844f9b9d6 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Wed, 8 May 2024 23:46:08 +0800
Subject: [PATCH 01/27] done

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/CMakeLists.txt                           |   3 +-
 .../Compression/CompressionCodecDeltaFOR.cpp  | 161 +-------
 .../IO/Compression/CompressionCodecFOR.cpp    | 127 +-----
 dbms/src/IO/Compression/CompressionCodecFOR.h |   6 -
 .../CompressionCodecIntegerLightweight.cpp    | 367 ++++++++++++++++++
 .../CompressionCodecIntegerLightweight.h      | 111 ++++++
 .../IO/Compression/CompressionCodecRLE.cpp    |  57 +--
 dbms/src/IO/Compression/CompressionFactory.h  |   3 +
 dbms/src/IO/Compression/CompressionInfo.h     |   1 +
 dbms/src/IO/Compression/CompressionMethod.h   |   1 +
 dbms/src/IO/Compression/CompressionSettings.h |   2 +
 dbms/src/IO/Compression/EncodingUtil.cpp      | 253 ++++++++++++
 dbms/src/IO/Compression/EncodingUtil.h        | 251 ++++++++++++
 .../tests/gtest_codec_compression.cpp         |   1 +
 14 files changed, 1022 insertions(+), 322 deletions(-)
 create mode 100644 dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
 create mode 100644 dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
 create mode 100644 dbms/src/IO/Compression/EncodingUtil.cpp
 create mode 100644 dbms/src/IO/Compression/EncodingUtil.h
diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index 44da3f050bf..a7a60040260 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -104,8 +104,7 @@ check_then_add_sources_compile_flag (
     src/Columns/ColumnVector.cpp
     src/DataTypes/DataTypeString.cpp
     src/Interpreters/Join.cpp
-    src/IO/Compression/CompressionCodecFOR.cpp
-    src/IO/Compression/CompressionCodecDeltaFOR.cpp
+    src/IO/Compression/EncodingUtil.cpp
     src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.cpp
     src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.cpp
 )
diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
index f449f71e67f..d2bcbbe3262 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
@@ -17,14 +17,11 @@
 #include <IO/Compression/CompressionCodecDeltaFOR.h>
 #include <IO/Compression/CompressionCodecFOR.h>
 #include <IO/Compression/CompressionInfo.h>
+#include <IO/Compression/EncodingUtil.h>
 #include <common/likely.h>
 #include <common/unaligned.h>
 
 
-#if defined(__AVX2__)
-#include <immintrin.h>
-#endif
-
 namespace DB
 {
 
@@ -56,148 +53,16 @@ UInt32 CompressionCodecDeltaFOR::getMaxCompressedDataSize(UInt32 uncompressed_si
 namespace
 {
 
-template <std::integral T>
-void DeltaEncode(const T * source, UInt32 count, T * dest)
-{
-    T prev = 0;
-    for (UInt32 i = 0; i < count; ++i)
-    {
-        T curr = source[i];
-        dest[i] = curr - prev;
-        prev = curr;
-    }
-}
-
 template <std::integral T>
 UInt32 compressData(const char * source, UInt32 source_size, char * dest)
 {
     const auto count = source_size / sizeof(T);
-    DeltaEncode<T>(reinterpret_cast<const T *>(source), count, reinterpret_cast<T *>(dest));
+    DB::Compression::DeltaEncoding<T>(reinterpret_cast<const T *>(source), count, reinterpret_cast<T *>(dest));
     // Cast deltas to signed type to better compress negative values.
+    // For example, if we have a sequence of UInt8 values [3, 2, 1, 0], the deltas will be [3, -1, -1, -1]
+    // If we compress them as UInt8, we will get [3, 255, 255, 255], which is not optimal.
     using TS = typename std::make_signed<T>::type;
-    return CompressionCodecFOR::compressData<TS>(reinterpret_cast<TS *>(dest), count, dest);
-}
-
-template <std::integral T>
-void ordinaryDeltaDecode(const char * source, UInt32 source_size, char * dest)
-{
-    T accumulator{};
-    const char * const source_end = source + source_size;
-    while (source < source_end)
-    {
-        accumulator += unalignedLoad<T>(source);
-        unalignedStore<T>(dest, accumulator);
-
-        source += sizeof(T);
-        dest += sizeof(T);
-    }
-}
-
-template <std::integral T>
-void DeltaDecode(const char * source, UInt32 source_size, char * dest)
-{
-    ordinaryDeltaDecode<T>(source, source_size, dest);
-}
-
-#if defined(__AVX2__)
-// Note: using SIMD to rewrite compress does not improve performance.
-
-template <>
-void DeltaDecode<UInt32>(const char * __restrict__ raw_source, UInt32 raw_source_size, char * __restrict__ raw_dest)
-{
-    const auto * source = reinterpret_cast<const UInt32 *>(raw_source);
-    auto source_size = raw_source_size / sizeof(UInt32);
-    auto * dest = reinterpret_cast<UInt32 *>(raw_dest);
-    __m128i prev = _mm_setzero_si128();
-    size_t i = 0;
-    for (; i < source_size / 4; i++)
-    {
-        auto curr = _mm_lddqu_si128(reinterpret_cast<const __m128i *>(source) + i);
-        const auto tmp1 = _mm_add_epi32(_mm_slli_si128(curr, 8), curr);
-        const auto tmp2 = _mm_add_epi32(_mm_slli_si128(tmp1, 4), tmp1);
-        prev = _mm_add_epi32(tmp2, _mm_shuffle_epi32(prev, 0xff));
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + i, prev);
-    }
-    uint32_t lastprev = _mm_extract_epi32(prev, 3);
-    for (i = 4 * i; i < source_size; ++i)
-    {
-        lastprev = lastprev + source[i];
-        dest[i] = lastprev;
-    }
-}
-
-template <>
-void DeltaDecode<UInt64>(const char * __restrict__ raw_source, UInt32 raw_source_size, char * __restrict__ raw_dest)
-{
-    const auto * source = reinterpret_cast<const UInt64 *>(raw_source);
-    auto source_size = raw_source_size / sizeof(UInt64);
-    auto * dest = reinterpret_cast<UInt64 *>(raw_dest);
-    // AVX2 does not support shffule across 128-bit lanes, so we need to use permute.
-    __m256i prev = _mm256_setzero_si256();
-    __m256i zero = _mm256_setzero_si256();
-    size_t i = 0;
-    for (; i < source_size / 4; ++i)
-    {
-        // curr = {a0, a1, a2, a3}
-        auto curr = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(source) + i);
-        // x0 = {0, a0, a1, a2}
-        auto x0 = _mm256_blend_epi32(_mm256_permute4x64_epi64(curr, 0b10010011), zero, 0b00000011);
-        // x1 = {a0, a01, a12, a23}
-        auto x1 = _mm256_add_epi64(curr, x0);
-        // x2 = {0, 0, a0, a01}
-        auto x2 = _mm256_permute2f128_si256(x1, x1, 0b00101000);
-        // prev = prev + {a0, a01, a012, a0123}
-        prev = _mm256_add_epi64(prev, _mm256_add_epi64(x1, x2));
-        _mm256_storeu_si256(reinterpret_cast<__m256i *>(dest) + i, prev);
-        // prev = {prev[3], prev[3], prev[3], prev[3]}
-        prev = _mm256_permute4x64_epi64(prev, 0b11111111);
-    }
-    UInt64 lastprev = _mm256_extract_epi64(prev, 3);
-    for (i = 4 * i; i < source_size; ++i)
-    {
-        lastprev += source[i];
-        dest[i] = lastprev;
-    }
-}
-
-#endif
-
-template <std::integral T>
-void ordinaryDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
-{
-    using TS = typename std::make_signed<T>::type;
-    CompressionCodecFOR::decompressData<TS>(source, source_size, dest, output_size);
-    ordinaryDeltaDecode<T>(dest, output_size, dest);
-}
-
-template <std::integral T>
-void decompressData(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
-{
-    ordinaryDecompressData<T>(source, source_size, dest, output_size);
-}
-
-template <>
-void decompressData<UInt32>(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
-{
-    const auto count = output_size / sizeof(UInt32);
-    auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
-    // Reserve enough space for the temporary buffer.
-    const auto required_size = round_size * sizeof(UInt32);
-    char tmp_buffer[required_size];
-    CompressionCodecFOR::decompressData<Int32>(source, source_size, tmp_buffer, required_size);
-    DeltaDecode<UInt32>(reinterpret_cast<const char *>(tmp_buffer), output_size, dest);
-}
-
-template <>
-void decompressData<UInt64>(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
-{
-    const auto count = output_size / sizeof(UInt64);
-    const auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
-    // Reserve enough space for the temporary buffer.
-    const auto required_size = round_size * sizeof(UInt64);
-    char tmp_buffer[required_size];
-    CompressionCodecFOR::decompressData<Int64>(source, source_size, tmp_buffer, required_size);
-    DeltaDecode<UInt64>(reinterpret_cast<const char *>(tmp_buffer), output_size, dest);
+    return DB::CompressionCodecFOR::compressData<TS>(reinterpret_cast<TS *>(dest), count, dest);
 }
 
 } // namespace
@@ -249,16 +114,16 @@ void CompressionCodecDeltaFOR::doDecompressData(
     switch (bytes_size)
     {
     case 1:
-        decompressData<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::DeltaForDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 2:
-        decompressData<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::DeltaForDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 4:
-        decompressData<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::DeltaForDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 8:
-        decompressData<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::DeltaForDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     default:
         throw Exception(
@@ -293,16 +158,16 @@ void CompressionCodecDeltaFOR::ordinaryDecompress(
     switch (bytes_size)
     {
     case 1:
-        ordinaryDecompressData<UInt8>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::OrdinaryDeltaForDecoding<UInt8>(&source[1], source_size_no_header, dest, dest_size);
         break;
     case 2:
-        ordinaryDecompressData<UInt16>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::OrdinaryDeltaForDecoding<UInt16>(&source[1], source_size_no_header, dest, dest_size);
         break;
     case 4:
-        ordinaryDecompressData<UInt32>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::OrdinaryDeltaForDecoding<UInt32>(&source[1], source_size_no_header, dest, dest_size);
         break;
     case 8:
-        ordinaryDecompressData<UInt64>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::OrdinaryDeltaForDecoding<UInt64>(&source[1], source_size_no_header, dest, dest_size);
         break;
     default:
         throw Exception(
diff --git a/dbms/src/IO/Compression/CompressionCodecFOR.cpp b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
index af5e46c99c2..86b21719744 100644
--- a/dbms/src/IO/Compression/CompressionCodecFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
@@ -16,12 +16,10 @@
 #include <Common/Exception.h>
 #include <IO/Compression/CompressionCodecFOR.h>
 #include <IO/Compression/CompressionInfo.h>
+#include <IO/Compression/EncodingUtil.h>
 #include <common/likely.h>
 #include <common/unaligned.h>
 
-#if defined(__AVX2__)
-#include <immintrin.h>
-#endif
 
 namespace DB
 {
@@ -58,98 +56,8 @@ UInt32 CompressionCodecFOR::compressData(const T * source, UInt32 count, char *
     std::vector<T> values(count);
     values.assign(source, source + count);
     T frame_of_reference = *std::min_element(values.cbegin(), values.cend());
-    // store frame of reference
-    unalignedStore<T>(dest, frame_of_reference);
-    dest += sizeof(T);
-    if (frame_of_reference != 0)
-    {
-        for (auto & value : values)
-            value -= frame_of_reference;
-    }
-    T max_value = *std::max_element(values.cbegin(), values.cend());
-    UInt8 width = BitpackingPrimitives::minimumBitWidth(max_value);
-    // store width
-    unalignedStore<UInt8>(dest, width);
-    dest += sizeof(UInt8);
-    // if width == 0, skip bitpacking
-    if (width == 0)
-        return sizeof(T) + sizeof(UInt8);
-    auto required_size = BitpackingPrimitives::getRequiredSize(count, width);
-    // after applying frame of reference, all values are bigger than 0.
-    BitpackingPrimitives::packBuffer(reinterpret_cast<unsigned char *>(dest), values.data(), count, width);
-    return sizeof(T) + sizeof(UInt8) + required_size;
-}
-
-template <std::integral T>
-void CompressionCodecFOR::decompressData(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
-{
-    const auto count = output_size / sizeof(T);
-    T frame_of_reference = unalignedLoad<T>(source);
-    source += sizeof(T);
-    auto width = unalignedLoad<UInt8>(source);
-    source += sizeof(UInt8);
-    const auto required_size = source_size - sizeof(T) - sizeof(UInt8);
-    RUNTIME_CHECK(BitpackingPrimitives::getRequiredSize(count, width) == required_size);
-    auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
-    if (round_size != count)
-    {
-        // Reserve enough space for the temporary buffer.
-        unsigned char tmp_buffer[round_size * sizeof(T)];
-        BitpackingPrimitives::unPackBuffer<T>(
-            tmp_buffer,
-            reinterpret_cast<const unsigned char *>(source),
-            count,
-            width);
-        CompressionCodecFOR::applyFrameOfReference(reinterpret_cast<T *>(tmp_buffer), frame_of_reference, count);
-        memcpy(dest, tmp_buffer, output_size);
-        return;
-    }
-    BitpackingPrimitives::unPackBuffer<T>(
-        reinterpret_cast<unsigned char *>(dest),
-        reinterpret_cast<const unsigned char *>(source),
-        count,
-        width);
-    CompressionCodecFOR::applyFrameOfReference(reinterpret_cast<T *>(dest), frame_of_reference, count);
-}
-
-template <std::integral T>
-void CompressionCodecFOR::applyFrameOfReference(T * dst, T frame_of_reference, UInt32 count)
-{
-    if (frame_of_reference == 0)
-        return;
-
-    UInt32 i = 0;
-#if defined(__AVX2__)
-    UInt32 aligned_count = count - count % (sizeof(__m256i) / sizeof(T));
-    for (; i < aligned_count; i += (sizeof(__m256i) / sizeof(T)))
-    {
-        // Load the data using SIMD
-        __m256i value = _mm256_loadu_si256(reinterpret_cast<__m256i *>(dst + i));
-        // Perform vectorized addition
-        if constexpr (sizeof(T) == 1)
-        {
-            value = _mm256_add_epi8(value, _mm256_set1_epi8(frame_of_reference));
-        }
-        else if constexpr (sizeof(T) == 2)
-        {
-            value = _mm256_add_epi16(value, _mm256_set1_epi16(frame_of_reference));
-        }
-        else if constexpr (sizeof(T) == 4)
-        {
-            value = _mm256_add_epi32(value, _mm256_set1_epi32(frame_of_reference));
-        }
-        else if constexpr (sizeof(T) == 8)
-        {
-            value = _mm256_add_epi64(value, _mm256_set1_epi64x(frame_of_reference));
-        }
-        // Store the result back to memory
-        _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst + i), value);
-    }
-#endif
-    for (; i < count; ++i)
-    {
-        dst[i] += frame_of_reference;
-    }
+    UInt8 width = DB::Compression::ForEncodingWidth(values, frame_of_reference);
+    return DB::Compression::ForEncoding<T, std::is_signed_v<T>>(values, frame_of_reference, width, dest);
 }
 
 UInt32 CompressionCodecFOR::doCompressData(const char * source, UInt32 source_size, char * dest) const
@@ -200,16 +108,16 @@ void CompressionCodecFOR::doDecompressData(
     switch (bytes_size)
     {
     case 1:
-        decompressData<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::ForDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 2:
-        decompressData<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::ForDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 4:
-        decompressData<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::ForDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 8:
-        decompressData<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::ForDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     default:
         throw Exception(
@@ -227,25 +135,4 @@ template UInt32 CompressionCodecFOR::compressData<Int16>(const Int16 * source, U
 template UInt32 CompressionCodecFOR::compressData<Int32>(const Int32 * source, UInt32 count, char * dest);
 template UInt32 CompressionCodecFOR::compressData<Int64>(const Int64 * source, UInt32 count, char * dest);
 
-template void CompressionCodecFOR::decompressData<Int8>(
-    const char * source,
-    UInt32 source_size,
-    char * dest,
-    UInt32 output_size);
-template void CompressionCodecFOR::decompressData<Int16>(
-    const char * source,
-    UInt32 source_size,
-    char * dest,
-    UInt32 output_size);
-template void CompressionCodecFOR::decompressData<Int32>(
-    const char * source,
-    UInt32 source_size,
-    char * dest,
-    UInt32 output_size);
-template void CompressionCodecFOR::decompressData<Int64>(
-    const char * source,
-    UInt32 source_size,
-    char * dest,
-    UInt32 output_size);
-
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecFOR.h b/dbms/src/IO/Compression/CompressionCodecFOR.h
index 38798b3d8d2..75dd8b91734 100644
--- a/dbms/src/IO/Compression/CompressionCodecFOR.h
+++ b/dbms/src/IO/Compression/CompressionCodecFOR.h
@@ -34,15 +34,9 @@ class CompressionCodecFOR : public ICompressionCodec
 
     UInt8 getMethodByte() const override;
 
-    template <std::integral T>
-    static void applyFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
-
     template <std::integral T>
     static UInt32 compressData(const T * source, UInt32 count, char * dest);
 
-    template <std::integral T>
-    static void decompressData(const char * source, UInt32 source_size, char * dest, UInt32 output_size);
-
 #ifndef DBMS_PUBLIC_GTEST
 protected:
 #endif
diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
new file mode 100644
index 00000000000..9dd9445a53a
--- /dev/null
+++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
@@ -0,0 +1,367 @@
+// Copyright 2024 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Common/BitpackingPrimitives.h>
+#include <Common/Exception.h>
+#include <IO/Compression/CompressionCodecIntegerLightweight.h>
+#include <IO/Compression/CompressionInfo.h>
+#include <IO/Compression/CompressionSettings.h>
+#include <IO/Compression/EncodingUtil.h>
+#include <common/likely.h>
+#include <common/unaligned.h>
+#include <lz4.h>
+
+#include <algorithm>
+#include <limits>
+
+
+namespace DB
+{
+
+// TODO: metrics
+
+namespace ErrorCodes
+{
+extern const int CANNOT_COMPRESS;
+extern const int CANNOT_DECOMPRESS;
+} // namespace ErrorCodes
+
+CompressionCodecIntegerLightweight::CompressionCodecIntegerLightweight(UInt8 bytes_size_)
+    : bytes_size(bytes_size_)
+{}
+
+UInt8 CompressionCodecIntegerLightweight::getMethodByte() const
+{
+    return static_cast<UInt8>(CompressionMethodByte::Lightweight);
+}
+
+UInt32 CompressionCodecIntegerLightweight::getMaxCompressedDataSize(UInt32 uncompressed_size) const
+{
+    // 1 byte for bytes_size, 1 byte for mode, and the rest for compressed data
+    return 1 + 1 + uncompressed_size;
+}
+
+template <typename T>
+size_t CompressionCodecIntegerLightweight::compressDataForType(const char * source, UInt32 source_size, char * dest)
+    const
+{
+    if (source_size % sizeof(T) != 0)
+        throw Exception(
+            ErrorCodes::CANNOT_COMPRESS,
+            "Cannot compress with lightweight codec, data size {} is not aligned to {}",
+            source_size,
+            sizeof(T));
+
+    // Load values
+    const size_t count = source_size / sizeof(T);
+    std::vector<T> values(count);
+    for (size_t i = 0; i < count; ++i)
+    {
+        values[i] = unalignedLoad<T>(source + i * sizeof(T));
+    }
+
+    // Analyze
+    State<T> state;
+    ctx.analyze<T>(values, state);
+
+    // Compress
+    unalignedStore<UInt8>(dest, static_cast<UInt8>(ctx.mode));
+    dest += sizeof(UInt8);
+    size_t compressed_size = 1;
+    switch (ctx.mode)
+    {
+    case Mode::CONSTANT:
+    {
+        compressed_size += Compression::ConstantEncoding(std::get<0>(state), dest);
+        break;
+    }
+    case Mode::CONSTANT_DELTA:
+    {
+        compressed_size += Compression::ConstantDeltaEncoding(values[0], std::get<0>(state), dest);
+        break;
+    }
+    case Mode::RLE:
+    {
+        compressed_size += Compression::RLEEncoding<T>(std::get<1>(state), dest);
+        break;
+    }
+    case Mode::FOR:
+    {
+        FORState for_state = std::get<2>(state);
+        compressed_size += Compression::ForEncoding(values, for_state.min_value, for_state.bit_width, dest);
+        break;
+    }
+    case Mode::DELTA_FOR:
+    {
+        DeltaFORState delta_for_state = std::get<3>(state);
+        compressed_size += Compression::ForEncoding<typename std::make_signed_t<T>, true>(
+            delta_for_state.deltas,
+            delta_for_state.min_delta_value,
+            delta_for_state.bit_width,
+            dest);
+        break;
+    }
+    case Mode::LZ4:
+    {
+        auto success = LZ4_compress_fast(
+            source,
+            dest,
+            source_size,
+            LZ4_COMPRESSBOUND(source_size),
+            CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
+        if (!success)
+            throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
+        compressed_size += success;
+        break;
+    }
+    default:
+        throw Exception(
+            ErrorCodes::CANNOT_COMPRESS,
+            "Cannot compress with lightweight codec, unknown mode {}",
+            static_cast<int>(ctx.mode));
+    }
+
+    // Update statistics
+    ctx.update(source_size, compressed_size);
+
+    return compressed_size;
+}
+
+template <typename T>
+void CompressionCodecIntegerLightweight::decompressDataForType(
+    const char * source,
+    UInt32 source_size,
+    char * dest,
+    UInt32 output_size) const
+{
+    auto mode = static_cast<Mode>(unalignedLoad<UInt8>(source));
+    source += sizeof(UInt8);
+    source_size -= sizeof(UInt8);
+    switch (mode)
+    {
+    case Mode::CONSTANT:
+        Compression::ConstantDecoding<T>(source, source_size, dest, output_size);
+        break;
+    case Mode::CONSTANT_DELTA:
+        Compression::ConstantDeltaDecoding<T>(source, source_size, dest, output_size);
+        break;
+    case Mode::RLE:
+        Compression::RLEDecoding<T>(source, source_size, dest, output_size);
+        break;
+    case Mode::FOR:
+        Compression::ForDecoding<T>(source, source_size, dest, output_size);
+        break;
+    case Mode::DELTA_FOR:
+        Compression::DeltaForDecoding<T>(source, source_size, dest, output_size);
+        break;
+    case Mode::LZ4:
+        if (unlikely(LZ4_decompress_safe(source, dest, source_size, output_size) < 0))
+            throw Exception("Cannot LZ4_decompress_safe", ErrorCodes::CANNOT_DECOMPRESS);
+        break;
+    default:
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot decompress with lightweight codec, unknown mode {}",
+            static_cast<int>(mode));
+    }
+}
+
+void CompressionCodecIntegerLightweight::CompressContext::update(size_t uncompressed_size, size_t compressed_size)
+{
+    if (mode == Mode::LZ4)
+    {
+        lz4_uncompressed_size += uncompressed_size;
+        lz4_compressed_size += compressed_size;
+        ++lz4_counter;
+    }
+    else
+    {
+        lw_uncompressed_size += uncompressed_size;
+        lw_compressed_size += compressed_size;
+        ++lw_counter;
+    }
+}
+
+bool CompressionCodecIntegerLightweight::CompressContext::needAnalyze() const
+{
+    // lightweight codec is never used, do not analyze anymore
+    if (lz4_counter > 5 && lw_counter == 0)
+        return false;
+    // if lz4 is used more than 5 times and the compression ratio is better than lightweight codec, do not analyze anymore
+    if (lz4_counter > 5 && lz4_uncompressed_size / lz4_compressed_size > lw_compressed_size / lw_uncompressed_size)
+        return false;
+    return true;
+}
+
+template <typename T>
+void CompressionCodecIntegerLightweight::CompressContext::analyze(std::vector<T> & values, State<T> & state)
+{
+    if (!needAnalyze())
+        return;
+
+    if (values.empty())
+    {
+        mode = Mode::Invalid;
+        return;
+    }
+
+    // Check CONSTANT
+    std::vector<std::pair<T, UInt8>> rle;
+    rle.reserve(values.size());
+    rle.emplace_back(values[0], 1);
+    for (size_t i = 1; i < values.size(); ++i)
+    {
+        if (values[i] != values[i - 1] || rle.back().second == std::numeric_limits<UInt8>::max())
+            rle.emplace_back(values[i], 1);
+        else
+            ++rle.back().second;
+    }
+    T min_value = *std::min_element(values.cbegin(), values.cend());
+    T max_value = *std::max_element(values.cbegin(), values.cend());
+    if (rle.size() == 1)
+    {
+        state = rle[0].first;
+        mode = Mode::CONSTANT;
+        return;
+    }
+
+    // Check CONSTANT_DELTA
+    using TS = std::make_signed_t<T>;
+    std::vector<TS> deltas;
+    deltas.reserve(values.size());
+    deltas.push_back(values[0]);
+    for (size_t i = 1; i < values.size(); ++i)
+    {
+        deltas.push_back(values[i] - values[i - 1]);
+    }
+    TS min_delta = *std::min_element(deltas.cbegin(), deltas.cend());
+    TS max_delta = *std::max_element(deltas.cbegin(), deltas.cend());
+    if (min_delta == max_delta)
+    {
+        state = static_cast<T>(min_delta);
+        mode = Mode::CONSTANT_DELTA;
+        return;
+    }
+
+    UInt8 delta_for_width = Compression::ForEncodingWidth(deltas, min_delta);
+    // additional T bytes for min_delta, and 1 byte for width
+    size_t delta_for_size
+        = BitpackingPrimitives::getRequiredSize(deltas.size(), delta_for_width) + sizeof(T) + sizeof(UInt8);
+    UInt8 for_width = BitpackingPrimitives::minimumBitWidth<T>(max_value - min_value);
+    // additional T bytes for min_value, and 1 byte for width
+    size_t for_size = BitpackingPrimitives::getRequiredSize(values.size(), for_width) + sizeof(T) + sizeof(UInt8);
+    size_t origin_size = values.size() * sizeof(T);
+    size_t rle_size = Compression::RLEPairsSize(rle);
+    if (rle_size < delta_for_size && rle_size < for_size && rle_size < origin_size)
+    {
+        state = std::move(rle);
+        mode = Mode::RLE;
+    }
+    else if (for_size < delta_for_size && for_size < origin_size)
+    {
+        state = FORState<T>{min_value, for_width};
+        mode = Mode::FOR;
+    }
+    else if (delta_for_size < origin_size)
+    {
+        state = DeltaFORState<T>{deltas, min_delta, delta_for_width};
+        mode = Mode::DELTA_FOR;
+    }
+    else
+    {
+        mode = Mode::LZ4;
+    }
+}
+
+UInt32 CompressionCodecIntegerLightweight::doCompressData(const char * source, UInt32 source_size, char * dest) const
+{
+    if unlikely (source_size % bytes_size != 0)
+        throw Exception(
+            ErrorCodes::CANNOT_COMPRESS,
+            "Cannot compress with lightweight codec, data size {} is not aligned to {}",
+            source_size,
+            bytes_size);
+
+    dest[0] = bytes_size;
+    dest += 1;
+    switch (bytes_size)
+    {
+    case 1:
+        return 1 + compressDataForType<UInt8>(source, source_size, dest);
+    case 2:
+        return 1 + compressDataForType<UInt16>(source, source_size, dest);
+    case 4:
+        return 1 + compressDataForType<UInt32>(source, source_size, dest);
+    case 8:
+        return 1 + compressDataForType<UInt64>(source, source_size, dest);
+    default:
+        throw Exception(
+            ErrorCodes::CANNOT_COMPRESS,
+            "Cannot compress with lightweight codec, unknown bytes size {}",
+            bytes_size);
+    }
+}
+
+void CompressionCodecIntegerLightweight::doDecompressData(
+    const char * source,
+    UInt32 source_size,
+    char * dest,
+    UInt32 uncompressed_size) const
+{
+    if unlikely (source_size < 2)
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot decompress lightweight-encoded data. File has wrong header");
+
+    if (uncompressed_size == 0)
+        return;
+
+    UInt8 bytes_size = source[0];
+
+    if unlikely (bytes_size != 1 && bytes_size != 2 && bytes_size != 4 && bytes_size != 8)
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot decompress lightweight-encoded data. File has wrong header");
+
+    if unlikely (uncompressed_size % bytes_size != 0)
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot decompress lightweight-encoded data. Uncompressed size {} is not aligned to {}",
+            uncompressed_size,
+            bytes_size);
+
+    UInt32 source_size_no_header = source_size - 1;
+    switch (bytes_size)
+    {
+    case 1:
+        decompressDataForType<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
+        break;
+    case 2:
+        decompressDataForType<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
+        break;
+    case 4:
+        decompressDataForType<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
+        break;
+    case 8:
+        decompressDataForType<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
+        break;
+    default:
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot compress with lightweight codec, unknown bytes size {}",
+            bytes_size);
+    }
+}
+
+} // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
new file mode 100644
index 00000000000..bbc7f8a7191
--- /dev/null
+++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
@@ -0,0 +1,111 @@
+// Copyright 2024 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <IO/Compression/ICompressionCodec.h>
+
+namespace DB
+{
+
+class CompressionCodecIntegerLightweight : public ICompressionCodec
+{
+public:
+    explicit CompressionCodecIntegerLightweight(UInt8 bytes_size_);
+
+    UInt8 getMethodByte() const override;
+
+protected:
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size)
+        const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+
+    bool isCompression() const override { return true; } // light compression
+    bool isGenericCompression() const override { return false; }
+
+private:
+    enum class Mode : UInt8
+    {
+        Invalid = 0,
+        CONSTANT = 1, // all values are the same
+        CONSTANT_DELTA = 2, // the difference between two adjacent values is the same
+        RLE = 3, // run-length encoding
+        FOR = 4, // Frame of Reference encoding
+        DELTA_FOR = 5, // delta encoding and then FOR encoding
+        LZ4 = 6, // the above modes are not suitable, use LZ4 instead
+    };
+
+    // Constant or ConstantDelta
+    template <typename T>
+    using ConstantState = T;
+
+    template <typename T>
+    using RLEState = std::vector<std::pair<T, UInt8>>;
+
+    template <typename T>
+    struct FORState
+    {
+        T min_value;
+        UInt8 bit_width;
+    };
+
+    template <typename T>
+    struct DeltaFORState
+    {
+        using TS = typename std::make_signed_t<T>;
+        std::vector<TS> deltas;
+        TS min_delta_value;
+        UInt8 bit_width;
+    };
+
+    // State is a union of different states for different modes
+    template <typename T>
+    using State = std::variant<ConstantState<T>, RLEState<T>, FORState<T>, DeltaFORState<T>>;
+
+    class CompressContext
+    {
+    public:
+        CompressContext() = default;
+
+        bool needAnalyze() const;
+
+        template <typename T>
+        void analyze(std::vector<T> & values, State<T> & state);
+
+        void update(size_t uncompressed_size, size_t compressed_size);
+
+        Mode mode = Mode::LZ4;
+
+    private:
+        size_t lw_uncompressed_size = 0;
+        size_t lw_compressed_size = 0;
+        size_t lw_counter = 0;
+        size_t lz4_uncompressed_size = 0;
+        size_t lz4_compressed_size = 0;
+        size_t lz4_counter = 0;
+    };
+
+    template <typename T>
+    size_t compressDataForType(const char * source, UInt32 source_size, char * dest) const;
+
+    template <typename T>
+    void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) const;
+
+    mutable CompressContext ctx;
+    const UInt8 bytes_size;
+};
+
+} // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecRLE.cpp b/dbms/src/IO/Compression/CompressionCodecRLE.cpp
index c16d7535f7d..27ddd53c4a7 100644
--- a/dbms/src/IO/Compression/CompressionCodecRLE.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecRLE.cpp
@@ -15,6 +15,7 @@
 #include <Common/Exception.h>
 #include <IO/Compression/CompressionCodecRLE.h>
 #include <IO/Compression/CompressionInfo.h>
+#include <IO/Compression/EncodingUtil.h>
 #include <IO/Compression/ICompressionCodec.h>
 #include <common/unaligned.h>
 
@@ -48,24 +49,23 @@ namespace
 {
 constexpr UInt8 JUST_COPY_CODE = 0xFF;
 
-// TODO: better implementation
-template <std::integral T>
+template <typename T>
 UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 {
     const char * source_end = source + source_size;
-    std::vector<std::pair<T, UInt16>> rle_vec;
+    DB::Compression::RLEPairs<T> rle_vec;
     rle_vec.reserve(source_size / sizeof(T));
-    static constexpr size_t RLE_PAIR_LENGTH = sizeof(T) + sizeof(UInt16);
     for (const auto * src = source; src < source_end; src += sizeof(T))
     {
         T value = unalignedLoad<T>(src);
-        if (rle_vec.empty() || rle_vec.back().first != value)
+        if (rle_vec.empty() || rle_vec.back().first != value
+            || rle_vec.back().second == std::numeric_limits<UInt8>::max())
             rle_vec.emplace_back(value, 1);
         else
             ++rle_vec.back().second;
     }
 
-    if (rle_vec.size() * RLE_PAIR_LENGTH > source_size)
+    if (DB::Compression::RLEPairsSize<T>(rle_vec) > source_size)
     {
         dest[0] = JUST_COPY_CODE;
         memcpy(&dest[1], source, source_size);
@@ -74,42 +74,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 
     dest[0] = sizeof(T);
     dest += 1;
-    for (const auto & [value, count] : rle_vec)
-    {
-        unalignedStore<T>(dest, value);
-        dest += sizeof(T);
-        unalignedStore<UInt16>(dest, count);
-        dest += sizeof(UInt16);
-    }
-    return 1 + rle_vec.size() * RLE_PAIR_LENGTH;
-}
-
-template <std::integral T>
-void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
-{
-    const char * output_end = dest + output_size;
-    const char * source_end = source + source_size;
-
-    UInt8 bytes_size = source[0];
-    RUNTIME_CHECK(bytes_size == sizeof(T), bytes_size, sizeof(T));
-    source += 1;
-
-    while (source < source_end)
-    {
-        T data = unalignedLoad<T>(source);
-        source += sizeof(T);
-        auto count = unalignedLoad<UInt16>(source);
-        source += sizeof(UInt16);
-        if unlikely (dest + count * sizeof(T) > output_end)
-            throw Exception(
-                ErrorCodes::CANNOT_DECOMPRESS,
-                "Cannot decompress RLE-encoded data, output buffer is too small");
-        for (UInt16 i = 0; i < count; ++i)
-        {
-            unalignedStore<T>(dest, data);
-            dest += sizeof(T);
-        }
-    }
+    return 1 + DB::Compression::RLEEncoding<T>(rle_vec, dest);
 }
 
 } // namespace
@@ -165,16 +130,16 @@ void CompressionCodecRLE::doDecompressData(
     switch (bytes_size)
     {
     case 1:
-        decompressDataForType<UInt8>(source, source_size, dest, uncompressed_size);
+        DB::Compression::RLEDecoding<UInt8>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     case 2:
-        decompressDataForType<UInt16>(source, source_size, dest, uncompressed_size);
+        DB::Compression::RLEDecoding<UInt16>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     case 4:
-        decompressDataForType<UInt32>(source, source_size, dest, uncompressed_size);
+        DB::Compression::RLEDecoding<UInt32>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     case 8:
-        decompressDataForType<UInt64>(source, source_size, dest, uncompressed_size);
+        DB::Compression::RLEDecoding<UInt64>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     default:
         throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress RLE-encoded data. Unsupported bytes size");
diff --git a/dbms/src/IO/Compression/CompressionFactory.h b/dbms/src/IO/Compression/CompressionFactory.h
index aac621b42ad..8e1646f5550 100644
--- a/dbms/src/IO/Compression/CompressionFactory.h
+++ b/dbms/src/IO/Compression/CompressionFactory.h
@@ -17,6 +17,7 @@
 #include <Common/config.h>
 #include <IO/Compression/CompressionCodecDeltaFOR.h>
 #include <IO/Compression/CompressionCodecFOR.h>
+#include <IO/Compression/CompressionCodecIntegerLightweight.h>
 #include <IO/Compression/CompressionCodecLZ4.h>
 #include <IO/Compression/CompressionCodecMultiple.h>
 #include <IO/Compression/CompressionCodecNone.h>
@@ -49,6 +50,8 @@ class CompressionFactory
             return std::make_unique<CompressionCodecLZ4HC>(setting.level);
         case CompressionMethod::ZSTD:
             return std::make_unique<CompressionCodecZSTD>(setting.level);
+        case CompressionMethod::Lightweight:
+            return std::make_unique<CompressionCodecIntegerLightweight>(setting.type_bytes_size);
 #if USE_QPL
         case CompressionMethod::QPL:
             return std::make_unique<CompressionCodecDeflateQpl>();
diff --git a/dbms/src/IO/Compression/CompressionInfo.h b/dbms/src/IO/Compression/CompressionInfo.h
index 2d2a3d9e190..fc9866635c9 100644
--- a/dbms/src/IO/Compression/CompressionInfo.h
+++ b/dbms/src/IO/Compression/CompressionInfo.h
@@ -60,6 +60,7 @@ enum class CompressionMethodByte : UInt8
     DeltaFOR        = 0x92,
     RLE             = 0x93,
     FOR             = 0x94,
+    Lightweight     = 0x95,
     // COL_END is not a compreesion method, but a flag of column end used in compact file.
     COL_END         = 0x66,
 };
diff --git a/dbms/src/IO/Compression/CompressionMethod.h b/dbms/src/IO/Compression/CompressionMethod.h
index 88f64edb9ed..21c6a3ca007 100644
--- a/dbms/src/IO/Compression/CompressionMethod.h
+++ b/dbms/src/IO/Compression/CompressionMethod.h
@@ -25,6 +25,7 @@ enum class CompressionMethod
     ZSTD = 3, /// Experimental algorithm: https://github.com/Cyan4973/zstd
     QPL = 4, /// The Intel Query Processing Library (QPL) is an open-source library to provide high-performance query processing operations
     NONE = 5, /// No compression
+    Lightweight = 6, /// Lightweight compression
 };
 
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionSettings.h b/dbms/src/IO/Compression/CompressionSettings.h
index ce9df0ba06f..2f90eee5019 100644
--- a/dbms/src/IO/Compression/CompressionSettings.h
+++ b/dbms/src/IO/Compression/CompressionSettings.h
@@ -31,6 +31,7 @@ constexpr CompressionMethodByte method_byte_map[] = {
     CompressionMethodByte::ZSTD, // ZSTD
     CompressionMethodByte::QPL, // QPL
     CompressionMethodByte::NONE, // NONE
+    CompressionMethodByte::Lightweight, // Lightweight
 };
 
 const std::unordered_map<CompressionMethodByte, CompressionMethod> method_map = {
@@ -41,6 +42,7 @@ const std::unordered_map<CompressionMethodByte, CompressionMethod> method_map =
     {CompressionMethodByte::DeltaFOR, CompressionMethod::NONE},
     {CompressionMethodByte::RLE, CompressionMethod::NONE},
     {CompressionMethodByte::FOR, CompressionMethod::NONE},
+    {CompressionMethodByte::Lightweight, CompressionMethod::Lightweight},
 };
 
 struct CompressionSetting
diff --git a/dbms/src/IO/Compression/EncodingUtil.cpp b/dbms/src/IO/Compression/EncodingUtil.cpp
new file mode 100644
index 00000000000..331d3144959
--- /dev/null
+++ b/dbms/src/IO/Compression/EncodingUtil.cpp
@@ -0,0 +1,253 @@
+// Copyright 2024 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "EncodingUtil.h"
+
+#if defined(__AVX2__)
+#include <immintrin.h>
+#endif
+
+namespace DB::Compression
+{
+
+template <std::integral T>
+void ApplyFrameOfReference(T * dst, T frame_of_reference, UInt32 count)
+{
+    if (frame_of_reference == 0)
+        return;
+
+    UInt32 i = 0;
+#if defined(__AVX2__)
+    UInt32 aligned_count = count - count % (sizeof(__m256i) / sizeof(T));
+    for (; i < aligned_count; i += (sizeof(__m256i) / sizeof(T)))
+    {
+        // Load the data using SIMD
+        __m256i value = _mm256_loadu_si256(reinterpret_cast<__m256i *>(dst + i));
+        // Perform vectorized addition
+        if constexpr (sizeof(T) == 1)
+        {
+            value = _mm256_add_epi8(value, _mm256_set1_epi8(frame_of_reference));
+        }
+        else if constexpr (sizeof(T) == 2)
+        {
+            value = _mm256_add_epi16(value, _mm256_set1_epi16(frame_of_reference));
+        }
+        else if constexpr (sizeof(T) == 4)
+        {
+            value = _mm256_add_epi32(value, _mm256_set1_epi32(frame_of_reference));
+        }
+        else if constexpr (sizeof(T) == 8)
+        {
+            value = _mm256_add_epi64(value, _mm256_set1_epi64x(frame_of_reference));
+        }
+        // Store the result back to memory
+        _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst + i), value);
+    }
+#endif
+    for (; i < count; ++i)
+    {
+        dst[i] += frame_of_reference;
+    }
+}
+
+template void ApplyFrameOfReference<UInt8>(UInt8 *, UInt8, UInt32);
+template void ApplyFrameOfReference<UInt16>(UInt16 *, UInt16, UInt32);
+template void ApplyFrameOfReference<UInt32>(UInt32 *, UInt32, UInt32);
+template void ApplyFrameOfReference<UInt64>(UInt64 *, UInt64, UInt32);
+
+template <std::integral T>
+void SubtractFrameOfReference(T * dst, T frame_of_reference, UInt32 count)
+{
+    if (frame_of_reference == 0)
+        return;
+
+    UInt32 i = 0;
+#if defined(__AVX2__)
+    UInt32 aligned_count = count - count % (sizeof(__m256i) / sizeof(T));
+    for (; i < aligned_count; i += (sizeof(__m256i) / sizeof(T)))
+    {
+        // Load the data using SIMD
+        __m256i value = _mm256_loadu_si256(reinterpret_cast<__m256i *>(dst + i));
+        // Perform vectorized addition
+        if constexpr (sizeof(T) == 1)
+        {
+            value = _mm256_sub_epi8(value, _mm256_set1_epi8(frame_of_reference));
+        }
+        else if constexpr (sizeof(T) == 2)
+        {
+            value = _mm256_sub_epi16(value, _mm256_set1_epi16(frame_of_reference));
+        }
+        else if constexpr (sizeof(T) == 4)
+        {
+            value = _mm256_sub_epi32(value, _mm256_set1_epi32(frame_of_reference));
+        }
+        else if constexpr (sizeof(T) == 8)
+        {
+            value = _mm256_sub_epi64(value, _mm256_set1_epi64x(frame_of_reference));
+        }
+        // Store the result back to memory
+        _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst + i), value);
+    }
+#endif
+    for (; i < count; ++i)
+    {
+        dst[i] -= frame_of_reference;
+    }
+}
+
+template void SubtractFrameOfReference<Int8>(Int8 *, Int8, UInt32);
+template void SubtractFrameOfReference<Int16>(Int16 *, Int16, UInt32);
+template void SubtractFrameOfReference<Int32>(Int32 *, Int32, UInt32);
+template void SubtractFrameOfReference<Int64>(Int64 *, Int64, UInt32);
+template void SubtractFrameOfReference<UInt8>(UInt8 *, UInt8, UInt32);
+template void SubtractFrameOfReference<UInt16>(UInt16 *, UInt16, UInt32);
+template void SubtractFrameOfReference<UInt32>(UInt32 *, UInt32, UInt32);
+template void SubtractFrameOfReference<UInt64>(UInt64 *, UInt64, UInt32);
+
+template <std::integral T>
+UInt8 ForEncodingWidth(std::vector<T> & values, T frame_of_reference)
+{
+    if constexpr (std::is_signed_v<T>)
+    {
+        // For signed types, after subtracting frame of reference, the range of values is not always [0, max_value - min_value].
+        // For example, we have a sequence of Int8 values [-128, 1, 127], after subtracting frame of reference -128, the values are [0, -127, -1].
+        // The minimum bit width required to store the values is 8 rather than the width of `max_value - min_value = -1`.
+        // So we need to calculate the minimum bit width of the values after subtracting frame of reference.
+        SubtractFrameOfReference<T>(values.data(), frame_of_reference, values.size());
+        T max_value = *std::max_element(values.cbegin(), values.cend());
+        T min_value = *std::min_element(values.cbegin(), values.cend());
+        return BitpackingPrimitives::minimumBitWidth<T>(min_value, max_value);
+    }
+    else
+    {
+        T max_value = *std::max_element(values.cbegin(), values.cend());
+        return BitpackingPrimitives::minimumBitWidth<T>(max_value - frame_of_reference);
+    }
+}
+
+template UInt8 ForEncodingWidth<Int8>(std::vector<Int8> &, Int8);
+template UInt8 ForEncodingWidth<Int16>(std::vector<Int16> &, Int16);
+template UInt8 ForEncodingWidth<Int32>(std::vector<Int32> &, Int32);
+template UInt8 ForEncodingWidth<Int64>(std::vector<Int64> &, Int64);
+template UInt8 ForEncodingWidth<UInt8>(std::vector<UInt8> &, UInt8);
+template UInt8 ForEncodingWidth<UInt16>(std::vector<UInt16> &, UInt16);
+template UInt8 ForEncodingWidth<UInt32>(std::vector<UInt32> &, UInt32);
+template UInt8 ForEncodingWidth<UInt64>(std::vector<UInt64> &, UInt64);
+
+template <std::integral T>
+void DeltaDecoding(const char * source, UInt32 source_size, char * dest)
+{
+    ordinaryDeltaDecoding<T>(source, source_size, dest);
+}
+
+#if defined(__AVX2__)
+// Note: using SIMD to rewrite compress does not improve performance.
+
+template <>
+void DeltaDecoding<UInt32>(const char * __restrict__ raw_source, UInt32 raw_source_size, char * __restrict__ raw_dest)
+{
+    const auto * source = reinterpret_cast<const UInt32 *>(raw_source);
+    auto source_size = raw_source_size / sizeof(UInt32);
+    auto * dest = reinterpret_cast<UInt32 *>(raw_dest);
+    __m128i prev = _mm_setzero_si128();
+    size_t i = 0;
+    for (; i < source_size / 4; i++)
+    {
+        auto curr = _mm_lddqu_si128(reinterpret_cast<const __m128i *>(source) + i);
+        const auto tmp1 = _mm_add_epi32(_mm_slli_si128(curr, 8), curr);
+        const auto tmp2 = _mm_add_epi32(_mm_slli_si128(tmp1, 4), tmp1);
+        prev = _mm_add_epi32(tmp2, _mm_shuffle_epi32(prev, 0xff));
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + i, prev);
+    }
+    uint32_t lastprev = _mm_extract_epi32(prev, 3);
+    for (i = 4 * i; i < source_size; ++i)
+    {
+        lastprev = lastprev + source[i];
+        dest[i] = lastprev;
+    }
+}
+
+template <>
+void DeltaDecoding<UInt64>(const char * __restrict__ raw_source, UInt32 raw_source_size, char * __restrict__ raw_dest)
+{
+    const auto * source = reinterpret_cast<const UInt64 *>(raw_source);
+    auto source_size = raw_source_size / sizeof(UInt64);
+    auto * dest = reinterpret_cast<UInt64 *>(raw_dest);
+    // AVX2 does not support shffule across 128-bit lanes, so we need to use permute.
+    __m256i prev = _mm256_setzero_si256();
+    __m256i zero = _mm256_setzero_si256();
+    size_t i = 0;
+    for (; i < source_size / 4; ++i)
+    {
+        // curr = {a0, a1, a2, a3}
+        auto curr = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(source) + i);
+        // x0 = {0, a0, a1, a2}
+        auto x0 = _mm256_blend_epi32(_mm256_permute4x64_epi64(curr, 0b10010011), zero, 0b00000011);
+        // x1 = {a0, a01, a12, a23}
+        auto x1 = _mm256_add_epi64(curr, x0);
+        // x2 = {0, 0, a0, a01}
+        auto x2 = _mm256_permute2f128_si256(x1, x1, 0b00101000);
+        // prev = prev + {a0, a01, a012, a0123}
+        prev = _mm256_add_epi64(prev, _mm256_add_epi64(x1, x2));
+        _mm256_storeu_si256(reinterpret_cast<__m256i *>(dest) + i, prev);
+        // prev = {prev[3], prev[3], prev[3], prev[3]}
+        prev = _mm256_permute4x64_epi64(prev, 0b11111111);
+    }
+    UInt64 lastprev = _mm256_extract_epi64(prev, 3);
+    for (i = 4 * i; i < source_size; ++i)
+    {
+        lastprev += source[i];
+        dest[i] = lastprev;
+    }
+}
+
+#endif
+
+template <std::integral T>
+void DeltaForDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    static_assert(std::is_integral<T>::value, "Integral required.");
+    OrdinaryDeltaForDecoding<T>(src, source_size, dest, dest_size);
+}
+
+template <>
+void DeltaForDecoding<UInt32>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    const auto count = dest_size / sizeof(UInt32);
+    auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
+    // Reserve enough space for the temporary buffer.
+    const auto required_size = round_size * sizeof(UInt32);
+    char tmp_buffer[required_size];
+    memset(tmp_buffer, 0, required_size);
+    ForDecoding<Int32>(src, source_size, tmp_buffer, required_size);
+    DeltaDecoding<UInt32>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
+}
+
+template <>
+void DeltaForDecoding<UInt64>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    const auto count = dest_size / sizeof(UInt64);
+    const auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
+    // Reserve enough space for the temporary buffer.
+    const auto required_size = round_size * sizeof(UInt64);
+    char tmp_buffer[required_size];
+    memset(tmp_buffer, 0, required_size);
+    ForDecoding<Int64>(src, source_size, tmp_buffer, required_size);
+    DeltaDecoding<UInt64>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
+}
+
+template void DeltaForDecoding<UInt8>(const char *, UInt32, char *, UInt32);
+template void DeltaForDecoding<UInt16>(const char *, UInt32, char *, UInt32);
+
+} // namespace DB::Compression
diff --git a/dbms/src/IO/Compression/EncodingUtil.h b/dbms/src/IO/Compression/EncodingUtil.h
new file mode 100644
index 00000000000..db39960d7ce
--- /dev/null
+++ b/dbms/src/IO/Compression/EncodingUtil.h
@@ -0,0 +1,251 @@
+// Copyright 2024 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <Common/BitpackingPrimitives.h>
+#include <Common/Exception.h>
+#include <common/types.h>
+#include <common/unaligned.h>
+
+#if defined(__AVX2__)
+#include <immintrin.h>
+#endif
+
+namespace DB::ErrorCodes
+{
+extern const int CANNOT_COMPRESS;
+extern const int CANNOT_DECOMPRESS;
+} // namespace DB::ErrorCodes
+
+namespace DB::Compression
+{
+
+/// Constant encoding
+
+template <std::integral T>
+size_t ConstantEncoding(T constant, char * dest)
+{
+    unalignedStore<T>(dest, constant);
+    return sizeof(T);
+}
+
+template <std::integral T>
+void ConstantDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    if (source_size < sizeof(T))
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot use Constant decoding, data size {} is too small",
+            source_size);
+
+    T constant = unalignedLoad<T>(src);
+    for (size_t i = 0; i < dest_size / sizeof(T); ++i)
+    {
+        unalignedStore<T>(dest, constant);
+        dest += sizeof(T);
+    }
+}
+
+/// Constant delta encoding
+
+template <std::integral T>
+size_t ConstantDeltaEncoding(T first_value, T constant_delta, char * dest)
+{
+    unalignedStore<T>(dest, first_value);
+    dest += sizeof(T);
+    unalignedStore<T>(dest, constant_delta);
+    return sizeof(T) + sizeof(T);
+}
+
+template <std::integral T>
+void ConstantDeltaDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    if (source_size < sizeof(T) + sizeof(T))
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot use ConstantDelta decoding, data size {} is too small",
+            source_size);
+
+    T first_value = unalignedLoad<T>(src);
+    T constant_delta = unalignedLoad<T>(src + sizeof(T));
+    for (size_t i = 0; i < dest_size / sizeof(T); ++i)
+    {
+        unalignedStore<T>(dest, first_value);
+        first_value += constant_delta;
+        dest += sizeof(T);
+    }
+}
+
+/// Run-length encoding
+
+template <std::integral T>
+using RLEPair = std::pair<T, UInt8>;
+template <std::integral T>
+using RLEPairs = std::vector<RLEPair<T>>;
+template <std::integral T>
+static constexpr size_t RLEPairLength = sizeof(T) + sizeof(UInt8);
+
+template <std::integral T>
+size_t RLEPairsSize(const RLEPairs<T> & rle)
+{
+    return rle.size() * RLEPairLength<T>;
+}
+
+template <std::integral T>
+size_t RLEEncoding(const RLEPairs<T> & rle, char * dest)
+{
+    for (const auto & [value, count] : rle)
+    {
+        unalignedStore<T>(dest, value);
+        dest += sizeof(T);
+        unalignedStore<UInt8>(dest, count);
+        dest += sizeof(UInt8);
+    }
+    return rle.size() * RLEPairLength<T>;
+}
+
+template <std::integral T>
+void RLEDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    if unlikely (source_size % RLEPairLength<T> != 0)
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot use RLE decoding, data size {} is not aligned to {}",
+            source_size,
+            RLEPairLength<T>);
+
+    const char * dest_end = dest + dest_size;
+    for (UInt32 i = 0; i < source_size / RLEPairLength<T>; ++i)
+    {
+        T value = unalignedLoad<T>(src);
+        src += sizeof(T);
+        auto count = unalignedLoad<UInt8>(src);
+        src += sizeof(UInt8);
+        if (dest + count * sizeof(T) > dest_end)
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot use RLE decoding, data is too large");
+        for (UInt8 j = 0; j < count; ++j)
+        {
+            unalignedStore<T>(dest, value);
+            dest += sizeof(T);
+        }
+    }
+}
+
+/// Frame of Reference encoding
+
+template <std::integral T>
+void SubtractFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
+
+template <std::integral T>
+UInt8 ForEncodingWidth(std::vector<T> & values, T frame_of_reference);
+
+template <std::integral T, bool skip_subtract_frame_of_reference = false>
+size_t ForEncoding(std::vector<T> & values, T frame_of_reference, UInt8 width, char * dest)
+{
+    assert(!values.empty());
+    if constexpr (!skip_subtract_frame_of_reference)
+        SubtractFrameOfReference(values.data(), frame_of_reference, values.size());
+    // store frame of reference
+    unalignedStore<T>(dest, frame_of_reference);
+    dest += sizeof(T);
+    // store width
+    unalignedStore<UInt8>(dest, width);
+    dest += sizeof(UInt8);
+    // if width == 0, skip bitpacking
+    if (width == 0)
+        return sizeof(T) + sizeof(UInt8);
+    auto required_size = BitpackingPrimitives::getRequiredSize(values.size(), width);
+    // after applying frame of reference, all values are bigger than 0.
+    BitpackingPrimitives::packBuffer(reinterpret_cast<unsigned char *>(dest), values.data(), values.size(), width);
+    return sizeof(T) + sizeof(UInt8) + required_size;
+}
+
+template <std::integral T>
+void ApplyFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
+
+template <std::integral T>
+void ForDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    const auto count = dest_size / sizeof(T);
+    T frame_of_reference = unalignedLoad<T>(src);
+    src += sizeof(T);
+    auto width = unalignedLoad<UInt8>(src);
+    src += sizeof(UInt8);
+    const auto required_size = source_size - sizeof(T) - sizeof(UInt8);
+    RUNTIME_CHECK(BitpackingPrimitives::getRequiredSize(count, width) == required_size);
+    auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
+    if (round_size != count)
+    {
+        // Reserve enough space for the temporary buffer.
+        unsigned char tmp_buffer[round_size * sizeof(T)];
+        BitpackingPrimitives::unPackBuffer<T>(tmp_buffer, reinterpret_cast<const unsigned char *>(src), count, width);
+        ApplyFrameOfReference(reinterpret_cast<T *>(tmp_buffer), frame_of_reference, count);
+        memcpy(dest, tmp_buffer, dest_size);
+        return;
+    }
+    BitpackingPrimitives::unPackBuffer<T>(
+        reinterpret_cast<unsigned char *>(dest),
+        reinterpret_cast<const unsigned char *>(src),
+        count,
+        width);
+    ApplyFrameOfReference(reinterpret_cast<T *>(dest), frame_of_reference, count);
+}
+
+/// Delta encoding
+
+template <std::integral T>
+void DeltaEncoding(const T * source, UInt32 count, T * dest)
+{
+    T prev = 0;
+    for (UInt32 i = 0; i < count; ++i)
+    {
+        T curr = source[i];
+        dest[i] = curr - prev;
+        prev = curr;
+    }
+}
+
+template <std::integral T>
+void ordinaryDeltaDecoding(const char * source, UInt32 source_size, char * dest)
+{
+    T accumulator{};
+    const char * const source_end = source + source_size;
+    while (source < source_end)
+    {
+        accumulator += unalignedLoad<T>(source);
+        unalignedStore<T>(dest, accumulator);
+
+        source += sizeof(T);
+        dest += sizeof(T);
+    }
+}
+
+template <std::integral T>
+void DeltaDecoding(const char * source, UInt32 source_size, char * dest);
+
+/// Delta + Frame of Reference encoding
+
+template <std::integral T>
+void OrdinaryDeltaForDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+{
+    using TS = typename std::make_signed_t<T>;
+    ForDecoding<TS>(src, source_size, dest, dest_size);
+    ordinaryDeltaDecoding<T>(dest, dest_size, dest);
+}
+
+template <std::integral T>
+void DeltaForDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size);
+
+} // namespace DB::Compression
diff --git a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
index 8beb29f22df..76781a3ac10 100644
--- a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
+++ b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
@@ -532,6 +532,7 @@ std::vector<CodecTestSequence> generatePyramidOfSequences(
 #define G(generator) generator, #generator
 
 const auto IntegerCodecsToTest = ::testing::Values(
+    CompressionMethodByte::Lightweight,
     CompressionMethodByte::DeltaFOR,
     CompressionMethodByte::FOR,
     CompressionMethodByte::RLE

From c583557c65da662909a2b69d41c19fba993292e5 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Thu, 9 May 2024 13:54:12 +0800
Subject: [PATCH 02/27] rename

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 .../Compression/CompressionCodecDeltaFOR.cpp  | 16 ++++-----
 .../IO/Compression/CompressionCodecFOR.cpp    | 12 +++----
 .../CompressionCodecIntegerLightweight.cpp    | 10 +++---
 dbms/src/IO/Compression/EncodingUtil.cpp      | 34 +++++++++----------
 dbms/src/IO/Compression/EncodingUtil.h        | 12 +++----
 5 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
index d2bcbbe3262..73880a3424f 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
@@ -114,16 +114,16 @@ void CompressionCodecDeltaFOR::doDecompressData(
     switch (bytes_size)
     {
     case 1:
-        DB::Compression::DeltaForDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::DeltaFORDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 2:
-        DB::Compression::DeltaForDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::DeltaFORDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 4:
-        DB::Compression::DeltaForDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::DeltaFORDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 8:
-        DB::Compression::DeltaForDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::DeltaFORDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     default:
         throw Exception(
@@ -158,16 +158,16 @@ void CompressionCodecDeltaFOR::ordinaryDecompress(
     switch (bytes_size)
     {
     case 1:
-        DB::Compression::OrdinaryDeltaForDecoding<UInt8>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::OrdinaryDeltaFORDecoding<UInt8>(&source[1], source_size_no_header, dest, dest_size);
         break;
     case 2:
-        DB::Compression::OrdinaryDeltaForDecoding<UInt16>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::OrdinaryDeltaFORDecoding<UInt16>(&source[1], source_size_no_header, dest, dest_size);
         break;
     case 4:
-        DB::Compression::OrdinaryDeltaForDecoding<UInt32>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::OrdinaryDeltaFORDecoding<UInt32>(&source[1], source_size_no_header, dest, dest_size);
         break;
     case 8:
-        DB::Compression::OrdinaryDeltaForDecoding<UInt64>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::OrdinaryDeltaFORDecoding<UInt64>(&source[1], source_size_no_header, dest, dest_size);
         break;
     default:
         throw Exception(
diff --git a/dbms/src/IO/Compression/CompressionCodecFOR.cpp b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
index 86b21719744..db3b7511bb0 100644
--- a/dbms/src/IO/Compression/CompressionCodecFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
@@ -56,8 +56,8 @@ UInt32 CompressionCodecFOR::compressData(const T * source, UInt32 count, char *
     std::vector<T> values(count);
     values.assign(source, source + count);
     T frame_of_reference = *std::min_element(values.cbegin(), values.cend());
-    UInt8 width = DB::Compression::ForEncodingWidth(values, frame_of_reference);
-    return DB::Compression::ForEncoding<T, std::is_signed_v<T>>(values, frame_of_reference, width, dest);
+    UInt8 width = DB::Compression::FOREncodingWidth(values, frame_of_reference);
+    return DB::Compression::FOREncoding<T, std::is_signed_v<T>>(values, frame_of_reference, width, dest);
 }
 
 UInt32 CompressionCodecFOR::doCompressData(const char * source, UInt32 source_size, char * dest) const
@@ -108,16 +108,16 @@ void CompressionCodecFOR::doDecompressData(
     switch (bytes_size)
     {
     case 1:
-        DB::Compression::ForDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::FORDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 2:
-        DB::Compression::ForDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::FORDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 4:
-        DB::Compression::ForDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::FORDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 8:
-        DB::Compression::ForDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::FORDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     default:
         throw Exception(
diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
index 9dd9445a53a..c0ebe0bdb69 100644
--- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
@@ -99,13 +99,13 @@ size_t CompressionCodecIntegerLightweight::compressDataForType(const char * sour
     case Mode::FOR:
     {
         FORState for_state = std::get<2>(state);
-        compressed_size += Compression::ForEncoding(values, for_state.min_value, for_state.bit_width, dest);
+        compressed_size += Compression::FOREncoding(values, for_state.min_value, for_state.bit_width, dest);
         break;
     }
     case Mode::DELTA_FOR:
     {
         DeltaFORState delta_for_state = std::get<3>(state);
-        compressed_size += Compression::ForEncoding<typename std::make_signed_t<T>, true>(
+        compressed_size += Compression::FOREncoding<typename std::make_signed_t<T>, true>(
             delta_for_state.deltas,
             delta_for_state.min_delta_value,
             delta_for_state.bit_width,
@@ -160,10 +160,10 @@ void CompressionCodecIntegerLightweight::decompressDataForType(
         Compression::RLEDecoding<T>(source, source_size, dest, output_size);
         break;
     case Mode::FOR:
-        Compression::ForDecoding<T>(source, source_size, dest, output_size);
+        Compression::FORDecoding<T>(source, source_size, dest, output_size);
         break;
     case Mode::DELTA_FOR:
-        Compression::DeltaForDecoding<T>(source, source_size, dest, output_size);
+        Compression::DeltaFORDecoding<T>(source, source_size, dest, output_size);
         break;
     case Mode::LZ4:
         if (unlikely(LZ4_decompress_safe(source, dest, source_size, output_size) < 0))
@@ -254,7 +254,7 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::vector<T>
         return;
     }
 
-    UInt8 delta_for_width = Compression::ForEncodingWidth(deltas, min_delta);
+    UInt8 delta_for_width = Compression::FOREncodingWidth(deltas, min_delta);
     // additional T bytes for min_delta, and 1 byte for width
     size_t delta_for_size
         = BitpackingPrimitives::getRequiredSize(deltas.size(), delta_for_width) + sizeof(T) + sizeof(UInt8);
diff --git a/dbms/src/IO/Compression/EncodingUtil.cpp b/dbms/src/IO/Compression/EncodingUtil.cpp
index 331d3144959..db24b7fa68b 100644
--- a/dbms/src/IO/Compression/EncodingUtil.cpp
+++ b/dbms/src/IO/Compression/EncodingUtil.cpp
@@ -116,7 +116,7 @@ template void SubtractFrameOfReference<UInt32>(UInt32 *, UInt32, UInt32);
 template void SubtractFrameOfReference<UInt64>(UInt64 *, UInt64, UInt32);
 
 template <std::integral T>
-UInt8 ForEncodingWidth(std::vector<T> & values, T frame_of_reference)
+UInt8 FOREncodingWidth(std::vector<T> & values, T frame_of_reference)
 {
     if constexpr (std::is_signed_v<T>)
     {
@@ -136,14 +136,14 @@ UInt8 ForEncodingWidth(std::vector<T> & values, T frame_of_reference)
     }
 }
 
-template UInt8 ForEncodingWidth<Int8>(std::vector<Int8> &, Int8);
-template UInt8 ForEncodingWidth<Int16>(std::vector<Int16> &, Int16);
-template UInt8 ForEncodingWidth<Int32>(std::vector<Int32> &, Int32);
-template UInt8 ForEncodingWidth<Int64>(std::vector<Int64> &, Int64);
-template UInt8 ForEncodingWidth<UInt8>(std::vector<UInt8> &, UInt8);
-template UInt8 ForEncodingWidth<UInt16>(std::vector<UInt16> &, UInt16);
-template UInt8 ForEncodingWidth<UInt32>(std::vector<UInt32> &, UInt32);
-template UInt8 ForEncodingWidth<UInt64>(std::vector<UInt64> &, UInt64);
+template UInt8 FOREncodingWidth<Int8>(std::vector<Int8> &, Int8);
+template UInt8 FOREncodingWidth<Int16>(std::vector<Int16> &, Int16);
+template UInt8 FOREncodingWidth<Int32>(std::vector<Int32> &, Int32);
+template UInt8 FOREncodingWidth<Int64>(std::vector<Int64> &, Int64);
+template UInt8 FOREncodingWidth<UInt8>(std::vector<UInt8> &, UInt8);
+template UInt8 FOREncodingWidth<UInt16>(std::vector<UInt16> &, UInt16);
+template UInt8 FOREncodingWidth<UInt32>(std::vector<UInt32> &, UInt32);
+template UInt8 FOREncodingWidth<UInt64>(std::vector<UInt64> &, UInt64);
 
 template <std::integral T>
 void DeltaDecoding(const char * source, UInt32 source_size, char * dest)
@@ -215,14 +215,14 @@ void DeltaDecoding<UInt64>(const char * __restrict__ raw_source, UInt32 raw_sour
 #endif
 
 template <std::integral T>
-void DeltaForDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void DeltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     static_assert(std::is_integral<T>::value, "Integral required.");
-    OrdinaryDeltaForDecoding<T>(src, source_size, dest, dest_size);
+    OrdinaryDeltaFORDecoding<T>(src, source_size, dest, dest_size);
 }
 
 template <>
-void DeltaForDecoding<UInt32>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void DeltaFORDecoding<UInt32>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     const auto count = dest_size / sizeof(UInt32);
     auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
@@ -230,12 +230,12 @@ void DeltaForDecoding<UInt32>(const char * src, UInt32 source_size, char * dest,
     const auto required_size = round_size * sizeof(UInt32);
     char tmp_buffer[required_size];
     memset(tmp_buffer, 0, required_size);
-    ForDecoding<Int32>(src, source_size, tmp_buffer, required_size);
+    FORDecoding<Int32>(src, source_size, tmp_buffer, required_size);
     DeltaDecoding<UInt32>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
 }
 
 template <>
-void DeltaForDecoding<UInt64>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void DeltaFORDecoding<UInt64>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     const auto count = dest_size / sizeof(UInt64);
     const auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
@@ -243,11 +243,11 @@ void DeltaForDecoding<UInt64>(const char * src, UInt32 source_size, char * dest,
     const auto required_size = round_size * sizeof(UInt64);
     char tmp_buffer[required_size];
     memset(tmp_buffer, 0, required_size);
-    ForDecoding<Int64>(src, source_size, tmp_buffer, required_size);
+    FORDecoding<Int64>(src, source_size, tmp_buffer, required_size);
     DeltaDecoding<UInt64>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
 }
 
-template void DeltaForDecoding<UInt8>(const char *, UInt32, char *, UInt32);
-template void DeltaForDecoding<UInt16>(const char *, UInt32, char *, UInt32);
+template void DeltaFORDecoding<UInt8>(const char *, UInt32, char *, UInt32);
+template void DeltaFORDecoding<UInt16>(const char *, UInt32, char *, UInt32);
 
 } // namespace DB::Compression
diff --git a/dbms/src/IO/Compression/EncodingUtil.h b/dbms/src/IO/Compression/EncodingUtil.h
index db39960d7ce..e07a8555304 100644
--- a/dbms/src/IO/Compression/EncodingUtil.h
+++ b/dbms/src/IO/Compression/EncodingUtil.h
@@ -149,10 +149,10 @@ template <std::integral T>
 void SubtractFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
 
 template <std::integral T>
-UInt8 ForEncodingWidth(std::vector<T> & values, T frame_of_reference);
+UInt8 FOREncodingWidth(std::vector<T> & values, T frame_of_reference);
 
 template <std::integral T, bool skip_subtract_frame_of_reference = false>
-size_t ForEncoding(std::vector<T> & values, T frame_of_reference, UInt8 width, char * dest)
+size_t FOREncoding(std::vector<T> & values, T frame_of_reference, UInt8 width, char * dest)
 {
     assert(!values.empty());
     if constexpr (!skip_subtract_frame_of_reference)
@@ -176,7 +176,7 @@ template <std::integral T>
 void ApplyFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
 
 template <std::integral T>
-void ForDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void FORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     const auto count = dest_size / sizeof(T);
     T frame_of_reference = unalignedLoad<T>(src);
@@ -238,14 +238,14 @@ void DeltaDecoding(const char * source, UInt32 source_size, char * dest);
 /// Delta + Frame of Reference encoding
 
 template <std::integral T>
-void OrdinaryDeltaForDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void OrdinaryDeltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     using TS = typename std::make_signed_t<T>;
-    ForDecoding<TS>(src, source_size, dest, dest_size);
+    FORDecoding<TS>(src, source_size, dest, dest_size);
     ordinaryDeltaDecoding<T>(dest, dest_size, dest);
 }
 
 template <std::integral T>
-void DeltaForDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size);
+void DeltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size);
 
 } // namespace DB::Compression

From a8d7de5574c59d3a9ca2e4b35e0a42558c38182b Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Thu, 9 May 2024 13:56:43 +0800
Subject: [PATCH 03/27] ut

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/tests/gtest_codec_compression.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
index 76781a3ac10..f1381484c31 100644
--- a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
+++ b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
@@ -534,7 +534,7 @@ std::vector<CodecTestSequence> generatePyramidOfSequences(
 const auto IntegerCodecsToTest = ::testing::Values(
     CompressionMethodByte::Lightweight,
     CompressionMethodByte::DeltaFOR,
-    CompressionMethodByte::FOR,
+    // CompressionMethodByte::FOR, // disable FOR codec for now, since there are too many unit tests.
     CompressionMethodByte::RLE
 #if USE_QPL
     ,

From 86bab117212d500b34c2222870abe31d91313ec0 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Thu, 9 May 2024 14:44:24 +0800
Subject: [PATCH 04/27] init template

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/EncodingUtil.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dbms/src/IO/Compression/EncodingUtil.cpp b/dbms/src/IO/Compression/EncodingUtil.cpp
index db24b7fa68b..b947bf6dd84 100644
--- a/dbms/src/IO/Compression/EncodingUtil.cpp
+++ b/dbms/src/IO/Compression/EncodingUtil.cpp
@@ -65,6 +65,10 @@ template void ApplyFrameOfReference<UInt8>(UInt8 *, UInt8, UInt32);
 template void ApplyFrameOfReference<UInt16>(UInt16 *, UInt16, UInt32);
 template void ApplyFrameOfReference<UInt32>(UInt32 *, UInt32, UInt32);
 template void ApplyFrameOfReference<UInt64>(UInt64 *, UInt64, UInt32);
+template void ApplyFrameOfReference<Int8>(Int8 *, Int8, UInt32);
+template void ApplyFrameOfReference<Int16>(Int16 *, Int16, UInt32);
+template void ApplyFrameOfReference<Int32>(Int32 *, Int32, UInt32);
+template void ApplyFrameOfReference<Int64>(Int64 *, Int64, UInt32);
 
 template <std::integral T>
 void SubtractFrameOfReference(T * dst, T frame_of_reference, UInt32 count)

From bdd4e1f6eca71ebcce0f51237deec839e25fb347 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Fri, 10 May 2024 11:49:47 +0800
Subject: [PATCH 05/27] optimize analyze

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 .../CompressionCodecIntegerLightweight.cpp    | 126 ++++++++++--------
 .../CompressionCodecIntegerLightweight.h      |  11 +-
 2 files changed, 83 insertions(+), 54 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
index c0ebe0bdb69..27ca11b2cf5 100644
--- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
@@ -49,27 +49,16 @@ UInt8 CompressionCodecIntegerLightweight::getMethodByte() const
 UInt32 CompressionCodecIntegerLightweight::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
     // 1 byte for bytes_size, 1 byte for mode, and the rest for compressed data
-    return 1 + 1 + uncompressed_size;
+    return 1 + 1 + LZ4_COMPRESSBOUND(uncompressed_size);
 }
 
 template <typename T>
 size_t CompressionCodecIntegerLightweight::compressDataForType(const char * source, UInt32 source_size, char * dest)
     const
 {
-    if (source_size % sizeof(T) != 0)
-        throw Exception(
-            ErrorCodes::CANNOT_COMPRESS,
-            "Cannot compress with lightweight codec, data size {} is not aligned to {}",
-            source_size,
-            sizeof(T));
-
     // Load values
     const size_t count = source_size / sizeof(T);
-    std::vector<T> values(count);
-    for (size_t i = 0; i < count; ++i)
-    {
-        values[i] = unalignedLoad<T>(source + i * sizeof(T));
-    }
+    std::span<const T> values(reinterpret_cast<const T *>(source), count);
 
     // Analyze
     State<T> state;
@@ -99,7 +88,7 @@ size_t CompressionCodecIntegerLightweight::compressDataForType(const char * sour
     case Mode::FOR:
     {
         FORState for_state = std::get<2>(state);
-        compressed_size += Compression::FOREncoding(values, for_state.min_value, for_state.bit_width, dest);
+        compressed_size += Compression::FOREncoding(for_state.values, for_state.min_value, for_state.bit_width, dest);
         break;
     }
     case Mode::DELTA_FOR:
@@ -191,6 +180,12 @@ void CompressionCodecIntegerLightweight::CompressContext::update(size_t uncompre
         lw_compressed_size += compressed_size;
         ++lw_counter;
     }
+    if (mode == Mode::CONSTANT_DELTA)
+        ++constant_delta_counter;
+    if (mode == Mode::DELTA_FOR)
+        ++delta_for_counter;
+    if (mode == Mode::RLE)
+        ++rle_counter;
 }
 
 bool CompressionCodecIntegerLightweight::CompressContext::needAnalyze() const
@@ -204,78 +199,103 @@ bool CompressionCodecIntegerLightweight::CompressContext::needAnalyze() const
     return true;
 }
 
-template <typename T>
-void CompressionCodecIntegerLightweight::CompressContext::analyze(std::vector<T> & values, State<T> & state)
+bool CompressionCodecIntegerLightweight::CompressContext::needAnalyzeDelta() const
 {
-    if (!needAnalyze())
-        return;
+    return lw_counter <= 5 || constant_delta_counter != 0 || delta_for_counter != 0;
+}
 
+bool CompressionCodecIntegerLightweight::CompressContext::needAnalyzeRLE() const
+{
+    return lw_counter <= 5 || rle_counter != 0;
+}
+
+template <typename T>
+void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<const T> & values, State<T> & state)
+{
     if (values.empty())
     {
         mode = Mode::Invalid;
         return;
     }
 
+    if (!needAnalyze())
+        return;
+
     // Check CONSTANT
-    std::vector<std::pair<T, UInt8>> rle;
-    rle.reserve(values.size());
-    rle.emplace_back(values[0], 1);
-    for (size_t i = 1; i < values.size(); ++i)
-    {
-        if (values[i] != values[i - 1] || rle.back().second == std::numeric_limits<UInt8>::max())
-            rle.emplace_back(values[i], 1);
-        else
-            ++rle.back().second;
-    }
-    T min_value = *std::min_element(values.cbegin(), values.cend());
-    T max_value = *std::max_element(values.cbegin(), values.cend());
-    if (rle.size() == 1)
+    T min_value = *std::min_element(values.begin(), values.end());
+    T max_value = *std::max_element(values.begin(), values.end());
+    if (min_value == max_value)
     {
-        state = rle[0].first;
+        state = min_value;
         mode = Mode::CONSTANT;
         return;
     }
 
-    // Check CONSTANT_DELTA
     using TS = std::make_signed_t<T>;
     std::vector<TS> deltas;
-    deltas.reserve(values.size());
-    deltas.push_back(values[0]);
-    for (size_t i = 1; i < values.size(); ++i)
+    UInt8 delta_for_width = sizeof(T) * 8;
+    size_t delta_for_size = std::numeric_limits<size_t>::max();
+    TS min_delta = std::numeric_limits<TS>::min();
+    if (needAnalyzeDelta())
     {
-        deltas.push_back(values[i] - values[i - 1]);
+        // Check CONSTANT_DELTA
+        deltas.reserve(values.size());
+        deltas.push_back(values[0]);
+        for (size_t i = 1; i < values.size(); ++i)
+        {
+            deltas.push_back(values[i] - values[i - 1]);
+        }
+        min_delta = *std::min_element(deltas.cbegin(), deltas.cend());
+        if (min_delta == *std::max_element(deltas.cbegin(), deltas.cend()))
+        {
+            state = static_cast<T>(min_delta);
+            mode = Mode::CONSTANT_DELTA;
+            return;
+        }
+
+        // DELTA_FOR
+        delta_for_width = Compression::FOREncodingWidth(deltas, min_delta);
+        // additional T bytes for min_delta, and 1 byte for width
+        delta_for_size
+            = BitpackingPrimitives::getRequiredSize(deltas.size(), delta_for_width) + sizeof(T) + sizeof(UInt8);
     }
-    TS min_delta = *std::min_element(deltas.cbegin(), deltas.cend());
-    TS max_delta = *std::max_element(deltas.cbegin(), deltas.cend());
-    if (min_delta == max_delta)
+
+    // RLE
+    std::vector<std::pair<T, UInt8>> rle;
+    if (needAnalyzeRLE())
     {
-        state = static_cast<T>(min_delta);
-        mode = Mode::CONSTANT_DELTA;
-        return;
+        rle.reserve(values.size());
+        rle.emplace_back(values[0], 1);
+        for (size_t i = 1; i < values.size(); ++i)
+        {
+            if (values[i] != values[i - 1] || rle.back().second == std::numeric_limits<UInt8>::max())
+                rle.emplace_back(values[i], 1);
+            else
+                ++rle.back().second;
+        }
     }
 
-    UInt8 delta_for_width = Compression::FOREncodingWidth(deltas, min_delta);
-    // additional T bytes for min_delta, and 1 byte for width
-    size_t delta_for_size
-        = BitpackingPrimitives::getRequiredSize(deltas.size(), delta_for_width) + sizeof(T) + sizeof(UInt8);
     UInt8 for_width = BitpackingPrimitives::minimumBitWidth<T>(max_value - min_value);
     // additional T bytes for min_value, and 1 byte for width
     size_t for_size = BitpackingPrimitives::getRequiredSize(values.size(), for_width) + sizeof(T) + sizeof(UInt8);
-    size_t origin_size = values.size() * sizeof(T);
+    // Assume that the compression ratio of LZ4 is 3.0
+    // The official document says that the compression ratio of LZ4 is 2.1, https://github.com/lz4/lz4
+    size_t estimate_lz_size = values.size() * sizeof(T) / 3;
     size_t rle_size = Compression::RLEPairsSize(rle);
-    if (rle_size < delta_for_size && rle_size < for_size && rle_size < origin_size)
+    if (rle_size < delta_for_size && rle_size < for_size && rle_size < estimate_lz_size)
     {
         state = std::move(rle);
         mode = Mode::RLE;
     }
-    else if (for_size < delta_for_size && for_size < origin_size)
+    else if (for_size < delta_for_size && for_size < estimate_lz_size)
     {
-        state = FORState<T>{min_value, for_width};
+        std::vector<T> values_copy(values.begin(), values.end());
+        state = FORState<T>{std::move(values_copy), min_value, for_width};
         mode = Mode::FOR;
     }
-    else if (delta_for_size < origin_size)
+    else if (delta_for_size < estimate_lz_size)
     {
-        state = DeltaFORState<T>{deltas, min_delta, delta_for_width};
+        state = DeltaFORState<T>{std::move(deltas), min_delta, delta_for_width};
         mode = Mode::DELTA_FOR;
     }
     else
diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
index bbc7f8a7191..f2760bcaf05 100644
--- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
+++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
@@ -16,6 +16,9 @@
 
 #include <IO/Compression/ICompressionCodec.h>
 
+#include <span>
+
+
 namespace DB
 {
 
@@ -58,6 +61,7 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
     template <typename T>
     struct FORState
     {
+        std::vector<T> values;
         T min_value;
         UInt8 bit_width;
     };
@@ -81,9 +85,11 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
         CompressContext() = default;
 
         bool needAnalyze() const;
+        bool needAnalyzeDelta() const;
+        bool needAnalyzeRLE() const;
 
         template <typename T>
-        void analyze(std::vector<T> & values, State<T> & state);
+        void analyze(std::span<const T> & values, State<T> & state);
 
         void update(size_t uncompressed_size, size_t compressed_size);
 
@@ -96,6 +102,9 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
         size_t lz4_uncompressed_size = 0;
         size_t lz4_compressed_size = 0;
         size_t lz4_counter = 0;
+        size_t constant_delta_counter = 0;
+        size_t delta_for_counter = 0;
+        size_t rle_counter = 0;
     };
 
     template <typename T>

From 40d39a2a9f16972f0b9c49b02a425d614e4a906b Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Thu, 9 May 2024 11:39:21 +0800
Subject: [PATCH 06/27] optimize & rename

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 .../CompressionCodecIntegerLightweight.cpp    | 50 ++++++++++++++-----
 .../CompressionCodecIntegerLightweight.h      | 13 +++--
 ...cRLE.cpp => CompressionCodecRunLength.cpp} | 42 +++++++++-------
 ...CodecRLE.h => CompressionCodecRunLength.h} |  4 +-
 dbms/src/IO/Compression/CompressionFactory.h  |  6 +--
 dbms/src/IO/Compression/CompressionInfo.h     |  2 +-
 dbms/src/IO/Compression/CompressionSettings.h |  2 +-
 dbms/src/IO/Compression/EncodingUtil.h        | 41 ++++++++-------
 .../tests/gtest_codec_compression.cpp         |  2 +-
 .../Storages/DeltaMerge/File/DMFileWriter.h   | 21 ++++++--
 10 files changed, 119 insertions(+), 64 deletions(-)
 rename dbms/src/IO/Compression/{CompressionCodecRLE.cpp => CompressionCodecRunLength.cpp} (67%)
 rename dbms/src/IO/Compression/{CompressionCodecRLE.h => CompressionCodecRunLength.h} (91%)

diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
index 27ca11b2cf5..08b87bad5b8 100644
--- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
@@ -52,6 +52,12 @@ UInt32 CompressionCodecIntegerLightweight::getMaxCompressedDataSize(UInt32 uncom
     return 1 + 1 + LZ4_COMPRESSBOUND(uncompressed_size);
 }
 
+CompressionCodecIntegerLightweight::~CompressionCodecIntegerLightweight()
+{
+    if (ctx.isCompression())
+        LOG_INFO(Logger::get(), "lightweight codec: {}", ctx.toDebugString());
+}
+
 template <typename T>
 size_t CompressionCodecIntegerLightweight::compressDataForType(const char * source, UInt32 source_size, char * dest)
     const
@@ -80,9 +86,9 @@ size_t CompressionCodecIntegerLightweight::compressDataForType(const char * sour
         compressed_size += Compression::ConstantDeltaEncoding(values[0], std::get<0>(state), dest);
         break;
     }
-    case Mode::RLE:
+    case Mode::RunLength:
     {
-        compressed_size += Compression::RLEEncoding<T>(std::get<1>(state), dest);
+        compressed_size += Compression::RunLengthEncoding<T>(std::get<1>(state), dest);
         break;
     }
     case Mode::FOR:
@@ -145,8 +151,8 @@ void CompressionCodecIntegerLightweight::decompressDataForType(
     case Mode::CONSTANT_DELTA:
         Compression::ConstantDeltaDecoding<T>(source, source_size, dest, output_size);
         break;
-    case Mode::RLE:
-        Compression::RLEDecoding<T>(source, source_size, dest, output_size);
+    case Mode::RunLength:
+        Compression::RunLengthDecoding<T>(source, source_size, dest, output_size);
         break;
     case Mode::FOR:
         Compression::FORDecoding<T>(source, source_size, dest, output_size);
@@ -166,6 +172,21 @@ void CompressionCodecIntegerLightweight::decompressDataForType(
     }
 }
 
+String CompressionCodecIntegerLightweight::CompressContext::toDebugString() const
+{
+    return fmt::format(
+        "lz4: {}, lightweight: {}, constant_delta: {}, delta_for: {}, rle: {}, lz4 {} -> {}, lightweight {} -> {}",
+        lz4_counter,
+        lw_counter,
+        constant_delta_counter,
+        delta_for_counter,
+        rle_counter,
+        lz4_uncompressed_size,
+        lz4_compressed_size,
+        lw_uncompressed_size,
+        lw_compressed_size);
+}
+
 void CompressionCodecIntegerLightweight::CompressContext::update(size_t uncompressed_size, size_t compressed_size)
 {
     if (mode == Mode::LZ4)
@@ -184,7 +205,7 @@ void CompressionCodecIntegerLightweight::CompressContext::update(size_t uncompre
         ++constant_delta_counter;
     if (mode == Mode::DELTA_FOR)
         ++delta_for_counter;
-    if (mode == Mode::RLE)
+    if (mode == Mode::RunLength)
         ++rle_counter;
 }
 
@@ -204,7 +225,7 @@ bool CompressionCodecIntegerLightweight::CompressContext::needAnalyzeDelta() con
     return lw_counter <= 5 || constant_delta_counter != 0 || delta_for_counter != 0;
 }
 
-bool CompressionCodecIntegerLightweight::CompressContext::needAnalyzeRLE() const
+bool CompressionCodecIntegerLightweight::CompressContext::needAnalyzeRunLength() const
 {
     return lw_counter <= 5 || rle_counter != 0;
 }
@@ -219,7 +240,10 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<cons
     }
 
     if (!needAnalyze())
+    {
+        RUNTIME_CHECK(mode == Mode::LZ4);
         return;
+    }
 
     // Check CONSTANT
     T min_value = *std::min_element(values.begin(), values.end());
@@ -260,9 +284,9 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<cons
             = BitpackingPrimitives::getRequiredSize(deltas.size(), delta_for_width) + sizeof(T) + sizeof(UInt8);
     }
 
-    // RLE
-    std::vector<std::pair<T, UInt8>> rle;
-    if (needAnalyzeRLE())
+    // RunLength
+    Compression::RunLengthPairs<T> rle;
+    if (needAnalyzeRunLength())
     {
         rle.reserve(values.size());
         rle.emplace_back(values[0], 1);
@@ -281,11 +305,11 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<cons
     // Assume that the compression ratio of LZ4 is 3.0
     // The official document says that the compression ratio of LZ4 is 2.1, https://github.com/lz4/lz4
     size_t estimate_lz_size = values.size() * sizeof(T) / 3;
-    size_t rle_size = Compression::RLEPairsSize(rle);
-    if (rle_size < delta_for_size && rle_size < for_size && rle_size < estimate_lz_size)
+    size_t rle_size = rle.empty() ? std::numeric_limits<size_t>::max() : Compression::RunLengthPairsSize(rle);
+    if (needAnalyzeRunLength() && rle_size < delta_for_size && rle_size < for_size && rle_size < estimate_lz_size)
     {
         state = std::move(rle);
-        mode = Mode::RLE;
+        mode = Mode::RunLength;
     }
     else if (for_size < delta_for_size && for_size < estimate_lz_size)
     {
@@ -293,7 +317,7 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<cons
         state = FORState<T>{std::move(values_copy), min_value, for_width};
         mode = Mode::FOR;
     }
-    else if (delta_for_size < estimate_lz_size)
+    else if (needAnalyzeDelta() && delta_for_size < estimate_lz_size)
     {
         state = DeltaFORState<T>{std::move(deltas), min_delta, delta_for_width};
         mode = Mode::DELTA_FOR;
diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
index f2760bcaf05..76b7db18599 100644
--- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
+++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
@@ -29,6 +29,8 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
 
     UInt8 getMethodByte() const override;
 
+    ~CompressionCodecIntegerLightweight() override;
+
 protected:
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size)
@@ -45,7 +47,7 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
         Invalid = 0,
         CONSTANT = 1, // all values are the same
         CONSTANT_DELTA = 2, // the difference between two adjacent values is the same
-        RLE = 3, // run-length encoding
+        RunLength = 3, // run-length encoding
         FOR = 4, // Frame of Reference encoding
         DELTA_FOR = 5, // delta encoding and then FOR encoding
         LZ4 = 6, // the above modes are not suitable, use LZ4 instead
@@ -56,7 +58,7 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
     using ConstantState = T;
 
     template <typename T>
-    using RLEState = std::vector<std::pair<T, UInt8>>;
+    using RunLengthState = std::vector<std::pair<T, UInt8>>;
 
     template <typename T>
     struct FORState
@@ -77,7 +79,7 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
 
     // State is a union of different states for different modes
     template <typename T>
-    using State = std::variant<ConstantState<T>, RLEState<T>, FORState<T>, DeltaFORState<T>>;
+    using State = std::variant<ConstantState<T>, RunLengthState<T>, FORState<T>, DeltaFORState<T>>;
 
     class CompressContext
     {
@@ -86,13 +88,16 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
 
         bool needAnalyze() const;
         bool needAnalyzeDelta() const;
-        bool needAnalyzeRLE() const;
+        bool needAnalyzeRunLength() const;
 
         template <typename T>
         void analyze(std::span<const T> & values, State<T> & state);
 
         void update(size_t uncompressed_size, size_t compressed_size);
 
+        String toDebugString() const;
+        bool isCompression() const { return lz4_counter > 0 || lw_counter > 0; }
+
         Mode mode = Mode::LZ4;
 
     private:
diff --git a/dbms/src/IO/Compression/CompressionCodecRLE.cpp b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
similarity index 67%
rename from dbms/src/IO/Compression/CompressionCodecRLE.cpp
rename to dbms/src/IO/Compression/CompressionCodecRunLength.cpp
index 27ddd53c4a7..5ca5fb0c4b5 100644
--- a/dbms/src/IO/Compression/CompressionCodecRLE.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #include <Common/Exception.h>
-#include <IO/Compression/CompressionCodecRLE.h>
+#include <IO/Compression/CompressionCodecRunLength.h>
 #include <IO/Compression/CompressionInfo.h>
 #include <IO/Compression/EncodingUtil.h>
 #include <IO/Compression/ICompressionCodec.h>
@@ -29,16 +29,16 @@ extern const int CANNOT_COMPRESS;
 extern const int CANNOT_DECOMPRESS;
 } // namespace ErrorCodes
 
-CompressionCodecRLE::CompressionCodecRLE(UInt8 bytes_size_)
+CompressionCodecRunLength::CompressionCodecRunLength(UInt8 bytes_size_)
     : bytes_size(bytes_size_)
 {}
 
-UInt8 CompressionCodecRLE::getMethodByte() const
+UInt8 CompressionCodecRunLength::getMethodByte() const
 {
-    return static_cast<uint8_t>(CompressionMethodByte::RLE);
+    return static_cast<uint8_t>(CompressionMethodByte::RunLength);
 }
 
-UInt32 CompressionCodecRLE::getMaxCompressedDataSize(UInt32 uncompressed_size) const
+UInt32 CompressionCodecRunLength::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
     // If the encoded data is larger than the original data, we will store the original data
     // Additional byte is used to store the size of the data type
@@ -53,7 +53,7 @@ template <typename T>
 UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 {
     const char * source_end = source + source_size;
-    DB::Compression::RLEPairs<T> rle_vec;
+    DB::Compression::RunLengthPairs<T> rle_vec;
     rle_vec.reserve(source_size / sizeof(T));
     for (const auto * src = source; src < source_end; src += sizeof(T))
     {
@@ -65,7 +65,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
             ++rle_vec.back().second;
     }
 
-    if (DB::Compression::RLEPairsSize<T>(rle_vec) > source_size)
+    if (DB::Compression::RunLengthPairsSize<T>(rle_vec) > source_size)
     {
         dest[0] = JUST_COPY_CODE;
         memcpy(&dest[1], source, source_size);
@@ -74,12 +74,12 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 
     dest[0] = sizeof(T);
     dest += 1;
-    return 1 + DB::Compression::RLEEncoding<T>(rle_vec, dest);
+    return 1 + DB::Compression::RunLengthEncoding<T>(rle_vec, dest);
 }
 
 } // namespace
 
-UInt32 CompressionCodecRLE::doCompressData(const char * source, UInt32 source_size, char * dest) const
+UInt32 CompressionCodecRunLength::doCompressData(const char * source, UInt32 source_size, char * dest) const
 {
     if unlikely (source_size % bytes_size != 0)
         throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size);
@@ -94,18 +94,20 @@ UInt32 CompressionCodecRLE::doCompressData(const char * source, UInt32 source_si
     case 8:
         return compressDataForType<UInt64>(source, source_size, dest);
     default:
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress RLE-encoded data. Unsupported bytes size");
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress RunLength-encoded data. Unsupported bytes size");
     }
 }
 
-void CompressionCodecRLE::doDecompressData(
+void CompressionCodecRunLength::doDecompressData(
     const char * source,
     UInt32 source_size,
     char * dest,
     UInt32 uncompressed_size) const
 {
     if (source_size < 1)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress RLE-encoded data. File has wrong header");
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot decompress RunLength-encoded data. File has wrong header");
 
     if (uncompressed_size == 0)
         return;
@@ -114,7 +116,9 @@ void CompressionCodecRLE::doDecompressData(
     if (bytes_size == JUST_COPY_CODE)
     {
         if (source_size - 1 < uncompressed_size)
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress RLE-encoded data. File has wrong header");
+            throw Exception(
+                ErrorCodes::CANNOT_DECOMPRESS,
+                "Cannot decompress RunLength-encoded data. File has wrong header");
 
         memcpy(dest, &source[1], uncompressed_size);
         return;
@@ -130,19 +134,21 @@ void CompressionCodecRLE::doDecompressData(
     switch (bytes_size)
     {
     case 1:
-        DB::Compression::RLEDecoding<UInt8>(&source[1], source_size - 1, dest, uncompressed_size);
+        DB::Compression::RunLengthDecoding<UInt8>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     case 2:
-        DB::Compression::RLEDecoding<UInt16>(&source[1], source_size - 1, dest, uncompressed_size);
+        DB::Compression::RunLengthDecoding<UInt16>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     case 4:
-        DB::Compression::RLEDecoding<UInt32>(&source[1], source_size - 1, dest, uncompressed_size);
+        DB::Compression::RunLengthDecoding<UInt32>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     case 8:
-        DB::Compression::RLEDecoding<UInt64>(&source[1], source_size - 1, dest, uncompressed_size);
+        DB::Compression::RunLengthDecoding<UInt64>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     default:
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress RLE-encoded data. Unsupported bytes size");
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot decompress RunLength-encoded data. Unsupported bytes size");
     }
 }
 
diff --git a/dbms/src/IO/Compression/CompressionCodecRLE.h b/dbms/src/IO/Compression/CompressionCodecRunLength.h
similarity index 91%
rename from dbms/src/IO/Compression/CompressionCodecRLE.h
rename to dbms/src/IO/Compression/CompressionCodecRunLength.h
index b114ee13515..c3d38090346 100644
--- a/dbms/src/IO/Compression/CompressionCodecRLE.h
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.h
@@ -19,10 +19,10 @@
 namespace DB
 {
 
-class CompressionCodecRLE : public ICompressionCodec
+class CompressionCodecRunLength : public ICompressionCodec
 {
 public:
-    explicit CompressionCodecRLE(UInt8 bytes_size_);
+    explicit CompressionCodecRunLength(UInt8 bytes_size_);
 
     UInt8 getMethodByte() const override;
 
diff --git a/dbms/src/IO/Compression/CompressionFactory.h b/dbms/src/IO/Compression/CompressionFactory.h
index 8e1646f5550..06a458a5144 100644
--- a/dbms/src/IO/Compression/CompressionFactory.h
+++ b/dbms/src/IO/Compression/CompressionFactory.h
@@ -21,7 +21,7 @@
 #include <IO/Compression/CompressionCodecLZ4.h>
 #include <IO/Compression/CompressionCodecMultiple.h>
 #include <IO/Compression/CompressionCodecNone.h>
-#include <IO/Compression/CompressionCodecRLE.h>
+#include <IO/Compression/CompressionCodecRunLength.h>
 #include <IO/Compression/CompressionCodecZSTD.h>
 #include <IO/Compression/CompressionSettings.h>
 #include <IO/Compression/ICompressionCodec.h>
@@ -63,8 +63,8 @@ class CompressionFactory
         {
         case CompressionMethodByte::DeltaFOR:
             return std::make_unique<CompressionCodecDeltaFOR>(setting.type_bytes_size);
-        case CompressionMethodByte::RLE:
-            return std::make_unique<CompressionCodecRLE>(setting.type_bytes_size);
+        case CompressionMethodByte::RunLength:
+            return std::make_unique<CompressionCodecRunLength>(setting.type_bytes_size);
         case CompressionMethodByte::FOR:
             return std::make_unique<CompressionCodecFOR>(setting.type_bytes_size);
         case CompressionMethodByte::NONE:
diff --git a/dbms/src/IO/Compression/CompressionInfo.h b/dbms/src/IO/Compression/CompressionInfo.h
index fc9866635c9..c8b59c974b7 100644
--- a/dbms/src/IO/Compression/CompressionInfo.h
+++ b/dbms/src/IO/Compression/CompressionInfo.h
@@ -58,7 +58,7 @@ enum class CompressionMethodByte : UInt8
     ZSTD            = 0x90,
     Multiple        = 0x91,
     DeltaFOR        = 0x92,
-    RLE             = 0x93,
+    RunLength       = 0x93,
     FOR             = 0x94,
     Lightweight     = 0x95,
     // COL_END is not a compreesion method, but a flag of column end used in compact file.
diff --git a/dbms/src/IO/Compression/CompressionSettings.h b/dbms/src/IO/Compression/CompressionSettings.h
index 2f90eee5019..54bf73714da 100644
--- a/dbms/src/IO/Compression/CompressionSettings.h
+++ b/dbms/src/IO/Compression/CompressionSettings.h
@@ -40,7 +40,7 @@ const std::unordered_map<CompressionMethodByte, CompressionMethod> method_map =
     {CompressionMethodByte::QPL, CompressionMethod::QPL},
     {CompressionMethodByte::NONE, CompressionMethod::NONE},
     {CompressionMethodByte::DeltaFOR, CompressionMethod::NONE},
-    {CompressionMethodByte::RLE, CompressionMethod::NONE},
+    {CompressionMethodByte::RunLength, CompressionMethod::NONE},
     {CompressionMethodByte::FOR, CompressionMethod::NONE},
     {CompressionMethodByte::Lightweight, CompressionMethod::Lightweight},
 };
diff --git a/dbms/src/IO/Compression/EncodingUtil.h b/dbms/src/IO/Compression/EncodingUtil.h
index e07a8555304..da3ac50f56f 100644
--- a/dbms/src/IO/Compression/EncodingUtil.h
+++ b/dbms/src/IO/Compression/EncodingUtil.h
@@ -91,20 +91,20 @@ void ConstantDeltaDecoding(const char * src, UInt32 source_size, char * dest, UI
 /// Run-length encoding
 
 template <std::integral T>
-using RLEPair = std::pair<T, UInt8>;
+using RunLengthPair = std::pair<T, UInt8>;
 template <std::integral T>
-using RLEPairs = std::vector<RLEPair<T>>;
+using RunLengthPairs = std::vector<RunLengthPair<T>>;
 template <std::integral T>
-static constexpr size_t RLEPairLength = sizeof(T) + sizeof(UInt8);
+static constexpr size_t RunLengthPairLength = sizeof(T) + sizeof(UInt8);
 
 template <std::integral T>
-size_t RLEPairsSize(const RLEPairs<T> & rle)
+size_t RunLengthPairsSize(const RunLengthPairs<T> & rle)
 {
-    return rle.size() * RLEPairLength<T>;
+    return rle.size() * RunLengthPairLength<T>;
 }
 
 template <std::integral T>
-size_t RLEEncoding(const RLEPairs<T> & rle, char * dest)
+size_t RunLengthEncoding(const RunLengthPairs<T> & rle, char * dest)
 {
     for (const auto & [value, count] : rle)
     {
@@ -113,32 +113,39 @@ size_t RLEEncoding(const RLEPairs<T> & rle, char * dest)
         unalignedStore<UInt8>(dest, count);
         dest += sizeof(UInt8);
     }
-    return rle.size() * RLEPairLength<T>;
+    return rle.size() * RunLengthPairLength<T>;
 }
 
 template <std::integral T>
-void RLEDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void RunLengthDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
-    if unlikely (source_size % RLEPairLength<T> != 0)
+    if unlikely (source_size % RunLengthPairLength<T> != 0)
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot use RLE decoding, data size {} is not aligned to {}",
+            "Cannot use RunLength decoding, data size {} is not aligned to {}",
             source_size,
-            RLEPairLength<T>);
+            RunLengthPairLength<T>);
 
     const char * dest_end = dest + dest_size;
-    for (UInt32 i = 0; i < source_size / RLEPairLength<T>; ++i)
+    for (UInt32 i = 0; i < source_size / RunLengthPairLength<T>; ++i)
     {
         T value = unalignedLoad<T>(src);
         src += sizeof(T);
         auto count = unalignedLoad<UInt8>(src);
         src += sizeof(UInt8);
-        if (dest + count * sizeof(T) > dest_end)
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot use RLE decoding, data is too large");
-        for (UInt8 j = 0; j < count; ++j)
+        if (unlikely(dest + count * sizeof(T) > dest_end))
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot use RunLength decoding, data is too large");
+        if constexpr (std::is_same_v<T, UInt8> || std::is_same_v<T, Int8>)
         {
-            unalignedStore<T>(dest, value);
-            dest += sizeof(T);
+            memset(dest, value, count);
+        }
+        else
+        {
+            for (UInt8 j = 0; j < count; ++j)
+            {
+                unalignedStore<T>(dest, value);
+                dest += sizeof(T);
+            }
         }
     }
 }
diff --git a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
index f1381484c31..0c458dfdaeb 100644
--- a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
+++ b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
@@ -535,7 +535,7 @@ const auto IntegerCodecsToTest = ::testing::Values(
     CompressionMethodByte::Lightweight,
     CompressionMethodByte::DeltaFOR,
     // CompressionMethodByte::FOR, // disable FOR codec for now, since there are too many unit tests.
-    CompressionMethodByte::RLE
+    CompressionMethodByte::RunLength
 #if USE_QPL
     ,
     CompressionMethodByte::QPL
diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h
index d85185bc729..f0ee9bf8606 100644
--- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h
+++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h
@@ -65,12 +65,25 @@ class DMFileWriter
                 /*flags*/ -1,
                 /*mode*/ 0666,
                 max_compress_block_size))
-            , compressed_buf(CompressedWriteBuffer<>::build(
-                  *plain_file,
-                  compression_settings,
-                  !dmfile->getConfiguration().has_value()))
             , minmaxes(do_index ? std::make_shared<MinMaxIndex>(*type) : nullptr)
         {
+            // TODO: better, now only for test
+            if (type->isInteger())
+            {
+                assert(compression_settings.settings.size() == 1);
+                CompressionSettings settings(CompressionMethod::Lightweight);
+                auto & setting = settings.settings[0];
+                setting.type_bytes_size = type->getSizeOfValueInMemory();
+                compressed_buf = CompressedWriteBuffer<>::build(*plain_file, settings, !dmfile->getConfiguration());
+            }
+            else
+            {
+                compressed_buf = CompressedWriteBuffer<>::build( //
+                    *plain_file,
+                    compression_settings,
+                    !dmfile->getConfiguration());
+            }
+
             if (!dmfile->useMetaV2())
             {
                 // will not used in DMFileFormat::V3, could be removed when v3 is default

From e79352f233aec5bf85251a6e909aa5c75ab89191 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Mon, 3 Jun 2024 15:05:29 +0800
Subject: [PATCH 07/27] rename

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 .../Compression/CompressionCodecDeltaFOR.cpp  | 18 +++---
 .../CompressionCodecIntegerLightweight.cpp    | 16 ++---
 .../Compression/CompressionCodecRunLength.cpp | 12 ++--
 dbms/src/IO/Compression/EncodingUtil.cpp      | 62 +++++++++----------
 dbms/src/IO/Compression/EncodingUtil.h        | 39 ++++++------
 5 files changed, 74 insertions(+), 73 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
index 73880a3424f..64df56856e5 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
@@ -57,7 +57,7 @@ template <std::integral T>
 UInt32 compressData(const char * source, UInt32 source_size, char * dest)
 {
     const auto count = source_size / sizeof(T);
-    DB::Compression::DeltaEncoding<T>(reinterpret_cast<const T *>(source), count, reinterpret_cast<T *>(dest));
+    DB::Compression::deltaEncoding<T>(reinterpret_cast<const T *>(source), count, reinterpret_cast<T *>(dest));
     // Cast deltas to signed type to better compress negative values.
     // For example, if we have a sequence of UInt8 values [3, 2, 1, 0], the deltas will be [3, -1, -1, -1]
     // If we compress them as UInt8, we will get [3, 255, 255, 255], which is not optimal.
@@ -114,16 +114,16 @@ void CompressionCodecDeltaFOR::doDecompressData(
     switch (bytes_size)
     {
     case 1:
-        DB::Compression::DeltaFORDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::deltaFORDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 2:
-        DB::Compression::DeltaFORDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::deltaFORDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 4:
-        DB::Compression::DeltaFORDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::deltaFORDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     case 8:
-        DB::Compression::DeltaFORDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
+        DB::Compression::deltaFORDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     default:
         throw Exception(
@@ -158,16 +158,16 @@ void CompressionCodecDeltaFOR::ordinaryDecompress(
     switch (bytes_size)
     {
     case 1:
-        DB::Compression::OrdinaryDeltaFORDecoding<UInt8>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::ordinaryDeltaFORDecoding<UInt8>(&source[1], source_size_no_header, dest, dest_size);
         break;
     case 2:
-        DB::Compression::OrdinaryDeltaFORDecoding<UInt16>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::ordinaryDeltaFORDecoding<UInt16>(&source[1], source_size_no_header, dest, dest_size);
         break;
     case 4:
-        DB::Compression::OrdinaryDeltaFORDecoding<UInt32>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::ordinaryDeltaFORDecoding<UInt32>(&source[1], source_size_no_header, dest, dest_size);
         break;
     case 8:
-        DB::Compression::OrdinaryDeltaFORDecoding<UInt64>(&source[1], source_size_no_header, dest, dest_size);
+        DB::Compression::ordinaryDeltaFORDecoding<UInt64>(&source[1], source_size_no_header, dest, dest_size);
         break;
     default:
         throw Exception(
diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
index 08b87bad5b8..f2962fcbdbc 100644
--- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
@@ -78,17 +78,17 @@ size_t CompressionCodecIntegerLightweight::compressDataForType(const char * sour
     {
     case Mode::CONSTANT:
     {
-        compressed_size += Compression::ConstantEncoding(std::get<0>(state), dest);
+        compressed_size += Compression::constantEncoding(std::get<0>(state), dest);
         break;
     }
     case Mode::CONSTANT_DELTA:
     {
-        compressed_size += Compression::ConstantDeltaEncoding(values[0], std::get<0>(state), dest);
+        compressed_size += Compression::constantDeltaEncoding(values[0], std::get<0>(state), dest);
         break;
     }
     case Mode::RunLength:
     {
-        compressed_size += Compression::RunLengthEncoding<T>(std::get<1>(state), dest);
+        compressed_size += Compression::runLengthEncoding<T>(std::get<1>(state), dest);
         break;
     }
     case Mode::FOR:
@@ -146,19 +146,19 @@ void CompressionCodecIntegerLightweight::decompressDataForType(
     switch (mode)
     {
     case Mode::CONSTANT:
-        Compression::ConstantDecoding<T>(source, source_size, dest, output_size);
+        Compression::constantDecoding<T>(source, source_size, dest, output_size);
         break;
     case Mode::CONSTANT_DELTA:
-        Compression::ConstantDeltaDecoding<T>(source, source_size, dest, output_size);
+        Compression::constantDeltaDecoding<T>(source, source_size, dest, output_size);
         break;
     case Mode::RunLength:
-        Compression::RunLengthDecoding<T>(source, source_size, dest, output_size);
+        Compression::runLengthDecoding<T>(source, source_size, dest, output_size);
         break;
     case Mode::FOR:
         Compression::FORDecoding<T>(source, source_size, dest, output_size);
         break;
     case Mode::DELTA_FOR:
-        Compression::DeltaFORDecoding<T>(source, source_size, dest, output_size);
+        Compression::deltaFORDecoding<T>(source, source_size, dest, output_size);
         break;
     case Mode::LZ4:
         if (unlikely(LZ4_decompress_safe(source, dest, source_size, output_size) < 0))
@@ -305,7 +305,7 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<cons
     // Assume that the compression ratio of LZ4 is 3.0
     // The official document says that the compression ratio of LZ4 is 2.1, https://github.com/lz4/lz4
     size_t estimate_lz_size = values.size() * sizeof(T) / 3;
-    size_t rle_size = rle.empty() ? std::numeric_limits<size_t>::max() : Compression::RunLengthPairsSize(rle);
+    size_t rle_size = rle.empty() ? std::numeric_limits<size_t>::max() : Compression::runLengthPairsSize(rle);
     if (needAnalyzeRunLength() && rle_size < delta_for_size && rle_size < for_size && rle_size < estimate_lz_size)
     {
         state = std::move(rle);
diff --git a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
index 5ca5fb0c4b5..ed438ea7ce7 100644
--- a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
@@ -65,7 +65,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
             ++rle_vec.back().second;
     }
 
-    if (DB::Compression::RunLengthPairsSize<T>(rle_vec) > source_size)
+    if (DB::Compression::runLengthPairsSize<T>(rle_vec) >= source_size)
     {
         dest[0] = JUST_COPY_CODE;
         memcpy(&dest[1], source, source_size);
@@ -74,7 +74,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 
     dest[0] = sizeof(T);
     dest += 1;
-    return 1 + DB::Compression::RunLengthEncoding<T>(rle_vec, dest);
+    return 1 + DB::Compression::runLengthEncoding<T>(rle_vec, dest);
 }
 
 } // namespace
@@ -134,16 +134,16 @@ void CompressionCodecRunLength::doDecompressData(
     switch (bytes_size)
     {
     case 1:
-        DB::Compression::RunLengthDecoding<UInt8>(&source[1], source_size - 1, dest, uncompressed_size);
+        DB::Compression::runLengthDecoding<UInt8>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     case 2:
-        DB::Compression::RunLengthDecoding<UInt16>(&source[1], source_size - 1, dest, uncompressed_size);
+        DB::Compression::runLengthDecoding<UInt16>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     case 4:
-        DB::Compression::RunLengthDecoding<UInt32>(&source[1], source_size - 1, dest, uncompressed_size);
+        DB::Compression::runLengthDecoding<UInt32>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     case 8:
-        DB::Compression::RunLengthDecoding<UInt64>(&source[1], source_size - 1, dest, uncompressed_size);
+        DB::Compression::runLengthDecoding<UInt64>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     default:
         throw Exception(
diff --git a/dbms/src/IO/Compression/EncodingUtil.cpp b/dbms/src/IO/Compression/EncodingUtil.cpp
index b947bf6dd84..531fbe60e24 100644
--- a/dbms/src/IO/Compression/EncodingUtil.cpp
+++ b/dbms/src/IO/Compression/EncodingUtil.cpp
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "EncodingUtil.h"
+#include <IO/Compression/EncodingUtil.h>
 
 #if defined(__AVX2__)
 #include <immintrin.h>
@@ -22,7 +22,7 @@ namespace DB::Compression
 {
 
 template <std::integral T>
-void ApplyFrameOfReference(T * dst, T frame_of_reference, UInt32 count)
+void applyFrameOfReference(T * dst, T frame_of_reference, UInt32 count)
 {
     if (frame_of_reference == 0)
         return;
@@ -61,17 +61,17 @@ void ApplyFrameOfReference(T * dst, T frame_of_reference, UInt32 count)
     }
 }
 
-template void ApplyFrameOfReference<UInt8>(UInt8 *, UInt8, UInt32);
-template void ApplyFrameOfReference<UInt16>(UInt16 *, UInt16, UInt32);
-template void ApplyFrameOfReference<UInt32>(UInt32 *, UInt32, UInt32);
-template void ApplyFrameOfReference<UInt64>(UInt64 *, UInt64, UInt32);
-template void ApplyFrameOfReference<Int8>(Int8 *, Int8, UInt32);
-template void ApplyFrameOfReference<Int16>(Int16 *, Int16, UInt32);
-template void ApplyFrameOfReference<Int32>(Int32 *, Int32, UInt32);
-template void ApplyFrameOfReference<Int64>(Int64 *, Int64, UInt32);
+template void applyFrameOfReference<UInt8>(UInt8 *, UInt8, UInt32);
+template void applyFrameOfReference<UInt16>(UInt16 *, UInt16, UInt32);
+template void applyFrameOfReference<UInt32>(UInt32 *, UInt32, UInt32);
+template void applyFrameOfReference<UInt64>(UInt64 *, UInt64, UInt32);
+template void applyFrameOfReference<Int8>(Int8 *, Int8, UInt32);
+template void applyFrameOfReference<Int16>(Int16 *, Int16, UInt32);
+template void applyFrameOfReference<Int32>(Int32 *, Int32, UInt32);
+template void applyFrameOfReference<Int64>(Int64 *, Int64, UInt32);
 
 template <std::integral T>
-void SubtractFrameOfReference(T * dst, T frame_of_reference, UInt32 count)
+void subtractFrameOfReference(T * dst, T frame_of_reference, UInt32 count)
 {
     if (frame_of_reference == 0)
         return;
@@ -110,14 +110,14 @@ void SubtractFrameOfReference(T * dst, T frame_of_reference, UInt32 count)
     }
 }
 
-template void SubtractFrameOfReference<Int8>(Int8 *, Int8, UInt32);
-template void SubtractFrameOfReference<Int16>(Int16 *, Int16, UInt32);
-template void SubtractFrameOfReference<Int32>(Int32 *, Int32, UInt32);
-template void SubtractFrameOfReference<Int64>(Int64 *, Int64, UInt32);
-template void SubtractFrameOfReference<UInt8>(UInt8 *, UInt8, UInt32);
-template void SubtractFrameOfReference<UInt16>(UInt16 *, UInt16, UInt32);
-template void SubtractFrameOfReference<UInt32>(UInt32 *, UInt32, UInt32);
-template void SubtractFrameOfReference<UInt64>(UInt64 *, UInt64, UInt32);
+template void subtractFrameOfReference<Int8>(Int8 *, Int8, UInt32);
+template void subtractFrameOfReference<Int16>(Int16 *, Int16, UInt32);
+template void subtractFrameOfReference<Int32>(Int32 *, Int32, UInt32);
+template void subtractFrameOfReference<Int64>(Int64 *, Int64, UInt32);
+template void subtractFrameOfReference<UInt8>(UInt8 *, UInt8, UInt32);
+template void subtractFrameOfReference<UInt16>(UInt16 *, UInt16, UInt32);
+template void subtractFrameOfReference<UInt32>(UInt32 *, UInt32, UInt32);
+template void subtractFrameOfReference<UInt64>(UInt64 *, UInt64, UInt32);
 
 template <std::integral T>
 UInt8 FOREncodingWidth(std::vector<T> & values, T frame_of_reference)
@@ -128,7 +128,7 @@ UInt8 FOREncodingWidth(std::vector<T> & values, T frame_of_reference)
         // For example, we have a sequence of Int8 values [-128, 1, 127], after subtracting frame of reference -128, the values are [0, -127, -1].
         // The minimum bit width required to store the values is 8 rather than the width of `max_value - min_value = -1`.
         // So we need to calculate the minimum bit width of the values after subtracting frame of reference.
-        SubtractFrameOfReference<T>(values.data(), frame_of_reference, values.size());
+        subtractFrameOfReference<T>(values.data(), frame_of_reference, values.size());
         T max_value = *std::max_element(values.cbegin(), values.cend());
         T min_value = *std::min_element(values.cbegin(), values.cend());
         return BitpackingPrimitives::minimumBitWidth<T>(min_value, max_value);
@@ -150,7 +150,7 @@ template UInt8 FOREncodingWidth<UInt32>(std::vector<UInt32> &, UInt32);
 template UInt8 FOREncodingWidth<UInt64>(std::vector<UInt64> &, UInt64);
 
 template <std::integral T>
-void DeltaDecoding(const char * source, UInt32 source_size, char * dest)
+void deltaDecoding(const char * source, UInt32 source_size, char * dest)
 {
     ordinaryDeltaDecoding<T>(source, source_size, dest);
 }
@@ -159,7 +159,7 @@ void DeltaDecoding(const char * source, UInt32 source_size, char * dest)
 // Note: using SIMD to rewrite compress does not improve performance.
 
 template <>
-void DeltaDecoding<UInt32>(const char * __restrict__ raw_source, UInt32 raw_source_size, char * __restrict__ raw_dest)
+void deltaDecoding<UInt32>(const char * __restrict__ raw_source, UInt32 raw_source_size, char * __restrict__ raw_dest)
 {
     const auto * source = reinterpret_cast<const UInt32 *>(raw_source);
     auto source_size = raw_source_size / sizeof(UInt32);
@@ -183,7 +183,7 @@ void DeltaDecoding<UInt32>(const char * __restrict__ raw_source, UInt32 raw_sour
 }
 
 template <>
-void DeltaDecoding<UInt64>(const char * __restrict__ raw_source, UInt32 raw_source_size, char * __restrict__ raw_dest)
+void deltaDecoding<UInt64>(const char * __restrict__ raw_source, UInt32 raw_source_size, char * __restrict__ raw_dest)
 {
     const auto * source = reinterpret_cast<const UInt64 *>(raw_source);
     auto source_size = raw_source_size / sizeof(UInt64);
@@ -219,14 +219,14 @@ void DeltaDecoding<UInt64>(const char * __restrict__ raw_source, UInt32 raw_sour
 #endif
 
 template <std::integral T>
-void DeltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void deltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     static_assert(std::is_integral<T>::value, "Integral required.");
-    OrdinaryDeltaFORDecoding<T>(src, source_size, dest, dest_size);
+    ordinaryDeltaFORDecoding<T>(src, source_size, dest, dest_size);
 }
 
 template <>
-void DeltaFORDecoding<UInt32>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void deltaFORDecoding<UInt32>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     const auto count = dest_size / sizeof(UInt32);
     auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
@@ -235,11 +235,11 @@ void DeltaFORDecoding<UInt32>(const char * src, UInt32 source_size, char * dest,
     char tmp_buffer[required_size];
     memset(tmp_buffer, 0, required_size);
     FORDecoding<Int32>(src, source_size, tmp_buffer, required_size);
-    DeltaDecoding<UInt32>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
+    deltaDecoding<UInt32>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
 }
 
 template <>
-void DeltaFORDecoding<UInt64>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void deltaFORDecoding<UInt64>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     const auto count = dest_size / sizeof(UInt64);
     const auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
@@ -248,10 +248,10 @@ void DeltaFORDecoding<UInt64>(const char * src, UInt32 source_size, char * dest,
     char tmp_buffer[required_size];
     memset(tmp_buffer, 0, required_size);
     FORDecoding<Int64>(src, source_size, tmp_buffer, required_size);
-    DeltaDecoding<UInt64>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
+    deltaDecoding<UInt64>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
 }
 
-template void DeltaFORDecoding<UInt8>(const char *, UInt32, char *, UInt32);
-template void DeltaFORDecoding<UInt16>(const char *, UInt32, char *, UInt32);
+template void deltaFORDecoding<UInt8>(const char *, UInt32, char *, UInt32);
+template void deltaFORDecoding<UInt16>(const char *, UInt32, char *, UInt32);
 
 } // namespace DB::Compression
diff --git a/dbms/src/IO/Compression/EncodingUtil.h b/dbms/src/IO/Compression/EncodingUtil.h
index da3ac50f56f..2632e80b65a 100644
--- a/dbms/src/IO/Compression/EncodingUtil.h
+++ b/dbms/src/IO/Compression/EncodingUtil.h
@@ -35,16 +35,16 @@ namespace DB::Compression
 /// Constant encoding
 
 template <std::integral T>
-size_t ConstantEncoding(T constant, char * dest)
+size_t constantEncoding(T constant, char * dest)
 {
     unalignedStore<T>(dest, constant);
     return sizeof(T);
 }
 
 template <std::integral T>
-void ConstantDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void constantDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
-    if (source_size < sizeof(T))
+    if (unlikely(source_size < sizeof(T)))
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot use Constant decoding, data size {} is too small",
@@ -61,7 +61,7 @@ void ConstantDecoding(const char * src, UInt32 source_size, char * dest, UInt32
 /// Constant delta encoding
 
 template <std::integral T>
-size_t ConstantDeltaEncoding(T first_value, T constant_delta, char * dest)
+size_t constantDeltaEncoding(T first_value, T constant_delta, char * dest)
 {
     unalignedStore<T>(dest, first_value);
     dest += sizeof(T);
@@ -70,9 +70,9 @@ size_t ConstantDeltaEncoding(T first_value, T constant_delta, char * dest)
 }
 
 template <std::integral T>
-void ConstantDeltaDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void constantDeltaDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
-    if (source_size < sizeof(T) + sizeof(T))
+    if (unlikely(source_size < sizeof(T) + sizeof(T)))
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot use ConstantDelta decoding, data size {} is too small",
@@ -98,13 +98,13 @@ template <std::integral T>
 static constexpr size_t RunLengthPairLength = sizeof(T) + sizeof(UInt8);
 
 template <std::integral T>
-size_t RunLengthPairsSize(const RunLengthPairs<T> & rle)
+size_t runLengthPairsSize(const RunLengthPairs<T> & rle)
 {
     return rle.size() * RunLengthPairLength<T>;
 }
 
 template <std::integral T>
-size_t RunLengthEncoding(const RunLengthPairs<T> & rle, char * dest)
+size_t runLengthEncoding(const RunLengthPairs<T> & rle, char * dest)
 {
     for (const auto & [value, count] : rle)
     {
@@ -117,9 +117,9 @@ size_t RunLengthEncoding(const RunLengthPairs<T> & rle, char * dest)
 }
 
 template <std::integral T>
-void RunLengthDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void runLengthDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
-    if unlikely (source_size % RunLengthPairLength<T> != 0)
+    if (unlikely(source_size % RunLengthPairLength<T> != 0))
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot use RunLength decoding, data size {} is not aligned to {}",
@@ -138,6 +138,7 @@ void RunLengthDecoding(const char * src, UInt32 source_size, char * dest, UInt32
         if constexpr (std::is_same_v<T, UInt8> || std::is_same_v<T, Int8>)
         {
             memset(dest, value, count);
+            dest += count * sizeof(T);
         }
         else
         {
@@ -153,7 +154,7 @@ void RunLengthDecoding(const char * src, UInt32 source_size, char * dest, UInt32
 /// Frame of Reference encoding
 
 template <std::integral T>
-void SubtractFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
+void subtractFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
 
 template <std::integral T>
 UInt8 FOREncodingWidth(std::vector<T> & values, T frame_of_reference);
@@ -163,7 +164,7 @@ size_t FOREncoding(std::vector<T> & values, T frame_of_reference, UInt8 width, c
 {
     assert(!values.empty());
     if constexpr (!skip_subtract_frame_of_reference)
-        SubtractFrameOfReference(values.data(), frame_of_reference, values.size());
+        subtractFrameOfReference(values.data(), frame_of_reference, values.size());
     // store frame of reference
     unalignedStore<T>(dest, frame_of_reference);
     dest += sizeof(T);
@@ -180,7 +181,7 @@ size_t FOREncoding(std::vector<T> & values, T frame_of_reference, UInt8 width, c
 }
 
 template <std::integral T>
-void ApplyFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
+void applyFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
 
 template <std::integral T>
 void FORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
@@ -198,7 +199,7 @@ void FORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_
         // Reserve enough space for the temporary buffer.
         unsigned char tmp_buffer[round_size * sizeof(T)];
         BitpackingPrimitives::unPackBuffer<T>(tmp_buffer, reinterpret_cast<const unsigned char *>(src), count, width);
-        ApplyFrameOfReference(reinterpret_cast<T *>(tmp_buffer), frame_of_reference, count);
+        applyFrameOfReference(reinterpret_cast<T *>(tmp_buffer), frame_of_reference, count);
         memcpy(dest, tmp_buffer, dest_size);
         return;
     }
@@ -207,13 +208,13 @@ void FORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_
         reinterpret_cast<const unsigned char *>(src),
         count,
         width);
-    ApplyFrameOfReference(reinterpret_cast<T *>(dest), frame_of_reference, count);
+    applyFrameOfReference(reinterpret_cast<T *>(dest), frame_of_reference, count);
 }
 
 /// Delta encoding
 
 template <std::integral T>
-void DeltaEncoding(const T * source, UInt32 count, T * dest)
+void deltaEncoding(const T * source, UInt32 count, T * dest)
 {
     T prev = 0;
     for (UInt32 i = 0; i < count; ++i)
@@ -240,12 +241,12 @@ void ordinaryDeltaDecoding(const char * source, UInt32 source_size, char * dest)
 }
 
 template <std::integral T>
-void DeltaDecoding(const char * source, UInt32 source_size, char * dest);
+void deltaDecoding(const char * source, UInt32 source_size, char * dest);
 
 /// Delta + Frame of Reference encoding
 
 template <std::integral T>
-void OrdinaryDeltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
+void ordinaryDeltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     using TS = typename std::make_signed_t<T>;
     FORDecoding<TS>(src, source_size, dest, dest_size);
@@ -253,6 +254,6 @@ void OrdinaryDeltaFORDecoding(const char * src, UInt32 source_size, char * dest,
 }
 
 template <std::integral T>
-void DeltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size);
+void deltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size);
 
 } // namespace DB::Compression

From d621355fa83e6958700566955e1a6afd810fa6e3 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com>
Date: Tue, 4 Jun 2024 16:45:50 +0800
Subject: [PATCH 08/27] Update dbms/src/IO/Compression/EncodingUtil.cpp

Co-authored-by: jinhelin <linjinhe33@gmail.com>
---
 dbms/src/IO/Compression/EncodingUtil.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/dbms/src/IO/Compression/EncodingUtil.cpp b/dbms/src/IO/Compression/EncodingUtil.cpp
index 531fbe60e24..aca78ee1784 100644
--- a/dbms/src/IO/Compression/EncodingUtil.cpp
+++ b/dbms/src/IO/Compression/EncodingUtil.cpp
@@ -129,9 +129,8 @@ UInt8 FOREncodingWidth(std::vector<T> & values, T frame_of_reference)
         // The minimum bit width required to store the values is 8 rather than the width of `max_value - min_value = -1`.
         // So we need to calculate the minimum bit width of the values after subtracting frame of reference.
         subtractFrameOfReference<T>(values.data(), frame_of_reference, values.size());
-        T max_value = *std::max_element(values.cbegin(), values.cend());
-        T min_value = *std::min_element(values.cbegin(), values.cend());
-        return BitpackingPrimitives::minimumBitWidth<T>(min_value, max_value);
+        auto [min_value, max_value] = std::minmax_element(values.cbegin(), values.cend());
+        return BitpackingPrimitives::minimumBitWidth<T>(*min_value, *max_value);
     }
     else
     {

From 91ac0d675fc2a54e6c3d917f15199ee99e5a8fd9 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com>
Date: Fri, 7 Jun 2024 17:17:31 +0800
Subject: [PATCH 09/27] Update dbms/src/IO/Compression/CompressionCodecFOR.cpp

Co-authored-by: jinhelin <linjinhe33@gmail.com>
---
 dbms/src/IO/Compression/CompressionCodecFOR.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecFOR.cpp b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
index db3b7511bb0..55f0fadc803 100644
--- a/dbms/src/IO/Compression/CompressionCodecFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
@@ -53,8 +53,7 @@ template <std::integral T>
 UInt32 CompressionCodecFOR::compressData(const T * source, UInt32 count, char * dest)
 {
     assert(count > 0); // doCompressData ensure it
-    std::vector<T> values(count);
-    values.assign(source, source + count);
+    std::vector<T> values(source, source + count);
     T frame_of_reference = *std::min_element(values.cbegin(), values.cend());
     UInt8 width = DB::Compression::FOREncodingWidth(values, frame_of_reference);
     return DB::Compression::FOREncoding<T, std::is_signed_v<T>>(values, frame_of_reference, width, dest);

From 61fe4bf98293ccf0e86958404c6bcc6b77c79f7b Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Fri, 21 Jun 2024 15:56:42 +0800
Subject: [PATCH 10/27] work with non-integer type

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 .../Compression/CompressionCodecDeltaFOR.cpp  | 128 +++---
 .../IO/Compression/CompressionCodecDeltaFOR.h |   6 +-
 .../IO/Compression/CompressionCodecFOR.cpp    | 110 +++---
 dbms/src/IO/Compression/CompressionCodecFOR.h |   6 +-
 .../CompressionCodecLightweight.cpp           | 126 ++++++
 ...weight.h => CompressionCodecLightweight.h} |  40 +-
 ... CompressionCodecLightweight_Interger.cpp} | 366 ++++++++----------
 ...CompressionCodecLightweight_NonInteger.cpp |  53 +++
 .../Compression/CompressionCodecRunLength.cpp | 105 +++--
 .../Compression/CompressionCodecRunLength.h   |   8 +-
 dbms/src/IO/Compression/CompressionFactory.h  |   4 +-
 dbms/src/IO/Compression/CompressionInfo.h     |  11 +
 dbms/src/IO/Compression/CompressionSettings.h |   2 +-
 dbms/src/IO/Compression/EncodingUtil.h        |   7 +
 .../tests/gtest_codec_compression.cpp         |   4 +-
 .../Storages/DeltaMerge/File/DMFileWriter.h   |  21 +-
 16 files changed, 585 insertions(+), 412 deletions(-)
 create mode 100644 dbms/src/IO/Compression/CompressionCodecLightweight.cpp
 rename dbms/src/IO/Compression/{CompressionCodecIntegerLightweight.h => CompressionCodecLightweight.h} (68%)
 rename dbms/src/IO/Compression/{CompressionCodecIntegerLightweight.cpp => CompressionCodecLightweight_Interger.cpp} (63%)
 create mode 100644 dbms/src/IO/Compression/CompressionCodecLightweight_NonInteger.cpp

diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
index 64df56856e5..099dfd900ba 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
@@ -17,10 +17,13 @@
 #include <IO/Compression/CompressionCodecDeltaFOR.h>
 #include <IO/Compression/CompressionCodecFOR.h>
 #include <IO/Compression/CompressionInfo.h>
+#include <IO/Compression/CompressionSettings.h>
 #include <IO/Compression/EncodingUtil.h>
 #include <common/likely.h>
 #include <common/unaligned.h>
+#include <lz4.h>
 
+#include <magic_enum.hpp>
 
 namespace DB
 {
@@ -31,8 +34,8 @@ extern const int CANNOT_COMPRESS;
 extern const int CANNOT_DECOMPRESS;
 } // namespace ErrorCodes
 
-CompressionCodecDeltaFOR::CompressionCodecDeltaFOR(UInt8 bytes_size_)
-    : bytes_size(bytes_size_)
+CompressionCodecDeltaFOR::CompressionCodecDeltaFOR(CompressionDataType data_type_)
+    : data_type(data_type_)
 {}
 
 UInt8 CompressionCodecDeltaFOR::getMethodByte() const
@@ -42,12 +45,22 @@ UInt8 CompressionCodecDeltaFOR::getMethodByte() const
 
 UInt32 CompressionCodecDeltaFOR::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
-    /**
-     *|bytes_of_original_type|frame_of_reference|width(bits)  |bitpacked data|
-     *|1 bytes               |bytes_size        |sizeof(UInt8)|required size |
-     */
-    const size_t count = uncompressed_size / bytes_size;
-    return 1 + bytes_size + sizeof(UInt8) + BitpackingPrimitives::getRequiredSize(count, bytes_size * 8);
+    switch (data_type)
+    {
+    case CompressionDataType::Int8:
+    case CompressionDataType::Int16:
+    case CompressionDataType::Int32:
+    case CompressionDataType::Int64:
+    {
+        // |bytes_of_original_type|frame_of_reference|width(bits)  |bitpacked data|
+        // |1 bytes               |bytes_size        |sizeof(UInt8)|required size |
+        auto bytes_size = magic_enum::enum_integer(data_type);
+        const size_t count = uncompressed_size / bytes_size;
+        return 1 + bytes_size + sizeof(UInt8) + BitpackingPrimitives::getRequiredSize(count, bytes_size * 8);
+    }
+    default:
+        return 1 + LZ4_COMPRESSBOUND(uncompressed_size);
+    }
 }
 
 namespace
@@ -56,35 +69,44 @@ namespace
 template <std::integral T>
 UInt32 compressData(const char * source, UInt32 source_size, char * dest)
 {
+    constexpr auto bytes_size = sizeof(T);
+    if unlikely (source_size % bytes_size != 0)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size);
     const auto count = source_size / sizeof(T);
     DB::Compression::deltaEncoding<T>(reinterpret_cast<const T *>(source), count, reinterpret_cast<T *>(dest));
     // Cast deltas to signed type to better compress negative values.
     // For example, if we have a sequence of UInt8 values [3, 2, 1, 0], the deltas will be [3, -1, -1, -1]
     // If we compress them as UInt8, we will get [3, 255, 255, 255], which is not optimal.
     using TS = typename std::make_signed<T>::type;
-    return DB::CompressionCodecFOR::compressData<TS>(reinterpret_cast<TS *>(dest), count, dest);
+    return DB::CompressionCodecFOR::compressData<TS>(reinterpret_cast<TS *>(dest), source_size, dest);
 }
 
 } // namespace
 
 UInt32 CompressionCodecDeltaFOR::doCompressData(const char * source, UInt32 source_size, char * dest) const
 {
-    if unlikely (source_size % bytes_size != 0)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size);
-    dest[0] = bytes_size;
-    size_t start_pos = 1;
-    switch (bytes_size)
+    dest[0] = magic_enum::enum_integer(data_type);
+    dest += 1;
+    switch (data_type)
     {
-    case 1:
-        return 1 + compressData<UInt8>(source, source_size, &dest[start_pos]);
-    case 2:
-        return 1 + compressData<UInt16>(source, source_size, &dest[start_pos]);
-    case 4:
-        return 1 + compressData<UInt32>(source, source_size, &dest[start_pos]);
-    case 8:
-        return 1 + compressData<UInt64>(source, source_size, &dest[start_pos]);
+    case CompressionDataType::Int8:
+        return 1 + compressData<UInt8>(source, source_size, dest);
+    case CompressionDataType::Int16:
+        return 1 + compressData<UInt16>(source, source_size, dest);
+    case CompressionDataType::Int32:
+        return 1 + compressData<UInt32>(source, source_size, dest);
+    case CompressionDataType::Int64:
+        return 1 + compressData<UInt64>(source, source_size, dest);
     default:
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress DeltaFor-encoded data. Unsupported bytes size");
+        auto success = LZ4_compress_fast(
+            source,
+            dest,
+            source_size,
+            LZ4_COMPRESSBOUND(source_size),
+            CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
+        if (!success)
+            throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
+        return 1 + success;
     }
 }
 
@@ -103,32 +125,28 @@ void CompressionCodecDeltaFOR::doDecompressData(
         return;
 
     UInt8 bytes_size = source[0];
-    if unlikely (uncompressed_size % bytes_size != 0)
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "uncompressed size {} is not aligned to {}",
-            uncompressed_size,
-            bytes_size);
+    auto data_type = magic_enum::enum_cast<CompressionDataType>(bytes_size);
+    RUNTIME_CHECK(data_type.has_value());
 
     UInt32 source_size_no_header = source_size - 1;
-    switch (bytes_size)
+    switch (data_type.value())
     {
-    case 1:
+    case CompressionDataType::Int8:
         DB::Compression::deltaFORDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
-    case 2:
+    case CompressionDataType::Int16:
         DB::Compression::deltaFORDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
-    case 4:
+    case CompressionDataType::Int32:
         DB::Compression::deltaFORDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
-    case 8:
+    case CompressionDataType::Int64:
         DB::Compression::deltaFORDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     default:
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot decompress DeltaFor-encoded data. Unsupported bytes size");
+        if (unlikely(LZ4_decompress_safe(&source[1], dest, source_size_no_header, uncompressed_size) < 0))
+            throw Exception("Cannot LZ4_decompress_safe", ErrorCodes::CANNOT_DECOMPRESS);
+        break;
     }
 }
 
@@ -136,43 +154,39 @@ void CompressionCodecDeltaFOR::ordinaryDecompress(
     const char * source,
     UInt32 source_size,
     char * dest,
-    UInt32 dest_size)
+    UInt32 uncompressed_size)
 {
     if unlikely (source_size < 2)
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot decompress DeltaFor-encoded data. File has wrong header");
 
-    if (dest_size == 0)
+    if (uncompressed_size == 0)
         return;
 
     UInt8 bytes_size = source[0];
-    if unlikely (dest_size % bytes_size != 0)
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "uncompressed size {} is not aligned to {}",
-            dest_size,
-            bytes_size);
+    auto data_type = magic_enum::enum_cast<CompressionDataType>(bytes_size);
+    RUNTIME_CHECK(data_type.has_value());
 
     UInt32 source_size_no_header = source_size - 1;
-    switch (bytes_size)
+    switch (data_type.value())
     {
-    case 1:
-        DB::Compression::ordinaryDeltaFORDecoding<UInt8>(&source[1], source_size_no_header, dest, dest_size);
+    case CompressionDataType::Int8:
+        DB::Compression::ordinaryDeltaFORDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
-    case 2:
-        DB::Compression::ordinaryDeltaFORDecoding<UInt16>(&source[1], source_size_no_header, dest, dest_size);
+    case CompressionDataType::Int16:
+        DB::Compression::ordinaryDeltaFORDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
-    case 4:
-        DB::Compression::ordinaryDeltaFORDecoding<UInt32>(&source[1], source_size_no_header, dest, dest_size);
+    case CompressionDataType::Int32:
+        DB::Compression::ordinaryDeltaFORDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
-    case 8:
-        DB::Compression::ordinaryDeltaFORDecoding<UInt64>(&source[1], source_size_no_header, dest, dest_size);
+    case CompressionDataType::Int64:
+        DB::Compression::ordinaryDeltaFORDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     default:
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot decompress DeltaFor-encoded data. Unsupported bytes size");
+        if (unlikely(LZ4_decompress_safe(&source[1], dest, source_size_no_header, uncompressed_size) < 0))
+            throw Exception("Cannot LZ4_decompress_safe", ErrorCodes::CANNOT_DECOMPRESS);
+        break;
     }
 }
 
diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h
index 316f4be72a9..5faf713e864 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h
@@ -22,11 +22,11 @@ namespace DB
 class CompressionCodecDeltaFOR : public ICompressionCodec
 {
 public:
-    explicit CompressionCodecDeltaFOR(UInt8 bytes_size_);
+    explicit CompressionCodecDeltaFOR(CompressionDataType data_type_);
 
     UInt8 getMethodByte() const override;
 
-    static void ordinaryDecompress(const char * source, UInt32 source_size, char * dest, UInt32 dest_size);
+    static void ordinaryDecompress(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size);
 
 #ifndef DBMS_PUBLIC_GTEST
 protected:
@@ -42,7 +42,7 @@ class CompressionCodecDeltaFOR : public ICompressionCodec
     bool isGenericCompression() const override { return false; }
 
 private:
-    const UInt8 bytes_size;
+    const CompressionDataType data_type;
 };
 
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecFOR.cpp b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
index 55f0fadc803..881232b8155 100644
--- a/dbms/src/IO/Compression/CompressionCodecFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
@@ -16,9 +16,13 @@
 #include <Common/Exception.h>
 #include <IO/Compression/CompressionCodecFOR.h>
 #include <IO/Compression/CompressionInfo.h>
+#include <IO/Compression/CompressionSettings.h>
 #include <IO/Compression/EncodingUtil.h>
 #include <common/likely.h>
 #include <common/unaligned.h>
+#include <lz4.h>
+
+#include <magic_enum.hpp>
 
 
 namespace DB
@@ -30,8 +34,8 @@ extern const int CANNOT_COMPRESS;
 extern const int CANNOT_DECOMPRESS;
 } // namespace ErrorCodes
 
-CompressionCodecFOR::CompressionCodecFOR(UInt8 bytes_size_)
-    : bytes_size(bytes_size_)
+CompressionCodecFOR::CompressionCodecFOR(CompressionDataType data_type_)
+    : data_type(data_type_)
 {}
 
 UInt8 CompressionCodecFOR::getMethodByte() const
@@ -41,18 +45,33 @@ UInt8 CompressionCodecFOR::getMethodByte() const
 
 UInt32 CompressionCodecFOR::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
-    /**
-     *|bytes_of_original_type|frame_of_reference|width(bits)  |bitpacked data|
-     *|1 bytes               |bytes_size        |sizeof(UInt8)|required size |
-     */
-    const size_t count = uncompressed_size / bytes_size;
-    return 1 + bytes_size + sizeof(UInt8) + BitpackingPrimitives::getRequiredSize(count, bytes_size * 8);
+    switch (data_type)
+    {
+    case CompressionDataType::Int8:
+    case CompressionDataType::Int16:
+    case CompressionDataType::Int32:
+    case CompressionDataType::Int64:
+    {
+        // |bytes_of_original_type|frame_of_reference|width(bits)  |bitpacked data|
+        // |1 bytes               |bytes_size        |sizeof(UInt8)|required size |
+        auto bytes_size = magic_enum::enum_integer(data_type);
+        const size_t count = uncompressed_size / bytes_size;
+        return 1 + bytes_size + sizeof(UInt8) + BitpackingPrimitives::getRequiredSize(count, bytes_size * 8);
+    }
+    default:
+        return 1 + LZ4_COMPRESSBOUND(uncompressed_size);
+    }
 }
 
 template <std::integral T>
-UInt32 CompressionCodecFOR::compressData(const T * source, UInt32 count, char * dest)
+UInt32 CompressionCodecFOR::compressData(const T * source, UInt32 source_size, char * dest)
 {
-    assert(count > 0); // doCompressData ensure it
+    constexpr size_t bytes_size = sizeof(T);
+    if unlikely (source_size % bytes_size != 0)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size);
+    auto count = source_size / bytes_size;
+    if unlikely (count == 0)
+        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress empty data");
     std::vector<T> values(source, source + count);
     T frame_of_reference = *std::min_element(values.cbegin(), values.cend());
     UInt8 width = DB::Compression::FOREncodingWidth(values, frame_of_reference);
@@ -61,25 +80,28 @@ UInt32 CompressionCodecFOR::compressData(const T * source, UInt32 count, char *
 
 UInt32 CompressionCodecFOR::doCompressData(const char * source, UInt32 source_size, char * dest) const
 {
-    if unlikely (source_size % bytes_size != 0)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size);
-    dest[0] = bytes_size;
-    auto count = source_size / bytes_size;
-    switch (bytes_size)
+    dest[0] = magic_enum::enum_integer(data_type);
+    dest += 1;
+    switch (data_type)
     {
-    case 1:
-        return 1 + compressData<UInt8>(reinterpret_cast<const UInt8 *>(source), count, &dest[1]);
-    case 2:
-        return 1 + compressData<UInt16>(reinterpret_cast<const UInt16 *>(source), count, &dest[1]);
-    case 4:
-        return 1 + compressData<UInt32>(reinterpret_cast<const UInt32 *>(source), count, &dest[1]);
-    case 8:
-        return 1 + compressData<UInt64>(reinterpret_cast<const UInt64 *>(source), count, &dest[1]);
+    case CompressionDataType::Int8:
+        return 1 + compressData<UInt8>(reinterpret_cast<const UInt8 *>(source), source_size, dest);
+    case CompressionDataType::Int16:
+        return 1 + compressData<UInt16>(reinterpret_cast<const UInt16 *>(source), source_size, dest);
+    case CompressionDataType::Int32:
+        return 1 + compressData<UInt32>(reinterpret_cast<const UInt32 *>(source), source_size, dest);
+    case CompressionDataType::Int64:
+        return 1 + compressData<UInt64>(reinterpret_cast<const UInt64 *>(source), source_size, dest);
     default:
-        throw Exception(
-            ErrorCodes::CANNOT_COMPRESS,
-            "Cannot compress For-encoded data. Unsupported bytes size: {}",
-            bytes_size);
+        auto success = LZ4_compress_fast(
+            source,
+            dest,
+            source_size,
+            LZ4_COMPRESSBOUND(source_size),
+            CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
+        if (!success)
+            throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
+        return 1 + success;
     }
 }
 
@@ -96,42 +118,36 @@ void CompressionCodecFOR::doDecompressData(
         return;
 
     UInt8 bytes_size = source[0];
-    if unlikely (uncompressed_size % bytes_size != 0)
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "uncompressed size {} is not aligned to {}",
-            uncompressed_size,
-            bytes_size);
+    auto data_type = magic_enum::enum_cast<CompressionDataType>(bytes_size);
+    RUNTIME_CHECK(data_type.has_value());
 
     UInt32 source_size_no_header = source_size - 1;
-    switch (bytes_size)
+    switch (data_type.value())
     {
-    case 1:
+    case CompressionDataType::Int8:
         DB::Compression::FORDecoding<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
-    case 2:
+    case CompressionDataType::Int16:
         DB::Compression::FORDecoding<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
-    case 4:
+    case CompressionDataType::Int32:
         DB::Compression::FORDecoding<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
-    case 8:
+    case CompressionDataType::Int64:
         DB::Compression::FORDecoding<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
     default:
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot decompress For-encoded data. Unsupported bytes size: {}",
-            bytes_size);
+        if (unlikely(LZ4_decompress_safe(&source[1], dest, source_size_no_header, uncompressed_size) < 0))
+            throw Exception("Cannot LZ4_decompress_safe", ErrorCodes::CANNOT_DECOMPRESS);
+        break;
     }
 }
 
-
 // The following instantiations are used in CompressionCodecDeltaFor.cpp
 
-template UInt32 CompressionCodecFOR::compressData<Int8>(const Int8 * source, UInt32 count, char * dest);
-template UInt32 CompressionCodecFOR::compressData<Int16>(const Int16 * source, UInt32 count, char * dest);
-template UInt32 CompressionCodecFOR::compressData<Int32>(const Int32 * source, UInt32 count, char * dest);
-template UInt32 CompressionCodecFOR::compressData<Int64>(const Int64 * source, UInt32 count, char * dest);
+template UInt32 CompressionCodecFOR::compressData<Int8>(const Int8 * source, UInt32 source_size, char * dest);
+template UInt32 CompressionCodecFOR::compressData<Int16>(const Int16 * source, UInt32 source_size, char * dest);
+template UInt32 CompressionCodecFOR::compressData<Int32>(const Int32 * source, UInt32 source_size, char * dest);
+template UInt32 CompressionCodecFOR::compressData<Int64>(const Int64 * source, UInt32 source_size, char * dest);
 
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecFOR.h b/dbms/src/IO/Compression/CompressionCodecFOR.h
index 75dd8b91734..824c36276cf 100644
--- a/dbms/src/IO/Compression/CompressionCodecFOR.h
+++ b/dbms/src/IO/Compression/CompressionCodecFOR.h
@@ -30,12 +30,12 @@ namespace DB
 class CompressionCodecFOR : public ICompressionCodec
 {
 public:
-    explicit CompressionCodecFOR(UInt8 bytes_size_);
+    explicit CompressionCodecFOR(CompressionDataType data_type_);
 
     UInt8 getMethodByte() const override;
 
     template <std::integral T>
-    static UInt32 compressData(const T * source, UInt32 count, char * dest);
+    static UInt32 compressData(const T * source, UInt32 source_size, char * dest);
 
 #ifndef DBMS_PUBLIC_GTEST
 protected:
@@ -51,7 +51,7 @@ class CompressionCodecFOR : public ICompressionCodec
     bool isGenericCompression() const override { return false; }
 
 private:
-    const UInt8 bytes_size;
+    const CompressionDataType data_type;
 };
 
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight.cpp
new file mode 100644
index 00000000000..efa0b77309d
--- /dev/null
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight.cpp
@@ -0,0 +1,126 @@
+// Copyright 2024 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Common/Exception.h>
+#include <IO/Compression/CompressionCodecLightweight.h>
+#include <common/likely.h>
+#include <lz4.h>
+
+#include <magic_enum.hpp>
+
+
+namespace DB
+{
+
+// TODO: metrics
+
+namespace ErrorCodes
+{
+extern const int CANNOT_COMPRESS;
+extern const int CANNOT_DECOMPRESS;
+} // namespace ErrorCodes
+
+CompressionCodecLightweight::CompressionCodecLightweight(CompressionDataType data_type_)
+    : data_type(data_type_)
+{}
+
+UInt8 CompressionCodecLightweight::getMethodByte() const
+{
+    return static_cast<UInt8>(CompressionMethodByte::Lightweight);
+}
+
+UInt32 CompressionCodecLightweight::getMaxCompressedDataSize(UInt32 uncompressed_size) const
+{
+    // 1 byte for bytes_size, 1 byte for mode, and the rest for compressed data
+    return 1 + 1 + LZ4_COMPRESSBOUND(uncompressed_size);
+}
+
+CompressionCodecLightweight::~CompressionCodecLightweight()
+{
+    if (ctx.isCompression())
+        LOG_INFO(Logger::get(), "lightweight codec: {}", ctx.toDebugString());
+}
+
+UInt32 CompressionCodecLightweight::doCompressData(const char * source, UInt32 source_size, char * dest) const
+{
+    dest[0] = magic_enum::enum_integer(data_type);
+    dest += 1;
+    switch (data_type)
+    {
+    case CompressionDataType::Int8:
+        return 1 + compressDataForInteger<UInt8>(source, source_size, dest);
+    case CompressionDataType::Int16:
+        return 1 + compressDataForInteger<UInt16>(source, source_size, dest);
+    case CompressionDataType::Int32:
+        return 1 + compressDataForInteger<UInt32>(source, source_size, dest);
+    case CompressionDataType::Int64:
+        return 1 + compressDataForInteger<UInt64>(source, source_size, dest);
+    case CompressionDataType::Float32:
+    case CompressionDataType::Float64:
+    case CompressionDataType::String:
+        return 1 + compressDataForNonInteger(source, source_size, dest);
+    default:
+        throw Exception(
+            ErrorCodes::CANNOT_COMPRESS,
+            "Cannot compress lightweight codec data. Invalid data type {}",
+            magic_enum::enum_name(data_type));
+    }
+}
+
+void CompressionCodecLightweight::doDecompressData(
+    const char * source,
+    UInt32 source_size,
+    char * dest,
+    UInt32 uncompressed_size) const
+{
+    if unlikely (source_size < 2)
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot decompress lightweight codec data. File has wrong header");
+
+    if (uncompressed_size == 0)
+        return;
+
+    UInt8 bytes_size = source[0];
+    auto data_type = magic_enum::enum_cast<CompressionDataType>(bytes_size);
+    if unlikely (!data_type.has_value())
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot decompress lightweight codec data. File has wrong header, unknown data type {}",
+            bytes_size);
+
+    UInt32 source_size_no_header = source_size - 1;
+    switch (data_type.value())
+    {
+    case CompressionDataType::Int8:
+        decompressDataForInteger<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
+        break;
+    case CompressionDataType::Int16:
+        decompressDataForInteger<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
+        break;
+    case CompressionDataType::Int32:
+        decompressDataForInteger<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
+        break;
+    case CompressionDataType::Int64:
+        decompressDataForInteger<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
+        break;
+    case CompressionDataType::Float32:
+    case CompressionDataType::Float64:
+    case CompressionDataType::String:
+        decompressDataForNonInteger(&source[1], source_size_no_header, dest, uncompressed_size);
+        break;
+    }
+}
+
+} // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h b/dbms/src/IO/Compression/CompressionCodecLightweight.h
similarity index 68%
rename from dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
rename to dbms/src/IO/Compression/CompressionCodecLightweight.h
index 76b7db18599..927ab466bce 100644
--- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight.h
@@ -22,14 +22,20 @@
 namespace DB
 {
 
-class CompressionCodecIntegerLightweight : public ICompressionCodec
+/**
+ * @brief Lightweight compression codec
+ * For integer data, it supports constant, constant delta, run-length, frame of reference, delta frame of reference, and LZ4.
+ * For non-integer data, it supports LZ4.
+ * The codec selects the best mode for each block of data.
+ */
+class CompressionCodecLightweight : public ICompressionCodec
 {
 public:
-    explicit CompressionCodecIntegerLightweight(UInt8 bytes_size_);
+    explicit CompressionCodecLightweight(CompressionDataType data_type_);
 
     UInt8 getMethodByte() const override;
 
-    ~CompressionCodecIntegerLightweight() override;
+    ~CompressionCodecLightweight() override;
 
 protected:
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
@@ -42,7 +48,9 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
     bool isGenericCompression() const override { return false; }
 
 private:
-    enum class Mode : UInt8
+    /// Integer data
+
+    enum class IntegerMode : UInt8
     {
         Invalid = 0,
         CONSTANT = 1, // all values are the same
@@ -79,26 +87,26 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
 
     // State is a union of different states for different modes
     template <typename T>
-    using State = std::variant<ConstantState<T>, RunLengthState<T>, FORState<T>, DeltaFORState<T>>;
+    using IntegerState = std::variant<ConstantState<T>, RunLengthState<T>, FORState<T>, DeltaFORState<T>>;
 
-    class CompressContext
+    class IntegerCompressContext
     {
     public:
-        CompressContext() = default;
+        IntegerCompressContext() = default;
 
         bool needAnalyze() const;
         bool needAnalyzeDelta() const;
         bool needAnalyzeRunLength() const;
 
         template <typename T>
-        void analyze(std::span<const T> & values, State<T> & state);
+        void analyze(std::span<const T> & values, IntegerState<T> & state);
 
         void update(size_t uncompressed_size, size_t compressed_size);
 
         String toDebugString() const;
         bool isCompression() const { return lz4_counter > 0 || lw_counter > 0; }
 
-        Mode mode = Mode::LZ4;
+        IntegerMode mode = IntegerMode::LZ4;
 
     private:
         size_t lw_uncompressed_size = 0;
@@ -113,13 +121,19 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec
     };
 
     template <typename T>
-    size_t compressDataForType(const char * source, UInt32 source_size, char * dest) const;
+    size_t compressDataForInteger(const char * source, UInt32 source_size, char * dest) const;
 
     template <typename T>
-    void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) const;
+    void decompressDataForInteger(const char * source, UInt32 source_size, char * dest, UInt32 output_size) const;
+
+    /// Non-integer data
 
-    mutable CompressContext ctx;
-    const UInt8 bytes_size;
+    static size_t compressDataForNonInteger(const char * source, UInt32 source_size, char * dest);
+    static void decompressDataForNonInteger(const char * source, UInt32 source_size, char * dest, UInt32 output_size);
+
+private:
+    mutable IntegerCompressContext ctx;
+    const CompressionDataType data_type;
 };
 
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_Interger.cpp
similarity index 63%
rename from dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
rename to dbms/src/IO/Compression/CompressionCodecLightweight_Interger.cpp
index f2962fcbdbc..e57050f9bcb 100644
--- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight_Interger.cpp
@@ -12,167 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <Common/BitpackingPrimitives.h>
 #include <Common/Exception.h>
-#include <IO/Compression/CompressionCodecIntegerLightweight.h>
-#include <IO/Compression/CompressionInfo.h>
+#include <IO/Compression/CompressionCodecLightweight.h>
 #include <IO/Compression/CompressionSettings.h>
 #include <IO/Compression/EncodingUtil.h>
-#include <common/likely.h>
-#include <common/unaligned.h>
+#include <fmt/format.h>
 #include <lz4.h>
 
-#include <algorithm>
-#include <limits>
-
-
 namespace DB
 {
 
-// TODO: metrics
-
 namespace ErrorCodes
 {
 extern const int CANNOT_COMPRESS;
 extern const int CANNOT_DECOMPRESS;
 } // namespace ErrorCodes
 
-CompressionCodecIntegerLightweight::CompressionCodecIntegerLightweight(UInt8 bytes_size_)
-    : bytes_size(bytes_size_)
-{}
-
-UInt8 CompressionCodecIntegerLightweight::getMethodByte() const
-{
-    return static_cast<UInt8>(CompressionMethodByte::Lightweight);
-}
-
-UInt32 CompressionCodecIntegerLightweight::getMaxCompressedDataSize(UInt32 uncompressed_size) const
-{
-    // 1 byte for bytes_size, 1 byte for mode, and the rest for compressed data
-    return 1 + 1 + LZ4_COMPRESSBOUND(uncompressed_size);
-}
-
-CompressionCodecIntegerLightweight::~CompressionCodecIntegerLightweight()
-{
-    if (ctx.isCompression())
-        LOG_INFO(Logger::get(), "lightweight codec: {}", ctx.toDebugString());
-}
-
-template <typename T>
-size_t CompressionCodecIntegerLightweight::compressDataForType(const char * source, UInt32 source_size, char * dest)
-    const
-{
-    // Load values
-    const size_t count = source_size / sizeof(T);
-    std::span<const T> values(reinterpret_cast<const T *>(source), count);
-
-    // Analyze
-    State<T> state;
-    ctx.analyze<T>(values, state);
-
-    // Compress
-    unalignedStore<UInt8>(dest, static_cast<UInt8>(ctx.mode));
-    dest += sizeof(UInt8);
-    size_t compressed_size = 1;
-    switch (ctx.mode)
-    {
-    case Mode::CONSTANT:
-    {
-        compressed_size += Compression::constantEncoding(std::get<0>(state), dest);
-        break;
-    }
-    case Mode::CONSTANT_DELTA:
-    {
-        compressed_size += Compression::constantDeltaEncoding(values[0], std::get<0>(state), dest);
-        break;
-    }
-    case Mode::RunLength:
-    {
-        compressed_size += Compression::runLengthEncoding<T>(std::get<1>(state), dest);
-        break;
-    }
-    case Mode::FOR:
-    {
-        FORState for_state = std::get<2>(state);
-        compressed_size += Compression::FOREncoding(for_state.values, for_state.min_value, for_state.bit_width, dest);
-        break;
-    }
-    case Mode::DELTA_FOR:
-    {
-        DeltaFORState delta_for_state = std::get<3>(state);
-        compressed_size += Compression::FOREncoding<typename std::make_signed_t<T>, true>(
-            delta_for_state.deltas,
-            delta_for_state.min_delta_value,
-            delta_for_state.bit_width,
-            dest);
-        break;
-    }
-    case Mode::LZ4:
-    {
-        auto success = LZ4_compress_fast(
-            source,
-            dest,
-            source_size,
-            LZ4_COMPRESSBOUND(source_size),
-            CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
-        if (!success)
-            throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
-        compressed_size += success;
-        break;
-    }
-    default:
-        throw Exception(
-            ErrorCodes::CANNOT_COMPRESS,
-            "Cannot compress with lightweight codec, unknown mode {}",
-            static_cast<int>(ctx.mode));
-    }
-
-    // Update statistics
-    ctx.update(source_size, compressed_size);
-
-    return compressed_size;
-}
-
-template <typename T>
-void CompressionCodecIntegerLightweight::decompressDataForType(
-    const char * source,
-    UInt32 source_size,
-    char * dest,
-    UInt32 output_size) const
-{
-    auto mode = static_cast<Mode>(unalignedLoad<UInt8>(source));
-    source += sizeof(UInt8);
-    source_size -= sizeof(UInt8);
-    switch (mode)
-    {
-    case Mode::CONSTANT:
-        Compression::constantDecoding<T>(source, source_size, dest, output_size);
-        break;
-    case Mode::CONSTANT_DELTA:
-        Compression::constantDeltaDecoding<T>(source, source_size, dest, output_size);
-        break;
-    case Mode::RunLength:
-        Compression::runLengthDecoding<T>(source, source_size, dest, output_size);
-        break;
-    case Mode::FOR:
-        Compression::FORDecoding<T>(source, source_size, dest, output_size);
-        break;
-    case Mode::DELTA_FOR:
-        Compression::deltaFORDecoding<T>(source, source_size, dest, output_size);
-        break;
-    case Mode::LZ4:
-        if (unlikely(LZ4_decompress_safe(source, dest, source_size, output_size) < 0))
-            throw Exception("Cannot LZ4_decompress_safe", ErrorCodes::CANNOT_DECOMPRESS);
-        break;
-    default:
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot decompress with lightweight codec, unknown mode {}",
-            static_cast<int>(mode));
-    }
-}
-
-String CompressionCodecIntegerLightweight::CompressContext::toDebugString() const
+String CompressionCodecLightweight::IntegerCompressContext::toDebugString() const
 {
     return fmt::format(
         "lz4: {}, lightweight: {}, constant_delta: {}, delta_for: {}, rle: {}, lz4 {} -> {}, lightweight {} -> {}",
@@ -187,9 +43,9 @@ String CompressionCodecIntegerLightweight::CompressContext::toDebugString() cons
         lw_compressed_size);
 }
 
-void CompressionCodecIntegerLightweight::CompressContext::update(size_t uncompressed_size, size_t compressed_size)
+void CompressionCodecLightweight::IntegerCompressContext::update(size_t uncompressed_size, size_t compressed_size)
 {
-    if (mode == Mode::LZ4)
+    if (mode == IntegerMode::LZ4)
     {
         lz4_uncompressed_size += uncompressed_size;
         lz4_compressed_size += compressed_size;
@@ -201,15 +57,15 @@ void CompressionCodecIntegerLightweight::CompressContext::update(size_t uncompre
         lw_compressed_size += compressed_size;
         ++lw_counter;
     }
-    if (mode == Mode::CONSTANT_DELTA)
+    if (mode == IntegerMode::CONSTANT_DELTA)
         ++constant_delta_counter;
-    if (mode == Mode::DELTA_FOR)
+    if (mode == IntegerMode::DELTA_FOR)
         ++delta_for_counter;
-    if (mode == Mode::RunLength)
+    if (mode == IntegerMode::RunLength)
         ++rle_counter;
 }
 
-bool CompressionCodecIntegerLightweight::CompressContext::needAnalyze() const
+bool CompressionCodecLightweight::IntegerCompressContext::needAnalyze() const
 {
     // lightweight codec is never used, do not analyze anymore
     if (lz4_counter > 5 && lw_counter == 0)
@@ -220,28 +76,28 @@ bool CompressionCodecIntegerLightweight::CompressContext::needAnalyze() const
     return true;
 }
 
-bool CompressionCodecIntegerLightweight::CompressContext::needAnalyzeDelta() const
+bool CompressionCodecLightweight::IntegerCompressContext::needAnalyzeDelta() const
 {
     return lw_counter <= 5 || constant_delta_counter != 0 || delta_for_counter != 0;
 }
 
-bool CompressionCodecIntegerLightweight::CompressContext::needAnalyzeRunLength() const
+bool CompressionCodecLightweight::IntegerCompressContext::needAnalyzeRunLength() const
 {
     return lw_counter <= 5 || rle_counter != 0;
 }
 
 template <typename T>
-void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<const T> & values, State<T> & state)
+void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<const T> & values, IntegerState<T> & state)
 {
     if (values.empty())
     {
-        mode = Mode::Invalid;
+        mode = IntegerMode::Invalid;
         return;
     }
 
     if (!needAnalyze())
     {
-        RUNTIME_CHECK(mode == Mode::LZ4);
+        RUNTIME_CHECK(mode == IntegerMode::LZ4);
         return;
     }
 
@@ -251,7 +107,7 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<cons
     if (min_value == max_value)
     {
         state = min_value;
-        mode = Mode::CONSTANT;
+        mode = IntegerMode::CONSTANT;
         return;
     }
 
@@ -273,7 +129,7 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<cons
         if (min_delta == *std::max_element(deltas.cbegin(), deltas.cend()))
         {
             state = static_cast<T>(min_delta);
-            mode = Mode::CONSTANT_DELTA;
+            mode = IntegerMode::CONSTANT_DELTA;
             return;
         }
 
@@ -309,103 +165,189 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span<cons
     if (needAnalyzeRunLength() && rle_size < delta_for_size && rle_size < for_size && rle_size < estimate_lz_size)
     {
         state = std::move(rle);
-        mode = Mode::RunLength;
+        mode = IntegerMode::RunLength;
     }
     else if (for_size < delta_for_size && for_size < estimate_lz_size)
     {
         std::vector<T> values_copy(values.begin(), values.end());
         state = FORState<T>{std::move(values_copy), min_value, for_width};
-        mode = Mode::FOR;
+        mode = IntegerMode::FOR;
     }
     else if (needAnalyzeDelta() && delta_for_size < estimate_lz_size)
     {
         state = DeltaFORState<T>{std::move(deltas), min_delta, delta_for_width};
-        mode = Mode::DELTA_FOR;
+        mode = IntegerMode::DELTA_FOR;
     }
     else
     {
-        mode = Mode::LZ4;
+        mode = IntegerMode::LZ4;
     }
 }
 
-UInt32 CompressionCodecIntegerLightweight::doCompressData(const char * source, UInt32 source_size, char * dest) const
+template <typename T>
+size_t CompressionCodecLightweight::compressDataForInteger(const char * source, UInt32 source_size, char * dest) const
 {
+    const auto bytes_size = static_cast<UInt8>(data_type);
+    assert(bytes_size == sizeof(T));
     if unlikely (source_size % bytes_size != 0)
         throw Exception(
             ErrorCodes::CANNOT_COMPRESS,
-            "Cannot compress with lightweight codec, data size {} is not aligned to {}",
+            "Cannot compress with lightweight-integer codec, data size {} is not aligned to {}",
             source_size,
             bytes_size);
 
-    dest[0] = bytes_size;
-    dest += 1;
-    switch (bytes_size)
+    // Load values
+    const size_t count = source_size / bytes_size;
+    std::span<const T> values(reinterpret_cast<const T *>(source), count);
+
+    // Analyze
+    IntegerState<T> state;
+    ctx.analyze<T>(values, state);
+
+    // Compress
+    unalignedStore<UInt8>(dest, static_cast<UInt8>(ctx.mode));
+    dest += sizeof(UInt8);
+    size_t compressed_size = 1;
+    switch (ctx.mode)
+    {
+    case IntegerMode::CONSTANT:
     {
-    case 1:
-        return 1 + compressDataForType<UInt8>(source, source_size, dest);
-    case 2:
-        return 1 + compressDataForType<UInt16>(source, source_size, dest);
-    case 4:
-        return 1 + compressDataForType<UInt32>(source, source_size, dest);
-    case 8:
-        return 1 + compressDataForType<UInt64>(source, source_size, dest);
+        compressed_size += Compression::constantEncoding(std::get<0>(state), dest);
+        break;
+    }
+    case IntegerMode::CONSTANT_DELTA:
+    {
+        compressed_size += Compression::constantDeltaEncoding(values[0], std::get<0>(state), dest);
+        break;
+    }
+    case IntegerMode::RunLength:
+    {
+        compressed_size += Compression::runLengthEncoding<T>(std::get<1>(state), dest);
+        break;
+    }
+    case IntegerMode::FOR:
+    {
+        FORState for_state = std::get<2>(state);
+        compressed_size += Compression::FOREncoding(for_state.values, for_state.min_value, for_state.bit_width, dest);
+        break;
+    }
+    case IntegerMode::DELTA_FOR:
+    {
+        DeltaFORState delta_for_state = std::get<3>(state);
+        compressed_size += Compression::FOREncoding<typename std::make_signed_t<T>, true>(
+            delta_for_state.deltas,
+            delta_for_state.min_delta_value,
+            delta_for_state.bit_width,
+            dest);
+        break;
+    }
+    case IntegerMode::LZ4:
+    {
+        auto success = LZ4_compress_fast(
+            source,
+            dest,
+            source_size,
+            LZ4_COMPRESSBOUND(source_size),
+            CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
+        if (!success)
+            throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
+        compressed_size += success;
+        break;
+    }
     default:
         throw Exception(
             ErrorCodes::CANNOT_COMPRESS,
-            "Cannot compress with lightweight codec, unknown bytes size {}",
-            bytes_size);
+            "Cannot compress with lightweight-integer codec, unknown mode {}",
+            static_cast<int>(ctx.mode));
     }
+
+    // Update statistics
+    ctx.update(source_size, compressed_size);
+
+    return compressed_size;
 }
 
-void CompressionCodecIntegerLightweight::doDecompressData(
+template <typename T>
+void CompressionCodecLightweight::decompressDataForInteger(
     const char * source,
     UInt32 source_size,
     char * dest,
-    UInt32 uncompressed_size) const
+    UInt32 output_size) const
 {
-    if unlikely (source_size < 2)
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot decompress lightweight-encoded data. File has wrong header");
-
-    if (uncompressed_size == 0)
-        return;
-
-    UInt8 bytes_size = source[0];
-
-    if unlikely (bytes_size != 1 && bytes_size != 2 && bytes_size != 4 && bytes_size != 8)
+    if unlikely (output_size % sizeof(T) != 0)
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot decompress lightweight-encoded data. File has wrong header");
+            "Cannot decompress lightweight-integer codec data. Uncompressed size {} is not aligned to {}",
+            output_size,
+            sizeof(T));
 
-    if unlikely (uncompressed_size % bytes_size != 0)
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot decompress lightweight-encoded data. Uncompressed size {} is not aligned to {}",
-            uncompressed_size,
-            bytes_size);
-
-    UInt32 source_size_no_header = source_size - 1;
-    switch (bytes_size)
+    auto mode = static_cast<IntegerMode>(unalignedLoad<UInt8>(source));
+    source += sizeof(UInt8);
+    source_size -= sizeof(UInt8);
+    switch (mode)
     {
-    case 1:
-        decompressDataForType<UInt8>(&source[1], source_size_no_header, dest, uncompressed_size);
+    case IntegerMode::CONSTANT:
+        Compression::constantDecoding<T>(source, source_size, dest, output_size);
+        break;
+    case IntegerMode::CONSTANT_DELTA:
+        Compression::constantDeltaDecoding<T>(source, source_size, dest, output_size);
         break;
-    case 2:
-        decompressDataForType<UInt16>(&source[1], source_size_no_header, dest, uncompressed_size);
+    case IntegerMode::RunLength:
+        Compression::runLengthDecoding<T>(source, source_size, dest, output_size);
+        break;
+    case IntegerMode::FOR:
+        Compression::FORDecoding<T>(source, source_size, dest, output_size);
         break;
-    case 4:
-        decompressDataForType<UInt32>(&source[1], source_size_no_header, dest, uncompressed_size);
+    case IntegerMode::DELTA_FOR:
+        Compression::deltaFORDecoding<T>(source, source_size, dest, output_size);
         break;
-    case 8:
-        decompressDataForType<UInt64>(&source[1], source_size_no_header, dest, uncompressed_size);
+    case IntegerMode::LZ4:
+        if (unlikely(LZ4_decompress_safe(source, dest, source_size, output_size) < 0))
+            throw Exception("Cannot LZ4_decompress_safe", ErrorCodes::CANNOT_DECOMPRESS);
         break;
     default:
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot compress with lightweight codec, unknown bytes size {}",
-            bytes_size);
+            "Cannot decompress with lightweight-integer codec, unknown mode {}",
+            static_cast<int>(mode));
     }
 }
 
+template size_t CompressionCodecLightweight::compressDataForInteger<UInt8>(
+    const char * source,
+    UInt32 source_size,
+    char * dest) const;
+template size_t CompressionCodecLightweight::compressDataForInteger<UInt16>(
+    const char * source,
+    UInt32 source_size,
+    char * dest) const;
+template size_t CompressionCodecLightweight::compressDataForInteger<UInt32>(
+    const char * source,
+    UInt32 source_size,
+    char * dest) const;
+template size_t CompressionCodecLightweight::compressDataForInteger<UInt64>(
+    const char * source,
+    UInt32 source_size,
+    char * dest) const;
+template void CompressionCodecLightweight::decompressDataForInteger<UInt8>(
+    const char * source,
+    UInt32 source_size,
+    char * dest,
+    UInt32 output_size) const;
+template void CompressionCodecLightweight::decompressDataForInteger<UInt16>(
+    const char * source,
+    UInt32 source_size,
+    char * dest,
+    UInt32 output_size) const;
+template void CompressionCodecLightweight::decompressDataForInteger<UInt32>(
+    const char * source,
+    UInt32 source_size,
+    char * dest,
+    UInt32 output_size) const;
+template void CompressionCodecLightweight::decompressDataForInteger<UInt64>(
+    const char * source,
+    UInt32 source_size,
+    char * dest,
+    UInt32 output_size) const;
+
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight_NonInteger.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_NonInteger.cpp
new file mode 100644
index 00000000000..816d8a00b7b
--- /dev/null
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight_NonInteger.cpp
@@ -0,0 +1,53 @@
+// Copyright 2024 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Common/Exception.h>
+#include <IO/Compression/CompressionCodecLightweight.h>
+#include <IO/Compression/CompressionSettings.h>
+#include <lz4.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int CANNOT_COMPRESS;
+extern const int CANNOT_DECOMPRESS;
+} // namespace ErrorCodes
+
+size_t CompressionCodecLightweight::compressDataForNonInteger(const char * source, UInt32 source_size, char * dest)
+{
+    auto success = LZ4_compress_fast(
+        source,
+        dest,
+        source_size,
+        LZ4_COMPRESSBOUND(source_size),
+        CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
+    if (!success)
+        throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
+    return success;
+}
+
+
+void CompressionCodecLightweight::decompressDataForNonInteger(
+    const char * source,
+    UInt32 source_size,
+    char * dest,
+    UInt32 output_size)
+{
+    if (unlikely(LZ4_decompress_safe(source, dest, source_size, output_size) < 0))
+        throw Exception("Cannot LZ4_decompress_safe", ErrorCodes::CANNOT_DECOMPRESS);
+}
+
+} // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
index ed438ea7ce7..e8782c176c2 100644
--- a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
@@ -15,9 +15,13 @@
 #include <Common/Exception.h>
 #include <IO/Compression/CompressionCodecRunLength.h>
 #include <IO/Compression/CompressionInfo.h>
+#include <IO/Compression/CompressionSettings.h>
 #include <IO/Compression/EncodingUtil.h>
 #include <IO/Compression/ICompressionCodec.h>
 #include <common/unaligned.h>
+#include <lz4.h>
+
+#include <magic_enum.hpp>
 
 
 namespace DB
@@ -29,8 +33,8 @@ extern const int CANNOT_COMPRESS;
 extern const int CANNOT_DECOMPRESS;
 } // namespace ErrorCodes
 
-CompressionCodecRunLength::CompressionCodecRunLength(UInt8 bytes_size_)
-    : bytes_size(bytes_size_)
+CompressionCodecRunLength::CompressionCodecRunLength(CompressionDataType data_type_)
+    : data_type(data_type_)
 {}
 
 UInt8 CompressionCodecRunLength::getMethodByte() const
@@ -40,18 +44,15 @@ UInt8 CompressionCodecRunLength::getMethodByte() const
 
 UInt32 CompressionCodecRunLength::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
-    // If the encoded data is larger than the original data, we will store the original data
-    // Additional byte is used to store the size of the data type
-    return 1 + uncompressed_size;
+    return 1 + LZ4_COMPRESSBOUND(uncompressed_size);
 }
 
-namespace
-{
-constexpr UInt8 JUST_COPY_CODE = 0xFF;
-
 template <typename T>
-UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
+UInt32 CompressionCodecRunLength::compressDataForInteger(const char * source, UInt32 source_size, char * dest) const
 {
+    constexpr auto bytes_size = sizeof(T);
+    if unlikely (source_size % bytes_size != 0)
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size);
     const char * source_end = source + source_size;
     DB::Compression::RunLengthPairs<T> rle_vec;
     rle_vec.reserve(source_size / sizeof(T));
@@ -67,34 +68,47 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 
     if (DB::Compression::runLengthPairsSize<T>(rle_vec) >= source_size)
     {
-        dest[0] = JUST_COPY_CODE;
-        memcpy(&dest[1], source, source_size);
-        return 1 + source_size;
+        // treat as string
+        dest[0] = magic_enum::enum_integer(CompressionDataType::String);
+        dest += 1;
+        auto success = LZ4_compress_fast(
+            source,
+            dest,
+            source_size,
+            LZ4_COMPRESSBOUND(source_size),
+            CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
+        if (!success)
+            throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
+        return 1 + success;
     }
 
-    dest[0] = sizeof(T);
+    dest[0] = magic_enum::enum_integer(data_type);
     dest += 1;
     return 1 + DB::Compression::runLengthEncoding<T>(rle_vec, dest);
 }
 
-} // namespace
-
 UInt32 CompressionCodecRunLength::doCompressData(const char * source, UInt32 source_size, char * dest) const
 {
-    if unlikely (source_size % bytes_size != 0)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size);
-    switch (bytes_size)
+    switch (data_type)
     {
-    case 1:
-        return compressDataForType<UInt8>(source, source_size, dest);
-    case 2:
-        return compressDataForType<UInt16>(source, source_size, dest);
-    case 4:
-        return compressDataForType<UInt32>(source, source_size, dest);
-    case 8:
-        return compressDataForType<UInt64>(source, source_size, dest);
+    case CompressionDataType::Int8:
+        return compressDataForInteger<UInt8>(source, source_size, dest);
+    case CompressionDataType::Int16:
+        return compressDataForInteger<UInt16>(source, source_size, dest);
+    case CompressionDataType::Int32:
+        return compressDataForInteger<UInt32>(source, source_size, dest);
+    case CompressionDataType::Int64:
+        return compressDataForInteger<UInt64>(source, source_size, dest);
     default:
-        throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress RunLength-encoded data. Unsupported bytes size");
+        auto success = LZ4_compress_fast(
+            source,
+            dest,
+            source_size,
+            LZ4_COMPRESSBOUND(source_size),
+            CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
+        if (!success)
+            throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
+        return 1 + success;
     }
 }
 
@@ -113,42 +127,27 @@ void CompressionCodecRunLength::doDecompressData(
         return;
 
     UInt8 bytes_size = source[0];
-    if (bytes_size == JUST_COPY_CODE)
-    {
-        if (source_size - 1 < uncompressed_size)
-            throw Exception(
-                ErrorCodes::CANNOT_DECOMPRESS,
-                "Cannot decompress RunLength-encoded data. File has wrong header");
+    auto data_type = magic_enum::enum_cast<CompressionDataType>(bytes_size);
+    RUNTIME_CHECK(data_type.has_value());
 
-        memcpy(dest, &source[1], uncompressed_size);
-        return;
-    }
-
-    if unlikely (uncompressed_size % bytes_size != 0)
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "uncompressed size {} is not aligned to {}",
-            uncompressed_size,
-            bytes_size);
-
-    switch (bytes_size)
+    switch (data_type.value())
     {
-    case 1:
+    case CompressionDataType::Int8:
         DB::Compression::runLengthDecoding<UInt8>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
-    case 2:
+    case CompressionDataType::Int16:
         DB::Compression::runLengthDecoding<UInt16>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
-    case 4:
+    case CompressionDataType::Int32:
         DB::Compression::runLengthDecoding<UInt32>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
-    case 8:
+    case CompressionDataType::Int64:
         DB::Compression::runLengthDecoding<UInt64>(&source[1], source_size - 1, dest, uncompressed_size);
         break;
     default:
-        throw Exception(
-            ErrorCodes::CANNOT_DECOMPRESS,
-            "Cannot decompress RunLength-encoded data. Unsupported bytes size");
+        if (unlikely(LZ4_decompress_safe(&source[1], dest, source_size - 1, uncompressed_size) < 0))
+            throw Exception("Cannot LZ4_decompress_safe", ErrorCodes::CANNOT_DECOMPRESS);
+        break;
     }
 }
 
diff --git a/dbms/src/IO/Compression/CompressionCodecRunLength.h b/dbms/src/IO/Compression/CompressionCodecRunLength.h
index c3d38090346..86a401765a0 100644
--- a/dbms/src/IO/Compression/CompressionCodecRunLength.h
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.h
@@ -22,7 +22,7 @@ namespace DB
 class CompressionCodecRunLength : public ICompressionCodec
 {
 public:
-    explicit CompressionCodecRunLength(UInt8 bytes_size_);
+    explicit CompressionCodecRunLength(CompressionDataType data_type_);
 
     UInt8 getMethodByte() const override;
 
@@ -37,7 +37,11 @@ class CompressionCodecRunLength : public ICompressionCodec
     bool isGenericCompression() const override { return false; }
 
 private:
-    const UInt8 bytes_size;
+    template <typename T>
+    UInt32 compressDataForInteger(const char * source, UInt32 source_size, char * dest) const;
+
+private:
+    const CompressionDataType data_type;
 };
 
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionFactory.h b/dbms/src/IO/Compression/CompressionFactory.h
index 06a458a5144..5798ac72f5e 100644
--- a/dbms/src/IO/Compression/CompressionFactory.h
+++ b/dbms/src/IO/Compression/CompressionFactory.h
@@ -17,8 +17,8 @@
 #include <Common/config.h>
 #include <IO/Compression/CompressionCodecDeltaFOR.h>
 #include <IO/Compression/CompressionCodecFOR.h>
-#include <IO/Compression/CompressionCodecIntegerLightweight.h>
 #include <IO/Compression/CompressionCodecLZ4.h>
+#include <IO/Compression/CompressionCodecLightweight.h>
 #include <IO/Compression/CompressionCodecMultiple.h>
 #include <IO/Compression/CompressionCodecNone.h>
 #include <IO/Compression/CompressionCodecRunLength.h>
@@ -51,7 +51,7 @@ class CompressionFactory
         case CompressionMethod::ZSTD:
             return std::make_unique<CompressionCodecZSTD>(setting.level);
         case CompressionMethod::Lightweight:
-            return std::make_unique<CompressionCodecIntegerLightweight>(setting.type_bytes_size);
+            return std::make_unique<CompressionCodecLightweight>(setting.type_bytes_size);
 #if USE_QPL
         case CompressionMethod::QPL:
             return std::make_unique<CompressionCodecDeflateQpl>();
diff --git a/dbms/src/IO/Compression/CompressionInfo.h b/dbms/src/IO/Compression/CompressionInfo.h
index c8b59c974b7..31c631c3291 100644
--- a/dbms/src/IO/Compression/CompressionInfo.h
+++ b/dbms/src/IO/Compression/CompressionInfo.h
@@ -66,4 +66,15 @@ enum class CompressionMethodByte : UInt8
 };
 // clang-format on
 
+enum class CompressionDataType : UInt8
+{
+    Int8 = 1, // Int8/UInt8
+    Int16 = 2, // Int16/UInt16
+    Int32 = 4, // Int32/UInt32
+    Int64 = 8, // Int64/UInt64
+    Float32 = 9,
+    Float64 = 10,
+    String = 11,
+};
+
 } // namespace DB
\ No newline at end of file
diff --git a/dbms/src/IO/Compression/CompressionSettings.h b/dbms/src/IO/Compression/CompressionSettings.h
index 54bf73714da..827f66423a6 100644
--- a/dbms/src/IO/Compression/CompressionSettings.h
+++ b/dbms/src/IO/Compression/CompressionSettings.h
@@ -50,7 +50,7 @@ struct CompressionSetting
     CompressionMethod method;
     CompressionMethodByte method_byte;
     int level;
-    UInt8 type_bytes_size = 1;
+    CompressionDataType type_bytes_size = CompressionDataType::String;
 
     CompressionSetting()
         : CompressionSetting(CompressionMethod::LZ4)
diff --git a/dbms/src/IO/Compression/EncodingUtil.h b/dbms/src/IO/Compression/EncodingUtil.h
index 2632e80b65a..0d91ec188e7 100644
--- a/dbms/src/IO/Compression/EncodingUtil.h
+++ b/dbms/src/IO/Compression/EncodingUtil.h
@@ -186,6 +186,13 @@ void applyFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
 template <std::integral T>
 void FORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
+    UInt8 bytes_size = sizeof(T);
+    if unlikely (dest_size % bytes_size != 0)
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "uncompressed size {} is not aligned to {}",
+            dest_size,
+            bytes_size);
     const auto count = dest_size / sizeof(T);
     T frame_of_reference = unalignedLoad<T>(src);
     src += sizeof(T);
diff --git a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
index 0c458dfdaeb..cf09fbcd9de 100644
--- a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
+++ b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
@@ -351,7 +351,7 @@ CodecTestSequence generateSeq(Generator gen, const char * gen_name, B Begin = 0,
 CompressionCodecPtr makeCodec(const CompressionMethodByte method_byte, UInt8 type_byte)
 {
     CompressionSetting setting(method_byte);
-    setting.type_bytes_size = type_byte;
+    setting.type_bytes_size = magic_enum::enum_cast<CompressionDataType>(type_byte).value();
     return CompressionFactory::create(setting);
 }
 
@@ -534,7 +534,7 @@ std::vector<CodecTestSequence> generatePyramidOfSequences(
 const auto IntegerCodecsToTest = ::testing::Values(
     CompressionMethodByte::Lightweight,
     CompressionMethodByte::DeltaFOR,
-    // CompressionMethodByte::FOR, // disable FOR codec for now, since there are too many unit tests.
+    CompressionMethodByte::FOR,
     CompressionMethodByte::RunLength
 #if USE_QPL
     ,
diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h
index f0ee9bf8606..d85185bc729 100644
--- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h
+++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h
@@ -65,25 +65,12 @@ class DMFileWriter
                 /*flags*/ -1,
                 /*mode*/ 0666,
                 max_compress_block_size))
+            , compressed_buf(CompressedWriteBuffer<>::build(
+                  *plain_file,
+                  compression_settings,
+                  !dmfile->getConfiguration().has_value()))
             , minmaxes(do_index ? std::make_shared<MinMaxIndex>(*type) : nullptr)
         {
-            // TODO: better, now only for test
-            if (type->isInteger())
-            {
-                assert(compression_settings.settings.size() == 1);
-                CompressionSettings settings(CompressionMethod::Lightweight);
-                auto & setting = settings.settings[0];
-                setting.type_bytes_size = type->getSizeOfValueInMemory();
-                compressed_buf = CompressedWriteBuffer<>::build(*plain_file, settings, !dmfile->getConfiguration());
-            }
-            else
-            {
-                compressed_buf = CompressedWriteBuffer<>::build( //
-                    *plain_file,
-                    compression_settings,
-                    !dmfile->getConfiguration());
-            }
-
             if (!dmfile->useMetaV2())
             {
                 // will not used in DMFileFormat::V3, could be removed when v3 is default

From e993bb0f7b934067b671896c8b57c4ca93c0548a Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Fri, 21 Jun 2024 16:22:38 +0800
Subject: [PATCH 11/27] rename

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 ...eight_Interger.cpp => CompressionCodecLightweight_Integer.cpp} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename dbms/src/IO/Compression/{CompressionCodecLightweight_Interger.cpp => CompressionCodecLightweight_Integer.cpp} (100%)

diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight_Interger.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
similarity index 100%
rename from dbms/src/IO/Compression/CompressionCodecLightweight_Interger.cpp
rename to dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp

From 00da398b3f4aee99f93a823f2affc04d05c1bfcc Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Mon, 24 Jun 2024 10:48:01 +0800
Subject: [PATCH 12/27] refine

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 .../Compression/CompressionCodecLightweight.h |  8 ++++++
 .../CompressionCodecLightweight_Integer.cpp   | 26 +++++++++----------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight.h b/dbms/src/IO/Compression/CompressionCodecLightweight.h
index 927ab466bce..3fa893d0ee8 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight.h
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight.h
@@ -109,6 +109,14 @@ class CompressionCodecLightweight : public ICompressionCodec
         IntegerMode mode = IntegerMode::LZ4;
 
     private:
+        // The threshold for the number of blocks to decide whether need to analyze.
+        // For example:
+        // If lz4 is used more than COUNT_THRESHOLD times and the compression ratio is better than lightweight codec, do not analyze anymore.
+        static constexpr size_t COUNT_THRESHOLD = 5;
+        // Assume that the compression ratio of LZ4 is 3.0
+        // The official document says that the compression ratio of LZ4 is 2.1, https://github.com/lz4/lz4
+        static constexpr size_t ESRTIMATE_LZ4_COMPRESSION_RATIO = 3;
+
         size_t lw_uncompressed_size = 0;
         size_t lw_compressed_size = 0;
         size_t lw_counter = 0;
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
index e57050f9bcb..45fc51aecd7 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
@@ -16,7 +16,6 @@
 #include <IO/Compression/CompressionCodecLightweight.h>
 #include <IO/Compression/CompressionSettings.h>
 #include <IO/Compression/EncodingUtil.h>
-#include <fmt/format.h>
 #include <lz4.h>
 
 namespace DB
@@ -68,22 +67,23 @@ void CompressionCodecLightweight::IntegerCompressContext::update(size_t uncompre
 bool CompressionCodecLightweight::IntegerCompressContext::needAnalyze() const
 {
     // lightweight codec is never used, do not analyze anymore
-    if (lz4_counter > 5 && lw_counter == 0)
+    if (lz4_counter > COUNT_THRESHOLD && lw_counter == 0)
         return false;
-    // if lz4 is used more than 5 times and the compression ratio is better than lightweight codec, do not analyze anymore
-    if (lz4_counter > 5 && lz4_uncompressed_size / lz4_compressed_size > lw_compressed_size / lw_uncompressed_size)
+    // if lz4 is used more than COUNT_THRESHOLD times and the compression ratio is better than lightweight codec, do not analyze anymore
+    if (lz4_counter > COUNT_THRESHOLD
+        && lz4_uncompressed_size / lz4_compressed_size > lw_compressed_size / lw_uncompressed_size)
         return false;
     return true;
 }
 
 bool CompressionCodecLightweight::IntegerCompressContext::needAnalyzeDelta() const
 {
-    return lw_counter <= 5 || constant_delta_counter != 0 || delta_for_counter != 0;
+    return lw_counter <= COUNT_THRESHOLD || constant_delta_counter != 0 || delta_for_counter != 0;
 }
 
 bool CompressionCodecLightweight::IntegerCompressContext::needAnalyzeRunLength() const
 {
-    return lw_counter <= 5 || rle_counter != 0;
+    return lw_counter <= COUNT_THRESHOLD || rle_counter != 0;
 }
 
 template <typename T>
@@ -101,6 +101,9 @@ void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<cons
         return;
     }
 
+    // additional T bytes for min_delta, and 1 byte for width
+    static constexpr auto ADDTIONAL_BYTES = sizeof(T) + sizeof(UInt8);
+
     // Check CONSTANT
     T min_value = *std::min_element(values.begin(), values.end());
     T max_value = *std::max_element(values.begin(), values.end());
@@ -135,9 +138,7 @@ void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<cons
 
         // DELTA_FOR
         delta_for_width = Compression::FOREncodingWidth(deltas, min_delta);
-        // additional T bytes for min_delta, and 1 byte for width
-        delta_for_size
-            = BitpackingPrimitives::getRequiredSize(deltas.size(), delta_for_width) + sizeof(T) + sizeof(UInt8);
+        delta_for_size = BitpackingPrimitives::getRequiredSize(deltas.size(), delta_for_width) + ADDTIONAL_BYTES;
     }
 
     // RunLength
@@ -156,11 +157,8 @@ void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<cons
     }
 
     UInt8 for_width = BitpackingPrimitives::minimumBitWidth<T>(max_value - min_value);
-    // additional T bytes for min_value, and 1 byte for width
-    size_t for_size = BitpackingPrimitives::getRequiredSize(values.size(), for_width) + sizeof(T) + sizeof(UInt8);
-    // Assume that the compression ratio of LZ4 is 3.0
-    // The official document says that the compression ratio of LZ4 is 2.1, https://github.com/lz4/lz4
-    size_t estimate_lz_size = values.size() * sizeof(T) / 3;
+    size_t for_size = BitpackingPrimitives::getRequiredSize(values.size(), for_width) + ADDTIONAL_BYTES;
+    size_t estimate_lz_size = values.size() * sizeof(T) / ESRTIMATE_LZ4_COMPRESSION_RATIO;
     size_t rle_size = rle.empty() ? std::numeric_limits<size_t>::max() : Compression::runLengthPairsSize(rle);
     if (needAnalyzeRunLength() && rle_size < delta_for_size && rle_size < for_size && rle_size < estimate_lz_size)
     {

From 3cf3f1ed3a98a666505e47e147084857d3342fd5 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Mon, 24 Jun 2024 15:09:40 +0800
Subject: [PATCH 13/27] refine

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/CompressionFactory.h  | 31 +++++------
 dbms/src/IO/Compression/CompressionSettings.h |  2 +-
 .../tests/gtest_codec_compression.cpp         | 53 +++++--------------
 3 files changed, 27 insertions(+), 59 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionFactory.h b/dbms/src/IO/Compression/CompressionFactory.h
index 5798ac72f5e..a12b8896929 100644
--- a/dbms/src/IO/Compression/CompressionFactory.h
+++ b/dbms/src/IO/Compression/CompressionFactory.h
@@ -42,31 +42,29 @@ class CompressionFactory
 public:
     static CompressionCodecPtr create(const CompressionSetting & setting)
     {
-        switch (setting.method)
+        // LZ4 and LZ4HC have the same format, the difference is only in compression.
+        // So they have the same method byte.
+        if (setting.method == CompressionMethod::LZ4HC)
+            return std::make_unique<CompressionCodecLZ4HC>(setting.level);
+
+        switch (setting.method_byte)
         {
-        case CompressionMethod::LZ4:
+        case CompressionMethodByte::LZ4:
             return std::make_unique<CompressionCodecLZ4>(setting.level);
-        case CompressionMethod::LZ4HC:
-            return std::make_unique<CompressionCodecLZ4HC>(setting.level);
-        case CompressionMethod::ZSTD:
+        case CompressionMethodByte::ZSTD:
             return std::make_unique<CompressionCodecZSTD>(setting.level);
-        case CompressionMethod::Lightweight:
-            return std::make_unique<CompressionCodecLightweight>(setting.type_bytes_size);
 #if USE_QPL
-        case CompressionMethod::QPL:
+        case CompressionMethodByte::QPL:
             return std::make_unique<CompressionCodecDeflateQpl>();
 #endif
-        default:
-            break;
-        }
-        switch (setting.method_byte)
-        {
+        case CompressionMethodByte::Lightweight:
+            return std::make_unique<CompressionCodecLightweight>(setting.data_type);
         case CompressionMethodByte::DeltaFOR:
-            return std::make_unique<CompressionCodecDeltaFOR>(setting.type_bytes_size);
+            return std::make_unique<CompressionCodecDeltaFOR>(setting.data_type);
         case CompressionMethodByte::RunLength:
-            return std::make_unique<CompressionCodecRunLength>(setting.type_bytes_size);
+            return std::make_unique<CompressionCodecRunLength>(setting.data_type);
         case CompressionMethodByte::FOR:
-            return std::make_unique<CompressionCodecFOR>(setting.type_bytes_size);
+            return std::make_unique<CompressionCodecFOR>(setting.data_type);
         case CompressionMethodByte::NONE:
             return std::make_unique<CompressionCodecNone>();
         default:
@@ -96,7 +94,6 @@ class CompressionFactory
 private:
     static Codecs createCodecs(const CompressionSettings & settings)
     {
-        RUNTIME_CHECK(settings.settings.size() > 1);
         Codecs codecs;
         codecs.reserve(settings.settings.size());
         for (const auto & setting : settings.settings)
diff --git a/dbms/src/IO/Compression/CompressionSettings.h b/dbms/src/IO/Compression/CompressionSettings.h
index 827f66423a6..5363b0aca5d 100644
--- a/dbms/src/IO/Compression/CompressionSettings.h
+++ b/dbms/src/IO/Compression/CompressionSettings.h
@@ -50,7 +50,7 @@ struct CompressionSetting
     CompressionMethod method;
     CompressionMethodByte method_byte;
     int level;
-    CompressionDataType type_bytes_size = CompressionDataType::String;
+    CompressionDataType data_type = CompressionDataType::String;
 
     CompressionSetting()
         : CompressionSetting(CompressionMethod::LZ4)
diff --git a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
index cf09fbcd9de..35f80b2a296 100644
--- a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
+++ b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp
@@ -351,7 +351,7 @@ CodecTestSequence generateSeq(Generator gen, const char * gen_name, B Begin = 0,
 CompressionCodecPtr makeCodec(const CompressionMethodByte method_byte, UInt8 type_byte)
 {
     CompressionSetting setting(method_byte);
-    setting.type_bytes_size = magic_enum::enum_cast<CompressionDataType>(type_byte).value();
+    setting.data_type = magic_enum::enum_cast<CompressionDataType>(type_byte).value();
     return CompressionFactory::create(setting);
 }
 
@@ -362,18 +362,21 @@ void testTranscoding(ICompressionCodec & codec, const CodecTestSequence & test_s
     const UInt32 encoded_max_size = codec.getCompressedReserveSize(static_cast<UInt32>(source_data.size()));
     PODArray<char> encoded(encoded_max_size);
 
-    assert(source_data.data() != nullptr); // Codec assumes that source buffer is not null.
-    const UInt32 encoded_size
-        = codec.compress(source_data.data(), static_cast<UInt32>(source_data.size()), encoded.data());
-
+    ASSERT_TRUE(source_data.data() != nullptr); // Codec assumes that source buffer is not null.
+    const UInt32 encoded_size = codec.compress( //
+        source_data.data(),
+        static_cast<UInt32>(source_data.size()),
+        encoded.data());
     encoded.resize(encoded_size);
 
-    PODArray<char> decoded(source_data.size());
-
-    const auto decoded_size = codec.readDecompressedBlockSize(encoded.data());
+    auto method_byte = ICompressionCodec::readMethod(encoded.data());
+    ASSERT_EQ(method_byte, codec.getMethodByte());
 
-    codec.decompress(encoded.data(), static_cast<UInt32>(encoded.size()), decoded.data(), decoded_size);
+    PODArray<char> decoded(source_data.size());
+    const auto decode_codec = CompressionFactory::createForDecompress(method_byte);
 
+    const auto decoded_size = decode_codec->readDecompressedBlockSize(encoded.data());
+    decode_codec->decompress(encoded.data(), static_cast<UInt32>(encoded.size()), decoded.data(), decoded_size);
     decoded.resize(decoded_size);
 
     ASSERT_TRUE(EqualByteContainers(test_sequence.data_type->getSizeOfValueInMemory(), source_data, decoded));
@@ -546,38 +549,6 @@ const auto IntegerCodecsToTest = ::testing::Values(
 // test cases
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-// INSTANTIATE_TEST_CASE_P(
-//     Simple,
-//     CodecTest,
-//     ::testing::Combine(
-//         IntegerCodecsToTest,
-//         ::testing::Values(makeSeq<Float64>(
-//             1,
-//             2,
-//             3,
-//             5,
-//             7,
-//             11,
-//             13,
-//             17,
-//             23,
-//             29,
-//             31,
-//             37,
-//             41,
-//             43,
-//             47,
-//             53,
-//             59,
-//             61,
-//             67,
-//             71,
-//             73,
-//             79,
-//             83,
-//             89,
-//             97))));
-
 INSTANTIATE_TEST_CASE_P(
     SmallSequences,
     MultipleSequencesCodecTest,

From 4b5afc19976ac53de50abbc50d686f82f216c967 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Mon, 24 Jun 2024 16:15:46 +0800
Subject: [PATCH 14/27] address comments

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/CompressionCodecRunLength.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
index e8782c176c2..ff1dd44649e 100644
--- a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
@@ -44,6 +44,8 @@ UInt8 CompressionCodecRunLength::getMethodByte() const
 
 UInt32 CompressionCodecRunLength::getMaxCompressedDataSize(UInt32 uncompressed_size) const
 {
+    // If the data is not compressible as run-length encoding, we will compress it as LZ4.
+    // 1 byte for data type, and the rest for LZ4 compressed data.
     return 1 + LZ4_COMPRESSBOUND(uncompressed_size);
 }
 
@@ -55,8 +57,8 @@ UInt32 CompressionCodecRunLength::compressDataForInteger(const char * source, UI
         throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size);
     const char * source_end = source + source_size;
     DB::Compression::RunLengthPairs<T> rle_vec;
-    rle_vec.reserve(source_size / sizeof(T));
-    for (const auto * src = source; src < source_end; src += sizeof(T))
+    rle_vec.reserve(source_size / bytes_size);
+    for (const auto * src = source; src < source_end; src += bytes_size)
     {
         T value = unalignedLoad<T>(src);
         if (rle_vec.empty() || rle_vec.back().first != value

From d029c532045724f722e9b66d8855407581c0d6d5 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Mon, 24 Jun 2024 17:52:55 +0800
Subject: [PATCH 15/27] address comments

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/CompressionCodecRunLength.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
index ff1dd44649e..c6aa45065fb 100644
--- a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
@@ -61,6 +61,9 @@ UInt32 CompressionCodecRunLength::compressDataForInteger(const char * source, UI
     for (const auto * src = source; src < source_end; src += bytes_size)
     {
         T value = unalignedLoad<T>(src);
+        // If the value is different from the previous one or the counter is at the maximum value (255 + 1 = 0),
+        // we need to start a new run.
+        // Otherwise, we can just increment the counter.
         if (rle_vec.empty() || rle_vec.back().first != value
             || rle_vec.back().second == std::numeric_limits<UInt8>::max())
             rle_vec.emplace_back(value, 1);

From 86d6c2fd9643c2a39cd4251379fe640d01c7d4f6 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com>
Date: Fri, 28 Jun 2024 11:57:26 +0800
Subject: [PATCH 16/27] Apply suggestions from code review

Co-authored-by: jinhelin <linjinhe33@gmail.com>
---
 dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp | 4 ++--
 dbms/src/IO/Compression/CompressionCodecDeltaFOR.h   | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
index 099dfd900ba..0305b855e39 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
@@ -104,7 +104,7 @@ UInt32 CompressionCodecDeltaFOR::doCompressData(const char * source, UInt32 sour
             source_size,
             LZ4_COMPRESSBOUND(source_size),
             CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
-        if (!success)
+        if (unlikely(!success))
             throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
         return 1 + success;
     }
@@ -161,7 +161,7 @@ void CompressionCodecDeltaFOR::ordinaryDecompress(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot decompress DeltaFor-encoded data. File has wrong header");
 
-    if (uncompressed_size == 0)
+    if (unlikely(uncompressed_size == 0))
         return;
 
     UInt8 bytes_size = source[0];
diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h
index 5faf713e864..d4fc5f62b6d 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h
@@ -26,6 +26,7 @@ class CompressionCodecDeltaFOR : public ICompressionCodec
 
     UInt8 getMethodByte() const override;
 
+    // ordinaryDecompress is only used for benchmark comparison.
     static void ordinaryDecompress(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size);
 
 #ifndef DBMS_PUBLIC_GTEST

From 9cf8b83816655c8be7664a712b0ee3f3149cc51f Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Fri, 28 Jun 2024 13:28:59 +0800
Subject: [PATCH 17/27] format

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp        | 2 +-
 dbms/src/IO/Compression/CompressionCodecFOR.cpp             | 4 ++--
 dbms/src/IO/Compression/CompressionCodecLZ4.cpp             | 2 +-
 dbms/src/IO/Compression/CompressionCodecLightweight.cpp     | 2 +-
 .../IO/Compression/CompressionCodecLightweight_Integer.cpp  | 2 +-
 .../Compression/CompressionCodecLightweight_NonInteger.cpp  | 2 +-
 dbms/src/IO/Compression/CompressionCodecRunLength.cpp       | 6 +++---
 dbms/src/IO/Compression/CompressionInfo.h                   | 2 +-
 dbms/src/Storages/KVStore/FFI/SSTReader.h                   | 2 +-
 9 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
index 0305b855e39..388d463c82a 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
@@ -121,7 +121,7 @@ void CompressionCodecDeltaFOR::doDecompressData(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot decompress DeltaFor-encoded data. File has wrong header");
 
-    if (uncompressed_size == 0)
+    if (unlikely(uncompressed_size == 0))
         return;
 
     UInt8 bytes_size = source[0];
diff --git a/dbms/src/IO/Compression/CompressionCodecFOR.cpp b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
index 881232b8155..3c3b380946c 100644
--- a/dbms/src/IO/Compression/CompressionCodecFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
@@ -99,7 +99,7 @@ UInt32 CompressionCodecFOR::doCompressData(const char * source, UInt32 source_si
             source_size,
             LZ4_COMPRESSBOUND(source_size),
             CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
-        if (!success)
+        if (unlikely(!success))
             throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
         return 1 + success;
     }
@@ -114,7 +114,7 @@ void CompressionCodecFOR::doDecompressData(
     if unlikely (source_size < 2)
         throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress For-encoded data. File has wrong header");
 
-    if (uncompressed_size == 0)
+    if (unlikely(uncompressed_size == 0))
         return;
 
     UInt8 bytes_size = source[0];
diff --git a/dbms/src/IO/Compression/CompressionCodecLZ4.cpp b/dbms/src/IO/Compression/CompressionCodecLZ4.cpp
index 7a7e91c6c97..5f0aa8719ae 100644
--- a/dbms/src/IO/Compression/CompressionCodecLZ4.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLZ4.cpp
@@ -62,7 +62,7 @@ UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_
 {
     auto success = LZ4_compress_HC(source, dest, source_size, LZ4_COMPRESSBOUND(source_size), level);
 
-    if (!success)
+    if (unlikely(!success))
         throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress with LZ4 codec");
 
     return success;
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight.cpp
index efa0b77309d..df1ea3e0b3f 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight.cpp
@@ -89,7 +89,7 @@ void CompressionCodecLightweight::doDecompressData(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot decompress lightweight codec data. File has wrong header");
 
-    if (uncompressed_size == 0)
+    if (unlikely(uncompressed_size == 0))
         return;
 
     UInt8 bytes_size = source[0];
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
index 45fc51aecd7..1de05fbec82 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
@@ -247,7 +247,7 @@ size_t CompressionCodecLightweight::compressDataForInteger(const char * source,
             source_size,
             LZ4_COMPRESSBOUND(source_size),
             CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
-        if (!success)
+        if (unlikely(!success))
             throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
         compressed_size += success;
         break;
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight_NonInteger.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_NonInteger.cpp
index 816d8a00b7b..efe669a2825 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight_NonInteger.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight_NonInteger.cpp
@@ -34,7 +34,7 @@ size_t CompressionCodecLightweight::compressDataForNonInteger(const char * sourc
         source_size,
         LZ4_COMPRESSBOUND(source_size),
         CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
-    if (!success)
+    if (unlikely(!success))
         throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
     return success;
 }
diff --git a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
index c6aa45065fb..08dbd908436 100644
--- a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
@@ -82,7 +82,7 @@ UInt32 CompressionCodecRunLength::compressDataForInteger(const char * source, UI
             source_size,
             LZ4_COMPRESSBOUND(source_size),
             CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
-        if (!success)
+        if (unlikely(!success))
             throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
         return 1 + success;
     }
@@ -111,7 +111,7 @@ UInt32 CompressionCodecRunLength::doCompressData(const char * source, UInt32 sou
             source_size,
             LZ4_COMPRESSBOUND(source_size),
             CompressionSetting::getDefaultLevel(CompressionMethod::LZ4));
-        if (!success)
+        if (unlikely(!success))
             throw Exception("Cannot LZ4_compress_fast", ErrorCodes::CANNOT_COMPRESS);
         return 1 + success;
     }
@@ -128,7 +128,7 @@ void CompressionCodecRunLength::doDecompressData(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot decompress RunLength-encoded data. File has wrong header");
 
-    if (uncompressed_size == 0)
+    if (unlikely(uncompressed_size == 0))
         return;
 
     UInt8 bytes_size = source[0];
diff --git a/dbms/src/IO/Compression/CompressionInfo.h b/dbms/src/IO/Compression/CompressionInfo.h
index 31c631c3291..f24e99741c6 100644
--- a/dbms/src/IO/Compression/CompressionInfo.h
+++ b/dbms/src/IO/Compression/CompressionInfo.h
@@ -77,4 +77,4 @@ enum class CompressionDataType : UInt8
     String = 11,
 };
 
-} // namespace DB
\ No newline at end of file
+} // namespace DB
diff --git a/dbms/src/Storages/KVStore/FFI/SSTReader.h b/dbms/src/Storages/KVStore/FFI/SSTReader.h
index 24552eabd86..46195e216d1 100644
--- a/dbms/src/Storages/KVStore/FFI/SSTReader.h
+++ b/dbms/src/Storages/KVStore/FFI/SSTReader.h
@@ -48,7 +48,7 @@ class MonoSSTReader : public SSTReader
     BaseBuffView keyView() const override;
     BaseBuffView valueView() const override;
     void next() override;
-    SSTFormatKind sstFormatKind() const { return kind; };
+    SSTFormatKind sstFormatKind() const { return kind; }
     size_t approxSize() const override;
     std::vector<std::string> findSplitKeys(uint64_t splits_count) const override;
     void seek(BaseBuffView && view) const override;

From c83a8249c418006fc06300b1b0525be13086e571 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Mon, 1 Jul 2024 16:49:55 +0800
Subject: [PATCH 18/27] address comments

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/CompressionCodecLightweight.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight.h b/dbms/src/IO/Compression/CompressionCodecLightweight.h
index 3fa893d0ee8..39a00ef292d 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight.h
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight.h
@@ -94,10 +94,6 @@ class CompressionCodecLightweight : public ICompressionCodec
     public:
         IntegerCompressContext() = default;
 
-        bool needAnalyze() const;
-        bool needAnalyzeDelta() const;
-        bool needAnalyzeRunLength() const;
-
         template <typename T>
         void analyze(std::span<const T> & values, IntegerState<T> & state);
 
@@ -108,6 +104,11 @@ class CompressionCodecLightweight : public ICompressionCodec
 
         IntegerMode mode = IntegerMode::LZ4;
 
+    private:
+        bool needAnalyze() const;
+        bool needAnalyzeDelta() const;
+        bool needAnalyzeRunLength() const;
+
     private:
         // The threshold for the number of blocks to decide whether need to analyze.
         // For example:

From bf8c288b42ddeecb88989ad243a330e65fa5551e Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Wed, 3 Jul 2024 15:05:54 +0800
Subject: [PATCH 19/27] address comments & fix DeltaFor

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 .../Compression/CompressionCodecDeflateQpl.h  |  3 --
 .../Compression/CompressionCodecDeltaFOR.cpp  | 18 ++++++---
 .../IO/Compression/CompressionCodecDeltaFOR.h |  3 --
 dbms/src/IO/Compression/CompressionCodecFOR.h |  3 --
 dbms/src/IO/Compression/CompressionCodecLZ4.h |  3 --
 .../CompressionCodecLightweight.cpp           |  5 +++
 .../Compression/CompressionCodecLightweight.h |  3 --
 .../CompressionCodecLightweight_Integer.cpp   | 26 ++++++++-----
 .../Compression/CompressionCodecMultiple.cpp  |  8 ----
 .../IO/Compression/CompressionCodecMultiple.h |  3 --
 .../src/IO/Compression/CompressionCodecNone.h |  3 --
 .../Compression/CompressionCodecRunLength.h   |  3 --
 .../src/IO/Compression/CompressionCodecZSTD.h |  3 --
 dbms/src/IO/Compression/EncodingUtil.cpp      | 38 +++++++++++++++----
 dbms/src/IO/Compression/EncodingUtil.h        |  7 +++-
 dbms/src/IO/Compression/ICompressionCodec.h   |  6 ---
 16 files changed, 70 insertions(+), 65 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecDeflateQpl.h b/dbms/src/IO/Compression/CompressionCodecDeflateQpl.h
index df01503cb14..131b1a21757 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeflateQpl.h
+++ b/dbms/src/IO/Compression/CompressionCodecDeflateQpl.h
@@ -103,9 +103,6 @@ class CompressionCodecDeflateQpl final : public ICompressionCodec
     UInt8 getMethodByte() const override;
 
 protected:
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return true; }
-
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size)
         const override;
diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
index 388d463c82a..f3e13490683 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
@@ -52,11 +52,11 @@ UInt32 CompressionCodecDeltaFOR::getMaxCompressedDataSize(UInt32 uncompressed_si
     case CompressionDataType::Int32:
     case CompressionDataType::Int64:
     {
-        // |bytes_of_original_type|frame_of_reference|width(bits)  |bitpacked data|
-        // |1 bytes               |bytes_size        |sizeof(UInt8)|required size |
+        // |bytes_of_original_type|first_value|frame_of_reference|width(bits)  |bitpacked data|
+        // |1 bytes               |bytes_size |bytes_size        |sizeof(UInt8)|required size |
         auto bytes_size = magic_enum::enum_integer(data_type);
-        const size_t count = uncompressed_size / bytes_size;
-        return 1 + bytes_size + sizeof(UInt8) + BitpackingPrimitives::getRequiredSize(count, bytes_size * 8);
+        const size_t deltas_count = uncompressed_size / bytes_size - 1;
+        return 1 + bytes_size * 2 + sizeof(UInt8) + BitpackingPrimitives::getRequiredSize(deltas_count, bytes_size * 8);
     }
     default:
         return 1 + LZ4_COMPRESSBOUND(uncompressed_size);
@@ -72,13 +72,19 @@ UInt32 compressData(const char * source, UInt32 source_size, char * dest)
     constexpr auto bytes_size = sizeof(T);
     if unlikely (source_size % bytes_size != 0)
         throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size);
-    const auto count = source_size / sizeof(T);
+    const auto count = source_size / bytes_size;
     DB::Compression::deltaEncoding<T>(reinterpret_cast<const T *>(source), count, reinterpret_cast<T *>(dest));
+    if (unlikely(count == 1))
+        return bytes_size;
     // Cast deltas to signed type to better compress negative values.
     // For example, if we have a sequence of UInt8 values [3, 2, 1, 0], the deltas will be [3, -1, -1, -1]
     // If we compress them as UInt8, we will get [3, 255, 255, 255], which is not optimal.
     using TS = typename std::make_signed<T>::type;
-    return DB::CompressionCodecFOR::compressData<TS>(reinterpret_cast<TS *>(dest), source_size, dest);
+    auto for_size = DB::CompressionCodecFOR::compressData<TS>(
+        reinterpret_cast<TS *>(dest + bytes_size),
+        source_size - bytes_size,
+        dest + bytes_size);
+    return bytes_size + for_size;
 }
 
 } // namespace
diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h
index d4fc5f62b6d..9dc9687f152 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.h
@@ -39,9 +39,6 @@ class CompressionCodecDeltaFOR : public ICompressionCodec
 
     UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
 
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return false; }
-
 private:
     const CompressionDataType data_type;
 };
diff --git a/dbms/src/IO/Compression/CompressionCodecFOR.h b/dbms/src/IO/Compression/CompressionCodecFOR.h
index 824c36276cf..3112ab65806 100644
--- a/dbms/src/IO/Compression/CompressionCodecFOR.h
+++ b/dbms/src/IO/Compression/CompressionCodecFOR.h
@@ -47,9 +47,6 @@ class CompressionCodecFOR : public ICompressionCodec
 
     UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
 
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return false; }
-
 private:
     const CompressionDataType data_type;
 };
diff --git a/dbms/src/IO/Compression/CompressionCodecLZ4.h b/dbms/src/IO/Compression/CompressionCodecLZ4.h
index 70ae9048d5a..4eda28ac714 100644
--- a/dbms/src/IO/Compression/CompressionCodecLZ4.h
+++ b/dbms/src/IO/Compression/CompressionCodecLZ4.h
@@ -29,9 +29,6 @@ class CompressionCodecLZ4 : public ICompressionCodec
 protected:
     UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
 
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return true; }
-
 private:
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size)
         const override;
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight.cpp
index df1ea3e0b3f..51a972340f8 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight.cpp
@@ -120,6 +120,11 @@ void CompressionCodecLightweight::doDecompressData(
     case CompressionDataType::String:
         decompressDataForNonInteger(&source[1], source_size_no_header, dest, uncompressed_size);
         break;
+    default:
+        throw Exception(
+            ErrorCodes::CANNOT_DECOMPRESS,
+            "Cannot decompress lightweight codec data. Invalid data type {}",
+            static_cast<int>(data_type.value()));
     }
 }
 
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight.h b/dbms/src/IO/Compression/CompressionCodecLightweight.h
index 39a00ef292d..f1f952fe68f 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight.h
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight.h
@@ -44,9 +44,6 @@ class CompressionCodecLightweight : public ICompressionCodec
 
     UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
 
-    bool isCompression() const override { return true; } // light compression
-    bool isGenericCompression() const override { return false; }
-
 private:
     /// Integer data
 
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
index 1de05fbec82..3c9c496f743 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
@@ -101,12 +101,10 @@ void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<cons
         return;
     }
 
-    // additional T bytes for min_delta, and 1 byte for width
-    static constexpr auto ADDTIONAL_BYTES = sizeof(T) + sizeof(UInt8);
-
     // Check CONSTANT
-    T min_value = *std::min_element(values.begin(), values.end());
-    T max_value = *std::max_element(values.begin(), values.end());
+    auto minmax_value = std::minmax_element(values.begin(), values.end());
+    T min_value = *minmax_value.first;
+    T max_value = *minmax_value.second;
     if (min_value == max_value)
     {
         state = min_value;
@@ -122,14 +120,17 @@ void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<cons
     if (needAnalyzeDelta())
     {
         // Check CONSTANT_DELTA
-        deltas.reserve(values.size());
-        deltas.push_back(values[0]);
+
+        // If values.size() == 1, mode will be CONSTANT_DELTA
+        // so values.size() must be greater than 1 here.
+        deltas.reserve(values.size() - 1);
         for (size_t i = 1; i < values.size(); ++i)
         {
             deltas.push_back(values[i] - values[i - 1]);
         }
-        min_delta = *std::min_element(deltas.cbegin(), deltas.cend());
-        if (min_delta == *std::max_element(deltas.cbegin(), deltas.cend()))
+        auto minmax_delta = std::minmax_element(deltas.cbegin(), deltas.cend());
+        min_delta = *minmax_delta.first;
+        if (min_delta == *minmax_delta.second)
         {
             state = static_cast<T>(min_delta);
             mode = IntegerMode::CONSTANT_DELTA;
@@ -138,6 +139,8 @@ void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<cons
 
         // DELTA_FOR
         delta_for_width = Compression::FOREncodingWidth(deltas, min_delta);
+        // values[0], min_delta, 1 byte for width, and the rest for compressed data
+        static constexpr auto ADDTIONAL_BYTES = sizeof(T) + sizeof(UInt8) + sizeof(T);
         delta_for_size = BitpackingPrimitives::getRequiredSize(deltas.size(), delta_for_width) + ADDTIONAL_BYTES;
     }
 
@@ -157,6 +160,8 @@ void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<cons
     }
 
     UInt8 for_width = BitpackingPrimitives::minimumBitWidth<T>(max_value - min_value);
+    // additional T bytes for min_delta, and 1 byte for width
+    static constexpr auto ADDTIONAL_BYTES = sizeof(T) + sizeof(UInt8);
     size_t for_size = BitpackingPrimitives::getRequiredSize(values.size(), for_width) + ADDTIONAL_BYTES;
     size_t estimate_lz_size = values.size() * sizeof(T) / ESRTIMATE_LZ4_COMPRESSION_RATIO;
     size_t rle_size = rle.empty() ? std::numeric_limits<size_t>::max() : Compression::runLengthPairsSize(rle);
@@ -232,6 +237,9 @@ size_t CompressionCodecLightweight::compressDataForInteger(const char * source,
     case IntegerMode::DELTA_FOR:
     {
         DeltaFORState delta_for_state = std::get<3>(state);
+        unalignedStore<T>(dest, values[0]);
+        dest += sizeof(T);
+        compressed_size += sizeof(T);
         compressed_size += Compression::FOREncoding<typename std::make_signed_t<T>, true>(
             delta_for_state.deltas,
             delta_for_state.min_delta_value,
diff --git a/dbms/src/IO/Compression/CompressionCodecMultiple.cpp b/dbms/src/IO/Compression/CompressionCodecMultiple.cpp
index e39d175157d..d5577716b38 100644
--- a/dbms/src/IO/Compression/CompressionCodecMultiple.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecMultiple.cpp
@@ -120,12 +120,4 @@ std::vector<UInt8> CompressionCodecMultiple::getCodecsBytesFromData(const char *
     return result;
 }
 
-bool CompressionCodecMultiple::isCompression() const
-{
-    for (const auto & codec : codecs)
-        if (codec->isCompression())
-            return true;
-    return false;
-}
-
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecMultiple.h b/dbms/src/IO/Compression/CompressionCodecMultiple.h
index 9d8a0041265..784718567dd 100644
--- a/dbms/src/IO/Compression/CompressionCodecMultiple.h
+++ b/dbms/src/IO/Compression/CompressionCodecMultiple.h
@@ -44,9 +44,6 @@ class CompressionCodecMultiple final : public ICompressionCodec
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size)
         const override;
 
-    bool isCompression() const override;
-    bool isGenericCompression() const override { return false; }
-
 private:
     Codecs codecs;
 };
diff --git a/dbms/src/IO/Compression/CompressionCodecNone.h b/dbms/src/IO/Compression/CompressionCodecNone.h
index b5d9eaf83cc..8716ba00e43 100644
--- a/dbms/src/IO/Compression/CompressionCodecNone.h
+++ b/dbms/src/IO/Compression/CompressionCodecNone.h
@@ -33,9 +33,6 @@ class CompressionCodecNone final : public ICompressionCodec
 
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size)
         const override;
-
-    bool isCompression() const override { return false; }
-    bool isGenericCompression() const override { return false; }
 };
 
 } // namespace DB
diff --git a/dbms/src/IO/Compression/CompressionCodecRunLength.h b/dbms/src/IO/Compression/CompressionCodecRunLength.h
index 86a401765a0..d8237b5079d 100644
--- a/dbms/src/IO/Compression/CompressionCodecRunLength.h
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.h
@@ -33,9 +33,6 @@ class CompressionCodecRunLength : public ICompressionCodec
 
     UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
 
-    bool isCompression() const override { return false; }
-    bool isGenericCompression() const override { return false; }
-
 private:
     template <typename T>
     UInt32 compressDataForInteger(const char * source, UInt32 source_size, char * dest) const;
diff --git a/dbms/src/IO/Compression/CompressionCodecZSTD.h b/dbms/src/IO/Compression/CompressionCodecZSTD.h
index 5e180ba8847..77c97550022 100644
--- a/dbms/src/IO/Compression/CompressionCodecZSTD.h
+++ b/dbms/src/IO/Compression/CompressionCodecZSTD.h
@@ -34,9 +34,6 @@ class CompressionCodecZSTD : public ICompressionCodec
     void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size)
         const override;
 
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return true; }
-
 private:
     const int level;
 };
diff --git a/dbms/src/IO/Compression/EncodingUtil.cpp b/dbms/src/IO/Compression/EncodingUtil.cpp
index aca78ee1784..5ce62863150 100644
--- a/dbms/src/IO/Compression/EncodingUtil.cpp
+++ b/dbms/src/IO/Compression/EncodingUtil.cpp
@@ -227,26 +227,48 @@ void deltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32
 template <>
 void deltaFORDecoding<UInt32>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
-    const auto count = dest_size / sizeof(UInt32);
-    auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
+    const auto deltas_count = dest_size / sizeof(UInt32) - 1;
+    if (unlikely(deltas_count == 0))
+    {
+        memcpy(dest, src, sizeof(UInt32));
+        return;
+    }
+    auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(deltas_count);
     // Reserve enough space for the temporary buffer.
-    const auto required_size = round_size * sizeof(UInt32);
+    const auto required_size = round_size * sizeof(UInt32) + sizeof(UInt32);
     char tmp_buffer[required_size];
     memset(tmp_buffer, 0, required_size);
-    FORDecoding<Int32>(src, source_size, tmp_buffer, required_size);
+    // copy the first value to the temporary buffer
+    memcpy(tmp_buffer, src, sizeof(UInt32));
+    FORDecoding<Int32>(
+        src + sizeof(UInt32),
+        source_size - sizeof(UInt32),
+        tmp_buffer + sizeof(UInt32),
+        required_size - sizeof(UInt32));
     deltaDecoding<UInt32>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
 }
 
 template <>
 void deltaFORDecoding<UInt64>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
-    const auto count = dest_size / sizeof(UInt64);
-    const auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
+    const auto deltas_count = dest_size / sizeof(UInt64) - 1;
+    if (unlikely(deltas_count == 0))
+    {
+        memcpy(dest, src, sizeof(UInt64));
+        return;
+    }
+    const auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(deltas_count);
     // Reserve enough space for the temporary buffer.
-    const auto required_size = round_size * sizeof(UInt64);
+    const auto required_size = round_size * sizeof(UInt64) + sizeof(UInt64);
     char tmp_buffer[required_size];
     memset(tmp_buffer, 0, required_size);
-    FORDecoding<Int64>(src, source_size, tmp_buffer, required_size);
+    // copy the first value to the temporary buffer
+    memcpy(tmp_buffer, src, sizeof(UInt64));
+    FORDecoding<Int64>(
+        src + sizeof(UInt64),
+        source_size - sizeof(UInt64),
+        tmp_buffer + sizeof(UInt64),
+        required_size - sizeof(UInt64));
     deltaDecoding<UInt64>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
 }
 
diff --git a/dbms/src/IO/Compression/EncodingUtil.h b/dbms/src/IO/Compression/EncodingUtil.h
index 0d91ec188e7..b092a46111c 100644
--- a/dbms/src/IO/Compression/EncodingUtil.h
+++ b/dbms/src/IO/Compression/EncodingUtil.h
@@ -256,7 +256,12 @@ template <std::integral T>
 void ordinaryDeltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
     using TS = typename std::make_signed_t<T>;
-    FORDecoding<TS>(src, source_size, dest, dest_size);
+    // copy first value to dest
+    memcpy(dest, src, sizeof(T));
+    if (unlikely(source_size <= sizeof(T)))
+        return;
+    // decode deltas
+    FORDecoding<TS>(src + sizeof(T), source_size - sizeof(T), dest + sizeof(T), dest_size - sizeof(T));
     ordinaryDeltaDecoding<T>(dest, dest_size, dest);
 }
 
diff --git a/dbms/src/IO/Compression/ICompressionCodec.h b/dbms/src/IO/Compression/ICompressionCodec.h
index 08b6585eef3..7542603539c 100644
--- a/dbms/src/IO/Compression/ICompressionCodec.h
+++ b/dbms/src/IO/Compression/ICompressionCodec.h
@@ -58,12 +58,6 @@ class ICompressionCodec : private boost::noncopyable
     /// Read method byte from compressed source
     static UInt8 readMethod(const char * source);
 
-    /// Return true if this codec actually compressing something. Otherwise it can be just transformation that helps compression (e.g. Delta).
-    virtual bool isCompression() const = 0;
-
-    /// Is it a generic compression algorithm like lz4, zstd. Usually it does not make sense to apply generic compression more than single time.
-    virtual bool isGenericCompression() const = 0;
-
 protected:
     /// Return size of compressed data without header
     virtual UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const { return uncompressed_size; }

From 66ea1f62bb25238b2f2e83dfcda6156c48bc700b Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com>
Date: Fri, 5 Jul 2024 17:55:24 +0800
Subject: [PATCH 20/27] Update
 dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp

---
 dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
index 3c9c496f743..8360a462955 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
@@ -71,7 +71,7 @@ bool CompressionCodecLightweight::IntegerCompressContext::needAnalyze() const
         return false;
     // if lz4 is used more than COUNT_THRESHOLD times and the compression ratio is better than lightweight codec, do not analyze anymore
     if (lz4_counter > COUNT_THRESHOLD
-        && lz4_uncompressed_size / lz4_compressed_size > lw_compressed_size / lw_uncompressed_size)
+        && lz4_uncompressed_size / lz4_compressed_size > lw_uncompressed_size / lw_compressed_size)
         return false;
     return true;
 }

From f046e20be482044313888f642624c930c167b429 Mon Sep 17 00:00:00 2001
From: JaySon-Huang <tshent@qq.com>
Date: Mon, 8 Jul 2024 11:43:21 +0800
Subject: [PATCH 21/27] Add comments

---
 dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp | 10 +++++-----
 dbms/src/IO/Compression/CompressionCodecFOR.cpp      |  2 +-
 dbms/src/IO/Compression/CompressionInfo.h            |  2 ++
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
index f3e13490683..de461e05090 100644
--- a/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecDeltaFOR.cpp
@@ -122,7 +122,7 @@ void CompressionCodecDeltaFOR::doDecompressData(
     char * dest,
     UInt32 uncompressed_size) const
 {
-    if unlikely (source_size < 2)
+    if (unlikely(source_size < 2))
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot decompress DeltaFor-encoded data. File has wrong header");
@@ -132,7 +132,7 @@ void CompressionCodecDeltaFOR::doDecompressData(
 
     UInt8 bytes_size = source[0];
     auto data_type = magic_enum::enum_cast<CompressionDataType>(bytes_size);
-    RUNTIME_CHECK(data_type.has_value());
+    RUNTIME_CHECK(data_type.has_value(), bytes_size);
 
     UInt32 source_size_no_header = source_size - 1;
     switch (data_type.value())
@@ -162,7 +162,7 @@ void CompressionCodecDeltaFOR::ordinaryDecompress(
     char * dest,
     UInt32 uncompressed_size)
 {
-    if unlikely (source_size < 2)
+    if (unlikely(source_size < 2))
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
             "Cannot decompress DeltaFor-encoded data. File has wrong header");
@@ -172,9 +172,9 @@ void CompressionCodecDeltaFOR::ordinaryDecompress(
 
     UInt8 bytes_size = source[0];
     auto data_type = magic_enum::enum_cast<CompressionDataType>(bytes_size);
-    RUNTIME_CHECK(data_type.has_value());
+    RUNTIME_CHECK(data_type.has_value(), bytes_size);
 
-    UInt32 source_size_no_header = source_size - 1;
+    const UInt32 source_size_no_header = source_size - 1;
     switch (data_type.value())
     {
     case CompressionDataType::Int8:
diff --git a/dbms/src/IO/Compression/CompressionCodecFOR.cpp b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
index 3c3b380946c..72f97112ecd 100644
--- a/dbms/src/IO/Compression/CompressionCodecFOR.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecFOR.cpp
@@ -111,7 +111,7 @@ void CompressionCodecFOR::doDecompressData(
     char * dest,
     UInt32 uncompressed_size) const
 {
-    if unlikely (source_size < 2)
+    if (unlikely(source_size < 2))
         throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress For-encoded data. File has wrong header");
 
     if (unlikely(uncompressed_size == 0))
diff --git a/dbms/src/IO/Compression/CompressionInfo.h b/dbms/src/IO/Compression/CompressionInfo.h
index f24e99741c6..2949f9ef1fe 100644
--- a/dbms/src/IO/Compression/CompressionInfo.h
+++ b/dbms/src/IO/Compression/CompressionInfo.h
@@ -68,10 +68,12 @@ enum class CompressionMethodByte : UInt8
 
 enum class CompressionDataType : UInt8
 {
+    // These enum values are used to represent the number of bytes of the type
     Int8 = 1, // Int8/UInt8
     Int16 = 2, // Int16/UInt16
     Int32 = 4, // Int32/UInt32
     Int64 = 8, // Int64/UInt64
+    // These enum values are not related to the number of bytes of the type
     Float32 = 9,
     Float64 = 10,
     String = 11,

From b92c5eef4c5f4024108731435165a204797e0efa Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Mon, 8 Jul 2024 11:47:58 +0800
Subject: [PATCH 22/27] add comments

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/EncodingUtil.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/dbms/src/IO/Compression/EncodingUtil.cpp b/dbms/src/IO/Compression/EncodingUtil.cpp
index 5ce62863150..45b50ce0e1a 100644
--- a/dbms/src/IO/Compression/EncodingUtil.cpp
+++ b/dbms/src/IO/Compression/EncodingUtil.cpp
@@ -155,7 +155,11 @@ void deltaDecoding(const char * source, UInt32 source_size, char * dest)
 }
 
 #if defined(__AVX2__)
-// Note: using SIMD to rewrite compress does not improve performance.
+
+/**
+ * 1. According to microbenchmark, the performance of SIMD encoding is not better than the ordinary implementation.
+ * 2. The SIMD implementation of UInt16 and UInt8 is too complex, and the performance is not better than the ordinary implementation.
+ */
 
 template <>
 void deltaDecoding<UInt32>(const char * __restrict__ raw_source, UInt32 raw_source_size, char * __restrict__ raw_dest)

From bfdbe765eca120fc2a6e8043ab8bc58c95c5dc7c Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Mon, 8 Jul 2024 12:51:58 +0800
Subject: [PATCH 23/27] fix

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/EncodingUtil.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/IO/Compression/EncodingUtil.h b/dbms/src/IO/Compression/EncodingUtil.h
index b092a46111c..3bfd8d3b202 100644
--- a/dbms/src/IO/Compression/EncodingUtil.h
+++ b/dbms/src/IO/Compression/EncodingUtil.h
@@ -258,7 +258,7 @@ void ordinaryDeltaFORDecoding(const char * src, UInt32 source_size, char * dest,
     using TS = typename std::make_signed_t<T>;
     // copy first value to dest
     memcpy(dest, src, sizeof(T));
-    if (unlikely(source_size <= sizeof(T)))
+    if (unlikely(source_size == sizeof(T)))
         return;
     // decode deltas
     FORDecoding<TS>(src + sizeof(T), source_size - sizeof(T), dest + sizeof(T), dest_size - sizeof(T));

From 471481f0915d59725b4f40c0033d1e05951a7cba Mon Sep 17 00:00:00 2001
From: JaySon-Huang <tshent@qq.com>
Date: Mon, 8 Jul 2024 13:49:50 +0800
Subject: [PATCH 24/27] Add sanitizer checks

---
 .../CompressionCodecLightweight_Integer.cpp   |  2 +-
 .../Compression/CompressionCodecRunLength.cpp |  2 +-
 dbms/src/IO/Compression/EncodingUtil.cpp      | 48 ++++++++++++-------
 dbms/src/IO/Compression/EncodingUtil.h        | 31 ++++++++----
 4 files changed, 53 insertions(+), 30 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
index 8360a462955..e4ba4dad00d 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
@@ -164,7 +164,7 @@ void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<cons
     static constexpr auto ADDTIONAL_BYTES = sizeof(T) + sizeof(UInt8);
     size_t for_size = BitpackingPrimitives::getRequiredSize(values.size(), for_width) + ADDTIONAL_BYTES;
     size_t estimate_lz_size = values.size() * sizeof(T) / ESRTIMATE_LZ4_COMPRESSION_RATIO;
-    size_t rle_size = rle.empty() ? std::numeric_limits<size_t>::max() : Compression::runLengthPairsSize(rle);
+    size_t rle_size = rle.empty() ? std::numeric_limits<size_t>::max() : Compression::runLengthPairsByteSize(rle);
     if (needAnalyzeRunLength() && rle_size < delta_for_size && rle_size < for_size && rle_size < estimate_lz_size)
     {
         state = std::move(rle);
diff --git a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
index 08dbd908436..0364147a6d8 100644
--- a/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecRunLength.cpp
@@ -71,7 +71,7 @@ UInt32 CompressionCodecRunLength::compressDataForInteger(const char * source, UI
             ++rle_vec.back().second;
     }
 
-    if (DB::Compression::runLengthPairsSize<T>(rle_vec) >= source_size)
+    if (DB::Compression::runLengthPairsByteSize<T>(rle_vec) >= source_size)
     {
         // treat as string
         dest[0] = magic_enum::enum_integer(CompressionDataType::String);
diff --git a/dbms/src/IO/Compression/EncodingUtil.cpp b/dbms/src/IO/Compression/EncodingUtil.cpp
index 45b50ce0e1a..29717c7d32b 100644
--- a/dbms/src/IO/Compression/EncodingUtil.cpp
+++ b/dbms/src/IO/Compression/EncodingUtil.cpp
@@ -122,6 +122,8 @@ template void subtractFrameOfReference<UInt64>(UInt64 *, UInt64, UInt32);
 template <std::integral T>
 UInt8 FOREncodingWidth(std::vector<T> & values, T frame_of_reference)
 {
+    assert(!values.empty()); // caller must ensure input is not empty
+
     if constexpr (std::is_signed_v<T>)
     {
         // For signed types, after subtracting frame of reference, the range of values is not always [0, max_value - min_value].
@@ -228,55 +230,65 @@ void deltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32
     ordinaryDeltaFORDecoding<T>(src, source_size, dest, dest_size);
 }
 
+// For UInt8/UInt16, the default implement has better performance
+template void deltaFORDecoding<UInt8>(const char *, UInt32, char *, UInt32);
+template void deltaFORDecoding<UInt16>(const char *, UInt32, char *, UInt32);
+
 template <>
 void deltaFORDecoding<UInt32>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
-    const auto deltas_count = dest_size / sizeof(UInt32) - 1;
+    static constexpr auto TYPE_BYTE_SIZE = sizeof(UInt32);
+    assert(source_size >= TYPE_BYTE_SIZE);
+    assert(dest_size >= TYPE_BYTE_SIZE);
+
+    const auto deltas_count = dest_size / TYPE_BYTE_SIZE - 1;
     if (unlikely(deltas_count == 0))
     {
-        memcpy(dest, src, sizeof(UInt32));
+        memcpy(dest, src, TYPE_BYTE_SIZE);
         return;
     }
     auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(deltas_count);
     // Reserve enough space for the temporary buffer.
-    const auto required_size = round_size * sizeof(UInt32) + sizeof(UInt32);
+    const auto required_size = round_size * TYPE_BYTE_SIZE + TYPE_BYTE_SIZE;
     char tmp_buffer[required_size];
     memset(tmp_buffer, 0, required_size);
     // copy the first value to the temporary buffer
-    memcpy(tmp_buffer, src, sizeof(UInt32));
+    memcpy(tmp_buffer, src, TYPE_BYTE_SIZE);
     FORDecoding<Int32>(
-        src + sizeof(UInt32),
-        source_size - sizeof(UInt32),
-        tmp_buffer + sizeof(UInt32),
-        required_size - sizeof(UInt32));
+        src + TYPE_BYTE_SIZE,
+        source_size - TYPE_BYTE_SIZE,
+        tmp_buffer + TYPE_BYTE_SIZE,
+        required_size - TYPE_BYTE_SIZE);
     deltaDecoding<UInt32>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
 }
 
 template <>
 void deltaFORDecoding<UInt64>(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
-    const auto deltas_count = dest_size / sizeof(UInt64) - 1;
+    static constexpr auto TYPE_BYTE_SIZE = sizeof(UInt64);
+    assert(source_size >= TYPE_BYTE_SIZE);
+    assert(dest_size >= TYPE_BYTE_SIZE);
+
+    const auto deltas_count = dest_size / TYPE_BYTE_SIZE - 1;
     if (unlikely(deltas_count == 0))
     {
-        memcpy(dest, src, sizeof(UInt64));
+        memcpy(dest, src, TYPE_BYTE_SIZE);
         return;
     }
     const auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(deltas_count);
     // Reserve enough space for the temporary buffer.
-    const auto required_size = round_size * sizeof(UInt64) + sizeof(UInt64);
+    const auto required_size = round_size * TYPE_BYTE_SIZE + TYPE_BYTE_SIZE;
     char tmp_buffer[required_size];
     memset(tmp_buffer, 0, required_size);
     // copy the first value to the temporary buffer
-    memcpy(tmp_buffer, src, sizeof(UInt64));
+    memcpy(tmp_buffer, src, TYPE_BYTE_SIZE);
     FORDecoding<Int64>(
-        src + sizeof(UInt64),
-        source_size - sizeof(UInt64),
-        tmp_buffer + sizeof(UInt64),
-        required_size - sizeof(UInt64));
+        src + TYPE_BYTE_SIZE,
+        source_size - TYPE_BYTE_SIZE,
+        tmp_buffer + TYPE_BYTE_SIZE,
+        required_size - TYPE_BYTE_SIZE);
     deltaDecoding<UInt64>(reinterpret_cast<const char *>(tmp_buffer), dest_size, dest);
 }
 
-template void deltaFORDecoding<UInt8>(const char *, UInt32, char *, UInt32);
-template void deltaFORDecoding<UInt16>(const char *, UInt32, char *, UInt32);
 
 } // namespace DB::Compression
diff --git a/dbms/src/IO/Compression/EncodingUtil.h b/dbms/src/IO/Compression/EncodingUtil.h
index 3bfd8d3b202..4781c2d6c74 100644
--- a/dbms/src/IO/Compression/EncodingUtil.h
+++ b/dbms/src/IO/Compression/EncodingUtil.h
@@ -90,6 +90,7 @@ void constantDeltaDecoding(const char * src, UInt32 source_size, char * dest, UI
 
 /// Run-length encoding
 
+// <value, num_of_value>
 template <std::integral T>
 using RunLengthPair = std::pair<T, UInt8>;
 template <std::integral T>
@@ -98,7 +99,7 @@ template <std::integral T>
 static constexpr size_t RunLengthPairLength = sizeof(T) + sizeof(UInt8);
 
 template <std::integral T>
-size_t runLengthPairsSize(const RunLengthPairs<T> & rle)
+size_t runLengthPairsByteSize(const RunLengthPairs<T> & rle)
 {
     return rle.size() * RunLengthPairLength<T>;
 }
@@ -134,7 +135,11 @@ void runLengthDecoding(const char * src, UInt32 source_size, char * dest, UInt32
         auto count = unalignedLoad<UInt8>(src);
         src += sizeof(UInt8);
         if (unlikely(dest + count * sizeof(T) > dest_end))
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot use RunLength decoding, data is too large");
+            throw Exception(
+                ErrorCodes::CANNOT_DECOMPRESS,
+                "Cannot use RunLength decoding, data is too large, count={} elem_byte={}",
+                count,
+                sizeof(T));
         if constexpr (std::is_same_v<T, UInt8> || std::is_same_v<T, Int8>)
         {
             memset(dest, value, count);
@@ -162,7 +167,8 @@ UInt8 FOREncodingWidth(std::vector<T> & values, T frame_of_reference);
 template <std::integral T, bool skip_subtract_frame_of_reference = false>
 size_t FOREncoding(std::vector<T> & values, T frame_of_reference, UInt8 width, char * dest)
 {
-    assert(!values.empty());
+    assert(!values.empty()); // caller must ensure input is not empty
+
     if constexpr (!skip_subtract_frame_of_reference)
         subtractFrameOfReference(values.data(), frame_of_reference, values.size());
     // store frame of reference
@@ -186,25 +192,26 @@ void applyFrameOfReference(T * dst, T frame_of_reference, UInt32 count);
 template <std::integral T>
 void FORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
-    UInt8 bytes_size = sizeof(T);
-    if unlikely (dest_size % bytes_size != 0)
+    static constexpr UInt8 BYTES_SIZE = sizeof(T);
+    if unlikely (dest_size % BYTES_SIZE != 0)
         throw Exception(
             ErrorCodes::CANNOT_DECOMPRESS,
             "uncompressed size {} is not aligned to {}",
             dest_size,
-            bytes_size);
-    const auto count = dest_size / sizeof(T);
+            BYTES_SIZE);
+
+    const auto count = dest_size / BYTES_SIZE;
     T frame_of_reference = unalignedLoad<T>(src);
-    src += sizeof(T);
+    src += BYTES_SIZE;
     auto width = unalignedLoad<UInt8>(src);
     src += sizeof(UInt8);
-    const auto required_size = source_size - sizeof(T) - sizeof(UInt8);
+    const auto required_size = source_size - BYTES_SIZE - sizeof(UInt8);
     RUNTIME_CHECK(BitpackingPrimitives::getRequiredSize(count, width) == required_size);
     auto round_size = BitpackingPrimitives::roundUpToAlgorithmGroupSize(count);
     if (round_size != count)
     {
         // Reserve enough space for the temporary buffer.
-        unsigned char tmp_buffer[round_size * sizeof(T)];
+        unsigned char tmp_buffer[round_size * BYTES_SIZE];
         BitpackingPrimitives::unPackBuffer<T>(tmp_buffer, reinterpret_cast<const unsigned char *>(src), count, width);
         applyFrameOfReference(reinterpret_cast<T *>(tmp_buffer), frame_of_reference, count);
         memcpy(dest, tmp_buffer, dest_size);
@@ -255,6 +262,10 @@ void deltaDecoding(const char * source, UInt32 source_size, char * dest);
 template <std::integral T>
 void ordinaryDeltaFORDecoding(const char * src, UInt32 source_size, char * dest, UInt32 dest_size)
 {
+    // caller should ensure these size
+    assert(source_size >= sizeof(T));
+    assert(dest_size >= sizeof(T));
+
     using TS = typename std::make_signed_t<T>;
     // copy first value to dest
     memcpy(dest, src, sizeof(T));

From c6ac15338637e1adce212b2a1d8b301f3378ee50 Mon Sep 17 00:00:00 2001
From: JaySon-Huang <tshent@qq.com>
Date: Mon, 8 Jul 2024 13:54:54 +0800
Subject: [PATCH 25/27] Use UInt32 for looping

---
 dbms/src/IO/Compression/EncodingUtil.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/IO/Compression/EncodingUtil.h b/dbms/src/IO/Compression/EncodingUtil.h
index 4781c2d6c74..24cad02ab5c 100644
--- a/dbms/src/IO/Compression/EncodingUtil.h
+++ b/dbms/src/IO/Compression/EncodingUtil.h
@@ -147,7 +147,7 @@ void runLengthDecoding(const char * src, UInt32 source_size, char * dest, UInt32
         }
         else
         {
-            for (UInt8 j = 0; j < count; ++j)
+            for (UInt32 j = 0; j < count; ++j)
             {
                 unalignedStore<T>(dest, value);
                 dest += sizeof(T);

From d3d14d60b9bb67de75e024ddc7e0fd35642b2025 Mon Sep 17 00:00:00 2001
From: Lloyd-Pottiger <yan1579196623@gmail.com>
Date: Mon, 8 Jul 2024 15:30:39 +0800
Subject: [PATCH 26/27] assert source_size > 0

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
---
 dbms/src/IO/Compression/ICompressionCodec.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/IO/Compression/ICompressionCodec.cpp b/dbms/src/IO/Compression/ICompressionCodec.cpp
index 1e0d2cb94e2..393669045ab 100644
--- a/dbms/src/IO/Compression/ICompressionCodec.cpp
+++ b/dbms/src/IO/Compression/ICompressionCodec.cpp
@@ -30,6 +30,7 @@ extern const int CORRUPTED_DATA;
 UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char * dest) const
 {
     assert(source != nullptr && dest != nullptr);
+    assert(source_size > 0);
 
     dest[0] = getMethodByte();
     UInt8 header_size = getHeaderSize();

From 7b6f51d0b90e053cb92d6c5fd29825e36b5a301e Mon Sep 17 00:00:00 2001
From: JaySon-Huang <tshent@qq.com>
Date: Mon, 8 Jul 2024 15:50:46 +0800
Subject: [PATCH 27/27] Add comments

---
 dbms/src/IO/Compression/CompressionCodecLightweight.h        | 3 +++
 .../IO/Compression/CompressionCodecLightweight_Integer.cpp   | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight.h b/dbms/src/IO/Compression/CompressionCodecLightweight.h
index f1f952fe68f..bc092d06e27 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight.h
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight.h
@@ -27,6 +27,9 @@ namespace DB
  * For integer data, it supports constant, constant delta, run-length, frame of reference, delta frame of reference, and LZ4.
  * For non-integer data, it supports LZ4.
  * The codec selects the best mode for each block of data.
+ *
+ * Note that this codec instance contains `ctx` for choosing the best compression
+ * mode for each block. Do NOT reuse the same instance for encoding data among multi-threads.
  */
 class CompressionCodecLightweight : public ICompressionCodec
 {
diff --git a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
index e4ba4dad00d..6ce2b51bbcc 100644
--- a/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
+++ b/dbms/src/IO/Compression/CompressionCodecLightweight_Integer.cpp
@@ -121,8 +121,9 @@ void CompressionCodecLightweight::IntegerCompressContext::analyze(std::span<cons
     {
         // Check CONSTANT_DELTA
 
-        // If values.size() == 1, mode will be CONSTANT_DELTA
-        // so values.size() must be greater than 1 here.
+        // If values.size() == 1, mode will be CONSTANT
+        // so values.size() must be greater than 1 here and deltas must be non empty.
+        assert(values.size() > 1);
         deltas.reserve(values.size() - 1);
         for (size_t i = 1; i < values.size(); ++i)
         {