From 2c92c817354d478f8bd3e1498ee8252e6514b7d8 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Thu, 9 May 2024 11:39:21 +0800 Subject: [PATCH] log Signed-off-by: Lloyd-Pottiger --- .../CompressionCodecIntegerLightweight.cpp | 29 ++++++++++++++++--- .../CompressionCodecIntegerLightweight.h | 5 ++++ .../Storages/DeltaMerge/File/DMFileWriter.h | 18 +++++++++++- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp index 27ca11b2cf5..c7a3b40ed9b 100644 --- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp +++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.cpp @@ -52,6 +52,12 @@ UInt32 CompressionCodecIntegerLightweight::getMaxCompressedDataSize(UInt32 uncom return 1 + 1 + LZ4_COMPRESSBOUND(uncompressed_size); } +CompressionCodecIntegerLightweight::~CompressionCodecIntegerLightweight() +{ + if (ctx.isCompression()) + LOG_INFO(Logger::get(), "lightweight codec: {}", ctx.toDebugString()); +} + template size_t CompressionCodecIntegerLightweight::compressDataForType(const char * source, UInt32 source_size, char * dest) const @@ -166,6 +172,18 @@ void CompressionCodecIntegerLightweight::decompressDataForType( } } +String CompressionCodecIntegerLightweight::CompressContext::toDebugString() const +{ + return fmt::format( + "lz4: {} times, {} -> {}, lightweight: {} times, {} -> {}", + lz4_counter, + lz4_uncompressed_size, + lz4_compressed_size, + lw_counter, + lw_uncompressed_size, + lw_compressed_size); +} + void CompressionCodecIntegerLightweight::CompressContext::update(size_t uncompressed_size, size_t compressed_size) { if (mode == Mode::LZ4) @@ -219,7 +237,10 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span> rle; + Compression::RLEPairs rle; if (needAnalyzeRLE()) { rle.reserve(values.size()); @@ -281,8 +302,8 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span::max() : Compression::RLEPairsSize(rle); + if (needAnalyzeRLE() && rle_size < delta_for_size && rle_size < for_size && rle_size < estimate_lz_size) { state = std::move(rle); mode = Mode::RLE; @@ -293,7 +314,7 @@ void CompressionCodecIntegerLightweight::CompressContext::analyze(std::span{std::move(values_copy), min_value, for_width}; mode = Mode::FOR; } - else if (delta_for_size < estimate_lz_size) + else if (needAnalyzeDelta() && delta_for_size < estimate_lz_size) { state = DeltaFORState{std::move(deltas), min_delta, delta_for_width}; mode = Mode::DELTA_FOR; diff --git a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h index f2760bcaf05..20ddc9331a4 100644 --- a/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h +++ b/dbms/src/IO/Compression/CompressionCodecIntegerLightweight.h @@ -29,6 +29,8 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec UInt8 getMethodByte() const override; + ~CompressionCodecIntegerLightweight() override; + protected: UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) @@ -93,6 +95,9 @@ class CompressionCodecIntegerLightweight : public ICompressionCodec void update(size_t uncompressed_size, size_t compressed_size); + String toDebugString() const; + bool isCompression() const { return lz4_counter > 0 || lw_counter > 0; } + Mode mode = Mode::LZ4; private: diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h index d6732b8da9a..8a1bafcbb66 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h @@ -65,9 +65,25 @@ class DMFileWriter /*flags*/ -1, /*mode*/ 0666, max_compress_block_size)) - , compressed_buf(CompressedWriteBuffer<>::build(*plain_file, compression_settings, !dmfile->configuration)) , minmaxes(do_index ? std::make_shared(*type) : nullptr) { + // TODO: better, now only for test + if (type->isInteger()) + { + assert(compression_settings.settings.size() == 1); + CompressionSettings settings(CompressionMethod::Lightweight); + auto & setting = settings.settings[0]; + setting.type_bytes_size = type->getSizeOfValueInMemory(); + compressed_buf = CompressedWriteBuffer<>::build(*plain_file, settings, !dmfile->configuration); + } + else + { + compressed_buf = CompressedWriteBuffer<>::build( // + *plain_file, + compression_settings, + !dmfile->configuration); + } + if (!dmfile->useMetaV2()) { mark_file = ChecksumWriteBufferBuilder::