diff --git a/ydb/core/load_test/group_write.cpp b/ydb/core/load_test/group_write.cpp index 26bb6a9c7145..8615b2c10146 100644 --- a/ydb/core/load_test/group_write.cpp +++ b/ydb/core/load_test/group_write.cpp @@ -1431,7 +1431,7 @@ class TLogWriterLoadTestActor : public TActorBootstrapped static ResultContainer GenerateBuffer(const TLogoBlobID& id) { - return GenDataForLZ4(id.BlobSize()); + return FastGenDataForLZ4(id.BlobSize()); } STRICT_STFUNC(StateFunc, diff --git a/ydb/core/util/lz4_data_generator.h b/ydb/core/util/lz4_data_generator.h index 8e1689cab343..af1d8f349cde 100644 --- a/ydb/core/util/lz4_data_generator.h +++ b/ydb/core/util/lz4_data_generator.h @@ -24,27 +24,93 @@ inline ResultContainer GenDataForLZ4(const ui64 size, const ui64 seed = 0) { return data; } -inline TString FastGenDataForLZ4(size_t size, ui64 seed) { - TString data = TString::Uninitialized(size); - +template +inline ResultContainer FastGenDataForLZ4(size_t size, ui64 seed = 0) { + ResultContainer data = ResultContainer::Uninitialized(size); + char *ptr = [&]() -> char * { + if constexpr(std::is_same::value) { + return data.Detach(); + } else { + return data.mutable_data(); + } + }(); TReallyFastRng32 rng(seed); constexpr size_t minRunLen = 32; constexpr size_t maxRunLen = 64; const size_t runLen = minRunLen + sizeof(ui32) * (rng() % ((maxRunLen - minRunLen) / sizeof(ui32) + 1)); - char run[maxRunLen]; - ui32 i; - for (i = 0; i < runLen; i += sizeof(ui32)) { - reinterpret_cast(i[run]) = rng(); - } - Y_DEBUG_ABORT_UNLESS(i == runLen); +#define UNROLL(LEN) \ + do { \ + ui64 x0, x1, x2, x3, x4 = 0, x5 = 0, x6 = 0, x7 = 0; \ + x0 = rng() | (ui64)rng() << 32; \ + x1 = rng() | (ui64)rng() << 32; \ + x2 = rng() | (ui64)rng() << 32; \ + x3 = rng() | (ui64)rng() << 32; \ + if constexpr (LEN >= 36) { \ + x4 = rng() | (ui64)rng() << 32; \ + } \ + if constexpr (LEN >= 44) { \ + x5 = rng() | (ui64)rng() << 32; \ + } \ + if constexpr (LEN >= 52) { \ + x6 = rng() | (ui64)rng() << 32; \ + } \ + if constexpr (LEN >= 60) { \ + x7 = rng() | (ui64)rng() << 32; \ + } \ + while (size >= LEN) { \ + *reinterpret_cast(ptr) = x0; \ + *reinterpret_cast(ptr + 8) = x1; \ + *reinterpret_cast(ptr + 16) = x2; \ + *reinterpret_cast(ptr + 24) = x3; \ + if constexpr (LEN == 36) { \ + *reinterpret_cast(ptr + 32) = x4; \ + } else if constexpr (LEN >= 40) { \ + *reinterpret_cast(ptr + 32) = x4; \ + } \ + if constexpr (LEN == 44) { \ + *reinterpret_cast(ptr + 40) = x5; \ + } else if constexpr (LEN >= 48) { \ + *reinterpret_cast(ptr + 40) = x5; \ + } \ + if constexpr (LEN == 52) { \ + *reinterpret_cast(ptr + 48) = x6; \ + } else if constexpr (LEN >= 56) { \ + *reinterpret_cast(ptr + 48) = x6; \ + } \ + if constexpr (LEN == 60) { \ + *reinterpret_cast(ptr + 56) = x7; \ + } else if constexpr (LEN >= 64) { \ + *reinterpret_cast(ptr + 56) = x7; \ + } \ + ptr += LEN; \ + size -= LEN; \ + } \ + for (ui64 x : {x0, x1, x2, x3, x4, x5, x6, x7}) { \ + if (size >= 8) { \ + *reinterpret_cast(ptr) = x; \ + ptr += 8; \ + size -= 8; \ + } else { \ + memcpy(ptr, &x, size); \ + break; \ + } \ + } \ + } while (false); - char *ptr = data.Detach(); - for (; size >= runLen; size -= runLen, ptr += runLen) { - memcpy(ptr, run, runLen); + switch (runLen) { + case 32: UNROLL(32); break; + case 36: UNROLL(36); break; + case 40: UNROLL(40); break; + case 44: UNROLL(44); break; + case 48: UNROLL(48); break; + case 52: UNROLL(52); break; + case 56: UNROLL(56); break; + case 60: UNROLL(60); break; + case 64: UNROLL(64); break; + default: Y_ABORT(); } - memcpy(ptr, run, size); return data; }