diff --git a/CMakeLists.txt b/CMakeLists.txt index 96f188d..d554ebf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ install(EXPORT DLSM-Development include(CTest) if(BUILD_TESTING) set(CMAKE_CTEST_ARGUMENTS -V) # Enable verbose output on test target +add_subdirectory(tests/apps) add_subdirectory(tests/perf) add_subdirectory(tests/unit) endif() diff --git a/README.md b/README.md index dc6c160..9eedf4a 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,48 @@ This project contains scripts and snippets in C++. - Integration with [clang-format](https://clang.llvm.org/docs/ClangFormat.html) and [clang-tidy](https://clang.llvm.org/extra/clang-tidy/) - Integration with [Doxygen](https://github.com/doxygen/doxygen) +## Disruptor +Yet another implementation of [LMAX Disruptor](https://lmax-exchange.github.io/disruptor/disruptor.html) in `C++20`. +
Details + +#### Other known implementations + - Original [LMAX Disruptor](https://github.com/LMAX-Exchange/disruptor) in `Java` + - [Abc-Arbitrage/Disruptor-cpp](https://github.com/Abc-Arbitrage/Disruptor-cpp) + - [lewissbaker/disruptorplus](https://github.com/lewissbaker/disruptorplus) + - [Vallest/Disruptor-CPP](https://github.com/Vallest/Disruptor-CPP) + - [jeremyko/disruptorCpp-IPC](https://github.com/jeremyko/disruptorCpp-IPC) + +#### Features of [dlsm::Disruptor](include/impl/Disruptor.hpp) + - Template-based implementation with different components for customization: + - Barriers:: + - PointerBarrier - minimal container for dependencies of a sequence + - AtomicsBarrier - `std::atomic` pointers to dependencies + - OffsetsBarrier - `std::atomic` offsets to dependencies for placing in shared memory + - Waits:: + - SpinsStrategy - busy-wait based on exponential `__x86_64__` `_mm_pause()` intrinsic + - YieldStrategy - busy-wait based on `std::this_thread::yield()` + - BlockStrategy - blocking strategy based on `std::std::condition_variable_any` + - ShareStrategy - blocking strategy based on `pthreads` for placing in shared memory + - Sequencers:: + - `SPMC` - Single Producer Multiple Consumers pattern + - `MPMC` - Multiple Producers Multiple Consumers pattern + - Ring - adapter for external random-access container(`std::array/vector`) for ring-access to Events + - External memory injection(optional) for sequencers, useful for placement in HugePages/SharedMemory + - [Unit](tests/unit/TestDisruptor.cpp) and [Performance(latency&throughput)](tests/perf/PerfDisruptor.cpp) tests + - [dlsm::Disruptor::Graph](include/impl/DisruptorGraph.hpp) - high-level API + +#### Known defects and limitations + - Implementation of lock-free operations in not portable to `Weak Memory Model` platforms(ARM, PowerPC) + - `Claim-Timeout`/`Consume-Timeout` operations are not implemented in Sequencers(Publishers and Consumers) + - `SPSC` - Single Producer Single Consumer pattern is not implemented + - dlsm::Disruptor::Graph has high overhead caused by indirections and virtual calls + - dlsm::Disruptor::Graph is incomplete and unstable + +#### Latency & Throughput tests +Results of performance tests are in separate [tests/perf/Disruptor.md](tests/perf/Disruptor.md). + +
+ ## Useful Scripts ```sh ./scripts/format.sh # Apply .clang-format rules on hpp/cpp files @@ -31,3 +73,4 @@ This project contains scripts and snippets in C++. ## Links and References - [Measuring Latency in Linux](http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/) - [Weak vs. Strong Memory Models](https://preshing.com/20120930/weak-vs-strong-memory-models/) + - [Fast and/or Large Memory – Cache and Memory Hierarchy](https://cw.fel.cvut.cz/b192/_media/courses/b35apo/en/lectures/04/b35apo_lecture04-cache-en.pdf) diff --git a/docs/images/delays-Disruptor-1P4C.png b/docs/images/delays-Disruptor-1P4C.png new file mode 100644 index 0000000..ebdc058 Binary files /dev/null and b/docs/images/delays-Disruptor-1P4C.png differ diff --git a/include/impl/Disruptor.hpp b/include/impl/Disruptor.hpp new file mode 100644 index 0000000..523fe4b --- /dev/null +++ b/include/impl/Disruptor.hpp @@ -0,0 +1,1128 @@ +#pragma once + +#include // for ::pthread_condattr_setpshared() in ShareStrategy + +#include +#include +#include // for std::has_single_bit() - is power of two +#include +#include +#include +#include +#include +#include +#include +#include // for std::hardware_destructive_interference_size +#include +#include + +#include "impl/Thread.hpp" + +// Disclamer: This implementation is not portable! The atomic operations +// below wont work expected on platforms with Weak Memory Model, like: +// ARM, PowerPC, Itanium. This implementation was tested on x86/64 CPU. +// For portability, some std::atomic_thread_fence() are necessary. +namespace dlsm::Disruptor { + +constexpr std::size_t ceilingNextPowerOfTwo(const std::size_t value) { + std::size_t result = 2; + while (result < value) result <<= 1; + return result; +} + +constexpr bool isPowerOf2(const std::size_t value) { return std::has_single_bit(value); } + +static constexpr auto CacheLineSize = 64; // std::hardware_destructive_interference_size; + +struct alignas(CacheLineSize) Sequence { + using Value = std::int64_t; + static constexpr Value Initial = -1; + static constexpr Value Max = std::numeric_limits::max(); + + struct Atomic : public std::atomic { + using Base = std::atomic; + using Base::Base; + Atomic(const Atomic& that) : Base{that.load()} {} + }; + static_assert(Atomic::is_always_lock_free); + + using Ptr = const Sequence::Atomic*; + + Atomic value_; // implicit padding by alignas + + Sequence(Value v = Initial) : value_{v} {} + + bool operator==(const Sequence& that) const { return load() == that.load(); } + + inline Ptr ptr() const { return &value_; } + operator Ptr() const { return &value_; } + operator Ptr() { return &value_; } + + inline void store(const Value value) { value_.store(value, std::memory_order_release); } + inline Value load() const { return value_.load(std::memory_order_acquire); } + inline Value add(Value v) { return value_.fetch_add(v, std::memory_order_relaxed); } + inline bool cas(Value expected, Value desired) { + return value_.compare_exchange_weak(expected, desired, std::memory_order_relaxed, std::memory_order_relaxed); + // return std::atomic_compare_exchange_strong(&m_fieldsValue, &expectedSequence, nextSequence); + } +}; + +static_assert(sizeof(Sequence::Atomic) == 8); +static_assert(sizeof(Sequence) == CacheLineSize); +static_assert(alignof(Sequence) == CacheLineSize); + +template +struct Group { + constexpr static std::size_t MaxItems = N; + using Pointers = std::array; + Pointers items_{nullptr}; + + Group() = default; + Group(const Group& that) = default; + template // Add array of pointers to Sequnce + Group(const Seq... seqs) : items_{seqs...} { + static_assert(sizeof...(seqs) <= MaxItems); + } + + std::size_t size() const { + std::size_t count = 0UL; + for (const auto& p : items_) { + if (p != nullptr) ++count; + } + return count; + } + + bool add(Sequence::Ptr ptr) { return replace(nullptr, ptr); } + bool del(Sequence::Ptr ptr) { return replace(ptr, nullptr); } + bool replace(Sequence::Ptr removable, Sequence::Ptr desired) { + for (auto& p : items_) { + if (p == removable) { + p = desired; + return true; + } + } + return false; + } +}; + +namespace Barriers { +// Barrier is: +// - Sequence number, it represents last processed item index +// - Dependencies, list of references to sequence numbers, +// whose progress blocks current processing + +template +concept Concept = requires(BarrierType b, Sequence::Ptr r, const Group<>& g, Sequence::Ptr ptr, Sequence::Value s) { + { b.cursor() } noexcept -> std::same_as; + { b.last() } noexcept -> std::same_as; + { b.release(s) } noexcept -> std::same_as; + { b.size() } noexcept -> std::same_as; + { b.contains(ptr) } noexcept -> std::same_as; + { b.add(ptr) } noexcept -> std::same_as; + { b.del(ptr) } noexcept -> std::same_as; + { b.replace(r, ptr) } noexcept -> std::same_as; + { b.replace(g, ptr) } noexcept -> std::same_as; + { b.dependencies() } noexcept -> std::same_as>; + { b.set(g) } noexcept -> std::same_as; + { b.depends(g) } noexcept -> std::same_as; + { b.minimumSequence() } noexcept -> std::same_as; +}; + +// PointerBarrier keeps dependencies as raw pointers. +// Barriers connections must be done before publishing data. +template +struct PointerBarrier { + constexpr static std::size_t MaxItems = N; + using Pointers = std::array; + + alignas(CacheLineSize) Sequence last_{Sequence::Initial}; + alignas(CacheLineSize) Pointers pointers_{nullptr}; + + PointerBarrier() = default; + PointerBarrier(const PointerBarrier& that) = default; + + Sequence::Ptr cursor() const noexcept { return last_; } + Sequence::Value last() const noexcept { return last_.load(); } + void release(Sequence::Value sequence) noexcept { last_.store(sequence); } + + std::size_t size() const noexcept { + std::size_t count = 0UL; + for (const auto& p : pointers_) { + if (p != nullptr) ++count; + } + return count; + } + + bool contains(const Sequence::Ptr ptr) const noexcept { + if (ptr) { + for (const auto& p : pointers_) { + if (p == ptr) return true; + } + } + return false; + } + + bool add(const Sequence::Ptr ptr) noexcept { return replace(nullptr, ptr); } + bool del(const Sequence::Ptr ptr) noexcept { return replace(ptr, nullptr); } + + bool replace(const Sequence::Ptr removable, const Sequence::Ptr desired) noexcept { + for (auto& p : pointers_) { + if (p == removable) { + p = desired; + return true; + } + } + return false; + } + + bool replace(const Group<>& removable, const Sequence::Ptr desired) noexcept { + bool replaced = false; + for (const auto& ptr : removable.items_) { + if (ptr) { + if (replaced) { + replace(ptr, nullptr); + } else { + replaced = replace(ptr, desired); + } + } + } + return replaced; + } + + Group<> dependencies() const noexcept { + Group<> result; + for (std::size_t i = 0; i < pointers_.size(); ++i) { + result.items_[i] = pointers_[i]; // NOLINT + } + + return result; + } + + void set(const Group<>& dependencies) noexcept { + for (std::size_t i = 0; i < pointers_.size(); ++i) { + pointers_[i] = dependencies.items_[i]; // NOLINT + } + } + + void depends(const Group<>& dependencies) noexcept { + set(dependencies); + last_.store(dependencies.size() ? minimumSequence() : Sequence::Initial); + } + + Sequence::Value minimumSequence(Sequence::Value minimum = Sequence::Max) const noexcept { + for (const auto& ptr : pointers_) { + if (ptr) { + Sequence::Value seq = ptr->load(); + if (seq < minimum) minimum = seq; + } + } + return minimum; + } +}; + +// AtomicsBarrier keeps dependencies as atomic pointers. +// Barriers connections can be changed diring data publishing. +template +struct AtomicsBarrier { + constexpr static std::size_t MaxItems = N; + using Atomics = std::array, MaxItems>; + static_assert(Atomics::value_type::is_always_lock_free); + + alignas(CacheLineSize) Sequence last_{Sequence::Initial}; + alignas(CacheLineSize) Atomics pointers_{nullptr}; + + AtomicsBarrier() = default; + AtomicsBarrier(const AtomicsBarrier& that) : last_{that.last_.load()} { + for (std::size_t i = 0; auto& p : pointers_) { + p = that.pointers_[i++].load(); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) + } + } + + Sequence::Ptr cursor() const noexcept { return last_; } + Sequence::Value last() const noexcept { return last_.load(); } + void release(Sequence::Value sequence) noexcept { last_.store(sequence); } + + std::size_t size() const noexcept { + std::size_t count = 0UL; + for (const auto& p : pointers_) { + if (p != nullptr) ++count; + } + return count; + } + + bool contains(const Sequence::Ptr ptr) const noexcept { + if (ptr) { + for (const auto& p : pointers_) { + if (p == ptr) return true; + } + } + return false; + } + + bool add(const Sequence::Ptr ptr) noexcept { return replace(nullptr, ptr); } + bool del(const Sequence::Ptr ptr) noexcept { return replace(ptr, nullptr); } + + bool replace(const Sequence::Ptr removable, const Sequence::Ptr desired) noexcept { + for (auto& p : pointers_) { + auto expected = removable; + if (p.compare_exchange_strong(expected, desired)) { + return true; + } + } + return false; + } + + bool replace(const Group<>& removable, const Sequence::Ptr desired) noexcept { + bool replaced = false; + for (const auto& ptr : removable.items_) { + if (ptr) { + if (replaced) { + replace(ptr, nullptr); + } else { + replaced = replace(ptr, desired); + } + } + } + return replaced; + } + + Group<> dependencies() const noexcept { + Group<> result; + for (std::size_t i = 0; i < pointers_.size(); ++i) { + result.items_[i] = pointers_[i].load(); // NOLINT + } + + return result; + } + + void set(const Group<>& dependencies) noexcept { + for (std::size_t i = 0; i < pointers_.size(); ++i) { + pointers_[i] = dependencies.items_[i]; // NOLINT + } + } + + void depends(const Group<>& dependencies) noexcept { + set(dependencies); + last_.store(dependencies.size() ? minimumSequence() : Sequence::Initial); + } + + Sequence::Value minimumSequence(Sequence::Value minimum = Sequence::Max) const noexcept { + for (const auto& p : pointers_) { + auto ptr = p.load(std::memory_order_acquire); + if (ptr) { + Sequence::Value seq = ptr->load(); + if (seq < minimum) minimum = seq; + } + } + return minimum; + } +}; + +// OffsetsBarrier keeps dependencies as atomic offsets relative to its last. +// It can be stored in shared memory for inter-process communications. +template +struct OffsetsBarrier { + constexpr static std::size_t MaxItems = N; + using Offsets = std::array, MaxItems>; + static_assert(Offsets::value_type::is_always_lock_free); + + alignas(CacheLineSize) Sequence last_{Sequence::Initial}; + alignas(CacheLineSize) Offsets offsets_{0}; // offsets relative to last_ + + OffsetsBarrier() = default; + OffsetsBarrier(const OffsetsBarrier& that) : last_{that.last_.load()} { + for (std::size_t i = 0; auto& p : offsets_) { + auto off = that.offsets_[i].load(); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) + p = off ? offset(that.ptr(off)) : 0; + ++i; + } + } + + Sequence::Ptr cursor() const noexcept { return last_; } + Sequence::Value last() const noexcept { return last_.load(); } + void release(Sequence::Value sequence) noexcept { last_.store(sequence); } + + std::ptrdiff_t offset(const Sequence::Ptr ptr) const { return ptr - &last_.value_; } + Sequence::Ptr ptr(std::ptrdiff_t offset) const { return &last_.value_ + offset; } + + std::size_t size() const noexcept { + std::size_t count = 0UL; + for (const auto& p : offsets_) { + if (p != 0) ++count; + } + return count; + } + + bool contains(const Sequence::Ptr ptr) const noexcept { + if (ptr) { + const auto off = offset(ptr); + for (const auto& p : offsets_) { + if (p == off) return true; + } + } + return false; + } + + bool add(const Sequence::Ptr ptr) noexcept { return replace(nullptr, ptr); } + bool del(const Sequence::Ptr ptr) noexcept { return replace(ptr, nullptr); } + + bool replace(std::ptrdiff_t removable, const std::ptrdiff_t desired) noexcept { + for (auto& p : offsets_) { + auto expected = removable; + if (p.compare_exchange_strong(expected, desired)) { + return true; + } + } + return false; + } + + bool replace(const Sequence::Ptr removable, const Sequence::Ptr desired) noexcept { + const auto rem = removable ? offset(removable) : 0; + const auto off = desired ? offset(desired) : 0; + return replace(rem, off); + } + + bool replace(const Group<>& removable, const Sequence::Ptr desired) noexcept { + bool replaced = false; + const auto off = desired ? offset(desired) : 0; + for (const auto& ptr : removable.items_) { + if (ptr) { + if (replaced) { + replace(offset(ptr), 0); + } else { + replaced = replace(offset(ptr), off); + } + } + } + return replaced; + } + + Group<> dependencies() const noexcept { + Group<> result; + for (std::size_t i = 0; i < offsets_.size(); ++i) { + auto offset = offsets_[i].load(); // NOLINT + result.items_[i] = (offset != 0) ? ptr(offset) : nullptr; // NOLINT + } + + return result; + } + + void set(const Group<>& dependencies) noexcept { + for (std::size_t i = 0; i < offsets_.size(); ++i) { + auto ptr = dependencies.items_[i]; // NOLINT + offsets_[i] = ptr ? offset(ptr) : 0; // NOLINT + } + } + + void depends(const Group<>& dependencies) noexcept { + set(dependencies); + last_.store(dependencies.size() ? minimumSequence() : Sequence::Initial); + } + + Sequence::Value minimumSequence(Sequence::Value minimum = Sequence::Max) const noexcept { + for (const auto& offset : offsets_) { + auto off = offset.load(std::memory_order_acquire); + if (off != 0) { + Sequence::Value seq = ptr(off)->load(); + if (seq < minimum) minimum = seq; + } + } + return minimum; + } +}; + +static_assert(Concept>); +static_assert(Concept>); +static_assert(Concept>); + +} // namespace Barriers + +using Barrier = Barriers::OffsetsBarrier<8>; + +static_assert(sizeof(Barrier) == CacheLineSize * 2); +static_assert(alignof(Barrier) == CacheLineSize); +static_assert(offsetof(Barrier, last_) == 0); + +template +Group<> group(const Barrier&... dependsOn) { + return Group<>{dependsOn.cursor()...}; +} + +template +inline std::size_t available(const Barrier& b) { + auto min = b.minimumSequence(); + auto end = b.last(); + // runtime error: signed integer overflow: 9223372036854775807 - -1 cannot be represented in type 'Sequence::Value' + auto result = (min >= end) ? (min - end) : (end - min); + return static_cast(result); +} + +namespace Waits { + +template +concept Concept = requires(Strategy s, Sequence::Value seq, const Barrier& seqs, Sequence::Ptr sptr) { + { s.wait(seq, seqs) } -> std::same_as; + { s.wait(seq, sptr) } -> std::same_as; + { s.signalAllWhenBlocking() } -> std::same_as; +}; + +template +inline bool waitingDone(Sequence::Value& result, const Sequence::Value sequence, const Barrier& seqs) { + return (result = seqs.minimumSequence()) >= sequence; +} + +inline bool waitingDone(Sequence::Value& result, const Sequence::Value sequence, Sequence::Ptr sptr) { + return (result = sptr->load()) >= sequence; +} + +struct SpinsStrategy { + struct Spinner { + static constexpr std::uint32_t Limit = 10U; + static constexpr std::uint32_t Sleep = 20U; + inline static const std::uint32_t Initial = [] { + return std::thread::hardware_concurrency() > 1 ? 0U : Limit; + }(); + std::uint32_t iteration_ = Initial; + + void once() { + // Exponentially longer sequences of busy-waits calls + if (iteration_ < Limit) { + auto count = 2 << iteration_; + while (count-- != 0) dlsm::Thread::pause(); + } else { + if (iteration_ == Sleep) { + iteration_ = Limit - 1; + dlsm::Thread::NanoSleep::pause(); + } else { + std::this_thread::yield(); + } + } + ++iteration_; + } + }; + + SpinsStrategy() = default; + template + Sequence::Value wait(const Sequence::Value sequence, const Barrier& seqs) const { + Spinner spinner; + Sequence::Value result = 0; + while (!waitingDone(result, sequence, seqs)) spinner.once(); + return result; + } + + Sequence::Value wait(const Sequence::Value sequence, Sequence::Ptr sptr) const { + Spinner spinner; + Sequence::Value result = 0; + while (!waitingDone(result, sequence, sptr)) spinner.once(); + return result; + } + + void signalAllWhenBlocking() {} +}; + +struct YieldStrategy { + const std::size_t spinTries_; + YieldStrategy(std::size_t spinTries = 10UL) : spinTries_{spinTries} {} + + static void waitOnce(std::size_t& iteration) { + if (iteration == 0) { + std::this_thread::yield(); + } else { + --iteration; + } + } + + template + Sequence::Value wait(const Sequence::Value sequence, const Barrier& seqs) { + std::size_t iteration = spinTries_; + Sequence::Value result = 0; + while (!waitingDone(result, sequence, seqs)) { + waitOnce(iteration); + } + return result; + } + + Sequence::Value wait(const Sequence::Value sequence, Sequence::Ptr sptr) const { + std::size_t iteration = spinTries_; + Sequence::Value result = 0; + while (!waitingDone(result, sequence, sptr)) { + waitOnce(iteration); + } + return result; + } + + void signalAllWhenBlocking() {} +}; + +struct BlockStrategy { + std::mutex mutex_; + std::condition_variable_any cv_; + + BlockStrategy() = default; + + template + Sequence::Value wait(const Sequence::Value sequence, const Barrier& seqs) { + Sequence::Value result = 0; + std::unique_lock lock(mutex_); + cv_.wait(lock, [&]() { return waitingDone(result, sequence, seqs); }); + return result; + } + + Sequence::Value wait(const Sequence::Value sequence, Sequence::Ptr sptr) { + Sequence::Value result = 0; + std::unique_lock lock(mutex_); + cv_.wait(lock, [&]() { return waitingDone(result, sequence, sptr); }); + return result; + } + + void signalAllWhenBlocking() { + std::unique_lock lock(mutex_); + cv_.notify_all(); + } +}; + +struct ShareStrategy { + pthread_mutex_t mutex_{}; + pthread_cond_t cv_{}; + + struct Lock { + pthread_mutex_t& m_; + Lock(pthread_mutex_t& m) : m_{m} { pthread_mutex_lock(&m_); } + ~Lock() { pthread_mutex_unlock(&m_); } + }; + + ShareStrategy() { + pthread_mutexattr_t mutexattr_{}; + pthread_mutexattr_init(&mutexattr_); + pthread_mutexattr_setpshared(&mutexattr_, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(&mutex_, &mutexattr_); + pthread_mutexattr_destroy(&mutexattr_); + + pthread_condattr_t cvattr_{}; + pthread_condattr_init(&cvattr_); + pthread_condattr_setpshared(&cvattr_, PTHREAD_PROCESS_SHARED); + pthread_cond_init(&cv_, &cvattr_); + pthread_condattr_destroy(&cvattr_); + } + ~ShareStrategy() { + pthread_mutex_destroy(&mutex_); + pthread_cond_destroy(&cv_); + } + + template + Sequence::Value wait(const Sequence::Value sequence, const Barrier& seqs) { + Sequence::Value result = 0; + + auto lock = Lock{mutex_}; + while (!waitingDone(result, sequence, seqs)) { + if (int err = pthread_cond_wait(&cv_, &mutex_); err != 0) { + throw std::system_error(err, std::generic_category(), "ShareStrategy::wait(seqs): "); + } + } + + return result; + } + + Sequence::Value wait(const Sequence::Value sequence, Sequence::Ptr sptr) { + Sequence::Value result = 0; + + auto lock = Lock{mutex_}; + while (!waitingDone(result, sequence, sptr)) { + if (int err = pthread_cond_wait(&cv_, &mutex_); err != 0) { + throw std::system_error(err, std::generic_category(), "ShareStrategy::wait(sptr): "); + } + } + + return result; + } + + void signalAllWhenBlocking() { + auto lock = Lock{mutex_}; + if (int err = pthread_cond_broadcast(&cv_); err != 0) { + throw std::system_error(err, std::generic_category(), "ShareStrategy::signalAllWhenBlocking(): "); + } + } +}; + +static_assert(Concept); +static_assert(Concept); +static_assert(Concept); +static_assert(Concept); + +} // namespace Waits + +namespace Sequencers { + +template +concept Produce = requires(ProduceType p, std::size_t amount, Sequence::Value lo, Sequence::Value hi) { + { p.available() } noexcept -> std::same_as; + { p.claim() } -> std::same_as; + { p.claim(amount) } -> std::same_as; // returns the 'end()' sequence value + { p.tryClaim(amount) } -> std::same_as; // may return Sequence::Initial + { p.publish(hi) } -> std::same_as; + { p.publish(lo, hi) } -> std::same_as; +}; + +template +concept Consume = requires(ConsumeType c, Sequence::Value next, Sequence::Value last) { + { c.available() } noexcept -> std::same_as; + { c.last() } -> std::same_as; + { c.consume(next) } -> std::same_as; + { c.consumable(next) } -> std::same_as; // max consumable or Sequence::Initial + { c.release(last) } -> std::same_as; +}; + +template +concept Concept = requires(SequencerType s, std::size_t count, Sequence::Value seq) { + typename SequencerType::WaitStrategy; + requires Produce; + requires Consume; + requires Consume; + { s.last() } -> std::same_as; // returns last published value + { s.next() } -> std::same_as; // returns next claimable value + { s.capacity() } -> std::same_as; + { s.published(seq) } -> std::same_as; // is published + { s.consumable(seq) } -> std::same_as; // max consumable or Sequence::Initial +}; + +template +struct Producer { + using WaitStrategy = WaitType; + using BarrierType = std::remove_reference_t; // Defines Barrier type + using BarrierStorage = Type; // Defines how a Barrier is stored, by value or reference + struct Consumer { + BarrierStorage barrier_; + Derived& producer_; + + explicit Consumer(BarrierType& b, Derived& p) : barrier_{b}, producer_{p} { + barrier_.depends(p.cursor()); // cursor points to last claimed< not published + barrier_.release(p.next() - 1); + } + + inline std::size_t available() const noexcept { + return static_cast(producer_.next() - (last() + 1)); + } + inline Group<> dependencies() const { return barrier_.dependencies(); } + inline Sequence::Ptr cursor() const { return barrier_.cursor(); } + inline Sequence::Value last() const { return barrier_.last(); } + Sequence::Value consume(Sequence::Value sequence) const { return producer_.consume(sequence); } + Sequence::Value consumable(Sequence::Value sequence) const { return producer_.consumable(sequence); } + inline void release(Sequence::Value sequence) { + barrier_.release(sequence); + producer_.wait_.signalAllWhenBlocking(); + } + }; + + struct Indirect { + BarrierStorage barrier_; + WaitStrategy& wait_; + + Indirect(BarrierType& barrier, WaitStrategy& wait, const Group<>& deps) : barrier_{barrier}, wait_{wait} { + depends(deps); + } + + template + explicit Indirect(Barrier& barrier, const Node& that) // Not a copy but linking to + : barrier_{barrier}, wait_{that.wait_} { + depends(group(that)); + } + + inline std::size_t available() const noexcept { + return static_cast(barrier_.minimumSequence() - last()); + } + inline void depends(const Group<>& dependencies) { barrier_.depends(dependencies); } + inline Group<> dependencies() const { return barrier_.dependencies(); } + inline Sequence::Ptr cursor() const { return barrier_.cursor(); } + inline Sequence::Value last() const { return barrier_.last(); } + inline Sequence::Value consume(Sequence::Value sequence) const { return wait_.wait(sequence, barrier_); } + Sequence::Value consumable(Sequence::Value sequence) const { + const auto published = barrier_.minimumSequence(); + return sequence <= published ? published : Sequence::Initial; + } + inline void release(Sequence::Value sequence) { + barrier_.release(sequence); + wait_.signalAllWhenBlocking(); + } + }; + + BarrierStorage barrier_; + WaitStrategy& wait_; + const std::size_t capacity_; + + Producer(BarrierType& barrier, WaitType& wait, std::size_t capacity) + : barrier_{barrier}, wait_{wait}, capacity_{capacity} { + if (!isPowerOf2(capacity_)) { + throw std::invalid_argument{"Capacity must be power-of-two, value:" + std::to_string(capacity_)}; + } + } + + inline auto add(const Sequence::Ptr ptr) noexcept { return barrier_.add(ptr); } + inline auto del(const Sequence::Ptr ptr) noexcept { return barrier_.del(ptr); } + inline auto replace(const Group<>& removable, const Sequence::Ptr desired) noexcept { + return barrier_.replace(removable, desired); + } + + void depends(const Group<>& dependencies) { barrier_.set(dependencies); } + inline Group<> dependencies() { return barrier_.dependencies(); } + inline Sequence::Ptr cursor() const { return barrier_.cursor(); } + inline Sequence::Value last() const { return barrier_.last(); } + inline std::size_t capacity() const { return capacity_; } +}; + +template +struct SPMC : Producer> { + using Base = Producer>; + + Sequence::Value next_{Sequence::Initial + 1}; + + SPMC(BarrierType& barrier, std::size_t capacity, WaitType& wait, std::span = {}) + : Base{barrier, wait, capacity} {} + + inline Sequence::Value next() const { return next_; } + + std::size_t available() const noexcept { + const auto n = next(); + return Base::capacity() - static_cast(n - Base::barrier_.minimumSequence(n - 1)) + 1; + } + + template + Sequence::Value claim(std::size_t count = N) { + static_assert(N > 0); + const auto amount = static_cast(std::min(count, Base::capacity())); + const auto next = next_ + amount; + const auto wrap = next - static_cast(Base::capacity()); + Base::wait_.wait(wrap, Base::barrier_); + next_ = next; + return next; + } + + template + Sequence::Value tryClaim(std::size_t count = N) { + static_assert(N > 0); + const auto amount = static_cast(std::min(count, Base::capacity())); + const auto next = next_ + amount; + const auto wrap = next - static_cast(Base::capacity()); + if (wrap > Base::barrier_.minimumSequence()) return Sequence::Initial; + next_ = next; + return next; + } + + bool published(Sequence::Value sequence) const { return sequence <= Base::barrier_.last(); } + + void publish(Sequence::Value sequence) { + Base::barrier_.release(sequence); + Base::wait_.signalAllWhenBlocking(); + } + + void publish(Sequence::Value, Sequence::Value hi) { + Base::barrier_.release(hi - 1); + Base::wait_.signalAllWhenBlocking(); + } + + Sequence::Value consume(Sequence::Value sequence) const { return Base::wait_.wait(sequence, Base::cursor()); } + Sequence::Value consumable(Sequence::Value sequence) const { + const auto published = Base::barrier_.last(); + return sequence <= published ? published : Sequence::Initial; + } +}; + +template +struct MPMC : Producer> { + using Base = Producer>; + + const std::size_t mask_; + std::vector internal_{}; + std::span published_{}; + + MPMC(BarrierType& barrier, std::size_t capacity, WaitType& wait, std::span external = {}) + : Base{barrier, wait, capacity}, mask_{capacity - 1}, internal_{}, published_{external} { + if (external.empty()) { // Allocate its own array for published sequences + internal_.resize(capacity); + published_ = internal_; + } else { + if (external.size() != capacity) { + throw std::invalid_argument{ + std::format("External storage size({}) != capacity({})", external.size(), capacity)}; + } + } + + for (auto& i : published_) i.store(Sequence::Initial); + } + + inline Sequence::Value next() const { return Base::barrier_.last() + 1; } + + std::size_t available() const noexcept { + return Base::capacity() - static_cast(next() - Base::barrier_.minimumSequence(next() - 1)) + 1; + } + + template + Sequence::Value claim(std::size_t count = N) { + static_assert(N > 0); + auto amount = static_cast(std::min(count, Base::capacity())); + const auto current = Base::barrier_.last_.add(amount) + 1; + const auto next = current + amount; + const auto wrap = next - static_cast(Base::capacity()); + + Sequence::Value gating = 0; + Waits::SpinsStrategy::Spinner spinner; + while (wrap > (gating = Base::barrier_.minimumSequence(current))) { + spinner.once(); + } + + return next; + } + + template + Sequence::Value tryClaim(std::size_t count = N) { + static_assert(N > 0); + const auto amount = static_cast(std::min(count, Base::capacity())); + + Sequence::Value current; // NOLINT(cppcoreguidelines-init-variables) + Sequence::Value next; // NOLINT(cppcoreguidelines-init-variables) + do { // NOLINT(cppcoreguidelines-avoid-do-while) + current = Base::barrier_.last(); + next = current + amount; + const auto wrap = next - static_cast(Base::capacity()); + if (wrap > Base::barrier_.minimumSequence()) return Sequence::Initial; + } while (!Base::barrier_.last_.cas(current, next)); + return next + 1; + } + + void setAvailable(Sequence::Value sequence) { + auto& seq = published_[static_cast(sequence) & mask_]; + assert(seq.load() == Sequence::Initial || + seq.load() == (sequence - static_cast(this->capacity_))); + seq.store(sequence); + } + + bool published(Sequence::Value sequence) const { + return published_[static_cast(sequence) & mask_].load() == sequence; + } + + void publish(Sequence::Value sequence) { + setAvailable(sequence); + this->wait_.signalAllWhenBlocking(); + } + + void publish(Sequence::Value lo, Sequence::Value hi) { + for (auto i = lo; i < hi; ++i) { + setAvailable(i); + } + Base::wait_.signalAllWhenBlocking(); + } + + Sequence::Value isAvailableNext(Sequence::Value lastKnownPublished) const { + // Prefetch next sequences for availability + Sequence::Value seq = lastKnownPublished + 1; + // Prefetch up to end of current cache line + static_assert(CacheLineSize / sizeof(Sequence::Atomic) == 8); + Sequence::Value limit = lastKnownPublished | (4 - 1); + for (; seq <= limit; ++seq) { + if (!published(seq)) { + return seq - 1; + } + } + + return limit; + } + + Sequence::Value consume(Sequence::Value sequence) const { + if (!published(sequence)) { + Base::wait_.wait(sequence, &published_[static_cast(sequence) & mask_]); + } + return isAvailableNext(sequence); + } + + Sequence::Value consumable(Sequence::Value sequence) const { + if (!published(sequence)) return Sequence::Initial; + return isAvailableNext(sequence); + } +}; + +static_assert(Concept>); +static_assert(Concept>); + +} // namespace Sequencers + +template +concept Indexer = requires(IndexerType p, Sequence::Value sequence) { + { p.operator[](sequence) } noexcept -> std::same_as; + { p.size() } noexcept -> std::same_as; + { p.data() } noexcept -> std::same_as; +}; + +template +struct Ring : public std::span { + using std::span::size; + using std::span::data; + const std::size_t mask_; + + explicit constexpr Ring(std::span buffer) : std::span{buffer}, mask_{size() - 1} { + if (data() == nullptr) { + throw std::invalid_argument{"Ring pointer is nullptr"}; + } + if (!isPowerOf2(size())) { + throw std::invalid_argument{"Ring size must be power-of-two, value:" + std::to_string(size())}; + } + } + // clang-format off + T& operator[](std::ptrdiff_t seq) noexcept { return data()[static_cast(seq) & mask_]; } + const T& operator[](std::ptrdiff_t seq) const noexcept { return data()[static_cast(seq) & mask_]; } + // clang-format on +}; + +static_assert(Indexer, int>); + +namespace Processor { + +// Describes how consumed T should be released +enum class ConsumedType : std::uint8_t { Exit, Release, Keep }; + +template +concept Handler = requires(HandlerType h, bool running, T& data, Sequence::Value sequence, std::size_t available, + std::exception_ptr eptr) { + { h.onRunning(running) } noexcept -> std::same_as; + { h.onBatch(sequence, available) } -> std::same_as; + { h.onConsume(data, sequence, available) } -> std::same_as; + { h.onTimeout(sequence) } -> std::same_as; + { h.onException(eptr, sequence) } -> std::same_as; +}; + +template +struct DefaultHandler { + using Consumed = ConsumedType; + void onRunning(bool) noexcept {} + void onBatch(Sequence::Value, std::size_t) {} + Consumed onConsume(T&, Sequence::Value, std::size_t) { return Consumed::Release; } + void onTimeout(Sequence::Value) {} + void onException(const std::exception_ptr& eptr, Sequence::Value sequence) noexcept(false) { + try { + if (eptr) std::rethrow_exception(eptr); + } catch (const std::exception& e) { + throw std::runtime_error("exception on #" + std::to_string(sequence) + " what:" + std::string{e.what()}); + } catch (...) { + std::throw_with_nested(std::runtime_error("exception on #" + std::to_string(sequence))); + } + } +}; + +static_assert(Handler, int>); + +template Indexer, Handler Handler> +struct Batch { + Barrier& barrier_; + Indexer& indexer_; + Handler& handler_; + std::atomic_flag running_; + std::atomic_flag halting_; + + Batch([[maybe_unused]] const T& dummy, Barrier& barrier, Indexer& indexer, Handler& handler) + : barrier_{barrier}, indexer_{indexer}, handler_{handler} {} + + void run() { + if (running_.test_and_set()) throw std::runtime_error{"Processor is already running"}; + halting_.clear(); + handler_.onRunning(true); + + auto next = barrier_.last() + 1; + while (!halting_.test(std::memory_order_consume)) try { + const auto available = barrier_.consume(next); + handler_.onBatch(next, static_cast(available - next + 1)); + while (next <= available) { + switch (handler_.onConsume(indexer_[next], next, static_cast(available - next))) { + case ConsumedType::Exit: + halting_.test_and_set(); + [[fallthrough]]; + case ConsumedType::Release: + barrier_.release(next); + [[fallthrough]]; + case ConsumedType::Keep: + break; + } + ++next; + } + barrier_.release(available); + } catch (...) { + try { + handler_.onException(std::current_exception(), next); + } catch (...) { + handler_.onRunning(false); + running_.clear(); + running_.notify_all(); + throw; + } + } + + handler_.onRunning(false); + running_.clear(); + running_.notify_all(); + } + + bool running() const noexcept { return running_.test(); } + void halt() { + halting_.test_and_set(); + running_.wait(true); + } +}; + +} // namespace Processor + +// Move template implementation to Disruptor.cpp and remove these extern +extern template struct Group<8>; +extern template struct Barriers::PointerBarrier<8>; +extern template struct Barriers::AtomicsBarrier<8>; +extern template struct Barriers::OffsetsBarrier<8>; + +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; +extern template struct Sequencers::SPMC>; + +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; +extern template struct Sequencers::SPMC&>; + +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; +extern template struct Sequencers::MPMC>; + +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; +extern template struct Sequencers::MPMC&>; + +} // namespace dlsm::Disruptor diff --git a/include/impl/DisruptorGraph.hpp b/include/impl/DisruptorGraph.hpp new file mode 100644 index 0000000..23303ea --- /dev/null +++ b/include/impl/DisruptorGraph.hpp @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "impl/Disruptor.hpp" + +namespace dlsm::Disruptor::Graph { + +enum class Type : std::uint8_t { SPSC, SPMC, MPMC }; +enum class Wait : std::uint8_t { Spins, Yield, Block, Share }; +enum class Stat : std::uint8_t { Empty = 0, Init, Ready, Updating }; + +struct Layout { + struct Graph { + Type type_{Type::SPSC}; + Wait wait_{Wait::Block}; + }; + + struct Slots { + std::size_t maxPub_{0}; + std::size_t maxSub_{0}; + std::size_t numPub_{0}; + std::size_t numSub_{0}; + }; + + struct Items { + std::size_t capacity_{0}; + std::size_t size_{0}; + std::size_t align_{1}; + std::size_t hash_{0}; + std::array type_{'\0'}; + + std::string_view type() const { return {type_.data()}; } + void type(std::string_view v) { + auto len = std::min(std::size(type_) - 1, std::size(v)); + auto end = std::copy_n(std::begin(v), len, std::begin(type_)); + std::fill(end, std::end(type_), '\0'); + } + + template + static Items create(std::size_t n = 0, std::string_view name = {}) { + static_assert(std::is_standard_layout_v); + auto& info = typeid(T); + auto items = Items{n, sizeof(T), alignof(T), info.hash_code()}; + items.type(name.empty() ? std::string_view{info.name()} : name); + return items; + } + }; + + Graph graph_; + Slots slots_; + Items items_; + + Layout() = default; + Layout(const Graph& g, const Slots& s, const Items& i) : graph_{g}, slots_{s}, items_{i} {} + + std::size_t size() const; + // Runtime checks, will throw exceptions + void check(const Graph& graph) const; + void check(const Slots& slots) const; + void check(const Items& items) const; + void check(const Layout& that) const; +}; + +struct IGraph { + virtual ~IGraph() = default; + + using Ptr = std::shared_ptr; + + struct IPub { + virtual ~IPub() = default; + using Ptr = std::shared_ptr; + virtual std::size_t available() const noexcept = 0; + virtual Sequence::Value next() const noexcept = 0; + virtual Sequence::Value claim(std::size_t amount = 1) = 0; + virtual Sequence::Value tryClaim(std::size_t amount = 1) = 0; + virtual void publish(Sequence::Value last) = 0; + }; + + struct ISub { + virtual ~ISub() = default; + using Ptr = std::shared_ptr; + virtual std::size_t available() const noexcept = 0; + virtual Sequence::Value last() const noexcept = 0; + virtual Sequence::Value consume(Sequence::Value next) = 0; + virtual Sequence::Value consumable(Sequence::Value next) = 0; + virtual void release(Sequence::Value last) = 0; + }; + + using SubList = std::span; + + virtual IPub::Ptr pub(std::string_view name = {}) = 0; + virtual ISub::Ptr sub(std::string_view name = {}, SubList dependencies = {}) = 0; + + virtual std::vector dependencies(std::string_view name = {}) const = 0; + virtual std::string description() const = 0; + virtual std::size_t capacity() const = 0; + virtual const Layout& layout() const = 0; + virtual const std::span items() const = 0; + + template + Ring ring(std::string_view name = {}) const { + auto& vla = layout(); + vla.check(Layout::Items::create(0, name)); + auto bytes = items(); + // C++23 std::start_lifetime_as_array + return Ring{{std::launder(reinterpret_cast(bytes.data())), bytes.size() / vla.items_.size_}}; + } + + static Ptr create(Type type, Wait wait, Layout::Items items = {1024}); + static Ptr inproc(const Layout& required, std::span space); + static Ptr shared(const Layout& required, const std::string& opts, std::string_view attaching = "100x1"); +}; + +} // namespace dlsm::Disruptor::Graph diff --git a/include/impl/Signal.hpp b/include/impl/Signal.hpp index e48fc3d..ecf2b08 100644 --- a/include/impl/Signal.hpp +++ b/include/impl/Signal.hpp @@ -4,6 +4,7 @@ #include #include +#include #include namespace dlsm::Signal { diff --git a/include/impl/Str.hpp b/include/impl/Str.hpp index 5efe9e5..33137c4 100644 --- a/include/impl/Str.hpp +++ b/include/impl/Str.hpp @@ -1,8 +1,10 @@ #pragma once #include +#include #include #include +#include #include #include #include diff --git a/include/impl/Transport.hpp b/include/impl/Transport.hpp index e63ce42..b49df49 100644 --- a/include/impl/Transport.hpp +++ b/include/impl/Transport.hpp @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dde9027..ed31926 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,6 +12,8 @@ target_include_directories(dlsm PUBLIC "$/include>" ) target_sources (dlsm PRIVATE + Disruptor.cpp + DisruptorGraph.cpp Logger.cpp SharedMemory.cpp Signal.cpp diff --git a/src/Disruptor.cpp b/src/Disruptor.cpp new file mode 100644 index 0000000..9d469d3 --- /dev/null +++ b/src/Disruptor.cpp @@ -0,0 +1,62 @@ +#include "impl/Disruptor.hpp" + +namespace dlsm::Disruptor { + +template struct Group<8>; +template struct Barriers::PointerBarrier<8>; +template struct Barriers::AtomicsBarrier<8>; +template struct Barriers::OffsetsBarrier<8>; + +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; +template struct Sequencers::SPMC>; + +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; +template struct Sequencers::SPMC&>; + +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; +template struct Sequencers::MPMC>; + +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; +template struct Sequencers::MPMC&>; + +} // namespace dlsm::Disruptor diff --git a/src/DisruptorGraph.cpp b/src/DisruptorGraph.cpp new file mode 100644 index 0000000..40bbd24 --- /dev/null +++ b/src/DisruptorGraph.cpp @@ -0,0 +1,564 @@ +#include "impl/DisruptorGraph.hpp" + +#include +#include +#include +#include +#include +#include + +#include "impl/Memory.hpp" +#include "impl/SharedMemory.hpp" + +// Need features: +// - customization BarrierStorage = Barrier for inproc and Barrier& for shared memory +// - recalculation dependencies graph after ~IPub()/~ISub() +// - implementation SPSC + +using namespace dlsm::Disruptor::Graph; + +constexpr std::string_view str(Type v) { + // C++23 std::to_underlying() + const auto i = static_cast>(v); + constexpr std::array strs{"SPSC", "SPMC", "MPMC"}; + return strs[i]; // NOLINT +} +constexpr std::string_view str(Wait v) { + // C++23 std::to_underlying() + const auto i = static_cast>(v); + constexpr std::array strs{"Spins", "Yield", "Block", "Share"}; + return strs[i]; // NOLINT +} +constexpr std::string_view str(Stat v) { + // C++23 std::to_underlying() + const auto i = static_cast>(v); + constexpr std::array strs{"Empty", "Init", "Ready", "Updating"}; + return strs[i]; // NOLINT +} + +namespace dlsm::Disruptor::Graph { + +// Slot is a Barrier with some metainformation in standard layout memory +// SHOULD be replaced with external polymorphism and C++23 std::flat_map<> +struct alignas(CacheLineSize) Slot { + Barrier barrier_; + dlsm::Str::Flat<16> name_; + + Slot() = default; + + Barrier& barrier() { return barrier_; } + std::string_view name() const { return name_; } + void name(std::string_view v) { name_ = v; } + std::vector dependencies() const { + std::vector ptrs; + static_assert(offsetof(Slot, barrier_) == 0); + static_assert(offsetof(Slot, barrier_.last_) == 0); + + for (auto ptr : barrier_.dependencies().items_) { + if (ptr) { + static_assert(std::is_same_v); + // Convert dependency const Sequence* to its aggregator const Slot* + ptrs.emplace_back(reinterpret_cast(ptr)); + } + } + return ptrs; + } +}; + +static_assert(std::is_standard_layout_v); +static_assert(alignof(Slot) == CacheLineSize); +} // namespace dlsm::Disruptor::Graph + +template <> +struct std::formatter : std::formatter { + auto format(const Layout& v, std::format_context& ctx) const { + return std::formatter::format( + std::format( + "items:{} slots:{} graph:{} size:{}", + std::format("'{}' {:2}@{}x{}({} bytes)", v.items_.type(), v.items_.size_, v.items_.align_, + v.items_.capacity_, v.items_.size_ * v.items_.capacity_), + std::format("{}x{}({}x{})", v.slots_.numPub_, v.slots_.numSub_, v.slots_.maxPub_, v.slots_.maxSub_), + std::format("{}({})", str(v.graph_.type_), str(v.graph_.wait_)), v.size()), + ctx); + } +}; + +namespace dlsm::Disruptor::Graph { + +template +T castTo(const auto& bytes) { + return std::launder(reinterpret_cast(std::data(bytes))); +} + +template +std::span nextTo(const U& that, std::size_t align, std::size_t size, std::size_t n) { + void* next = const_cast(&that); // NOLINT + std::size_t bytes = size * n; + std::size_t space = align + bytes; + + void* aligned = std::align(align, bytes, next, space); + // C++23 std::start_lifetime_as_array + return {std::launder(reinterpret_cast(aligned)), bytes}; +} + +template +std::span nextTo(const U& that, std::size_t n = 1) { + auto bytes = nextTo(that, alignof(T), sizeof(T), n); + return {castTo(bytes), n}; +} + +struct alignas(CacheLineSize) VLA : Layout { + std::atomic state_{Stat::Empty}; // Kind of SeqLock + + std::span slots() { + const auto count = (slots_.maxPub_ > 0 ? 1 : 0) + slots_.maxSub_; + return nextTo(*(this + 1), count); + } + + std::span wait() { + // clang-format off + const auto [align, size] = [&]() -> std::pair { + using namespace dlsm::Disruptor::Waits; + switch (graph_.wait_) { + case Wait::Spins: return {alignof(SpinsStrategy), sizeof(SpinsStrategy)}; + case Wait::Yield: return {alignof(YieldStrategy), sizeof(YieldStrategy)}; + case Wait::Block: return {alignof(BlockStrategy), sizeof(BlockStrategy)}; + case Wait::Share: return {alignof(ShareStrategy), sizeof(ShareStrategy)}; + } + return {0ULL, 0ULL}; + }(); + // clang-format on + return nextTo(*slots().end(), align, size, 1); + } + + std::span sequences() { + const auto size = (graph_.type_ == Type::MPMC) ? items_.capacity_ : 0; + return nextTo(*wait().end(), size); + } + + std::span items() { return nextTo(*sequences().end(), items_.align_, items_.size_, items_.capacity_); } + + std::size_t size() { + auto begin = reinterpret_cast(this); + auto end = &*items().end(); + return static_cast(end - begin) + alignof(VLA); + } + + VLA(const Layout& that) : Layout{that} {} + VLA(const Layout& that, Stat state) : Layout{that} { + dlsm::Memory::checkAlignment(&that); + const auto s = slots(); + dlsm::Memory::checkAlignment(std::data(s)); + std::uninitialized_default_construct(std::begin(s), std::end(s)); + // clang-format off + using namespace dlsm::Disruptor::Waits; + switch(graph_.wait_) { + case Wait::Spins: std::construct_at(castTo(wait())); break; + case Wait::Yield: std::construct_at(castTo(wait())); break; + case Wait::Block: std::construct_at(castTo(wait())); break; + case Wait::Share: std::construct_at(castTo(wait())); break; + } + // clang-format on + const auto seq = sequences(); + dlsm::Memory::checkAlignment(std::data(seq)); + std::uninitialized_default_construct(std::begin(seq), std::end(seq)); + // Fill items by zeroes(no ctor & dtor) + auto i = items(); + dlsm::Memory::checkAlignment(std::data(i), that.items_.align_); + std::fill(std::begin(i), std::end(i), std::byte{0}); + state_.store(state); + } + ~VLA() { + if (state_.load() == Stat::Empty) return; + std::destroy(std::begin(slots()), std::end(slots())); + // clang-format off + using namespace dlsm::Disruptor::Waits; + switch(graph_.wait_) { + case Wait::Spins: std::destroy_at(castTo(wait())); break; + case Wait::Yield: std::destroy_at(castTo(wait())); break; + case Wait::Block: std::destroy_at(castTo(wait())); break; + case Wait::Share: std::destroy_at(castTo(wait())); break; + } + // clang-format on + std::destroy(std::begin(sequences()), std::end(sequences())); + static_cast(*this) = Layout{}; // NOLINT(cppcoreguidelines-slicing) + state_.store(Stat::Empty); + } +}; + +std::size_t Layout::size() const { return VLA{*this}.size(); } + +void Layout::check(const Graph& that) const { + if ((graph_.type_ != that.type_) || (graph_.wait_ != that.wait_)) { + throw std::runtime_error{std::format("Layout::Graph missmatch: type:{}={} wait:{}={}", ::str(graph_.type_), + ::str(that.type_), ::str(graph_.wait_), ::str(that.wait_))}; + } +} +void Layout::check(const Slots& that) const { + if ((slots_.maxPub_ != that.maxPub_) || (slots_.maxSub_ != that.maxSub_)) { + throw std::runtime_error{std::format("Layout::Slots missmatch: maxPub:{}={} maxSub:{}={}", slots_.maxPub_, + that.maxPub_, slots_.maxSub_, that.maxSub_)}; + } +} +void Layout::check(const Items& that) const { + if ((items_.size_ != that.size_) || (items_.align_ != that.align_) || (items_.hash_ != that.hash_) || + (items_.type_ != that.type_)) { + throw std::runtime_error{std::format("Layout::Items missmatch: size:{}={} align:{}={} type:{}={}", items_.size_, + that.size_, items_.align_, that.align_, items_.type(), that.type())}; + } +} +void Layout::check(const Layout& that) const { + check(that.graph_); + check(that.slots_); + check(that.items_); +} + +struct Lock { // Kind of SeqLock with waiting by sleep + std::atomic& state_; + const Stat old_; + + Lock(std::atomic& s, Stat expected = Stat::Ready, std::string_view wait = "500x1") + : state_{s}, old_{expected} { + if (state_.compare_exchange_strong(expected, Stat::Updating)) return; + auto r = dlsm::Str::xpair(wait); + for (std::size_t i = 0; i < r.first; ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(r.second)); + expected = Stat::Ready; + if (state_.compare_exchange_strong(expected, Stat::Updating)) return; + } + + throw std::runtime_error{ + std::format("IGraph state is:{} after {}x{}ms retries", str(expected), r.first, r.second)}; + } + ~Lock() { state_.store(old_); } +}; + +struct ExternMemoryHolder { + std::vector extern_; + std::string description() const { + if (extern_.empty()) return {}; + return std::format("In ExternMemory: size:{} address:{}", extern_.size(), + reinterpret_cast(extern_.data())); + } +}; + +struct SharedMemoryHolder { + dlsm::SharedMemory::UPtr shared_; + std::string description() const { + if (!shared_) return {}; + return std::format("In SharedMemory:{}{} size:{} address:{}", shared_->name(), + (shared_->owner() ? "(owner)" : ""), shared_->size(), shared_->address()); + } +}; + +struct IGraphInternal : ExternMemoryHolder, SharedMemoryHolder, public IGraph { + using Ptr = std::shared_ptr; +}; + +template +struct IGraphImpl final : IGraphInternal, public std::enable_shared_from_this> { + static constexpr std::size_t Master = 0ULL; // producer(s) slot index + + VLA& vla_; + std::span slots_; + typename Sequencer::WaitStrategy& wait_; + Sequencer impl_; + + IGraphImpl(VLA& vla) + : vla_{vla}, + slots_{vla_.slots()}, + wait_{*castTo(vla_.wait())}, + impl_{slots_[Master].barrier(), vla_.items_.capacity_, wait_, vla_.sequences()} { + if (vla_.state_.load() == Stat::Init) { + slots_[Master].name("Master"); + vla_.state_.store(Stat::Ready); + } + } + + ~IGraphImpl() override { + auto lock = Lock{vla_.state_}; + if (numPub() == 0 && numSub() == 0) { + std::destroy_at(&vla_); + } + } + + struct Pub final : IPub { + IGraphImpl& graph_; + IGraph::Ptr hold_; + + Pub(IGraphImpl& graph) : graph_{graph}, hold_{graph.shared_from_this()} {} + ~Pub() override { graph_.numPub() -= 1; } + + std::size_t available() const noexcept override { return graph_.impl_.available(); } + Sequence::Value next() const noexcept override { return graph_.impl_.next(); } + Sequence::Value claim(std::size_t amount) override { return graph_.impl_.claim(amount); } + Sequence::Value tryClaim(std::size_t amount) override { return graph_.impl_.tryClaim(amount); } + void publish(Sequence::Value last) override { return graph_.impl_.publish(last); } + }; + + template + struct Sub final : ISub, Impl { + IGraphImpl& graph_; + IGraph::Ptr hold_; + Sub(IGraphImpl& graph, Slot& slot) + : Impl{slot.barrier(), graph.impl_}, graph_{graph}, hold_{graph.shared_from_this()} {} + ~Sub() override { graph_.numSub() -= 1; } + + std::size_t available() const noexcept override { return Impl::available(); } + Sequence::Value last() const noexcept override { return Impl::last(); }; + Sequence::Value consume(Sequence::Value next) override { return Impl::consume(next); } + Sequence::Value consumable(Sequence::Value next) override { return Impl::consumable(next); } + void release(Sequence::Value last) override { return Impl::release(last); } + }; + + using Consumer = Sub; + using Indirect = Sub; + + auto maxPub() const { return vla_.slots_.maxPub_; } + auto maxSub() const { return vla_.slots_.maxSub_; } + auto& numPub() { return vla_.slots_.numPub_; } + auto& numSub() { return vla_.slots_.numSub_; } + + Slot& construct(std::string_view name, bool& allocated) { + // Find if slot already allocated + for (auto& slot : slots_) { + if (std::string{slot.name()} == std::string{name}) { + // std::cout << "exists: " << name << std::endl; + allocated = false; + return slot; + } + } + // Allocate + if (numSub() + 1 > maxSub()) { + throw std::runtime_error{"Max consumer limit is reached:" + std::to_string(maxSub())}; + } + const auto index = (numSub() += 1); + + // std::cout << "allocate: " << name << " index: " << index << std::endl; + + auto& slot = slots_[index]; + slot.name(name); + allocated = true; + return slot; + } + + IPub::Ptr pub(std::string_view /*name*/) override { + auto lock = Lock{vla_.state_}; + if (numPub() + 1 > maxPub()) { + throw std::runtime_error{"Max producer limit is reached:" + std::to_string(maxPub())}; + } + const auto pubs = (numPub() += 1); + + if (vla_.graph_.type_ == Type::SPMC && pubs > 1) { + numPub() -= 1; + throw std::runtime_error{"One publisher is already created for SPMC"}; + } + + auto result = std::make_shared(*this); + return result; + } + ISub::Ptr sub(std::string_view name, SubList dependencies) override { + auto lock = Lock{vla_.state_}; + bool newone = false; + auto& slot = construct(name, newone); + + const auto dsize = dependencies.size(); + if (dsize == 0) { + auto result = std::make_shared(*this, slot); + if (newone && !impl_.add(result->cursor())) { + throw std::runtime_error("Exceeds Subscribers limit"); + } + return result; + } + + if (dsize > Group<>::MaxItems) + throw std::invalid_argument("Dependencies list exceeds limit:" + std::to_string(Group<>::MaxItems)); + + Group<> deps; + for (const auto& dep : dependencies) { + Sequence::Ptr cursor = nullptr; + for (const auto& slot : slots_) + if (slot.name() == dep) { + cursor = slot.barrier_.cursor(); + break; + } + + if (!cursor) { // Dependency is not found, allocate it + bool fallocated = false; + cursor = construct(dep, fallocated).barrier_.cursor(); + } + deps.add(cursor); + } + + auto result = std::make_shared(*this, slot); + result->depends(deps); + + // Remove dependencies from Master + impl_.replace(deps, nullptr); + + // Add dependency to this sub if it is new one + if (newone && !impl_.add(result->cursor())) { + throw std::runtime_error("Exceeds Subscribers limit"); + } + return result; + } + + std::vector dependencies(std::string_view name) const override { + auto lock = Lock{vla_.state_}; + + const auto names = [](const auto& slots) { + std::vector result{}; + result.reserve(slots.size()); + for (const auto& slot : slots) { + result.emplace_back(slot->name()); + } + return result; + }; + + if (!name.empty()) { + for (const auto& slot : slots_) { + if (slot.name() == name) { + return names(slot.dependencies()); + } + } + } + return names(slots_[Master].dependencies()); + } + + std::string description() const override { + const auto capacity = static_cast(this->capacity()); + const auto print = [&](const Slot& s) -> std::string { + const auto name = s.name(); + const auto last = s.barrier_.last(); + const auto deps = s.dependencies(); + + if (name.empty() && last == dlsm::Disruptor::Sequence::Initial && deps.empty()) return "Empty"; + + std::string dstr = "Empty"; + if (!deps.empty()) { + std::string list; + for (bool first = true; const auto& d : deps) { + if (first) + first = false; + else + list += ", "; + list += std::format("{:6}", d->name()); + } + dstr = std::format("{}x[{}]", deps.size(), list); + } + double available = static_cast(dlsm::Disruptor::available(s.barrier_)) / capacity * 100.0; + return std::format("name: {:8} last: {:2} full: {:2}% depends: {}", name, last, available, dstr); + }; + + auto lock = Lock{vla_.state_}; + std::string str; + str += ExternMemoryHolder::description() + '\n'; + str += SharedMemoryHolder::description() + '\n'; + str += std::format("state:{} {}\nSlot{:<2} {} pubs: {}\n", ::str(lock.old_), layout(), Master, + print(slots_[Master]), vla_.slots_.numPub_); + for (std::size_t i = Master + 1; i < slots_.size(); ++i) { + str += std::format("Slot{:<2} {}\n", i, print(slots_[i])); + } + return str; + } + + std::size_t capacity() const override { return impl_.capacity(); } + const Layout& layout() const override { return vla_; } + const std::span items() const override { return vla_.items(); } +}; + +IGraphInternal::Ptr createTypeWait(VLA& layout) { + using namespace dlsm::Disruptor::Waits; + using namespace dlsm::Disruptor::Sequencers; + // clang-format off + switch(layout.graph_.type_) { + case Type::SPSC: throw std::invalid_argument{"SPSC is not implemented"}; + case Type::SPMC: + switch(layout.graph_.wait_) { + case Wait::Spins: return std::make_shared>>(layout); + case Wait::Yield: return std::make_shared>>(layout); + case Wait::Block: return std::make_shared>>(layout); + case Wait::Share: return std::make_shared>>(layout); + } + break; + case Type::MPMC: + switch(layout.graph_.wait_) { + case Wait::Spins: return std::make_shared>>(layout); + case Wait::Yield: return std::make_shared>>(layout); + case Wait::Block: return std::make_shared>>(layout); + case Wait::Share: return std::make_shared>>(layout); + } + break; + default: break; + } + // clang-format on + throw std::invalid_argument{"unsupported arguments"}; +} + +IGraphInternal::Ptr create(const Layout& required, VLA& allocated, std::string_view r = "100x1") { + if (required.graph_.type_ == Type::SPMC && required.slots_.maxPub_ != 1) { + throw std::invalid_argument{"Type::SPMC supports only one producer, current limit:" + + std::to_string(required.slots_.maxPub_)}; + } + + auto expected = Stat::Empty; + if (allocated.state_.compare_exchange_strong(expected, Stat::Init)) { + std::construct_at(&allocated, required, Stat::Init); + // Construction in allocated memory + return createTypeWait(allocated); + } + + if (expected == Stat::Init) { // Waiting construction in separate thread/process + auto retries = dlsm::Str::xpair(r); + for (std::size_t i = 0; i < retries.first; ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(retries.second)); + expected = allocated.state_.load(); + if (expected == Stat::Ready) break; + } + } + + // 3) attaching to memory with external ownership(check provided, no ownership) + auto lock = Lock{allocated.state_}; + required.check(allocated); + // Attaching to already created memory + return Graph::createTypeWait(allocated); +} + +IGraphInternal::Ptr create(const Layout& required, std::span space) { + if (required.size() > space.size()) { + throw std::invalid_argument{ + std::format("IGraph::Layout requires {} bytes, only {} provided", required.size(), space.size())}; + } + // 2) construction in memory with external ownership (initialization, no ownership) + VLA& layout = nextTo(space.front()).front(); + layout.state_ = Stat::Empty; + return Graph::create(required, layout); +} + +IGraph::Ptr IGraph::create(Type type, Wait wait, Layout::Items items) { + // 1) construction in memory with internal ownership (initialization, with ownership) + items.capacity_ = ceilingNextPowerOfTwo(items.capacity_); + Layout required{{type, wait}, {(type == Type::MPMC ? 4U : 1U), 4}, items}; + // Allocate storage for required layout and move its ownership to internal + auto internal = std::vector(required.size()); + auto graph = Graph::create(required, internal); + graph->extern_ = std::move(internal); + return graph; +} + +IGraph::Ptr IGraph::inproc(const Layout& required, std::span space) { + return Graph::create(required, space); +} + +IGraph::Ptr IGraph::shared(const Layout& required, const std::string& opts, std::string_view attaching) { + if (required.graph_.wait_ == Wait::Block) { + throw std::invalid_argument{"Wait::Block is not allowed for Layout in Shared Memory"}; + } + auto smem = dlsm::SharedMemory::create(opts + ",size=" + std::to_string(required.size())); + auto& allocated = smem->reference(); + auto graph = Graph::create(required, allocated, attaching); + graph->shared_ = std::move(smem); + return graph; +} + +} // namespace dlsm::Disruptor::Graph diff --git a/src/Str.cpp b/src/Str.cpp index f13a2db..fa41aed 100644 --- a/src/Str.cpp +++ b/src/Str.cpp @@ -62,7 +62,7 @@ void copy(std::string_view src, std::span dst) { } auto len = std::min(std::size(dst) - 1, std::size(src)); - auto end = std::copy(std::begin(src), std::begin(src) + len, std::begin(dst)); + auto end = std::copy_n(std::begin(src), len, std::begin(dst)); std::fill(end, std::end(dst), '\0'); } } // namespace dlsm::Str \ No newline at end of file diff --git a/src/Transport.cpp b/src/Transport.cpp index 2992cc7..bb769ea 100644 --- a/src/Transport.cpp +++ b/src/Transport.cpp @@ -1,6 +1,7 @@ #include "impl/Transport.hpp" #include + #include #include #include diff --git a/tests/apps/Broadcast.cpp b/tests/apps/Broadcast.cpp new file mode 100644 index 0000000..9bb3fc6 --- /dev/null +++ b/tests/apps/Broadcast.cpp @@ -0,0 +1,86 @@ +#include +#include +#include + +#include "impl/DisruptorGraph.hpp" +#include "impl/Signal.hpp" + +using namespace std::literals; +using namespace dlsm::Disruptor::Graph; + +struct alignas(64) Event { + std::chrono::nanoseconds timestamp; + std::uint64_t seqnumber; +}; + +std::ostream& operator<<(std::ostream& out, const Event& e) { + auto tp = std::chrono::system_clock::time_point{e.timestamp}; + auto zt = std::chrono::zoned_time{std::chrono::current_zone(), tp}; + out << std::format("seq:{} ts:{:%T%z}", e.seqnumber, zt); + return out; +} + +void produce(const Event& event, auto& pub, auto& items) { + const auto seq = pub.claim() - 1; + items[seq] = event; + items[seq].seqnumber = static_cast(seq); + // std::cout << pub.name() << " " << items[seq] << '\n'; + pub.publish(seq); +} + +void consume(auto& sub, const auto& items) { + auto next = sub.last() + 1; + auto last = sub.consume(next); + for (; next <= last; ++next) { + auto& event = items[next]; + std::cout << /*sub.name()*/ "sub" + << " " << event << '\n'; + } + sub.release(next - 1); +} + +int main(int argc, char* argv[]) { + if (argc < 3 || (argv[1] != "pub"s && argv[1] != "sub"s && argv[1] != "mon"s)) { + std::cerr << "Usage: " << argv[0] << " pub|sub|mon /dev/shm/shared \n"; + return -1; + } + + const auto pub = argv[1] == "pub"s; + const auto sub = argv[1] == "sub"s; + [[maybe_unused]] const auto mon = argv[1] == "mon"s; + + auto name = std::string{argv[1]}; + auto path = std::string{argv[2]}; + auto msgs = argc < 4 ? 1 : std::stoull(argv[3]); + auto data = Layout{{Type::MPMC, Wait::Spins}, {4, 4}, Layout::Items::create(16)}; + auto opts = std::format("lock=on,name={}", path); + + if (pub) opts += ",purge=on,create=on"; + if (sub) opts += ",open=1000x10"; + + dlsm::Signal::Termination watcher; + + auto graph = IGraph::shared(data, opts); + auto items = graph->ring(); + + auto nameC1 = name + "C1"; + auto nameC2 = name + "C2"; + + auto P1 = graph->pub(name + "P1"); + auto C1 = graph->sub(nameC1); + auto C2 = graph->sub(nameC2, {{std::string_view{nameC1}}}); + + std::cout << std::format("{} {}", name, graph->description()); + + for (std::size_t i = 0; i < msgs; ++i) { + produce(Event{std::chrono::system_clock::now().time_since_epoch(), i}, *P1, items); + consume(*C1, items); + consume(*C2, items); + } + + std::cout << std::format("{} {}", name, graph->description()); + + watcher.wait(); + + return 0; +} diff --git a/tests/apps/CMakeLists.txt b/tests/apps/CMakeLists.txt new file mode 100644 index 0000000..a569d5d --- /dev/null +++ b/tests/apps/CMakeLists.txt @@ -0,0 +1,5 @@ +find_package(benchmark REQUIRED benchmark) + +add_executable (broadcast Broadcast.cpp) +target_include_directories(broadcast PRIVATE ../) +target_link_libraries (broadcast PRIVATE dlsm) diff --git a/tests/perf/CMakeLists.txt b/tests/perf/CMakeLists.txt index 36be26d..8682513 100644 --- a/tests/perf/CMakeLists.txt +++ b/tests/perf/CMakeLists.txt @@ -6,6 +6,7 @@ target_include_directories(perf PRIVATE ../) target_link_libraries (perf PRIVATE dlsm benchmark::benchmark) target_sources (perf PRIVATE PerfClock.cpp + PerfDisruptor.cpp PerfLock.cpp PerfTransport.cpp ) diff --git a/tests/perf/Disruptor.md b/tests/perf/Disruptor.md new file mode 100644 index 0000000..16b69ed --- /dev/null +++ b/tests/perf/Disruptor.md @@ -0,0 +1,541 @@ +## Latency & Throughput tests for [dlsm::Disruptor](../../include/impl/Disruptor.hpp) are in [PerfDisruptor.cpp](PerfDisruptor.cpp) + +It was tested in Virtual Box on Windows 10 host, so 99% percentiles and jitter are not representative. Latency tests skip first `10%` of measurements samples during calculation `50%`, `90%`, `99%` metrics(just treat them as warming memory and caches). + +### Tests Environment +```bash +user@fedora:~/dlsm$ uname -a +Linux fedora 6.5.10-300.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Nov 2 20:01:06 UTC 2023 x86_64 GNU/Linux +user@fedora:~/dlsm$ lscpu +Architecture: x86_64 + CPU op-mode(s): 32-bit, 64-bit + Address sizes: 39 bits physical, 48 bits virtual + Byte Order: Little Endian +CPU(s): 12 + On-line CPU(s) list: 0-11 +Vendor ID: GenuineIntel + Model name: 12th Gen Intel(R) Core(TM) i7-12700H + CPU family: 6 + Model: 154 + Thread(s) per core: 1 + Core(s) per socket: 12 + Socket(s): 1 + Stepping: 3 + BogoMIPS: 5376.00 + Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 c + x16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm 3dnowprefetch fsgsbase bmi1 avx2 bmi2 invpcid rdseed clflushopt arat md_clear flush_l1d arch_capabilities +Virtualization features: + Hypervisor vendor: KVM + Virtualization type: full +Caches (sum of all): + L1d: 576 KiB (12 instances) + L1i: 384 KiB (12 instances) + L2: 15 MiB (12 instances) + L3: 288 MiB (12 instances) +NUMA: + NUMA node(s): 1 + NUMA node0 CPU(s): 0-11 +Vulnerabilities: + Gather data sampling: Not affected + Itlb multihit: Not affected + L1tf: Not affected + Mds: Not affected + Meltdown: Not affected + Mmio stale data: Not affected + Retbleed: Not affected + Spec rstack overflow: Not affected + Spec store bypass: Vulnerable + Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization + Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling, PBRSB-eIBRS Not affected + Srbds: Not affected + Tsx async abort: Not affected +user@fedora:~/dlsm$ cat /proc/cmdline +BOOT_IMAGE=(hd0,gpt2)/vmlinuz-6.5.10-300.fc39.x86_64 root=UUID=3ce67a5d-34a5-4187-a149-a042b4e03660 ro rootflags=subvol=root rhgb quiet isolcpus=6-11 +user@fedora:~/dlsm$ cat /sys/kernel/mm/transparent_hugepage/enabled +always [madvise] never +user@fedora:~/dlsm$ numactl --hardware +available: 1 nodes (0) +node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 +node 0 size: 15977 MB +node 0 free: 3666 MB +node distances: +node 0 + 0: 10 +``` + +### Tests Results for SPMC/MPMC busy-wait In-process/Inter-process +Run by: +```user@fedora:~/dlsm$ time taskset -c 6-11 ./build/tests/perf/perf --benchmark_filter=Disruptor --benchmark_counters_tabular=true``` + +Scroll it horizontally. +
2024-04-28T03:06:58+03:00 + +```bash +2024-04-28T03:06:58+03:00 +Running ./build/tests/perf/perf +Run on (12 X 2687.99 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 24576 KiB (x12) +Load Average: 2.26, 3.71, 3.72 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations 50% 90% 99% Max Min Pub x1 per_item(avg) +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 9.33 s 50.7 s 1 171n 596n 10.393u 0.0195771 41n 100M 93.2754n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 9.32 s 51.7 s 1 168n 624n 8.825u 0.0157792 49n 100M 93.1781n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 9.27 s 51.0 s 1 166n 584n 9.442u 0.0132266 52n 100M 92.7215n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 9.41 s 52.0 s 1 164n 587n 8.473u 0.035434 53n 100M 94.0744n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 9.48 s 54.5 s 1 169n 378n 9.518u 0.0144451 54n 100M 94.793n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_mean 9.36 s 52.0 s 5 167.6n 553.8n 9.3302u 0.0196924 49.8n 100M 93.6085n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_median 9.33 s 51.7 s 5 168n 587n 9.442u 0.0157792 52n 100M 93.2754n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_stddev 0.082 s 1.48 s 5 2.70185n 99.5349n 736.093n 9.11682m 5.26308n 0.833 822.033p +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_cv 0.88 % 2.86 % 5 1.61% 17.97% 7.89% 46.30% 10.57% 0.00% 0.88% +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 3.41 s 20.8 s 1 1.152u 16.203u 29.877u 0.0144117 64n 100M 34.0952n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 3.28 s 19.1 s 1 1.179u 8.561u 29.671u 9.01935m 62n 100M 32.7974n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 3.38 s 19.8 s 1 1.174u 14.384u 29.853u 0.01529 61n 100M 33.7833n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 3.19 s 18.6 s 1 1.228u 12.517u 30.047u 4.49388m 53n 100M 31.8845n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.47 s 18.0 s 1 1.362u 5.897u 26.551u 5.23252m 64n 100M 24.7149n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_mean 3.15 s 19.2 s 5 1.219u 11.5124u 29.1998u 9.6895m 60.8n 100M 31.455n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_median 3.28 s 19.1 s 5 1.179u 12.517u 29.853u 9.01935m 62n 100M 32.7974n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.387 s 1.07 s 5 84.6227n 4.2279u 1.48671u 5.02428m 4.54973n 0.833 3.86675n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_cv 12.29 % 5.54 % 5 6.94% 36.72% 5.09% 51.85% 7.48% 0.00% 12.29% +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.0 s 64.6 s 1 231n 899n 10.268u 0.0172506 22n 100M 120.157n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 11.9 s 64.5 s 1 232n 896n 10.14u 0.0169999 67n 100M 119.421n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.2 s 65.5 s 1 232n 902n 11.206u 0.0262233 61n 100M 122.285n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.1 s 65.1 s 1 236n 894n 10.291u 0.024601 58n 100M 120.56n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 11.2 s 63.5 s 1 186n 340n 10.372u 8.19814m 57n 100M 111.998n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_mean 11.9 s 64.6 s 5 223.4n 786.2n 10.4554u 0.0186546 53n 100M 118.884n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_median 12.0 s 64.6 s 5 232n 896n 10.291u 0.0172506 58n 100M 120.157n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_stddev 0.399 s 0.745 s 5 20.9952n 249.452n 427.779n 7.18841m 17.7623n 0.833 3.99075n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_cv 3.36 % 1.15 % 5 9.40% 31.73% 4.09% 38.53% 33.51% 0.00% 3.36% +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 3.66 s 22.7 s 1 695n 12.187u 30.443u 2.83701m 62n 100M 36.5854n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 3.62 s 21.5 s 1 730n 19.688u 29.874u 8.96693m 67n 100M 36.1616n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 3.45 s 20.7 s 1 707n 13.619u 44.48u 0.0146715 67n 100M 34.5485n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 3.61 s 21.7 s 1 720n 21.18u 43.759u 2.78296m 67n 100M 36.0637n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 3.18 s 21.5 s 1 686n 8.492u 29.67u 1.0919m 67n 100M 31.8346n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_mean 3.50 s 21.6 s 5 707.6n 15.0332u 35.6452u 6.07005m 66n 100M 35.0387n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_median 3.61 s 21.5 s 5 707n 13.619u 30.443u 2.83701m 67n 100M 36.0637n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.195 s 0.714 s 5 17.8969n 5.29949u 7.74532u 5.66604m 2.23607n 0.833 1.95003n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_cv 5.57 % 3.30 % 5 2.53% 35.25% 21.73% 93.34% 3.39% 0.00% 5.57% +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 14.4 s 77.3 s 1 141n 183n 8.958u 0.0144793 46n 100M 144.301n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.4 s 66.5 s 1 125n 175n 7.09u 0.0305681 45n 100M 124.479n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.5 s 66.8 s 1 126n 174n 7.567u 0.0130309 43n 100M 124.727n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.5 s 67.1 s 1 125n 173n 6.906u 0.016571 42n 100M 124.585n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.5 s 69.2 s 1 125n 173n 7.977u 0.017144 47n 100M 124.665n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_mean 12.9 s 69.4 s 5 128.4n 175.6n 7.6996u 0.0183587 44.6n 100M 128.551n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_median 12.5 s 67.1 s 5 125n 174n 7.567u 0.016571 45n 100M 124.665n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_stddev 0.880 s 4.54 s 5 7.05691n 4.219n 818.462n 7.02098m 2.07364n 0.833 8.80475n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_cv 6.85 % 6.55 % 5 5.50% 2.40% 10.63% 38.24% 4.65% 0.00% 6.85% +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 8.87 s 51.0 s 1 179n 291n 10.571u 0.0204653 53n 100M 88.7241n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 8.82 s 48.9 s 1 179n 292n 9.546u 0.0117131 52n 100M 88.165n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 9.00 s 50.3 s 1 180n 293n 9.493u 0.015888 61n 100M 89.9522n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 8.80 s 49.3 s 1 178n 283n 7.967u 7.53523m 61n 100M 87.9993n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 8.76 s 51.4 s 1 179n 283n 8.341u 0.0258682 43n 100M 87.6462n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_mean 8.85 s 50.2 s 5 179n 288.4n 9.1836u 0.016294 54n 100M 88.4974n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_median 8.82 s 50.3 s 5 179n 291n 9.493u 0.015888 53n 100M 88.165n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.090 s 1.06 s 5 707.107p 4.97996n 1.04188u 7.19253m 7.48331n 0.833 901.552p +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_cv 1.02 % 2.12 % 5 0.40% 1.73% 11.34% 44.14% 13.86% 0.00% 1.02% +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.4 s 67.3 s 1 125n 177n 8.354u 7.56218m 49n 100M 124.325n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.4 s 66.7 s 1 124n 175n 6.644u 8.37152m 49n 100M 124.132n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.3 s 66.3 s 1 123n 170n 7.489u 0.0170156 46n 100M 122.869n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.5 s 67.6 s 1 125n 179n 7.67u 0.0152955 46n 100M 125.142n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time 12.3 s 68.2 s 1 124n 173n 7.06u 0.011449 45n 100M 122.939n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_mean 12.4 s 67.2 s 5 124.2n 174.8n 7.4434u 0.0119388 47n 100M 123.881n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_median 12.4 s 67.3 s 5 124n 175n 7.489u 0.011449 46n 100M 124.132n +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_stddev 0.097 s 0.764 s 5 836.66p 3.49285n 645.882n 4.15815m 1.87083n 0.833 969.57p +DisruptorLatency/1/4/100000000/0/iterations:1/repeats:5/process_time/manual_time_cv 0.78 % 1.14 % 5 0.67% 2.00% 8.68% 34.83% 3.98% 0.00% 0.78% +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 8.81 s 49.4 s 1 181n 290n 9.616u 0.0144294 42n 100M 88.061n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 8.75 s 49.1 s 1 181n 288n 8.711u 0.0261344 61n 100M 87.4536n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 8.74 s 48.8 s 1 181n 289n 9.085u 0.0121636 60n 100M 87.3647n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 10.0 s 55.1 s 1 191n 308n 11.045u 0.0147897 57n 100M 100.367n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time 8.81 s 51.1 s 1 182n 296n 10.421u 0.0138417 61n 100M 88.1306n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_mean 9.03 s 50.7 s 5 183.2n 294.2n 9.7756u 0.0162718 56.2n 100M 90.2754n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_median 8.81 s 49.4 s 5 181n 290n 9.616u 0.0144294 60n 100M 88.061n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.565 s 2.63 s 5 4.38178n 8.31865n 957.039n 5.60456m 8.10555n 0.833 5.65208n +DisruptorLatency/1/4/100000000/32/iterations:1/repeats:5/process_time/manual_time_cv 6.26 % 5.19 % 5 2.39% 2.83% 9.79% 34.44% 14.42% 0.00% 6.26% +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations 50% 90% 99% Max Min Pub x4 per_item(avg) +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.22 s 27.0 s 1 395n 47.021u 54.884u 8.69645m 65n 25M 52.2439n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.38 s 27.9 s 1 360n 3.065u 52.267u 8.5814m 42n 25M 53.7544n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.23 s 27.2 s 1 372n 32.64u 52.497u 0.0100931 65n 25M 52.3347n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.36 s 27.8 s 1 379n 35.132u 52.384u 0.0307617 68n 25M 53.5996n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.31 s 28.0 s 1 361n 566n 52.353u 0.0140403 57n 25M 53.1311n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time_mean 5.30 s 27.6 s 5 373.4n 23.6848u 52.877u 0.0144346 59.4n 25M 53.0127n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time_median 5.31 s 27.8 s 5 372n 32.64u 52.384u 0.0100931 65n 25M 53.1311n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time_stddev 0.070 s 0.466 s 5 14.4326n 20.7091u 1.12496u 9.39103m 10.5499n 0.20825 699.868p +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time_cv 1.32 % 1.69 % 5 3.87% 87.44% 2.13% 65.06% 17.76% 0.00% 1.32% +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.36 s 12.4 s 1 17.976u 20.155u 51.061u 0.0160589 71n 25M 23.6266n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.51 s 13.3 s 1 17.951u 48.674u 55.345u 5.88402m 66n 25M 25.1401n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.41 s 13.0 s 1 17.963u 19.337u 51.256u 1.50153m 74n 25M 24.0937n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.74 s 14.5 s 1 17.941u 49.475u 56.75u 8.56021m 70n 25M 27.3809n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.50 s 13.9 s 1 17.969u 48.96u 56.638u 6.76251m 62n 25M 25.0487n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time_mean 2.51 s 13.4 s 5 17.96u 37.3202u 54.21u 7.75343m 68.6n 25M 25.058n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time_median 2.50 s 13.3 s 5 17.963u 48.674u 55.345u 6.76251m 70n 25M 25.0487n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.145 s 0.814 s 5 14.0357n 16.0481u 2.84066u 5.31963m 4.66905n 0.20825 1.44753n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time_cv 5.78 % 6.06 % 5 0.08% 43.00% 5.24% 68.61% 6.81% 0.00% 5.78% +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.22 s 27.2 s 1 365n 4.512u 52.461u 0.0345443 44n 25M 52.186n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.25 s 26.9 s 1 395n 42.742u 56.332u 0.0157147 35n 25M 52.4871n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.25 s 27.2 s 1 394n 50.714u 55.603u 0.0111909 59n 25M 52.5139n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.21 s 26.9 s 1 391n 45.953u 53.378u 2.36218m 56n 25M 52.1453n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time 5.16 s 27.1 s 1 391n 50.341u 55.672u 7.1939m 58n 25M 51.5701n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time_mean 5.22 s 27.0 s 5 387.2n 38.8524u 54.6892u 0.0142012 50.4n 25M 52.1805n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time_median 5.22 s 27.1 s 5 391n 45.953u 55.603u 0.0111909 56n 25M 52.186n +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time_stddev 0.038 s 0.143 s 5 12.5379n 19.4775u 1.67211u 0.0123942 10.5024n 0.20825 380.453p +DisruptorLatency/4/1/100000000/0/iterations:1/repeats:5/process_time/manual_time_cv 0.73 % 0.53 % 5 3.24% 50.13% 3.06% 87.28% 20.84% 0.00% 0.73% +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.61 s 13.9 s 1 18.151u 28.802u 52.341u 4.02694m 71n 25M 26.1356n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.62 s 13.9 s 1 18.264u 24.878u 53.455u 0.0150808 69n 25M 26.2062n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.27 s 12.1 s 1 18.1u 20.084u 52.087u 3.29571m 68n 25M 22.7498n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.95 s 15.5 s 1 18.374u 50.574u 60.596u 0.0109504 69n 25M 29.4771n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time 2.58 s 14.3 s 1 18.157u 48.42u 54.874u 1.77409m 72n 25M 25.7666n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time_mean 2.61 s 14.0 s 5 18.2092u 34.5516u 54.6706u 7.02559m 69.8n 25M 26.0671n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time_median 2.61 s 13.9 s 5 18.157u 28.802u 53.455u 4.02694m 69n 25M 26.1356n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.238 s 1.24 s 5 109.771n 14.0089u 3.4904u 5.7178m 1.64317n 0.20825 2.38469n +DisruptorLatency/4/1/100000000/32/iterations:1/repeats:5/process_time/manual_time_cv 9.15 % 8.85 % 5 0.60% 40.54% 6.38% 81.39% 2.35% 0.00% 9.15% +Reader batch sizes, top 8 of 4526: +1): 32 item batch, 4.1767%, 2610418 times +2): 736 item batch, 4.0764%, 110771 times +3): 64 item batch, 3.6406%, 1137703 times +4): 768 item batch, 2.8739%, 74841 times +5): 704 item batch, 2.8422%, 80745 times +6): 96 item batch, 0.9476%, 197407 times +7): 672 item batch, 0.8873%, 26407 times +8): 530176 item batch, 0.8218%, 31 times +Reader batch sizes, top 8 of 4454: +1): 736 item batch, 8.2478%, 224125 times +2): 704 item batch, 5.9748%, 169738 times +3): 768 item batch, 5.6427%, 146945 times +4): 32 item batch, 2.7101%, 1693799 times +5): 64 item batch, 2.3350%, 729686 times +6): 672 item batch, 1.9245%, 57276 times +7): 800 item batch, 1.3206%, 33014 times +8): 640 item batch, 0.8431%, 26348 times +Reader batch sizes, top 8 of 4428: +1): 736 item batch, 6.9030%, 187581 times +2): 768 item batch, 5.3083%, 138238 times +3): 704 item batch, 4.4576%, 126635 times +4): 672 item batch, 1.3684%, 40726 times +5): 800 item batch, 1.2582%, 31456 times +6): 640 item batch, 0.5642%, 17632 times +7): 545536 item batch, 0.5455%, 20 times +8): 540000 item batch, 0.5130%, 19 times +Reader batch sizes, top 8 of 4459: +1): 736 item batch, 8.4155%, 228682 times +2): 704 item batch, 6.0134%, 170835 times +3): 768 item batch, 5.6323%, 146675 times +4): 32 item batch, 1.7521%, 1095076 times +5): 672 item batch, 1.6568%, 49309 times +6): 64 item batch, 1.4526%, 453927 times +7): 800 item batch, 1.2814%, 32035 times +8): 511040 item batch, 0.7155%, 28 times +Reader batch sizes, top 8 of 4537: +1): 736 item batch, 9.3446%, 253930 times +2): 704 item batch, 6.3247%, 179679 times +3): 768 item batch, 5.9360%, 154583 times +4): 32 item batch, 4.7501%, 2968817 times +5): 64 item batch, 4.2997%, 1343649 times +6): 672 item batch, 1.6975%, 50522 times +7): 800 item batch, 1.2138%, 30346 times +8): 96 item batch, 0.9079%, 189153 times +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +Benchmark Time CPU Iterations Pub x1 Sub x4 bytes_per_second items_per_second per_item(avg) +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.81 s 13.7 s 1 500M 500M 10.6214Gi/s 178.198M/s 5.61173n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.64 s 13.0 s 1 500M 500M 11.2828Gi/s 189.293M/s 5.28281n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.65 s 13.0 s 1 500M 500M 11.2509Gi/s 188.759M/s 5.29777n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.63 s 12.9 s 1 500M 500M 11.3158Gi/s 189.847M/s 5.26739n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.69 s 13.2 s 1 500M 500M 11.0834Gi/s 185.949M/s 5.37782n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_mean 2.68 s 13.2 s 5 500M 500M 11.1109Gi/s 186.409M/s 5.3675n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_median 2.65 s 13.0 s 5 500M 500M 11.2509Gi/s 188.759M/s 5.29777n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.072 s 0.297 s 5 4.165 4.165 294.763Mi/s 4.8294M/s 143.015p +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_cv 2.66 % 2.25 % 5 0.00% 0.00% 2.59% 2.59% 2.66% +Reader batch sizes, top 8 of 3841: +1): 768 item batch, 8.4588%, 220281 times +2): 736 item batch, 7.0356%, 191184 times +3): 800 item batch, 3.8331%, 95827 times +4): 704 item batch, 2.8809%, 81845 times +5): 534176 item batch, 1.1752%, 44 times +6): 527328 item batch, 1.1074%, 42 times +7): 514368 item batch, 1.1059%, 43 times +8): 521216 item batch, 1.0424%, 40 times +Reader batch sizes, top 8 of 4513: +1): 768 item batch, 6.7796%, 176552 times +2): 736 item batch, 5.8031%, 157693 times +3): 800 item batch, 2.9445%, 73612 times +4): 704 item batch, 2.2984%, 65295 times +5): 64 item batch, 0.7613%, 237899 times +6): 525088 item batch, 0.6564%, 25 times +7): 523456 item batch, 0.6543%, 25 times +8): 672 item batch, 0.6327%, 18829 times +Reader batch sizes, top 8 of 4024: +1): 768 item batch, 8.1923%, 213340 times +2): 736 item batch, 7.6941%, 209078 times +3): 800 item batch, 3.4410%, 86025 times +4): 704 item batch, 3.3273%, 94525 times +5): 64 item batch, 1.3713%, 428530 times +6): 128 item batch, 1.1205%, 175076 times +7): 96 item batch, 0.9783%, 203821 times +8): 672 item batch, 0.9392%, 27952 times +Reader batch sizes, top 8 of 4644: +1): 768 item batch, 5.9593%, 155190 times +2): 736 item batch, 5.3490%, 145353 times +3): 800 item batch, 2.5952%, 64881 times +4): 704 item batch, 2.2451%, 63781 times +5): 528736 item batch, 0.8724%, 33 times +6): 519808 item batch, 0.8317%, 32 times +7): 518592 item batch, 0.7001%, 27 times +8): 529952 item batch, 0.6889%, 26 times +Reader batch sizes, top 8 of 4376: +1): 768 item batch, 7.5147%, 195696 times +2): 736 item batch, 6.6186%, 179854 times +3): 800 item batch, 3.1532%, 78829 times +4): 704 item batch, 2.4945%, 70867 times +5): 64 item batch, 0.9346%, 292047 times +6): 672 item batch, 0.6039%, 17973 times +7): 96 item batch, 0.5489%, 114358 times +8): 128 item batch, 0.5350%, 83592 times +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.64 s 13.0 s 1 500M 500M 11.272Gi/s 189.113M/s 5.28784n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.66 s 13.1 s 1 500M 500M 11.2054Gi/s 187.996M/s 5.31927n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.65 s 13.0 s 1 500M 500M 11.2454Gi/s 188.666M/s 5.30038n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.66 s 13.1 s 1 500M 500M 11.2032Gi/s 187.958M/s 5.32034n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.64 s 13.0 s 1 500M 500M 11.2945Gi/s 189.491M/s 5.2773n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_mean 2.65 s 13.0 s 5 500M 500M 11.2441Gi/s 188.645M/s 5.30103n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_median 2.65 s 13.0 s 5 500M 500M 11.2454Gi/s 188.666M/s 5.30038n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.009 s 0.051 s 5 4.165 4.165 41.2658Mi/s 676.099k/s 18.9928p +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_cv 0.36 % 0.39 % 5 0.00% 0.00% 0.36% 0.36% 0.36% +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.6571%, 249142688 times +2): 4 item batch, 0.1030%, 515203 times +3): 3 item batch, 0.0788%, 525619 times +4): 5 item batch, 0.0746%, 298235 times +5): 7 item batch, 0.0273%, 78133 times +6): 2 item batch, 0.0263%, 262774 times +7): 1 item batch, 0.0206%, 411707 times +8): 6 item batch, 0.0123%, 40911 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.3445%, 248361146 times +2): 5 item batch, 0.1519%, 607447 times +3): 7 item batch, 0.1389%, 396881 times +4): 6 item batch, 0.1144%, 381176 times +5): 3 item batch, 0.1039%, 692630 times +6): 2 item batch, 0.0567%, 567007 times +7): 4 item batch, 0.0531%, 265704 times +8): 1 item batch, 0.0367%, 733654 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.5314%, 248828614 times +2): 5 item batch, 0.1043%, 417362 times +3): 7 item batch, 0.0918%, 262307 times +4): 3 item batch, 0.0783%, 521694 times +5): 6 item batch, 0.0734%, 244748 times +6): 4 item batch, 0.0451%, 225571 times +7): 2 item batch, 0.0444%, 444123 times +8): 1 item batch, 0.0312%, 624029 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.3000%, 248249881 times +2): 5 item batch, 0.1587%, 634666 times +3): 7 item batch, 0.1471%, 420403 times +4): 6 item batch, 0.1202%, 400517 times +5): 3 item batch, 0.1108%, 738713 times +6): 2 item batch, 0.0624%, 624300 times +7): 4 item batch, 0.0600%, 299803 times +8): 1 item batch, 0.0409%, 817748 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.3922%, 248480436 times +2): 5 item batch, 0.1356%, 542261 times +3): 7 item batch, 0.1238%, 353742 times +4): 6 item batch, 0.1020%, 340105 times +5): 3 item batch, 0.0968%, 645056 times +6): 2 item batch, 0.0573%, 573121 times +7): 4 item batch, 0.0539%, 269657 times +8): 1 item batch, 0.0384%, 768345 times +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.52 s 37.0 s 1 500M 500M 3.96497Gi/s 66.5212M/s 15.0328n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.05 s 34.7 s 1 500M 500M 4.22896Gi/s 70.9502M/s 14.0944n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.05 s 34.7 s 1 500M 500M 4.22704Gi/s 70.918M/s 14.1008n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.04 s 34.6 s 1 500M 500M 4.23587Gi/s 71.0661M/s 14.0714n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.09 s 34.8 s 1 500M 500M 4.20548Gi/s 70.5562M/s 14.1731n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_mean 7.15 s 35.1 s 5 500M 500M 4.17247Gi/s 70.0024M/s 14.2945n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_median 7.05 s 34.7 s 5 500M 500M 4.22704Gi/s 70.918M/s 14.1008n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.207 s 1.02 s 5 4.165 4.165 119.344Mi/s 1.95534M/s 414.474p +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_cv 2.90 % 2.92 % 5 0.00% 0.00% 2.79% 2.79% 2.90% +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9919%, 62494920 times +2): 5 item batch, 0.0018%, 1799 times +3): 7 item batch, 0.0017%, 1226 times +4): 6 item batch, 0.0016%, 1324 times +5): 3 item batch, 0.0013%, 2125 times +6): 2 item batch, 0.0008%, 1878 times +7): 1 item batch, 0.0006%, 2912 times +8): 4 item batch, 0.0004%, 519 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9960%, 62497487 times +2): 5 item batch, 0.0008%, 834 times +3): 7 item batch, 0.0007%, 526 times +4): 3 item batch, 0.0007%, 1138 times +5): 6 item batch, 0.0006%, 522 times +6): 2 item batch, 0.0004%, 1038 times +7): 4 item batch, 0.0004%, 467 times +8): 1 item batch, 0.0004%, 1762 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9954%, 62497109 times +2): 5 item batch, 0.0011%, 1108 times +3): 7 item batch, 0.0010%, 691 times +4): 6 item batch, 0.0009%, 735 times +5): 3 item batch, 0.0008%, 1283 times +6): 2 item batch, 0.0004%, 978 times +7): 1 item batch, 0.0003%, 1428 times +8): 4 item batch, 0.0002%, 277 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9990%, 62499360 times +2): 5 item batch, 0.0002%, 243 times +3): 7 item batch, 0.0002%, 158 times +4): 6 item batch, 0.0002%, 161 times +5): 3 item batch, 0.0002%, 282 times +6): 2 item batch, 0.0001%, 196 times +7): 1 item batch, 0.0001%, 347 times +8): 4 item batch, 0.0000%, 62 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9985%, 62499066 times +2): 5 item batch, 0.0003%, 316 times +3): 7 item batch, 0.0003%, 207 times +4): 3 item batch, 0.0002%, 408 times +5): 6 item batch, 0.0002%, 200 times +6): 1 item batch, 0.0002%, 773 times +7): 2 item batch, 0.0001%, 345 times +8): 4 item batch, 0.0001%, 139 times +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +Benchmark Time CPU Iterations Pub x4 Sub x1 bytes_per_second items_per_second per_item(avg) +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.98 s 14.6 s 1 125M 500M 10.0149Gi/s 168.023M/s 5.95157n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 3.02 s 14.8 s 1 125M 500M 9.86776Gi/s 165.553M/s 6.04034n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 3.01 s 14.8 s 1 125M 500M 9.89201Gi/s 165.96M/s 6.02553n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 3.06 s 15.0 s 1 125M 500M 9.73692Gi/s 163.358M/s 6.12151n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 2.99 s 14.7 s 1 125M 500M 9.95568Gi/s 167.029M/s 5.987n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time_mean 3.01 s 14.8 s 5 125M 500M 9.89346Gi/s 165.985M/s 6.02519n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time_median 3.01 s 14.8 s 5 125M 500M 9.89201Gi/s 165.96M/s 6.02553n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.032 s 0.157 s 5 1.04125 4.165 107.148Mi/s 1.75551M/s 64.0014p +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time_cv 1.06 % 1.06 % 5 0.00% 0.00% 1.06% 1.06% 1.06% +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.5906%, 248976466 times +2): 5 item batch, 0.0983%, 393270 times +3): 7 item batch, 0.0865%, 247088 times +4): 6 item batch, 0.0696%, 232153 times +5): 3 item batch, 0.0667%, 444594 times +6): 2 item batch, 0.0345%, 345466 times +7): 4 item batch, 0.0309%, 154438 times +8): 1 item batch, 0.0228%, 456922 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.4713%, 248678300 times +2): 5 item batch, 0.1225%, 490057 times +3): 7 item batch, 0.1063%, 303651 times +4): 6 item batch, 0.0883%, 294226 times +5): 3 item batch, 0.0870%, 579796 times +6): 2 item batch, 0.0481%, 481169 times +7): 4 item batch, 0.0442%, 221047 times +8): 1 item batch, 0.0323%, 646488 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.2057%, 248014249 times +2): 5 item batch, 0.1984%, 793634 times +3): 7 item batch, 0.1623%, 463687 times +4): 3 item batch, 0.1328%, 885197 times +5): 6 item batch, 0.1300%, 433381 times +6): 2 item batch, 0.0652%, 651736 times +7): 4 item batch, 0.0615%, 307388 times +8): 1 item batch, 0.0442%, 883128 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.5104%, 248776010 times +2): 5 item batch, 0.1093%, 437196 times +3): 7 item batch, 0.0966%, 276033 times +4): 3 item batch, 0.0802%, 534577 times +5): 6 item batch, 0.0799%, 266423 times +6): 4 item batch, 0.0465%, 232290 times +7): 2 item batch, 0.0464%, 463807 times +8): 1 item batch, 0.0307%, 614666 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.5235%, 248808669 times +2): 5 item batch, 0.1107%, 442871 times +3): 7 item batch, 0.0941%, 268717 times +4): 3 item batch, 0.0791%, 527140 times +5): 6 item batch, 0.0767%, 255698 times +6): 4 item batch, 0.0444%, 221973 times +7): 2 item batch, 0.0433%, 432919 times +8): 1 item batch, 0.0283%, 565936 times +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +Benchmark Time CPU Iterations Pub x1 Sub x4 bytes_per_second items_per_second per_item(avg) +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.38 s 36.3 s 1 500M 500M 4.03807Gi/s 67.7477M/s 14.7607n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.14 s 35.1 s 1 500M 500M 4.17344Gi/s 70.0187M/s 14.2819n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.10 s 34.9 s 1 500M 500M 4.19871Gi/s 70.4427M/s 14.1959n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.42 s 36.5 s 1 500M 500M 4.01902Gi/s 67.4279M/s 14.8307n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time 7.38 s 36.4 s 1 500M 500M 4.039Gi/s 67.7631M/s 14.7573n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_mean 7.28 s 35.8 s 5 500M 500M 4.09365Gi/s 68.68M/s 14.5653n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_median 7.38 s 36.3 s 5 500M 500M 4.039Gi/s 67.7631M/s 14.7573n +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.150 s 0.756 s 5 4.165 4.165 87.2656Mi/s 1.42976M/s 300.908p +DisruptorThroughput/1/4/500000000/32/iterations:1/repeats:5/process_time/manual_time_cv 2.07 % 2.11 % 5 0.00% 0.00% 2.08% 2.08% 2.07% +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9986%, 62499095 times +2): 7 item batch, 0.0003%, 236 times +3): 5 item batch, 0.0003%, 260 times +4): 3 item batch, 0.0002%, 353 times +5): 6 item batch, 0.0002%, 165 times +6): 1 item batch, 0.0002%, 815 times +7): 2 item batch, 0.0002%, 388 times +8): 4 item batch, 0.0001%, 162 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9569%, 62473081 times +2): 5 item batch, 0.0093%, 9317 times +3): 7 item batch, 0.0093%, 6609 times +4): 6 item batch, 0.0080%, 6706 times +5): 3 item batch, 0.0067%, 11140 times +6): 2 item batch, 0.0040%, 9925 times +7): 1 item batch, 0.0033%, 16366 times +8): 4 item batch, 0.0025%, 3158 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9978%, 62498625 times +2): 7 item batch, 0.0005%, 345 times +3): 5 item batch, 0.0005%, 479 times +4): 6 item batch, 0.0004%, 372 times +5): 3 item batch, 0.0003%, 575 times +6): 2 item batch, 0.0002%, 499 times +7): 1 item batch, 0.0001%, 691 times +8): 4 item batch, 0.0001%, 136 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9768%, 62485506 times +2): 5 item batch, 0.0044%, 4430 times +3): 7 item batch, 0.0042%, 2993 times +4): 6 item batch, 0.0041%, 3403 times +5): 3 item batch, 0.0034%, 5688 times +6): 2 item batch, 0.0027%, 6750 times +7): 1 item batch, 0.0023%, 11477 times +8): 4 item batch, 0.0021%, 2598 times +Reader batch sizes, top 8 of 8: +1): 8 item batch, 99.9884%, 62492748 times +2): 7 item batch, 0.0022%, 1557 times +3): 5 item batch, 0.0021%, 2075 times +4): 6 item batch, 0.0020%, 1676 times +5): 3 item batch, 0.0017%, 2798 times +6): 2 item batch, 0.0014%, 3514 times +7): 1 item batch, 0.0012%, 5992 times +8): 4 item batch, 0.0011%, 1318 times +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +Benchmark Time CPU Iterations Pub x4 Sub x1 bytes_per_second items_per_second per_item(avg) +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 3.09 s 15.2 s 1 125M 500M 9.63559Gi/s 161.658M/s 6.18588n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 3.18 s 15.6 s 1 125M 500M 9.37906Gi/s 157.354M/s 6.35508n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 3.16 s 15.5 s 1 125M 500M 9.43276Gi/s 158.255M/s 6.3189n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 3.16 s 15.5 s 1 125M 500M 9.43433Gi/s 158.282M/s 6.31785n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time 3.21 s 15.8 s 1 125M 500M 9.29507Gi/s 155.945M/s 6.4125n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time_mean 3.16 s 15.5 s 5 125M 500M 9.43536Gi/s 158.299M/s 6.31804n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time_median 3.16 s 15.5 s 5 125M 500M 9.43276Gi/s 158.255M/s 6.3189n +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time_stddev 0.042 s 0.227 s 5 1.04125 4.165 128.463Mi/s 2.10474M/s 83.3087p +DisruptorThroughput/4/1/500000000/32/iterations:1/repeats:5/process_time/manual_time_cv 1.32 % 1.46 % 5 0.00% 0.00% 1.33% 1.33% 1.32% + +real 13m48.142s +user 49m19.298s +sys 1m3.041s +``` + +
+ + diff --git a/tests/perf/PerfDisruptor.cpp b/tests/perf/PerfDisruptor.cpp new file mode 100644 index 0000000..481587f --- /dev/null +++ b/tests/perf/PerfDisruptor.cpp @@ -0,0 +1,500 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "impl/Allocator.hpp" +#include "impl/Clock.hpp" +#include "impl/Disruptor.hpp" +#include "perf/Perf.hpp" +#include "perf/Timestamps.hpp" + +using namespace std::literals; + +namespace { + +#ifdef NDEBUG +const auto repeats = 5; +const auto multiplier = 1; // for real perf measurement: 1'000; +const auto latency_items = 100'000 * multiplier; +const auto throughput_items = 500'000 * multiplier; +#else +const int repeats = 1; +const auto latency_items = 1'000; +const auto throughput_items = 1'000; +#endif // NDEBUG + +using Timestamps = Tests::Perf::Timestamps; + +struct alignas(64) Event { + std::chrono::nanoseconds timestamp; + std::uint64_t seqnumber; +}; + +std::mutex state_mutex; // NOLINT +void synchronized(auto action) { + std::lock_guard guard(state_mutex); + action(); +}; + +void checkSequence(auto& state, const auto& name, auto expected, auto seq) { + if (expected != seq) + synchronized([&] { + state.SkipWithError( + std::format("{} got gap on #{} received #{}, missing: {}", name, expected, seq, seq - expected)); + }); +} + +template +void DisruptorLatency(benchmark::State& state) { + // clang-format off + const auto producers = static_cast(state.range(0)); + const auto consumers = static_cast(state.range(1)); + const auto tosend = static_cast(state.range(2)); + const auto batch = static_cast(state.range(3)); + // clang-format on + + static auto repeat = repeats; + if (repeat == 0) repeat = repeats; + --repeat; + + constexpr auto capacity = std::size_t(1) << 10; // 1k events in buffer + + auto buffer = std::vector>{capacity, Event{}}; + auto ring = dlsm::Disruptor::Ring{buffer}; + auto wait = typename Impl::WaitStrategy{}; + typename Impl::BarrierType barrier; + + using Ty = dlsm::Disruptor::Sequence::Atomic; + auto external = std::vector>(capacity); + + auto disruptor = Impl{barrier, ring.size(), wait, external}; + + struct CollectTimestamps : dlsm::Disruptor::Processor::DefaultHandler { + Timestamps::List& ts_; + benchmark::State& state_; + const std::string& name_; + const std::size_t expected_; + std::size_t count_ = 0ULL; + + CollectTimestamps(Timestamps::List& ts, benchmark::State& state, const std::string& name, std::size_t expected) + : ts_{ts}, state_{state}, name_{name}, expected_{expected} {} + + Consumed onConsume(Event& data, dlsm::Disruptor::Sequence::Value, std::size_t) { + ts_[data.seqnumber] = Timestamps::ts(); + + const auto expected = count_; + ++count_; + checkSequence(state_, name_, expected, data.seqnumber); + + return count_ >= expected_ ? Consumed::Exit : Consumed::Release; + } + + void report() { + if (const auto lost = expected_ - count_) state_.counters[name_ + "Lost"] = static_cast(lost); + } + }; + + for (auto _ : state) { + auto cpus = dlsm::Thread::AllCPU; + dlsm::Thread::affinity(cpus.extract()); + std::vector threads{producers + consumers}; + std::barrier sync(std::ssize(threads)); + auto begin = Timestamps::ts(), end = begin; + + state.counters["Pub x" + std::to_string(producers)] = + static_cast(tosend) / static_cast(producers); + + auto timestamps = Timestamps{1 + consumers, tosend}; + + for (std::size_t i = 0; auto& t : threads) { + t = std::jthread([&, i]() { + const auto affinity = cpus.count() > i ? cpus.at(i) : cpus; + dlsm::Thread::affinity(affinity); + + const auto name = + (i < producers) ? "Pub"s + std::to_string(i + 1) : "Sub"s + std::to_string(i + 1 - producers); + dlsm::Thread::name(name); + + std::uint64_t count = 0; + if (i < producers) { + // First #0 producer will send remaining events to disruptor + const auto tosendP = tosend / producers + ((i == 0) ? (tosend % producers) : 0); + auto& ts = timestamps[0]; + sync.arrive_and_wait(); + if (i == 0) { + begin = Timestamps::ts(); + } + + if (batch > 0) { + while (count < tosendP) { + const auto n = std::min(batch, tosendP - count); + const auto hi = disruptor.claim(n); + const auto lo = hi - static_cast(n); + for (auto seq = lo; seq < hi; ++seq, ++count) { + auto& event = ring[seq]; + auto useq = static_cast(seq); + + event.timestamp = ts[useq] = timestamps.ts(); + event.seqnumber = useq; + + disruptor.publish(seq); + } + // disruptor.publish(lo, hi); + } + } else { + while (count < tosendP) { + const auto seq = disruptor.claim() - 1; + auto useq = static_cast(seq); + + auto& event = ring[seq]; + event.timestamp = ts[useq] = timestamps.ts(); + event.seqnumber = useq; + + disruptor.publish(seq); + ++count; + } + } + + sync.arrive_and_wait(); + if (i == 0) { + end = Timestamps::ts(); + + if (repeat == 0) // Write timestamps to file on last repeat + timestamps.write(ts, state.name() + "-Pub"); + } + } else { + const auto id = i - producers; + auto& ts = timestamps[1 + id]; + + typename Impl::BarrierType barrier; + auto sub = typename Impl::Consumer{barrier, disruptor}; + disruptor.add(sub.cursor()); + + CollectTimestamps handler(ts, state, name, tosend); + dlsm::Disruptor::Processor::Batch p{Event{}, sub, ring, handler}; + + sync.arrive_and_wait(); + + p.run(); + + sync.arrive_and_wait(); + disruptor.del(sub.cursor()); + + synchronized([&] { handler.report(); }); + + if (repeat == 0) // Write timestamps to files on last repeat + timestamps.write(ts, state.name() + "-" + name); + } + }); + ++i; + } + threads.clear(); + dlsm::Thread::affinity(dlsm::Thread::AllCPU); + + // Calculate delays and percentiles, Warming: skip first 10% of samples from statistics + for (const auto& p : timestamps.percentiles(10.0)) { + state.counters[p.label] = std::chrono::duration(p.value).count(); + } + + const std::chrono::duration seconds = end - begin; + state.SetIterationTime(seconds.count()); + state.counters["per_item(avg)"] = (seconds / tosend).count(); + } +} + +template +void DisruptorThroughput(benchmark::State& state) { + // clang-format off + const auto producers = static_cast(state.range(0)); + const auto consumers = static_cast(state.range(1)); + const auto tosend = static_cast(state.range(2)); + const auto batch = static_cast(state.range(3)); + // clang-format on + + const std::size_t capacity = + std::min(dlsm::Disruptor::ceilingNextPowerOfTwo(tosend / 2), (std::size_t(1) << 20 /*1Mb*/)); + + auto buffer = std::vector>{capacity, Event{}}; + auto ring = dlsm::Disruptor::Ring{buffer}; + auto wait = typename Impl::WaitStrategy{}; + typename Impl::BarrierType barrier; + + auto disruptor = Impl{barrier, ring.size(), wait}; + + auto bar = std::vector(); + bar.reserve(consumers); + auto barriers = std::vector(); + barriers.reserve(consumers); + for (std::size_t i = 0; i < consumers; ++i) { + auto& b = bar.emplace_back(); + const auto& c = barriers.emplace_back(b, disruptor); + disruptor.add(c.cursor()); + } + + struct ReaderBatches { + using AmountSize = std::pair; + std::vector> amounts; + ReaderBatches(std::size_t capacity) : amounts(capacity) { + for (auto& v : amounts) { + std::atomic_init(&v, 0ULL); + } + } + + void add(std::size_t size) { amounts.at(size - 1).fetch_add(1); } + + std::vector sorted() const { + std::vector result; + for (size_t i = 0; i < amounts.size(); ++i) { + if (amounts[i] != 0) { + result.emplace_back(amounts[i] * (i + 1), (i + 1)); + } + } + std::sort(std::rbegin(result), std::rend(result)); + return result; + } + + void top(std::size_t n) const { + if (n == 0) return; + auto ordered = sorted(); + + std::size_t total = 0; + for (const AmountSize& i : ordered) total += i.first; + + std::cout << std::format("Reader batch sizes, top {} of {}:\n", n, ordered.size()); + for (std::size_t i = 0; i < n && i < ordered.size(); ++i) { + std::cout << std::format("{}): {} item batch,{:>8.4f}%, {} times\n", i + 1, ordered[i].second, + (double)(100 * ordered[i].first) / ((double)total), + (ordered[i].first / ordered[i].second)); + } + std::cout << std::flush; + } + }; + + struct CollectBatches : dlsm::Disruptor::Processor::DefaultHandler { + ReaderBatches& batches_; + benchmark::State& state_; + const std::string& name_; + const std::size_t expected_; + std::size_t count_ = 0ULL; + + CollectBatches(ReaderBatches& batches, benchmark::State& state, const std::string& name, std::size_t expected) + : batches_{batches}, state_{state}, name_{name}, expected_{expected} {} + + void onBatch(dlsm::Disruptor::Sequence::Value, std::size_t amount) { batches_.add(amount); } + + Consumed onConsume(Event& data, dlsm::Disruptor::Sequence::Value, std::size_t) { + const auto expected = count_; + ++count_; + checkSequence(state_, name_, expected, data.seqnumber); + + return count_ >= expected_ ? Consumed::Exit : Consumed::Keep; + } + + void report() { + if (const auto lost = expected_ - count_) state_.counters[name_ + "Lost"] = static_cast(lost); + } + }; + + for (auto _ : state) { + auto cpus = dlsm::Thread::AllCPU; + dlsm::Thread::affinity(cpus.extract()); + std::vector threads{producers + consumers}; + ReaderBatches batches{ring.size()}; + std::barrier sync(std::ssize(threads)); + + auto begin = Timestamps::ts(), end = begin; + + state.counters["Pub x" + std::to_string(producers)] = + static_cast(tosend) / static_cast(producers); + state.counters["Sub x" + std::to_string(consumers)] = static_cast(tosend); + + for (std::size_t i = 0; auto& t : threads) { + t = std::jthread([&, i]() { + const auto affinity = cpus.count() > i ? cpus.at(i) : cpus; + dlsm::Thread::affinity(affinity); + + const auto name = + (i < producers) ? "Pub"s + std::to_string(i + 1) : "Sub"s + std::to_string(i + 1 - producers); + dlsm::Thread::name(name); + + std::uint64_t count = 0; + if (i < producers) { + // First #0 producer will send remaining events to disruptor + const auto tosendP = tosend / producers + ((i == 0) ? (tosend % producers) : 0); + sync.arrive_and_wait(); + if (i == 0) { + begin = Timestamps::ts(); + } + + if (batch > 0) { + while (count < tosendP) { + const auto n = std::min(batch, tosendP - count); + const dlsm::Disruptor::Sequence::Value hi = disruptor.claim(n); + const dlsm::Disruptor::Sequence::Value lo = + hi - static_cast(n); + for (auto seq = lo; seq < hi; ++seq, ++count) { + auto& event = ring[seq]; + + event.seqnumber = static_cast(seq); + + // disruptor.publish(seq); + } + disruptor.publish(lo, hi); + } + } else { + while (count < tosendP) { + const auto seq = disruptor.claim() - 1; + + auto& event = ring[seq]; + event.seqnumber = static_cast(seq); + + disruptor.publish(seq); + ++count; + } + } + + sync.arrive_and_wait(); + if (i == 0) { + end = Timestamps::ts(); + } + } else { + auto& sub = barriers[i - producers]; + + CollectBatches handler(batches, state, name, tosend); + dlsm::Disruptor::Processor::Batch p{Event{}, sub, ring, handler}; + + sync.arrive_and_wait(); + + p.run(); + + sync.arrive_and_wait(); + disruptor.del(sub.cursor()); + + synchronized([&] { handler.report(); }); + } + }); + ++i; + } + threads.clear(); + + const std::chrono::duration seconds = end - begin; + state.SetIterationTime(seconds.count()); + state.SetItemsProcessed(static_cast(tosend)); + state.SetBytesProcessed(static_cast(tosend * sizeof(Event))); + state.counters["per_item(avg)"] = (seconds / tosend).count(); + + dlsm::Thread::affinity(dlsm::Thread::AllCPU); + + batches.top(8); + } +} + +void DisruptorSpinner(benchmark::State& state) { + const auto iteration = static_cast(state.range(0)); + dlsm::Disruptor::Waits::SpinsStrategy::Spinner spinner; + for (auto _ : state) { // NOLINT + spinner.iteration_ = iteration; + spinner.once(); + } +} +} // namespace + +using namespace dlsm::Disruptor::Waits; +using namespace dlsm::Disruptor::Barriers; +using namespace dlsm::Disruptor::Sequencers; +static constexpr std::size_t Ndeps = 4; // Max number of dependencies of Barrier +using SPMCSpinsAtomics = SPMC>; +using MPMCSpinsAtomics = MPMC>; +// Inter-process communications through shared memory requires placement Barriers +// in external shared memory and using offsets instead of pointers for dependencies +using SPMCSpinsSharing = SPMC&>; +using MPMCSpinsSharing = MPMC&>; + +BENCHMARK(DisruptorLatency) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({1, 4, latency_items, 0}) + ->Args({1, 4, latency_items, 32}); +BENCHMARK(DisruptorLatency) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({1, 4, latency_items, 0}) + ->Args({1, 4, latency_items, 32}); + +BENCHMARK(DisruptorLatency) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({1, 4, latency_items, 0}) + ->Args({1, 4, latency_items, 32}); +BENCHMARK(DisruptorLatency) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({1, 4, latency_items, 0}) + ->Args({1, 4, latency_items, 32}); +BENCHMARK(DisruptorLatency) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({4, 1, latency_items, 0}) + ->Args({4, 1, latency_items, 32}); +BENCHMARK(DisruptorLatency) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({4, 1, latency_items, 0}) + ->Args({4, 1, latency_items, 32}); + +BENCHMARK(DisruptorThroughput) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({1, 4, throughput_items, 32}); +BENCHMARK(DisruptorThroughput) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({1, 4, throughput_items, 32}); + +BENCHMARK(DisruptorThroughput) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({1, 4, throughput_items, 32}) + ->Args({4, 1, throughput_items, 32}); +BENCHMARK(DisruptorThroughput) + ->MeasureProcessCPUTime() + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(1) + ->Repetitions(repeats) + ->Args({1, 4, throughput_items, 32}) + ->Args({4, 1, throughput_items, 32}); + +BENCHMARK(DisruptorSpinner)->DenseRange(SpinsStrategy::Spinner::Initial, SpinsStrategy::Spinner::Sleep, 1); diff --git a/tests/perf/README.md b/tests/perf/README.md index 514cdfa..fe233b7 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -33,9 +33,10 @@ sudo sysctl vm.stat_interval=300 ## [delays.py](delays.py) This script reads binary files with `int64` samples(nanoseconds timestamps), and plots them as `master` and `signals` delays relative to `master`. ```sh -# Display Pub1.ns as master and delays of Sub1/Sub2/Sub3/Sub4.ns signals relative to Pub1 +# Display Pub.ns as master and delays of Sub1/Sub2/Sub3/Sub4.ns signals relative to Pub.ns ./tests/perf/delays.py ./build/tests/perf/TransportPubSub-mem-* ``` +![Disruptor 1P4C Timestamps](../../docs/images/delays-Disruptor-1P4C.png) ## Threads Affinity and CPU Core Isolation ```sh diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index e68e2f6..e184d43 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -11,6 +11,8 @@ target_sources (unit PRIVATE Flat.cpp TestAllocator.cpp TestClock.cpp + TestDisruptor.cpp + TestDisruptorGraph.cpp TestLock.cpp TestLogger.cpp TestMemory.cpp diff --git a/tests/unit/TestDisruptor.cpp b/tests/unit/TestDisruptor.cpp new file mode 100644 index 0000000..c93c1ce --- /dev/null +++ b/tests/unit/TestDisruptor.cpp @@ -0,0 +1,463 @@ +#include +#include + +#include "impl/Disruptor.hpp" +#include "unit/Unit.hpp" + +using namespace dlsm::Disruptor; + +static_assert(ceilingNextPowerOfTwo(1) == 2); +static_assert(ceilingNextPowerOfTwo(3) == 4); +static_assert(ceilingNextPowerOfTwo(7) == 8); +static_assert(ceilingNextPowerOfTwo(8) == 8); + +static_assert(isPowerOf2(1) == true); +static_assert(isPowerOf2(2) == true); +static_assert(isPowerOf2(4) == true); +static_assert(isPowerOf2(0) == false); +static_assert(isPowerOf2(3) == false); +static_assert(isPowerOf2(5) == false); + +namespace { +template +void SequenceClaimPublishWaitRelease(S& p) { + EXPECT_EQ(p.capacity(), 8); + EXPECT_EQ(p.dependencies().size(), 0); + EXPECT_EQ(p.available(), p.capacity()); + + Barrier cb; + auto c = typename std::decay_t::Consumer(cb, p); + EXPECT_EQ(c.dependencies().size(), 1); + EXPECT_EQ(c.available(), 0) << "nothig is available"; + EXPECT_EQ(c.last(), p.next() - 1) << "created Consumer points to already published seq"; + p.depends(group(c)); + EXPECT_EQ(p.dependencies().size(), 1); + // EXPECT_EQ(p.last(), Sequence::Initial) << typeid(p).name(); + EXPECT_EQ(p.next(), 0); + + { // Claim next 1 + EXPECT_EQ(p.available(), p.capacity()) << "none was claimed"; + EXPECT_EQ(c.available(), 0) << "nothing was was claimed yet"; + EXPECT_EQ(p.claim(), 1); + EXPECT_EQ(p.next(), 1); + EXPECT_EQ(p.barrier_.minimumSequence(), -1); + EXPECT_EQ(p.available(), p.capacity() - 1) << "one was claimed"; + EXPECT_EQ(c.available(), 1) << "nothing was published yet but was claimed"; + // EXPECT_EQ(p.last(), -1); + EXPECT_EQ(p.next(), 1) << "next claimed will be 1"; + + EXPECT_FALSE(p.published(0)); + p.publish(0); + EXPECT_TRUE(p.published(0)); + + // EXPECT_EQ(p.last(), 0); + EXPECT_EQ(p.next(), 1); + EXPECT_EQ(c.available(), 1) << "one was published and is available for consumer"; + + EXPECT_EQ(p.available(), p.capacity() - 1) << "one was published but not released"; + EXPECT_EQ(c.consume(0), 0) << "consume"; + EXPECT_EQ(c.last(), Sequence::Initial); + EXPECT_EQ(c.available(), 1) << "it was not released yet and is still available"; + c.release(0); + EXPECT_EQ(p.available(), p.capacity()) << "one was published and released by consumer"; + EXPECT_EQ(c.available(), 0) << "nothing was published"; + EXPECT_EQ(c.last(), 0); + } + + Barrier ib; // Join IndirectConsumer and Claim next 4 + [[maybe_unused]] auto i = typename std::decay_t::Indirect(ib, p.wait_, group(c)); + EXPECT_EQ(i.dependencies().size(), 1); + EXPECT_EQ(i.last(), c.last()) << "created Indirect points to last Consumer seq"; + EXPECT_EQ(i.available(), 0) << "nothig is available"; + p.depends(group(i)); // replace dependency p -> c to p -> i, so graph is like p -> c -> i + { + EXPECT_EQ(p.available(), p.capacity()) << "none was claimed"; + EXPECT_EQ(c.available(), 0) << "nothing was was claimed yet"; + EXPECT_EQ(p.claim(4), 5); + EXPECT_EQ(p.available(), p.capacity() - 4) << "4 were claimed"; + // EXPECT_EQ(p.last(), 4 or 0); ??? + EXPECT_EQ(p.next(), 5); + EXPECT_EQ(c.consumable(2), Sequence::Initial); + EXPECT_FALSE(p.published(4)); + p.publish(1, 5); + EXPECT_TRUE(p.published(4)); + EXPECT_FALSE(p.published(5)); + EXPECT_EQ(p.last(), 4); + EXPECT_EQ(p.next(), 5); + EXPECT_EQ(c.available(), 4); + EXPECT_EQ(i.available(), 0) << "published new 4 items, but c has not processed them yet"; + + EXPECT_EQ(p.available(), p.capacity() - 4) << "4 were published but not released"; + EXPECT_EQ(c.consumable(5), Sequence::Initial); + // EXPECT_EQ(c.consumable(2), 4); MPMC returns 3 because of limit to scan next pub seqs + EXPECT_EQ(c.consumable(4), 4); + EXPECT_EQ(c.consume(4), 4) << "consume by c"; + EXPECT_EQ(c.last(), 0); + EXPECT_EQ(c.available(), 4); + + EXPECT_EQ(i.consumable(1), Sequence::Initial); + EXPECT_EQ(i.available(), 0) << "c has not released 4 items"; + c.release(4); + + EXPECT_EQ(i.available(), 4) << "c has released 4 items and they are available"; + EXPECT_EQ(i.consumable(5), Sequence::Initial); + EXPECT_EQ(i.consumable(2), 4); + EXPECT_EQ(i.consumable(4), 4); + EXPECT_EQ(i.consume(4), 4) << "consume by i"; + i.release(4); + EXPECT_EQ(i.available(), 0); + + EXPECT_EQ(p.available(), p.capacity()) << "4 were published and released by both consumers"; + EXPECT_EQ(c.available(), 0); + EXPECT_EQ(c.last(), 4); + EXPECT_EQ(i.available(), 0); + EXPECT_EQ(i.last(), 4); + } + + { // tryClaim next 2 + EXPECT_EQ(p.available(), p.capacity()) << "none was claimed"; + EXPECT_EQ(p.next(), 5); + EXPECT_EQ(p.last(), 4); + EXPECT_EQ(p.tryClaim(2), 7); + EXPECT_EQ(p.available(), p.capacity() - 2) << "2 were claimed"; + EXPECT_EQ(c.available(), 2); + EXPECT_EQ(i.available(), 0); + EXPECT_EQ(p.next(), 7); + // EXPECT_EQ(p.last(), 6 MPMC or 4 SPMC); + EXPECT_EQ(p.tryClaim(7), Sequence::Initial) << "7 no available for immidiate claming"; + + EXPECT_FALSE(p.published(6)); + p.publish(5, 7); + EXPECT_TRUE(p.published(6)); + + EXPECT_EQ(p.available(), p.capacity() - 2) << "2 were claimed"; + EXPECT_EQ(p.last(), 6); + EXPECT_EQ(p.next(), 7); + EXPECT_EQ(p.tryClaim(7), Sequence::Initial) << "7 no available for immidiate claming"; + EXPECT_EQ(p.available(), p.capacity() - 2) << "2 were claimed"; + EXPECT_EQ(p.last(), 6); + EXPECT_EQ(p.next(), 7); + } +} + +} // namespace + +TEST(Disruptor, Sequence) { + EXPECT_EQ(Sequence{}.load(), Sequence::Initial); + Sequence s{0ULL}; + EXPECT_EQ(s.load(), 0ULL); + s.store(42); + EXPECT_EQ(s.load(), 42ULL); +} + +TEST(Disruptor, Group) { + using Group = Group<>; + + Group g; + EXPECT_EQ(g.size(), 0UL); + Sequence seqs[Group::MaxItems] = {4, 3, 2, 1}; // NOLINT + for (const auto& s : seqs) EXPECT_TRUE(g.add(s)); // NOLINT + EXPECT_EQ(g.size(), std::size(seqs)); + Sequence additional; + EXPECT_FALSE(g.add(additional.ptr())) << "Storage overflow, insertion failed"; + EXPECT_EQ(g.size(), std::size(seqs)); + + EXPECT_TRUE(g.replace(seqs[0], nullptr)); + EXPECT_EQ(g.size(), std::size(seqs) - 1); + + EXPECT_TRUE(g.del(seqs[1].ptr())); + EXPECT_EQ(g.size(), std::size(seqs) - 2); + + Sequence replacement{42}; + EXPECT_FALSE(g.del(replacement)) << "replacement is not in Group"; + EXPECT_FALSE(g.replace(seqs[0], replacement)) << "seqs[0] has been deleted before"; + EXPECT_FALSE(g.replace(seqs[1], replacement)) << "seqs[1] has been deleted before"; + EXPECT_EQ(g.size(), std::size(seqs) - 2); + EXPECT_TRUE(g.replace(seqs[2], replacement)) << "seqs[2] deletion"; + EXPECT_EQ(g.size(), std::size(seqs) - 2); +} + +TEST(Disruptor, Barriers) { + const auto test = [](const auto& barrier) { + using Barrier = std::decay_t; + { + Barrier empty; + EXPECT_EQ(empty.size(), 0UL); + EXPECT_EQ(empty.minimumSequence(), Sequence::Max); + EXPECT_EQ(empty.minimumSequence(12UL), 12UL); + } + { + Barrier b; + Sequence A{5}, B{9}; + b.depends(Group{A.ptr(), B.ptr()}); + EXPECT_EQ(b.size(), 2); + EXPECT_TRUE(b.contains(A)); + EXPECT_TRUE(b.contains(B)); + EXPECT_EQ(b.last(), 5); + EXPECT_EQ(b.minimumSequence(), 5); + Barrier b2 = b; + EXPECT_TRUE(b2.contains(A)); + EXPECT_TRUE(b2.contains(B)); + EXPECT_EQ(b2.last(), 5); + EXPECT_EQ(*b2.cursor(), 5); + EXPECT_EQ(b2.minimumSequence(), 5); + b2.depends(Group{}); + EXPECT_EQ(b2.size(), 0); + EXPECT_EQ(b2.last(), Sequence::Initial); + EXPECT_EQ(b2.minimumSequence(), Sequence::Max); + EXPECT_EQ(b.last(), 5); + EXPECT_EQ(*b.cursor(), 5); + EXPECT_EQ(b.minimumSequence(), 5); + } + { + Barrier b; + Sequence seqs[Barrier::MaxItems] = {4, 3, 2, 1}; // NOLINT + for (const auto& s : seqs) EXPECT_TRUE(b.add(s)); // NOLINT + EXPECT_EQ(b.size(), std::size(seqs)); + Sequence additional; + EXPECT_FALSE(b.add(additional)) << "Storage overflow, insertion failed"; + EXPECT_EQ(b.size(), std::size(seqs)); + + EXPECT_FALSE(b.contains(nullptr)); + EXPECT_TRUE(b.contains(seqs[0])); + EXPECT_TRUE(b.replace(seqs[0].ptr(), nullptr)); + EXPECT_FALSE(b.contains(seqs[0])); + EXPECT_EQ(b.size(), std::size(seqs) - 1); + + EXPECT_TRUE(b.del(seqs[1])); + EXPECT_EQ(b.size(), std::size(seqs) - 2); + + Sequence replacement{42}; + EXPECT_FALSE(b.del(replacement)) << "replacement is not in Group"; + EXPECT_FALSE(b.replace(seqs[0].ptr(), replacement)) << "seqs[0] has been deleted before"; + EXPECT_FALSE(b.replace(seqs[1].ptr(), replacement)) << "seqs[1] has been deleted before"; + EXPECT_EQ(b.size(), std::size(seqs) - 2); + EXPECT_TRUE(b.replace(seqs[2].ptr(), replacement)) << "seqs[1] has been deleted before"; + EXPECT_EQ(b.size(), std::size(seqs) - 2); + EXPECT_TRUE(b.contains(replacement)); + EXPECT_TRUE(b.contains(seqs[3])); + + Group<> removable{seqs[2].ptr(), seqs[3].ptr(), replacement.ptr()}; + Sequence newone{13}; + EXPECT_FALSE(b.contains(newone)); + EXPECT_TRUE(b.replace(removable, newone)); + EXPECT_FALSE(b.contains(seqs[2])); + EXPECT_FALSE(b.contains(seqs[3])); + EXPECT_FALSE(b.contains(replacement)); + EXPECT_TRUE(b.contains(newone)); + EXPECT_EQ(b.size(), std::size(seqs) - 3); + } + { + Sequence A{3}, B{2}; + Barrier b; + b.add(A); + b.add(B); + EXPECT_EQ(b.size(), 2); + EXPECT_EQ(b.minimumSequence(), 2); + EXPECT_EQ(available(b), 3); + B.store(5); // Replace 2 -> 5 + EXPECT_EQ(b.minimumSequence(), 3); + EXPECT_EQ(available(b), 4); + b.release(6); + EXPECT_EQ(available(b), 3); + } + }; + + const auto safe = [](const auto& barrier) { + using Barrier = std::decay_t; + { // Check thread-safety of a Barrier + Barrier b; + std::barrier sync(2); + const auto test = [&](std::size_t count) { + std::array seqs = {1, 2}; + EXPECT_EQ(b.size(), 0); + sync.arrive_and_wait(); + for (std::size_t i = 0; i < count; ++i) { + EXPECT_LE(b.size(), seqs.size()); + for (const auto& s : seqs) EXPECT_TRUE(b.add(s)); + EXPECT_GE(b.size(), seqs.size()); + for (const auto& s : seqs) EXPECT_TRUE(b.contains(s)); + EXPECT_GE(b.size(), seqs.size()); + for (const auto& s : seqs) EXPECT_TRUE(b.del(s)); + EXPECT_LE(b.size(), seqs.size()); + } + sync.arrive_and_wait(); + EXPECT_EQ(b.size(), 0); + }; + + std::jthread T1(test, 10000); + std::jthread T2(test, 10000); + } + }; + + using namespace dlsm::Disruptor::Barriers; + test(PointerBarrier<>{}); + test(AtomicsBarrier<>{}); + test(OffsetsBarrier<>{}); + safe(AtomicsBarrier<>{}); + safe(OffsetsBarrier<>{}); +} + +TEST(Disruptor, Waits) { + const auto test = [](auto strategy) { + Sequence A{3}, B{2}, C{1}; + Barrier b; + b.add(A); + b.add(B); + b.add(C); + + { + std::jthread T([&] { + // const auto to = std::chrono::microseconds{10}; + EXPECT_EQ(strategy.wait(0, A), 3); + EXPECT_EQ(strategy.wait(0, b), 1); + // EXPECT_EQ(strategy.waitFor(0, g, to), 1); + // EXPECT_EQ(strategy.waitFor(5, g, to), 1); // Timeout + }); + } + + { + std::jthread T([&] { + EXPECT_EQ(strategy.wait(4, b), 5); + EXPECT_EQ(A.load(), 7); + EXPECT_EQ(B.load(), 6); + EXPECT_EQ(C.load(), 5); + }); + A.store(7); + B.store(6); + C.store(5); + strategy.signalAllWhenBlocking(); + } + }; + + using namespace dlsm::Disruptor::Waits; + test(SpinsStrategy{}); + test(YieldStrategy{}); + test(BlockStrategy{}); + test(ShareStrategy{}); +} + +TEST(Disruptor, SPMC) { + Waits::BlockStrategy wait; + Barrier barrier; + + { + EXPECT_THAT([&] { Sequencers::SPMC(barrier, 3, wait); }, + ThrowsMessage("Capacity must be power-of-two, value:3")); + } + + Sequencers::SPMC p(barrier, 8, wait); + SequenceClaimPublishWaitRelease(p); +} + +TEST(Disruptor, MPMC) { + Waits::BlockStrategy wait; + Barrier barrier; + + { + EXPECT_THAT([&] { Sequencers::MPMC(barrier, 3, wait); }, + ThrowsMessage("Capacity must be power-of-two, value:3")); + + std::vector external(16); + EXPECT_THAT([&] { Sequencers::MPMC(barrier, 4, wait, external); }, + ThrowsMessage("External storage size(16) != capacity(4)")); + } + + Sequencers::MPMC p(barrier, 8, wait); + EXPECT_EQ(p.published_.size(), p.capacity()); + constexpr auto I = Sequence::Initial; + EXPECT_THAT(p.published_, ElementsAreArray(std::vector{I, I, I, I, I, I, I, I})); + SequenceClaimPublishWaitRelease(p); + EXPECT_THAT(p.published_, ElementsAreArray(std::vector{0, 1, 2, 3, 4, 5, 6, I})); +} + +TEST(Disruptor, Ring) { + long A3[3] = {0, 1, 2}; // NOLINT + EXPECT_THAT([&] { Ring r(A3); }, // NOLINT + ThrowsMessage("Ring size must be power-of-two, value:3")); + + EXPECT_THAT( + [&] { + Ring r({A3, 0}); // NOLINT + }, // NOLINT + ThrowsMessage("Ring size must be power-of-two, value:0")); + + const auto test0 = [&] { Ring r({(long*)nullptr, 4}); }; + EXPECT_THAT([&] { test0(); }, ThrowsMessage("Ring pointer is nullptr")); + + auto A4 = std::array{}; + auto V8 = std::vector(8, 0); + + using R8 = Ring; + + R8 r8{V8}; + EXPECT_EQ(r8.size(), 8); + r8[1 + static_cast(r8.size())] = 42; + + const auto& cr8 = r8; + EXPECT_EQ(cr8[1], 42); + EXPECT_EQ(V8[1], 42); + + EXPECT_EQ(Ring{A4}.size(), 4); + EXPECT_EQ(Ring{V8}.size(), 8); + + long buffer[8] = {1}; // NOLINT + auto r = Ring{buffer}; + + EXPECT_EQ(r.size(), 8); + EXPECT_EQ(r.size_bytes(), sizeof(buffer)); + EXPECT_FALSE(r.empty()); + EXPECT_EQ(r[0], 1); + r[0] = 42; + EXPECT_EQ(buffer[0], 42); + EXPECT_EQ(buffer[0], r[static_cast(r.size())]); + EXPECT_EQ(&r[0], &r[static_cast(r.size())]) << "wrap indexing in a ring storage"; +} + +TEST(Disruptor, Processor) { + struct TestHandler : Processor::DefaultHandler { + std::size_t batches = 0; + void onBatch(Sequence::Value, std::size_t s) { batches += s; } + Consumed onConsume(int&, Sequence::Value seq, std::size_t) { + if (seq == 0) return Consumed::Release; + if (seq == 1) return Consumed::Keep; + if (seq >= 3) throw std::invalid_argument{"Check OnException"}; + return Consumed::Exit; + } + }; + + using Impl = Sequencers::SPMC; + + typename Impl::BarrierType pbar; + typename Impl::WaitStrategy wait; + Impl p(pbar, 8, wait); + + auto buffer = std::vector{16, 0}; + auto ring = Ring{buffer}; + + typename Impl::BarrierType cbar; + auto c = Impl::Consumer{cbar, p}; + p.add(c.cursor()); + + TestHandler handler; + Processor::Batch batch{int{}, c, ring, handler}; + p.publish(p.claim() - 1); + p.publish(p.claim() - 1); + p.publish(p.claim() - 1); + + EXPECT_FALSE(batch.running()); + EXPECT_EQ(handler.batches, 0); + batch.run(); + EXPECT_EQ(handler.batches, 3); + EXPECT_FALSE(batch.running()); + batch.halt(); + EXPECT_FALSE(batch.running()); + + EXPECT_THAT( + [&] { + p.publish(p.claim() - 1); + batch.run(); + }, + ThrowsMessage("exception on #3 what:Check OnException")); + EXPECT_FALSE(batch.running()); +} diff --git a/tests/unit/TestDisruptorGraph.cpp b/tests/unit/TestDisruptorGraph.cpp new file mode 100644 index 0000000..4d6a2a2 --- /dev/null +++ b/tests/unit/TestDisruptorGraph.cpp @@ -0,0 +1,635 @@ +#include +#include +#include + +#include "impl/DisruptorGraph.hpp" +#include "unit/Unit.hpp" + +using namespace dlsm::Disruptor::Graph; + +namespace TestDisruptorGraph { +struct alignas(64) Event { + std::uint64_t number{0xFF}; // #event +}; +} // namespace TestDisruptorGraph + +using TestDisruptorGraph::Event; + +namespace { + +using Strs = std::vector; + +struct WaitProducer { + static void run(auto ring, auto& barrier, std::size_t count, const std::size_t init, const std::size_t step) { + std::uint64_t number = init; + + for (std::size_t i = 0; i < count; ++i) { + const auto seq = barrier.claim() - 1; + + auto& event = ring[seq]; + event.number = number; + // std::cout << std::format("Pub#{}: {} seq: {}\n", init, number, seq); + number += step; + + EXPECT_GE(seq, i) << "claimed sequence number must == #event"; + barrier.publish(seq); + } + + EXPECT_EQ(number, count * step + init); + } +}; + +struct WaitConsumer { + static void run(auto ring, auto& barrier, std::size_t count, const std::size_t producers = 1) { + dlsm::Disruptor::Sequence::Value next = barrier.last() + 1; + + std::vector expected(producers, 0); + for (std::size_t i = 0; i < producers; ++i) expected[i] = i + 1; + + for (std::size_t i = 0; i < count * producers;) { + auto available = barrier.consume(next); + EXPECT_GE(available, next) << "at least next sequence number is available"; + for (; next <= available;) { + const auto& event = ring[next]; + + auto step = event.number % producers; + if (step == 0) step = producers; + auto index = step - 1; + + // std::cout << std::format("Sub#{}: {} seq: {}\n", index+1, event.number, next); + auto& exp = expected[index]; + + EXPECT_EQ(event.number, exp) + << "#event " << event.number << " != expected: " << exp << " for: " << index + 1; + exp = event.number + producers; + + barrier.release(next); + ++next; + ++i; + } + } + + for (std::size_t i = 0; i < expected.size(); ++i) { + EXPECT_EQ(expected[i], (i + 1) + count * producers); + } + } +}; + +struct NoWaitProducer { + static void run(auto ring, auto& barrier, std::size_t count, const std::size_t init, const std::size_t step) { + std::uint64_t number = init; + for (std::size_t i = 0; i < count;) { + const auto r = barrier.tryClaim(1); + if (r == dlsm::Disruptor::Sequence::Initial) continue; + const auto seq = r - 1; + + auto& event = ring[seq]; + event.number = number; + number += step; + + EXPECT_GE(seq, i) << "claimed sequence number must == #event"; + barrier.publish(seq); + ++i; + } + EXPECT_EQ(number, count * step + init); + } +}; + +struct NoWaitConsumer { + static void run(auto ring, auto& barrier, std::size_t count, const std::size_t producers = 1) { + dlsm::Disruptor::Sequence::Value next = barrier.last() + 1; + + std::vector expected(producers, 0); + for (std::size_t i = 0; i < producers; ++i) expected[i] = i + 1; + + for (std::size_t i = 0; i < count * producers;) { + const auto available = barrier.consumable(next); + if (available == dlsm::Disruptor::Sequence::Initial) continue; + EXPECT_GE(available, next) << "at least next sequence number is available"; + for (; next <= available;) { + const auto& event = ring[next]; + + auto step = event.number % producers; + if (step == 0) step = producers; + auto index = step - 1; + + // std::cout << std::format("Sub#{}: {} seq: {}\n", index+1, event.number, next); + auto& exp = expected[index]; + + EXPECT_EQ(event.number, exp) + << "#event " << event.number << " != expected: " << exp << " for: " << index + 1; + exp = event.number + producers; + + barrier.release(next); + ++next; + ++i; + } + } + + for (std::size_t i = 0; i < expected.size(); ++i) { + EXPECT_EQ(expected[i], (i + 1) + count * producers); + } + } +}; + +template +struct InExternMemory { + static constexpr std::size_t ToSend = 5000; + Layout layout; + std::vector space; + + InExternMemory(Type type, Wait wait, std::size_t pubs = 3, std::size_t subs = 3) { + auto capacity = Layout::Items::create(512); + if (type == Type::SPMC) pubs = 1; + layout = Layout{{type, wait}, {pubs, subs}, capacity}; + space.resize(layout.size()); + } + + std::jthread Produce(IGraph::Ptr& g, auto& barrier, std::size_t init = 1, std::size_t p = 1) { + return std::jthread{[&, init, p] { Producer::run(g->ring(), *barrier, ToSend, init, p); }}; + } + std::jthread Consume(IGraph::Ptr& g, auto& barrier, std::size_t p = 1) { + return std::jthread{[&, p] { Consumer::run(g->ring(), *barrier, ToSend, p); }}; + } + + void Unicast() { // Unicast: P1 –> C1 + auto graph = IGraph::inproc(layout, space); + auto P1 = graph->pub("P1"); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("P1"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + auto TP1 = Produce(graph, P1); + auto TC1 = Consume(graph, C1); + } + void Pipeline() { // Pipeline: P1 –> C1 -> C2 -> C3 + auto graph = IGraph::inproc(layout, space); + auto P1 = graph->pub("P1"); + auto C1 = graph->sub("C1"); + auto C2 = graph->sub("C2", {{"C1"}}); + auto C3 = graph->sub("C3", {{"C2"}}); + EXPECT_THAT(graph->dependencies("P1"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C3"), Strs{"C2"}); + auto TP1 = Produce(graph, P1); + auto TC1 = Consume(graph, C1); + auto TC2 = Consume(graph, C2); + auto TC3 = Consume(graph, C3); + } + void Sequencer() { // Sequencer: P1,P2,P3 –> C1,C2 -> C3 + + if (layout.graph_.type_ != Type::MPMC) return; // Only MPMC supports multiple Producers + + auto graph = IGraph::inproc(layout, space); + auto P1 = graph->pub("P1"); + auto P2 = graph->pub("P2"); + auto P3 = graph->pub("P3"); + EXPECT_TRUE(graph->dependencies("P1").empty()); + EXPECT_TRUE(graph->dependencies("P2").empty()); + EXPECT_TRUE(graph->dependencies("P3").empty()); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("P1"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("P2"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("P3"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + auto C2 = graph->sub("C2"); + EXPECT_THAT(graph->dependencies("P1"), ContainerEq(Strs{"C1", "C2"})); + EXPECT_THAT(graph->dependencies("P2"), ContainerEq(Strs{"C1", "C2"})); + EXPECT_THAT(graph->dependencies("P3"), ContainerEq(Strs{"C1", "C2"})); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"Master"}); + auto C3 = graph->sub("C3", {{"C1", "C2"}}); + EXPECT_THAT(graph->dependencies("P1"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("P2"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("P3"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C3"), ContainerEq(Strs{"C1", "C2"})); + std::size_t producers = 3; + auto TP1 = Produce(graph, P1, 1, producers); + auto TP2 = Produce(graph, P2, 2, producers); + auto TP3 = Produce(graph, P3, 3, producers); + auto TC1 = Consume(graph, C1, producers); + auto TC2 = Consume(graph, C2, producers); + auto TC3 = Consume(graph, C3, producers); + } + void Multicast() { // Multicast: P1 –> C1,C2,C3 + auto graph = IGraph::inproc(layout, space); + auto P1 = graph->pub("P1"); + auto C1 = graph->sub("C1"); + auto C2 = graph->sub("C2"); + auto C3 = graph->sub("C3"); + EXPECT_THAT(graph->dependencies("P1"), ContainerEq(Strs{"C1", "C2", "C3"})); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C3"), Strs{"Master"}); + auto TP1 = Produce(graph, P1); + auto TC1 = Consume(graph, C1); + auto TC2 = Consume(graph, C2); + auto TC3 = Consume(graph, C3); + } + void Diamond() { // Diamond: P1 –> C1,C2 -> C3 + auto graph = IGraph::inproc(layout, space); + auto P1 = graph->pub("P1"); + auto C1 = graph->sub("C1"); + auto C2 = graph->sub("C2"); + auto C3 = graph->sub("C3", {{"C1", "C2"}}); + EXPECT_THAT(graph->dependencies("P1"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C3"), ContainerEq(Strs{"C1", "C2"})); + auto TP1 = Produce(graph, P1); + auto TC1 = Consume(graph, C1); + auto TC2 = Consume(graph, C2); + auto TC3 = Consume(graph, C3); + } + void Topologies() { + Unicast(); + Pipeline(); + Sequencer(); + Multicast(); + Diamond(); + } +}; + +template +struct InSharedMemory { + static constexpr std::size_t ToSend = 1000; + const Layout layout; + + InSharedMemory(const Layout& in) : layout{in} {} + + std::string opts() const { return std::format("name=DisruptorGraph.InSharedMemory{},lock=on", ::getpid()); } + + std::jthread pub(std::barrier<>& sync, std::string name, std::size_t init = 1, std::size_t publishers = 1) const { + return std::jthread([this, name, init, publishers, &sync] { + dlsm::Thread::name(name); + std::string create = (init == 1) ? ",purge=on,create=on" : ",open=500x1"; + auto graph = IGraph::shared(layout, opts() + create); + auto pub = graph->pub(name); + sync.arrive_and_wait(); + Producer::run(graph->template ring(), *pub, ToSend, init, publishers); + if (publishers == 1) { + EXPECT_EQ(pub->next(), ToSend); + } + // std::cout << std::format("{}:{}\n", name, graph->description()); + }); + }; + + std::jthread sub(std::barrier<>& sync, std::string name, const std::vector& deps = {}, + std::size_t publishers = 1) const { + return std::jthread([this, &sync, name, deps, publishers] { + dlsm::Thread::name(name); + std::vector svdeps; + svdeps.reserve(deps.size()); + for (auto& s : deps) svdeps.emplace_back(std::string_view{s}); + auto graph = IGraph::shared(layout, opts() + ",open=500x1"); + auto sub = graph->sub(name, svdeps); + sync.arrive_and_wait(); + Consumer::run(graph->template ring(), *sub, ToSend, publishers); + EXPECT_EQ(sub->last(), (publishers * ToSend) - 1); + // std::cout << std::format("{}:{}\n", name, graph->description()); + }); + }; + + void Unicast() { // Unicast: P1 –> C1 + std::barrier sync(2); + auto P1 = pub(sync, "P1"); + auto C1 = sub(sync, "C1"); + } + void Pipeline() { // Pipeline: P1 –> C1 -> C2 -> C3 + std::barrier sync(4); + auto P1 = pub(sync, "P1"); + auto C1 = sub(sync, "C1"); + auto C2 = sub(sync, "C2", Strs{"C1"}); + auto C3 = sub(sync, "C3", Strs{"C2"}); + } + void Sequencer() { // Sequencer: P1,P2,P3 –> C1,C2 -> C3 + std::barrier sync(6); + std::size_t publishers = 3; + auto P1 = pub(sync, "P1", 1, publishers); + auto P2 = pub(sync, "P2", 2, publishers); + auto P3 = pub(sync, "P3", 3, publishers); + auto C1 = sub(sync, "C1", Strs{}, publishers); + auto C2 = sub(sync, "C2", Strs{}, publishers); + auto C3 = sub(sync, "C3", Strs{"C1", "C2"}, publishers); + } + void Multicast() { // Multicast: P1 –> C1,C2,C3 + std::barrier sync(4); + auto P1 = pub(sync, "P1"); + auto C1 = sub(sync, "C1"); + auto C2 = sub(sync, "C2"); + auto C3 = sub(sync, "C3"); + } + void Diamond() { // Diamond: P1 –> C1,C2 -> C3 + std::barrier sync(4); + auto P1 = pub(sync, "P1"); + auto C1 = sub(sync, "C1"); + auto C2 = sub(sync, "C2"); + auto C3 = sub(sync, "C3", Strs{"C1", "C2"}); + } + void Topologies() { + Unicast(); + Pipeline(); + if (layout.graph_.type_ == Type::MPMC) Sequencer(); // SPMC does not support multiple Producers + Multicast(); + Diamond(); + } +}; + +} // namespace + +TEST(DisruptorGraph, Layout) { + Layout layout; + EXPECT_EQ(layout.graph_.wait_, Wait::Block); + const auto empty = layout.size(); + EXPECT_GE(empty, 128); + + layout = Layout({}, {}, Layout::Items::create(0, "double")); + EXPECT_EQ(layout.size(), empty); + EXPECT_EQ(layout.items_.type(), "double"); + + EXPECT_THAT( + [&] { + layout.check(Layout::Graph{Type::MPMC, Wait::Yield}); + }, + ThrowsMessage("Layout::Graph missmatch: type:SPSC=MPMC wait:Block=Yield")); + EXPECT_THAT( + [&] { + layout.check(Layout::Slots{1, 4}); + }, + ThrowsMessage("Layout::Slots missmatch: maxPub:0=1 maxSub:0=4")); + EXPECT_THAT([&] { layout.check(Layout::Items::create(0, "float")); }, + ThrowsMessage("Layout::Items missmatch: size:8=4 align:8=4 type:double=float")); +} + +TEST(DisruptorGraph, RefCounting) { + auto i = Layout::Items::create(2); + { + auto graph = IGraph::create(Type::SPMC, Wait::Spins, i); + EXPECT_EQ(graph.use_count(), 1); + auto C1 = graph->sub("C1"); + EXPECT_EQ(graph.use_count(), 2); + EXPECT_EQ(C1.use_count(), 1); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + auto P1 = graph->pub("P1"); + EXPECT_EQ(graph.use_count(), 3); + EXPECT_EQ(C1.use_count(), 1); + EXPECT_EQ(P1.use_count(), 1); + C1.reset(); + EXPECT_EQ(graph.use_count(), 2); + P1.reset(); + EXPECT_EQ(graph.use_count(), 1); + EXPECT_TRUE(graph.unique()); + } +} + +TEST(DisruptorGraph, Description) { + auto i = Layout::Items::create(2); + + auto graph = IGraph::create(Type::MPMC, Wait::Spins, i); + auto P1 = graph->pub("P1"); + P1->publish(P1->claim()); + std::string placeholder = graph->description(); + EXPECT_FALSE(placeholder.empty()); +} + +TEST(DisruptorGraph, Linking) { + auto i = Layout::Items::create(2); + { + // Pipeline: P1 -> C1 + auto graph = IGraph::create(Type::SPMC, Wait::Spins, i); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + } + + { + // Pipeline: P1 -> C1 -> C2 + auto graph = IGraph::create(Type::SPMC, Wait::Spins, i); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + auto C2 = graph->sub("C2", {{"C1"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C2"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + } + { + // Pipeline: P1 -> C1 -> C2 + auto graph = IGraph::create(Type::SPMC, Wait::Spins, i); + auto C2 = graph->sub("C2", {{"C1"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C2"}) << graph->description(); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}) << graph->description(); + EXPECT_THAT(graph->dependencies("C1"), Strs{}); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C2"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + } + + { + // Pipeline: P1 -> C1 -> C2 -> C3 + auto graph = IGraph::create(Type::SPMC, Wait::Spins, i); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + auto C2 = graph->sub("C2", {{"C1"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C2"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + auto C3 = graph->sub("C3", {{"C2"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C3"), Strs{"C2"}); + } + { + // Pipeline: P1 -> C1 -> C2 -> C3 + auto graph = IGraph::create(Type::SPMC, Wait::Spins, i); + auto C3 = graph->sub("C3", {{"C2"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}) << graph->description(); + EXPECT_THAT(graph->dependencies("C3"), Strs{"C2"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{}); + auto C2 = graph->sub("C2", {{"C1"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}) << graph->description(); + EXPECT_THAT(graph->dependencies("C1"), Strs{}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C3"), Strs{"C2"}); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}) << graph->description(); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C3"), Strs{"C2"}); + } + { + // Pipeline: P1 -> C1 -> C2 -> C3 + auto graph = IGraph::create(Type::SPMC, Wait::Spins, i); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + auto C3 = graph->sub("C3", {{"C2"}}); + EXPECT_THAT(graph->dependencies("Master"), (Strs{"C1", "C3"})); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{}); + EXPECT_THAT(graph->dependencies("C3"), Strs{"C2"}); + auto C2 = graph->sub("C2", {{"C1"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C3"), Strs{"C2"}); + } + { + // Pipeline: P1 -> C1 -> C2 -> C3 + auto graph = IGraph::create(Type::SPMC, Wait::Spins, i); + auto C2 = graph->sub("C2", {{"C1"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C2"}) << graph->description(); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}) << graph->description(); + EXPECT_THAT(graph->dependencies("C1"), Strs{}); + auto C3 = graph->sub("C3", {{"C2"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C3"), Strs{"C2"}); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C3"), Strs{"C2"}); + } + + { + // Sequencer: P1,P2,P3 –> C1,C2 -> C3 + auto graph = IGraph::create(Type::MPMC, Wait::Spins, i); + auto P1 = graph->pub("P1"); + auto P2 = graph->pub("P2"); + auto P3 = graph->pub("P3"); + EXPECT_TRUE(graph->dependencies("P1").empty()); + EXPECT_TRUE(graph->dependencies("P2").empty()); + EXPECT_TRUE(graph->dependencies("P3").empty()); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("P1"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("P2"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("P3"), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + auto C2 = graph->sub("C2"); + EXPECT_THAT(graph->dependencies("P1"), ContainerEq(Strs{"C1", "C2"})); + EXPECT_THAT(graph->dependencies("P2"), ContainerEq(Strs{"C1", "C2"})); + EXPECT_THAT(graph->dependencies("P3"), ContainerEq(Strs{"C1", "C2"})); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"Master"}); + auto C3 = graph->sub("C3", {{"C1", "C2"}}); + EXPECT_THAT(graph->dependencies("P1"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("P2"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("P3"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C3"), ContainerEq(Strs{"C1", "C2"})); + } + + { + // Diamond: P1 –> C1,C2 -> C3 + auto graph = IGraph::create(Type::MPMC, Wait::Spins, i); + auto P1 = graph->pub("P1"); + EXPECT_TRUE(graph->dependencies().empty()); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies(), Strs{"C1"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + auto C2 = graph->sub("C2"); + EXPECT_THAT(graph->dependencies(), ContainerEq(Strs{"C1", "C2"})); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"Master"}); + auto C3 = graph->sub("C3", {{"C1", "C2"}}); + EXPECT_THAT(graph->dependencies(), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C3"), ContainerEq(Strs{"C1", "C2"})); + } + { + // Diamond: P1 –> C1,C2 -> C3 + auto graph = IGraph::create(Type::MPMC, Wait::Spins, i); + auto P1 = graph->pub("P1"); + EXPECT_TRUE(graph->dependencies().empty()); + auto C3 = graph->sub("C3", {{"C1", "C2"}}); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{}); + EXPECT_THAT(graph->dependencies("C2"), Strs{}); + EXPECT_THAT(graph->dependencies("C3"), ContainerEq(Strs{"C1", "C2"})); + auto C1 = graph->sub("C1"); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{}); + EXPECT_THAT(graph->dependencies("C3"), ContainerEq(Strs{"C1", "C2"})); + auto C2 = graph->sub("C2"); + EXPECT_THAT(graph->dependencies("Master"), Strs{"C3"}); + EXPECT_THAT(graph->dependencies("C1"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C2"), Strs{"Master"}); + EXPECT_THAT(graph->dependencies("C3"), ContainerEq(Strs{"C1", "C2"})); + } +} + +TEST(DisruptorGraph, NoWait) { + InExternMemory(Type::SPMC, Wait::Spins).Topologies(); + InExternMemory(Type::MPMC, Wait::Spins).Topologies(); + + InExternMemory(Type::SPMC, Wait::Spins).Topologies(); + InExternMemory(Type::MPMC, Wait::Spins).Topologies(); + + InExternMemory(Type::SPMC, Wait::Spins).Topologies(); + InExternMemory(Type::MPMC, Wait::Spins).Topologies(); +} + +TEST(DisruptorGraph, InExternMemory) { + InExternMemory(Type::SPMC, Wait::Spins).Topologies(); + InExternMemory(Type::MPMC, Wait::Spins).Topologies(); + + InExternMemory(Type::SPMC, Wait::Yield).Topologies(); + InExternMemory(Type::MPMC, Wait::Yield).Topologies(); + + InExternMemory(Type::SPMC, Wait::Block).Topologies(); + InExternMemory(Type::MPMC, Wait::Block).Topologies(); + + InExternMemory(Type::SPMC, Wait::Share).Topologies(); + InExternMemory(Type::MPMC, Wait::Share).Topologies(); + + { + auto capacity = Layout::Items{512}; + auto layout = Layout{{Type::MPMC, Wait::Block}, {1, 1}, capacity}; + std::vector lowspace{42}; + + EXPECT_THAT([&] { IGraph::inproc(layout, lowspace); }, + ThrowsMessage( + MatchesRegex(R"(IGraph::Layout requires \d+ bytes, only \d+ provided)"))); + } +} + +TEST(DisruptorGraph, InSharedMemory) { + const auto capacity = Layout::Items::create(64); + { + auto layout = Layout{{Type::SPMC, Wait::Spins}, {1, 3}, capacity}; + InSharedMemory{layout}.Topologies(); + } + { + auto layout = Layout{{Type::MPMC, Wait::Spins}, {3, 3}, capacity}; + InSharedMemory{layout}.Topologies(); + } + { + auto layout = Layout{{Type::SPMC, Wait::Share}, {1, 3}, capacity}; + InSharedMemory{layout}.Topologies(); + } + { + auto layout = Layout{{Type::MPMC, Wait::Share}, {3, 3}, capacity}; + InSharedMemory{layout}.Topologies(); + } + + { + auto layout = Layout{{Type::SPMC, Wait::Block}, {1, 3}, capacity}; + EXPECT_THAT([&] { IGraph::shared(layout, "opts"); }, + ThrowsMessage("Wait::Block is not allowed for Layout in Shared Memory")); + } + { + auto layout = Layout{{Type::SPMC, Wait::Yield}, {2, 3}, capacity}; + EXPECT_THAT([&] { IGraph::shared(layout, "name=DisruptorGraph.BadMaxSubs,create=on"); }, + ThrowsMessage("Type::SPMC supports only one producer, current limit:2")); + } +}