Skip to content

Commit

Permalink
added CachelineAlignment to the queue traits
Browse files Browse the repository at this point in the history
The purpose of the cacheline-size padding between class members is to
avoid false sharing when threads modify the atomic members, but they do
not have to be aligned exactly to the cacheline-size. The atomic members
are very small (just 4 or 8 bytes) compared to the cacheline. As far as
I can tell, their explicit alignment does not matter, they can be placed
anywhere on the cacheline - we just need to ensure they are mapped to
different cachelines.

To avoid problems with misaligned dynamic allocation, the alignment must
not be stricter than alignof(std::max_align_t), which is 16 bytes on
x86_64 Linux. The value was added to the traits structs.

Fixes d36u9#1
  • Loading branch information
lahwaacz committed Dec 31, 2020
1 parent c76224b commit 8378bbe
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 18 deletions.
12 changes: 7 additions & 5 deletions async/bounded_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ namespace async {
struct bounded_traits {
static constexpr bool NOEXCEPT_CHECK = false; // exception handling flag
static constexpr size_t CachelineSize = 64;
static constexpr size_t CachelineAlignment = 16; // must not be larger than alignof(std::max_align_t), see issue #1
using sequence_type = uint64_t;
};

Expand All @@ -27,6 +28,7 @@ template <typename T, typename TRAITS = bounded_traits> class bounded_queue {

public:
static constexpr size_t cacheline_size = TRAITS::CachelineSize;
static constexpr size_t cacheline_alignment = TRAITS::CachelineAlignment;
using seq_t = typename TRAITS::sequence_type;
explicit bounded_queue(size_t size)
: fastmodulo((size > 0 && ((size & (size - 1)) == 0))),
Expand Down Expand Up @@ -331,10 +333,10 @@ template <typename T, typename TRAITS = bounded_traits> class bounded_queue {
element *const elements; // pointer to buffer
size_t const mask; // used if fastmodulo is true
size_t const qsize; // queue size
alignas(cacheline_size) char cacheline_padding1[cacheline_size];
alignas(cacheline_size) std::atomic<seq_t> enqueueIx;
alignas(cacheline_size) char cacheline_padding2[cacheline_size];
alignas(cacheline_size) std::atomic<seq_t> dequeueIx;
alignas(cacheline_size) char cacheline_padding3[cacheline_size];
alignas(cacheline_alignment) char cacheline_padding1[cacheline_size];
alignas(cacheline_alignment) std::atomic<seq_t> enqueueIx;
alignas(cacheline_alignment) char cacheline_padding2[cacheline_size];
alignas(cacheline_alignment) std::atomic<seq_t> dequeueIx;
alignas(cacheline_alignment) char cacheline_padding3[cacheline_size];
};
} // namespace async
28 changes: 15 additions & 13 deletions async/queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ struct traits // 3-level (L3, L2, L1) depth of nested group design, total
static constexpr uint64_t Basebits = 8;
static constexpr bool NOEXCEPT_CHECK = false; // exception handling flag
static constexpr size_t CachelineSize = 64;
static constexpr size_t CachelineAlignment = 16; // must not be larger than alignof(std::max_align_t), see issue #1
};

template <typename T, typename TRAITS = traits> class queue final {
Expand All @@ -29,6 +30,7 @@ template <typename T, typename TRAITS = traits> class queue final {
return std::atomic<uint64_t>{}.is_lock_free();
}
static constexpr size_t cacheline_size = TRAITS::CachelineSize;
static constexpr size_t cacheline_alignment = TRAITS::CachelineAlignment;
static constexpr uint64_t BaseMask = getBitmask<uint64_t>(TRAITS::Basebits);
static constexpr uint64_t L1Mask = getBitmask<uint64_t>(TRAITS::L1bits)
<< TRAITS::Basebits;
Expand Down Expand Up @@ -411,17 +413,17 @@ template <typename T, typename TRAITS = traits> class queue final {
using L1container = nestedcontainer<basecontainer, L1Mask>;
using L2container = nestedcontainer<L1container, L2Mask>;
nestedcontainer<L2container, L3Mask> container;
alignas(cacheline_size) char cacheline_padding1[cacheline_size];
alignas(cacheline_size) std::atomic<uint64_t> nodeCount; // # of allocated nodes, not the #
// of elements stored in the queue
alignas(cacheline_size) char cacheline_padding2[cacheline_size];
alignas(cacheline_size) std::atomic<index> dequeueIx; // dequeue pointer
alignas(cacheline_size) char cacheline_padding3[cacheline_size];
alignas(cacheline_size) std::atomic<index> enqueueIx; // enqueue pointer
alignas(cacheline_size) char cacheline_padding4[cacheline_size];
alignas(cacheline_size) std::atomic<index> spawnIx; // spawn pointer
alignas(cacheline_size) char cacheline_padding5[cacheline_size];
alignas(cacheline_size) std::atomic<index> recycleIx; // recycle pointer
alignas(cacheline_size) char cacheline_padding6[cacheline_size];
alignas(cacheline_alignment) char cacheline_padding1[cacheline_size];
alignas(cacheline_alignment) std::atomic<uint64_t> nodeCount; // # of allocated nodes, not the #
// of elements stored in the queue
alignas(cacheline_alignment) char cacheline_padding2[cacheline_size];
alignas(cacheline_alignment) std::atomic<index> dequeueIx; // dequeue pointer
alignas(cacheline_alignment) char cacheline_padding3[cacheline_size];
alignas(cacheline_alignment) std::atomic<index> enqueueIx; // enqueue pointer
alignas(cacheline_alignment) char cacheline_padding4[cacheline_size];
alignas(cacheline_alignment) std::atomic<index> spawnIx; // spawn pointer
alignas(cacheline_alignment) char cacheline_padding5[cacheline_size];
alignas(cacheline_alignment) std::atomic<index> recycleIx; // recycle pointer
alignas(cacheline_alignment) char cacheline_padding6[cacheline_size];
};
} // namespace async
} // namespace async

0 comments on commit 8378bbe

Please sign in to comment.