diff --git a/include/ofi_atomic_queue.h b/include/ofi_atomic_queue.h index 8ffecbe2667..9d5eb70de6e 100644 --- a/include/ofi_atomic_queue.h +++ b/include/ofi_atomic_queue.h @@ -94,19 +94,28 @@ extern "C" { #define OFI_CACHE_LINE_SIZE (64) +/* + * Base address of atomic queue must be cache line aligned to maximize atomic + * value perforamnce benefits + */ #define OFI_DECLARE_ATOMIC_Q(entrytype, name) \ struct name ## _entry { \ ofi_atomic64_t seq; \ bool noop; \ entrytype buf; \ -}; \ +} __attribute__((__aligned__(64))); \ + \ struct name { \ - int size; \ - int size_mask; \ ofi_atomic64_t write_pos; \ - char pad0[OFI_CACHE_LINE_SIZE]; \ + uint8_t pad0[OFI_CACHE_LINE_SIZE - \ + sizeof(ofi_atomic64_t)]; \ ofi_atomic64_t read_pos; \ - char pad1[OFI_CACHE_LINE_SIZE]; \ + uint8_t pad1[OFI_CACHE_LINE_SIZE - \ + sizeof(ofi_atomic64_t)]; \ + int size; \ + int size_mask; \ + uint8_t pad2[OFI_CACHE_LINE_SIZE - \ + (sizeof(int) * 2)]; \ struct name ## _entry entry[]; \ } __attribute__((__aligned__(64))); \ \ @@ -114,6 +123,7 @@ static inline void name ## _init(struct name *aq, size_t size) \ { \ size_t i; \ assert(size == roundup_power_of_two(size)); \ + assert(!((uintptr_t) aq % OFI_CACHE_LINE_SIZE)); \ aq->size = size; \ aq->size_mask = aq->size - 1; \ ofi_atomic_initialize64(&aq->write_pos, 0); \ diff --git a/prov/shm/src/smr_util.c b/prov/shm/src/smr_util.c index 190787ac935..16f157b5e80 100644 --- a/prov/shm/src/smr_util.c +++ b/prov/shm/src/smr_util.c @@ -108,8 +108,8 @@ size_t smr_calculate_size_offsets(size_t tx_count, size_t rx_count, tx_size = roundup_power_of_two(tx_count); rx_size = roundup_power_of_two(rx_count); - /* Align cmd_queue offset to 128-bit boundary. */ - cmd_queue_offset = ofi_get_aligned_size(sizeof(struct smr_region), 16); + /* Align cmd_queue offset to cache line */ + cmd_queue_offset = ofi_get_aligned_size(sizeof(struct smr_region), 64); resp_queue_offset = cmd_queue_offset + sizeof(struct smr_cmd_queue) + sizeof(struct smr_cmd_queue_entry) * rx_size; inject_pool_offset = resp_queue_offset + sizeof(struct smr_resp_queue) +