Skip to content

Commit

Permalink
[SYCL][ESIMD] Move named barrier functions to supported and use new i…
Browse files Browse the repository at this point in the history
…ntrinsic (#13704)

The new intrinsic works in the drivers used in CI now, previously it
didn't. The only change is adding the `Fence` parameter for
named_barrier_signal.
I manually ran the tests on PVC.

---------

Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
  • Loading branch information
sarnex authored May 9, 2024
1 parent 2d6bd7c commit 0b4be96
Show file tree
Hide file tree
Showing 12 changed files with 171 additions and 107 deletions.
30 changes: 30 additions & 0 deletions sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1130,4 +1130,34 @@ __ESIMD_INTRIN void __esimd_lsc_store2d_stateless(
int SurfaceHeight, int SurfacePitch, int X, int Y,
__ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;

/// Represents named barrier synchronization for a subgroup of threads.
/// Available only on PVC
///
/// @param mode - is wait(0) or signal(1)
///
/// @param id - barrier id
///
/// @param thread_count - number of threads, ignored in 'wait' mode
__ESIMD_INTRIN void __esimd_nbarrier(uint8_t mode, uint8_t id,
uint8_t thread_count) __ESIMD_INTRIN_END;

/// Initialize number of named barriers for a kernel
/// Available only on PVC
///
/// @param count - number of named barriers
__ESIMD_INTRIN void __esimd_nbarrier_init(uint8_t count) __ESIMD_INTRIN_END;

/// Perform signal operation on named barriers
/// Available only on PVC
/// @param id - barrier id
///
/// @param thread_role - thread role
///
/// @param num_producers - number of producers
///
/// @param num_consumers - number of consumers
__ESIMD_INTRIN void
__esimd_nbarrier_arrive(uint8_t id, uint8_t thread_role, uint8_t num_producers,
uint8_t num_consumers) __ESIMD_INTRIN_END;

/// @endcond ESIMD_DETAIL
49 changes: 49 additions & 0 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9773,6 +9773,55 @@ __ESIMD_API void raw_send(__ESIMD_NS::simd<T1, n1> msg_src0, uint32_t ex_desc,

/// @} sycl_esimd_raw_send

/// @defgroup sycl_esimd_memory_nbarrier Named barrier APIs.
/// @ingroup sycl_esimd_memory

/// @addtogroup sycl_esimd_memory_nbarrier
/// @{

/// Wait on a named barrier
/// Available only on PVC
///
/// @param id - named barrier id
__ESIMD_API void named_barrier_wait(uint8_t id) {
__esimd_nbarrier(0 /*wait*/, id, 0 /*thread count*/);
}

/// Initialize number of named barriers for a kernel
/// Available only on PVC
///
/// @tparam NbarCount - number of named barriers
template <uint8_t NbarCount> __ESIMD_API void named_barrier_init() {
__esimd_nbarrier_init(NbarCount);
}

/// Perform signal operation for the given named barrier
/// Available only on PVC
///
/// @tparam Fence - fence before signaling
///
/// @param barrier_id - named barrier id
///
/// @param producer_consumer_mode - 2-bit flag to indicate if it's producer
/// mode (0x1) or consumer mode (0x2). User must ensure the input value is set
/// correctly and higher order bits are cleared.
///
/// @param num_producers - number of producers
///
/// @param num_consumers - number of consumers
template <bool Fence = true>
__ESIMD_API void
named_barrier_signal(uint8_t barrier_id, uint8_t producer_consumer_mode,
uint32_t num_producers, uint32_t num_consumers) {
if constexpr (Fence)
__esimd_fence(fence_mask::global_coherent_fence |
fence_mask::local_barrier);
__esimd_nbarrier_arrive(barrier_id, producer_consumer_mode, num_producers,
num_consumers);
}

/// @} sycl_esimd_memory_nbarrier

/// @} sycl_esimd_memory

/// @cond EXCLUDE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,57 +25,6 @@ __esimd_sbarrier(__ESIMD_ENS::split_barrier_action flag) __ESIMD_INTRIN_END;
__ESIMD_INTRIN void __esimd_wait(uint16_t value);
#endif // __SYCL_DEVICE_ONLY__

/// Represents named barrier synchronization for a subgroup of threads.
/// Available only on PVC
///
/// @param mode - is wait(0) or signal(1)
///
/// @param id - barrier id
///
/// @param thread_count - number of threads, ignored in 'wait' mode
__ESIMD_INTRIN void __esimd_nbarrier(uint8_t mode, uint8_t id,
uint8_t thread_count) __ESIMD_INTRIN_END;

/// Initialize number of named barriers for a kernel
/// Available only on PVC
///
/// @param count - number of named barriers
__ESIMD_INTRIN void __esimd_nbarrier_init(uint8_t count) __ESIMD_INTRIN_END;

/// Raw send signal to perform signal operation on named barriers
/// Available only on PVC
/// @tparam Ty - message element type
///
/// @tparam N - message length
///
/// @param is_sendc - is sendc
///
/// @param extended_descriptor - extended message descriptor
///
/// @param descriptor - message descriptor
///
/// @param msg_var - source operand of send message
///
/// @param pred - predicate for enabled channels
template <typename Ty, int N>
__ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal(
uint32_t is_sendc, uint32_t extended_descriptor, uint32_t descriptor,
__ESIMD_DNS::vector_type_t<Ty, N> msg_var,
uint16_t pred = 1) __ESIMD_INTRIN_END;

/// Perform signal operation on named barriers
/// Available only on PVC
/// @param id - barrier id
///
/// @param thread_role - thread role
///
/// @param num_producers - number of producers
///
/// @param num_consumers - number of consumers
__ESIMD_INTRIN void
__esimd_nbarrier_arrive(uint8_t id, uint8_t thread_role, uint8_t num_producers,
uint8_t num_consumers) __ESIMD_INTRIN_END;

/// Memory fence.
/// Supported platforms: DG2, PVC
///
Expand Down
30 changes: 9 additions & 21 deletions sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,16 +366,19 @@ __ESIMD_API
/// Available only on PVC
///
/// @param id - named barrier id
__SYCL_DEPRECATED("use sycl::ext::intel::esimd::named_barrier_wait")
__ESIMD_API void named_barrier_wait(uint8_t id) {
__esimd_nbarrier(0 /*wait*/, id, 0 /*thread count*/);
__ESIMD_NS::named_barrier_wait(id);
}

/// Initialize number of named barriers for a kernel
/// Available only on PVC
///
/// @tparam NbarCount - number of named barriers
template <uint8_t NbarCount> __ESIMD_API void named_barrier_init() {
__esimd_nbarrier_init(NbarCount);
template <uint8_t NbarCount>
__SYCL_DEPRECATED("use sycl::ext::intel::esimd::named_barrier_init")
__ESIMD_API void named_barrier_init() {
__ESIMD_NS::named_barrier_init<NbarCount>();
}

/// Perform signal operation for the given named barrier
Expand All @@ -390,28 +393,13 @@ template <uint8_t NbarCount> __ESIMD_API void named_barrier_init() {
/// @param num_producers - number of producers
///
/// @param num_consumers - number of consumers
__SYCL_DEPRECATED("use sycl::ext::intel::esimd::named_barrier_signal")
__ESIMD_API void named_barrier_signal(uint8_t barrier_id,
uint8_t producer_consumer_mode,
uint32_t num_producers,
uint32_t num_consumers) {
__esimd_fence(__ESIMD_NS::fence_mask::global_coherent_fence |
__ESIMD_NS::fence_mask::local_barrier);
#ifdef __ESIMD_USE_NEW_NAMED_BARRIER_INTRIN
__esimd_nbarrier_arrive(barrier_id, producer_consumer_mode, num_producers,
num_consumers);
#else
constexpr uint32_t gateway = 3;
constexpr uint32_t barrier = 4;
constexpr uint32_t descriptor = 1 << 25 | // Message length: 1 register
0 << 12 | // Fence Data Ports: No fence
barrier; // Barrier subfunction

__ESIMD_DNS::vector_type_t<uint32_t, 8> payload = 0;
payload[2] = (num_consumers & 0xff) << 24 | (num_producers & 0xff) << 16 |
producer_consumer_mode << 14 | (barrier_id & 0b11111) << 0;
__esimd_raw_send_nbarrier_signal<uint32_t, 8>(
0 /*sendc*/, gateway, descriptor, payload, 1 /*pred*/);
#endif
__ESIMD_NS::named_barrier_signal(barrier_id, producer_consumer_mode,
num_producers, num_consumers);
}

/// Create explicit scoreboard dependency to avoid device code motion
Expand Down
18 changes: 13 additions & 5 deletions sycl/test-e2e/ESIMD/named_barriers/exec_in_order.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
// REQUIRES: gpu-intel-pvc
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// RUN: %{build} -o %t1.out -DEXP
// RUN: %{run} %t1.out
//
// Test checks support of named barrier in ESIMD kernel.
// Threads are executed in ascending order of their local ID and each thread
Expand All @@ -21,6 +23,12 @@

#include "../esimd_test_utils.hpp"

#ifdef EXP
#define NS __ESIMD_ENS
#else
#define NS __ESIMD_NS
#endif

using namespace sycl;
using namespace sycl::ext::intel::esimd;
using namespace sycl::ext::intel::experimental::esimd;
Expand Down Expand Up @@ -64,7 +72,7 @@ bool test(QueueTY q) {
Range, [=](sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL {
// Threads - 1 named barriers required
// but id 0 reserved for unnamed
named_barrier_init<Threads>();
NS::named_barrier_init<Threads>();

unsigned int idx = ndi.get_local_id(0);
// overlaping offset
Expand Down Expand Up @@ -92,8 +100,8 @@ bool test(QueueTY q) {
// and so on
if (idx > 0) {
int barrier_id = idx;
named_barrier_signal(barrier_id, flag, producers, consumers);
named_barrier_wait(barrier_id);
NS::named_barrier_signal(barrier_id, flag, producers, consumers);
NS::named_barrier_wait(barrier_id);
}

if constexpr (UseSLM)
Expand All @@ -109,8 +117,8 @@ bool test(QueueTY q) {
// and so on, but last thread skipped this block
if (idx < Threads - 1) {
int barrier_id = idx + 1;
named_barrier_signal(barrier_id, flag, producers, consumers);
named_barrier_wait(barrier_id);
NS::named_barrier_signal(barrier_id, flag, producers, consumers);
NS::named_barrier_wait(barrier_id);
}

barrier();
Expand Down
34 changes: 21 additions & 13 deletions sycl/test-e2e/ESIMD/named_barriers/exec_in_order_branched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
// REQUIRES: gpu-intel-pvc
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// RUN: %{build} -o %t1.out -DEXP
// RUN: %{run} %t1.out
//
// Test checks support of named barrier in ESIMD kernel.
// Threads are executed in ascending order of their local ID and each thread
Expand All @@ -22,6 +24,12 @@

#include "../esimd_test_utils.hpp"

#ifdef EXP
#define NS __ESIMD_ENS
#else
#define NS __ESIMD_NS
#endif

using namespace sycl;
using namespace sycl::ext::intel::esimd;
using namespace sycl::ext::intel::experimental::esimd;
Expand Down Expand Up @@ -64,7 +72,7 @@ bool test(QueueTY q) {
Range, [=](sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL {
// Threads - 1 named barriers required
// but id 0 reserved for unnamed
named_barrier_init<Threads>();
NS::named_barrier_init<Threads>();

unsigned int idx = ndi.get_local_id(0);
// overlaping offset
Expand Down Expand Up @@ -98,13 +106,13 @@ bool test(QueueTY q) {
// T0 signals barrier 1 and locks
// waiting for first signal from T1
const int barrier_id = 1;
named_barrier_signal(barrier_id, flag, producers, consumers);
named_barrier_wait(barrier_id);
NS::named_barrier_signal(barrier_id, flag, producers, consumers);
NS::named_barrier_wait(barrier_id);
} else if (idx == 1) {
// T1 signals barrier 1 and locks, waiting for signal from T0
const int barrier_id = 1;
named_barrier_signal(barrier_id, flag, producers, consumers);
named_barrier_wait(barrier_id);
NS::named_barrier_signal(barrier_id, flag, producers, consumers);
NS::named_barrier_wait(barrier_id);

if constexpr (UseSLM) {
lsc_slm_block_store<int, VL>(off, val);
Expand All @@ -117,14 +125,14 @@ bool test(QueueTY q) {
// T1 signals barrier 2 and locks
// waiting for first signal from T2
const int barrier_id2 = 2;
named_barrier_signal(barrier_id2, flag, producers, consumers);
named_barrier_wait(barrier_id2);
NS::named_barrier_signal(barrier_id2, flag, producers, consumers);
NS::named_barrier_wait(barrier_id2);
} else if (idx == 2) {
// T2 signals barrier 2 and locks
// waiting for second signal from T1
const int barrier_id = 2;
named_barrier_signal(barrier_id, flag, producers, consumers);
named_barrier_wait(barrier_id);
NS::named_barrier_signal(barrier_id, flag, producers, consumers);
NS::named_barrier_wait(barrier_id);

if constexpr (UseSLM) {
lsc_slm_block_store<int, VL>(off, val);
Expand All @@ -136,14 +144,14 @@ bool test(QueueTY q) {

// T2 signals barrier 3 and locks, waiting for signal from T3
const int barrier_id2 = 3;
named_barrier_signal(barrier_id2, flag, producers, consumers);
named_barrier_wait(barrier_id2);
NS::named_barrier_signal(barrier_id2, flag, producers, consumers);
NS::named_barrier_wait(barrier_id2);
} else {
// T3 signals barrier 3 and locks
// waiting for second signal from T2
const int barrier_id = 3;
named_barrier_signal(barrier_id, flag, producers, consumers);
named_barrier_wait(barrier_id);
NS::named_barrier_signal(barrier_id, flag, producers, consumers);
NS::named_barrier_wait(barrier_id);

if constexpr (UseSLM) {
lsc_slm_block_store<int, VL>(off, val);
Expand Down
14 changes: 11 additions & 3 deletions sycl/test-e2e/ESIMD/named_barriers/loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
// REQUIRES: gpu-intel-pvc
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// RUN: %{build} -o %t1.out -DEXP
// RUN: %{run} %t1.out
//
// Test checks support of named barrier in a loop in ESIMD kernel.
// SLM and surface size is 32 bytes, 16 bytes per iteration.
Expand All @@ -21,6 +23,12 @@

#include "../esimd_test_utils.hpp"

#ifdef EXP
#define NS __ESIMD_ENS
#else
#define NS __ESIMD_NS
#endif

using namespace sycl;
using namespace sycl::ext::intel::esimd;
using namespace sycl::ext::intel::experimental::esimd;
Expand Down Expand Up @@ -62,7 +70,7 @@ bool test(QueueTY q) {
// number of ints read/written by single thread
constexpr unsigned VL = SlmSize / Threads;

named_barrier_init<bnum>();
NS::named_barrier_init<bnum>();

unsigned int idx = ndi.get_local_id(0);
unsigned int off = idx * VL * sizeof(int);
Expand Down Expand Up @@ -91,8 +99,8 @@ bool test(QueueTY q) {
lsc_slm_block_store<int, SlmSize / 2>(prod_off, init);
}

named_barrier_signal(b, flag, producers, consumers);
named_barrier_wait(b); // consumers waiting for signal
NS::named_barrier_signal(b, flag, producers, consumers);
NS::named_barrier_wait(b); // consumers waiting for signal

// reading SLM
auto val = lsc_slm_block_load<int, VL>(off);
Expand Down
Loading

0 comments on commit 0b4be96

Please sign in to comment.