|
8 | 8 |
|
9 | 9 | #include <clc/clc.h> |
10 | 10 | #include <spirv/spirv.h> |
| 11 | +#include <spirv/spirv_types.h> |
11 | 12 |
|
12 | | -void __clc_amdgcn_s_waitcnt(unsigned flags); |
| 13 | +#define BUILTIN_FENCE(semantics, scope_memory) \ |
| 14 | + if (semantics & Acquire) \ |
| 15 | + return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, scope_memory); \ |
| 16 | + else if (semantics & Release) \ |
| 17 | + return __builtin_amdgcn_fence(__ATOMIC_RELEASE, scope_memory); \ |
| 18 | + else if (semantics & AcquireRelease) \ |
| 19 | + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); \ |
| 20 | + else if (semantics & SequentiallyConsistent) \ |
| 21 | + return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, scope_memory); \ |
| 22 | + else \ |
| 23 | + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); |
13 | 24 |
|
14 | | -// s_waitcnt takes 16bit argument with a combined number of maximum allowed |
15 | | -// pending operations: |
16 | | -// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages |
17 | | -// [7] -- undefined |
18 | | -// [6:4] -- exports, GDS, and mem write |
19 | | -// [3:0] -- vector memory operations |
20 | | - |
21 | | -// Newer clang supports __builtin_amdgcn_s_waitcnt |
22 | | -#if __clang_major__ >= 5 |
23 | | -#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x) |
24 | | -#else |
25 | | -#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x) |
26 | | -_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt"); |
27 | | -#endif |
28 | | - |
29 | | -_CLC_DEF _CLC_OVERLOAD void __mem_fence(cl_mem_fence_flags flags) { |
30 | | - if (flags & CLK_GLOBAL_MEM_FENCE) { |
31 | | - // scalar loads are counted with LGKM but we don't know whether |
32 | | - // the compiler turned any loads to scalar |
33 | | - __waitcnt(0); |
34 | | - } else if (flags & CLK_LOCAL_MEM_FENCE) |
35 | | - __waitcnt(0xff); // LGKM is [12:8] |
| 25 | +_CLC_DEF _CLC_OVERLOAD void __mem_fence(unsigned int scope_memory, |
| 26 | + unsigned int semantics) { |
| 27 | + switch ((enum Scope)scope_memory) { |
| 28 | + case CrossDevice: |
| 29 | + BUILTIN_FENCE(semantics, "") |
| 30 | + case Device: |
| 31 | + BUILTIN_FENCE(semantics, "agent") |
| 32 | + case Workgroup: |
| 33 | + BUILTIN_FENCE(semantics, "workgroup") |
| 34 | + case Subgroup: |
| 35 | + BUILTIN_FENCE(semantics, "wavefront") |
| 36 | + case Invocation: |
| 37 | + BUILTIN_FENCE(semantics, "singlethread") |
| 38 | + } |
36 | 39 | } |
37 | | -#undef __waitcnt |
| 40 | +#undef BUILTIN_FENCE |
38 | 41 |
|
39 | | -_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int memory, |
| 42 | +_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int scope_memory, |
40 | 43 | unsigned int semantics) { |
41 | | - __mem_fence(memory); |
| 44 | + __mem_fence(scope_memory, semantics); |
42 | 45 | } |
43 | 46 |
|
44 | 47 | _CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void |
45 | | -__spirv_ControlBarrier(unsigned int scope, unsigned int memory, |
| 48 | +__spirv_ControlBarrier(unsigned int scope_execution, unsigned scope_memory, |
46 | 49 | unsigned int semantics) { |
47 | 50 | if (semantics) { |
48 | | - __mem_fence(memory); |
| 51 | + __mem_fence(scope_memory, semantics); |
49 | 52 | } |
50 | 53 | __builtin_amdgcn_s_barrier(); |
51 | 54 | } |
0 commit comments