@@ -18,37 +18,39 @@ extern __constant int __OptDisable;
1818
1919// MEMFENCE IMPLEMENTATION
2020
21- void __attribute__((optnone )) __intel_memfence_optnone (bool flushRW , bool isGlobal , bool invalidateL1 )
21+ void __attribute__((optnone )) __intel_memfence_optnone (bool flushRW , bool isGlobal , bool invalidateL1 , bool evictL1 )
2222{
23- #define MEMFENCE_IF (V1 , V5 , V6 ) \
24- if (flushRW == V1 && isGlobal == V5 && invalidateL1 == V6) \
25- { \
26- __builtin_IB_memfence(true, V1, false, false, false, V5, V6); \
23+ #define MEMFENCE_IF (V1 , V5 , V6 , V7 ) \
24+ if (flushRW == V1 && isGlobal == V5 && invalidateL1 == V6 && evictL1 == V7) \
25+ { \
26+ __builtin_IB_memfence(true, V1, false, false, false, V5, V6, V7); \
2727} else
2828
2929// Generate combinations for all MEMFENCE_IF cases, e.g.:
3030// true, true, true
3131// true, true, false etc.
32+ #define MF_L3 (...) MF_L2(__VA_ARGS__,false) MF_L2(__VA_ARGS__,true)
3233#define MF_L2 (...) MF_L1(__VA_ARGS__,false) MF_L1(__VA_ARGS__,true)
3334#define MF_L1 (...) MEMFENCE_IF(__VA_ARGS__,false) MEMFENCE_IF(__VA_ARGS__,true)
34- MF_L2 (false )
35- MF_L2 (true ) {}
35+ MF_L3 (false )
36+ MF_L3 (true ) {}
3637
3738#undef MEMFENCE_IF
39+ #undef MF_L3
3840#undef MF_L2
3941#undef MF_L1
4042}
41- void __intel_memfence (bool flushRW , bool isGlobal , bool invalidateL1 )
43+ void __intel_memfence (bool flushRW , bool isGlobal , bool invalidateL1 , bool evictL1 )
4244{
43- __builtin_IB_memfence (true, flushRW , false, false, false, isGlobal , invalidateL1 );
45+ __builtin_IB_memfence (true, flushRW , false, false, false, isGlobal , invalidateL1 , evictL1 );
4446}
4547
46- void __intel_memfence_handler (bool flushRW , bool isGlobal , bool invalidateL1 )
48+ void __intel_memfence_handler (bool flushRW , bool isGlobal , bool invalidateL1 , bool evictL1 )
4749{
4850 if (__OptDisable )
49- __intel_memfence_optnone (flushRW , isGlobal , invalidateL1 );
51+ __intel_memfence_optnone (flushRW , isGlobal , invalidateL1 , evictL1 );
5052 else
51- __intel_memfence (flushRW , isGlobal , invalidateL1 );
53+ __intel_memfence (flushRW , isGlobal , invalidateL1 , evictL1 );
5254}
5355
5456// TYPEDMEMFENCE IMPLEMENTATION
@@ -81,6 +83,7 @@ static void __intel_atomic_work_item_fence( Scope_t Memory, uint Semantics )
8183 bool fence = Semantics & ( Acquire | Release | AcquireRelease | SequentiallyConsistent );
8284
8385 bool invalidateL1 = Semantics & ( Acquire | AcquireRelease | SequentiallyConsistent );
86+ bool evictL1 = Semantics & ( Release | AcquireRelease | SequentiallyConsistent );
8487
8588 // We always need to 'fence' image memory (aka, flush caches, drain pipelines)
8689 fence |= ( Semantics & ImageMemory );
@@ -97,12 +100,12 @@ static void __intel_atomic_work_item_fence( Scope_t Memory, uint Semantics )
97100 // although on some platforms they may be elided; platform-specific checks are performed in codegen
98101 if (Semantics & WorkgroupMemory )
99102 {
100- __intel_memfence_handler (false, false, false);
103+ __intel_memfence_handler (false, false, false, false );
101104 }
102105 if (Semantics & CrossWorkgroupMemory )
103106 {
104107 bool flushL3 = Memory == Device || Memory == CrossDevice ;
105- __intel_memfence_handler (flushL3 , true, invalidateL1 );
108+ __intel_memfence_handler (flushL3 , true, invalidateL1 , evictL1 );
106109 }
107110 }
108111}
0 commit comments