Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions llvm/lib/SYCLLowerIR/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1234,10 +1234,6 @@ void SYCLLowerESIMDLegacyPass::collectGenXVolatileType(Module &M) {
PreservedAnalyses SYCLLowerESIMDPass::run(Function &F,
FunctionAnalysisManager &FAM,
SmallPtrSet<Type *, 4> &GVTS) {
// Only consider functions marked with !sycl_explicit_simd
if (F.getMetadata("sycl_explicit_simd") == nullptr)
return PreservedAnalyses::all();

SmallVector<CallInst *, 32> ESIMDIntrCalls;
SmallVector<Instruction *, 8> ESIMDToErases;

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/SYCLLowerIR/esimd_global.ll
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #2
declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #2

; Function Attrs: noinline norecurse nounwind
define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %C, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v) local_unnamed_addr #3 !sycl_explicit_simd !12 {
define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %C, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v) local_unnamed_addr #3 {
entry:
%agg.tmp = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
%0 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
Expand Down
68 changes: 35 additions & 33 deletions llvm/test/SYCLLowerIR/esimd_lower_intrins.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ target triple = "spir64-unknown-unknown-sycldevice"
@vg = dso_local global %"cm::gen::simd<int, 16>" zeroinitializer, align 64 #0
@vc = dso_local addrspace(1) global <32 x i32> zeroinitializer

define dso_local spir_func <32 x i32> @FUNC_1() !sycl_explicit_simd !1 {
; LowerESIMD pass should process every function,
; !sycl_explicit_simd metadata is not necessary.
define dso_local spir_func <32 x i32> @FUNC_1() {
%a_1 = alloca <32 x i64>
%1 = load <32 x i64>, <32 x i64>* %a_1
%a_2 = alloca <32 x i16>
Expand All @@ -24,7 +26,7 @@ define dso_local spir_func <32 x i32> @FUNC_1() !sycl_explicit_simd !1 {
ret <32 x i32> %ret_val
}

define dso_local spir_func <32 x i32> @FUNC_2() !sycl_explicit_simd !1 {
define dso_local spir_func <32 x i32> @FUNC_2() {
%a_1 = alloca <32 x i64>
%1 = load <32 x i64>, <32 x i64>* %a_1
%a_2 = alloca <32 x i32>
Expand All @@ -36,7 +38,7 @@ define dso_local spir_func <32 x i32> @FUNC_2() !sycl_explicit_simd !1 {
ret <32 x i32> %ret_val
}

define dso_local spir_func <32 x i32> @FUNC_3() !sycl_explicit_simd !1 {
define dso_local spir_func <32 x i32> @FUNC_3() {
%a_1 = alloca <32 x i64>
%1 = load <32 x i64>, <32 x i64>* %a_1
%a_2 = alloca <32 x i32>
Expand All @@ -50,21 +52,21 @@ define dso_local spir_func <32 x i32> @FUNC_3() !sycl_explicit_simd !1 {
ret <32 x i32> %ret_val
}

define dso_local spir_func <32 x i32> @FUNC_4() !sycl_explicit_simd !1 {
define dso_local spir_func <32 x i32> @FUNC_4() {
%ret_val = call spir_func <32 x i32> @_Z33__esimd_flat_block_read_unalignedIjLi32ELN2cm3gen9CacheHintE0ELS2_0EENS1_13__vector_typeIT_XT0_EE4typeEy(i64 0)
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.block.ld.unaligned.v32i32.i64(i64 0)
ret <32 x i32> %ret_val
}

define dso_local spir_func void @FUNC_5() !sycl_explicit_simd !1 {
define dso_local spir_func void @FUNC_5() {
%a_1 = alloca <32 x i32>
%1 = load <32 x i32>, <32 x i32>* %a_1
call spir_func void @_Z24__esimd_flat_block_writeIjLi32ELN2cm3gen9CacheHintE0ELS2_0EEvyNS1_13__vector_typeIT_XT0_EE4typeE(i64 0, <32 x i32> %1)
; CHECK: call void @llvm.genx.svm.block.st.i64.v32i32(i64 0, <32 x i32> %{{[0-9a-zA-Z_.]+}})
ret void
}

define dso_local spir_func <32 x i32> @FUNC_6() !sycl_explicit_simd !1 {
define dso_local spir_func <32 x i32> @FUNC_6() {
%a_1 = alloca <32 x i64>
%1 = load <32 x i64>, <32 x i64>* %a_1
%a_2 = alloca <32 x i16>
Expand All @@ -74,7 +76,7 @@ define dso_local spir_func <32 x i32> @FUNC_6() !sycl_explicit_simd !1 {
ret <32 x i32> %ret_val
}

define dso_local spir_func void @FUNC_7() !sycl_explicit_simd !1 {
define dso_local spir_func void @FUNC_7() {
%a_1 = alloca <32 x i64>
%1 = load <32 x i64>, <32 x i64>* %a_1
%a_2 = alloca <32 x i32>
Expand All @@ -86,7 +88,7 @@ define dso_local spir_func void @FUNC_7() !sycl_explicit_simd !1 {
ret void
}

define dso_local spir_func <16 x i16> @FUNC_8() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i16> @FUNC_8() {
%a_1 = alloca <16 x i16>
%1 = load <16 x i16>, <16 x i16>* %a_1
%a_2 = alloca <16 x i16>
Expand All @@ -96,7 +98,7 @@ define dso_local spir_func <16 x i16> @FUNC_8() !sycl_explicit_simd !1 {
ret <16 x i16> %ret_val
}

define dso_local spir_func <1 x float> @FUNC_9() !sycl_explicit_simd !1 {
define dso_local spir_func <1 x float> @FUNC_9() {
%a_1 = alloca <1 x float>
%1 = load <1 x float>, <1 x float>* %a_1
%a_2 = alloca <1 x float>
Expand All @@ -106,15 +108,15 @@ define dso_local spir_func <1 x float> @FUNC_9() !sycl_explicit_simd !1 {
ret <1 x float> %ret_val
}

define dso_local spir_func <8 x float> @FUNC_10() !sycl_explicit_simd !1 {
define dso_local spir_func <8 x float> @FUNC_10() {
%a_1 = alloca <16 x float>
%1 = load <16 x float>, <16 x float>* %a_1
%ret_val = call spir_func <8 x float> @_Z16__esimd_rdregionIfLi16ELi8ELi0ELi8ELi1ELi0EEN2cm3gen13__vector_typeIT_XT1_EE4typeENS2_IS3_XT0_EE4typeEt(<16 x float> %1, i16 zeroext 0)
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0)
ret <8 x float> %ret_val
}

define dso_local spir_func <16 x float> @FUNC_11() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x float> @FUNC_11() {
%a_1 = alloca <16 x float>
%1 = load <16 x float>, <16 x float>* %a_1
%a_2 = alloca <8 x float>
Expand All @@ -124,36 +126,36 @@ define dso_local spir_func <16 x float> @FUNC_11() !sycl_explicit_simd !1 {
ret <16 x float> %ret_val
}

define dso_local spir_func <32 x i32> @FUNC_21(%opencl.image2d_ro_t addrspace(1)* %0, i32 %1, i32 %2) !sycl_explicit_simd !1 {
define dso_local spir_func <32 x i32> @FUNC_21(%opencl.image2d_ro_t addrspace(1)* %0, i32 %1, i32 %2) {
%ret_val = call spir_func <32 x i32> @_Z24__esimd_media_block_loadIiLi4ELi8E14ocl_image2d_roEN2cm3gen13__vector_typeIT_XmlT0_T1_EE4typeEjT2_jjjj(i32 0, %opencl.image2d_ro_t addrspace(1)* %0, i32 0, i32 32, i32 %1, i32 %2)
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.media.ld.v32i32(i32 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}})
ret <32 x i32> %ret_val
}

define dso_local spir_func void @FUNC_22(%opencl.image2d_wo_t addrspace(1)* %0, i32 %1, i32 %2) !sycl_explicit_simd !1 {
define dso_local spir_func void @FUNC_22(%opencl.image2d_wo_t addrspace(1)* %0, i32 %1, i32 %2) {
%a_3 = alloca <32 x i32>
%4 = load <32 x i32>, <32 x i32>* %a_3
call spir_func void @_Z25__esimd_media_block_storeIiLi4ELi8E14ocl_image2d_woEvjT2_jjjjN2cm3gen13__vector_typeIT_XmlT0_T1_EE4typeE(i32 0, %opencl.image2d_wo_t addrspace(1)* %0, i32 0, i32 32, i32 %1, i32 %2, <32 x i32> %4)
; CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
ret void
}

define dso_local spir_func <16 x i32> @FUNC_23() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_23() {
%ret_val = call spir_func <16 x i32> @_Z13__esimd_vloadIiLi16EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"cm::gen::simd<int, 16>", %"cm::gen::simd<int, 16>"* @vg, i32 0, i32 0) to <16 x i32> addrspace(4)*))
; CHECK: %ret_val1 = load <16 x i32>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"cm::gen::simd<int, 16>", %"cm::gen::simd<int, 16>"* @vg, i32 0, i32 0) to <16 x i32> addrspace(4)*), align 64
; TODO: testcase to generate this:
; CxHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* {{.*}})
ret <16 x i32> %ret_val
}

define dso_local spir_func void @FUNC_28(<32 x i32> %0) !sycl_explicit_simd !1 {
define dso_local spir_func void @FUNC_28(<32 x i32> %0) {
call spir_func void @_Z14__esimd_vstoreIiLi32EEvPN2cm3gen13__vector_typeIT_XT0_EE4typeES5_(<32 x i32> addrspace(4)* addrspacecast (<32 x i32> addrspace(1)* @vc to <32 x i32> addrspace(4)*), <32 x i32> %0)
; CHECK: store <32 x i32> %0, <32 x i32> addrspace(4)* addrspacecast (<32 x i32> addrspace(1)* @vc to <32 x i32> addrspace(4)*), align 128

ret void
}

define dso_local spir_func void @FUNC_29() !sycl_explicit_simd !1 {
define dso_local spir_func void @FUNC_29() {
%a_1 = alloca <32 x i32>
%1 = addrspacecast <32 x i32>* %a_1 to <32 x i32> addrspace(4)*
%a_2 = alloca <32 x i32>
Expand All @@ -163,15 +165,15 @@ define dso_local spir_func void @FUNC_29() !sycl_explicit_simd !1 {
ret void
}

define dso_local spir_kernel void @FUNC_30() !sycl_explicit_simd !1 {
; CHECK: define dso_local spir_kernel void @FUNC_30() !sycl_explicit_simd !1
define dso_local spir_kernel void @FUNC_30() {
; CHECK: define dso_local spir_kernel void @FUNC_30()
call spir_func void @_ZN2cl4sycl5INTEL3gpu8slm_initEj(i32 1023)
ret void
; CHECK-NEXT: ret void
}

define dso_local spir_kernel void @FUNC_31() !sycl_explicit_simd !1 {
; CHECK: define dso_local spir_kernel void @FUNC_31() !sycl_explicit_simd !1
define dso_local spir_kernel void @FUNC_31() {
; CHECK: define dso_local spir_kernel void @FUNC_31()
%call = call spir_func i64 @_Z27__spirv_LocalInvocationId_xv()
; CHECK-NEXT: %call.esimd = call <3 x i32> @llvm.genx.local.id.v3i32()
; CHECK-NEXT: %local_id.x = extractelement <3 x i32> %call.esimd, i32 0
Expand All @@ -180,7 +182,7 @@ define dso_local spir_kernel void @FUNC_31() !sycl_explicit_simd !1 {
; CHECK-NEXT: ret void
}

define dso_local spir_func <16 x i32> @FUNC_32() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_32() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <16 x i32>
Expand All @@ -192,7 +194,7 @@ define dso_local spir_func <16 x i32> @FUNC_32() !sycl_explicit_simd !1 {
ret <16 x i32> %ret_val
}

define dso_local spir_func <16 x i32> @FUNC_33() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_33() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <16 x i32>
Expand All @@ -204,7 +206,7 @@ define dso_local spir_func <16 x i32> @FUNC_33() !sycl_explicit_simd !1 {
ret <16 x i32> %ret_val
}

define dso_local spir_func <16 x i32> @FUNC_34() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_34() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <16 x i32>
Expand All @@ -216,7 +218,7 @@ define dso_local spir_func <16 x i32> @FUNC_34() !sycl_explicit_simd !1 {
ret <16 x i32> %ret_val
}

define dso_local spir_func <16 x i32> @FUNC_35() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_35() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <16 x i32>
Expand All @@ -228,7 +230,7 @@ define dso_local spir_func <16 x i32> @FUNC_35() !sycl_explicit_simd !1 {
ret <16 x i32> %ret_val
}

define dso_local spir_func <16 x i32> @FUNC_36() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_36() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <16 x i32>
Expand All @@ -240,7 +242,7 @@ define dso_local spir_func <16 x i32> @FUNC_36() !sycl_explicit_simd !1 {
ret <16 x i32> %ret_val
}

define dso_local spir_func <16 x i32> @FUNC_37() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_37() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <16 x i32>
Expand All @@ -252,7 +254,7 @@ define dso_local spir_func <16 x i32> @FUNC_37() !sycl_explicit_simd !1 {
ret <16 x i32> %ret_val
}

define dso_local spir_func <16 x i32> @FUNC_38() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_38() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <16 x i32>
Expand All @@ -264,7 +266,7 @@ define dso_local spir_func <16 x i32> @FUNC_38() !sycl_explicit_simd !1 {
ret <16 x i32> %ret_val
}

define dso_local spir_func <16 x i32> @FUNC_39() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_39() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <16 x i32>
Expand All @@ -276,25 +278,25 @@ define dso_local spir_func <16 x i32> @FUNC_39() !sycl_explicit_simd !1 {
ret <16 x i32> %ret_val
}

define dso_local spir_func <8 x i32> @FUNC_40() !sycl_explicit_simd !1 {
define dso_local spir_func <8 x i32> @FUNC_40() {
%ret_val = call spir_func <8 x i32> @_Z22__esimd_slm_block_readIiLi8EEN2cl4sycl5INTEL3gpu11vector_typeIT_XT0_EE4typeEj(i32 0)
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.oword.ld.v8i32(i32 0, i32 254, i32 0)
ret <8 x i32> %ret_val
}

define dso_local spir_func void @FUNC_41() !sycl_explicit_simd !1 {
define dso_local spir_func void @FUNC_41() {
call spir_func void @_Z16__esimd_sbarrierN2cl4sycl5INTEL3gpu17EsimdSbarrierTypeE(i8 zeroext 1)
; CHECK: call void @llvm.genx.sbarrier(i8 1)
ret void
}

define dso_local spir_func void @FUNC_42() !sycl_explicit_simd !1 {
define dso_local spir_func void @FUNC_42() {
call spir_func void @_Z16__esimd_sbarrierN2cl4sycl5INTEL3gpu17EsimdSbarrierTypeE(i8 zeroext 0)
; CHECK: call void @llvm.genx.sbarrier(i8 0)
ret void
}

define dso_local spir_func <8 x i32> @FUNC_43() !sycl_explicit_simd !1 {
define dso_local spir_func <8 x i32> @FUNC_43() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <8 x i16>
Expand All @@ -304,7 +306,7 @@ define dso_local spir_func <8 x i32> @FUNC_43() !sycl_explicit_simd !1 {
ret <8 x i32> %ret_val
}

define dso_local spir_func <16 x i32> @FUNC_44() !sycl_explicit_simd !1 {
define dso_local spir_func <16 x i32> @FUNC_44() {
%a_1 = alloca <16 x i32>
%1 = load <16 x i32>, <16 x i32>* %a_1
%a_2 = alloca <8 x i32>
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/SYCLLowerIR/scalar_fptoui.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:
target triple = "spir64-unknown-unknown-sycldevice"

; Function Attrs: convergent norecurse
define dso_local spir_func i32 @foo(float %x) !sycl_explicit_simd !1 {
define dso_local spir_func i32 @foo(float %x) {
%y = fptoui float %x to i32
; check that the scalar float to unsigned int conversion is left intact
; CHECK: %y = fptoui float %x to i32
Expand Down
21 changes: 14 additions & 7 deletions sycl/test/esimd/slm_load4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,22 @@
using namespace sycl::INTEL::gpu;
using namespace cl::sycl;

void kernel() __attribute__((sycl_device)) {
simd<uint32_t, 32> offsets(0, 1);
simd<int, 128> v1(0, 1);
template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_call(Func kernelFunc) {
kernelFunc();
}

void caller() {
kernel_call<class EsimdKernel>([=]() SYCL_ESIMD_KERNEL {
simd<uint32_t, 32> offsets(0, 1);
simd<int, 128> v1(0, 1);

slm_init(1024);
slm_init(1024);

auto v0 = slm_load4<int, 32, ESIMD_ABGR_ENABLE>(offsets);
auto v0 = slm_load4<int, 32, ESIMD_ABGR_ENABLE>(offsets);

v0 = v0 + v1;
v0 = v0 + v1;

slm_store4<int, 32, ESIMD_ABGR_ENABLE>(v0, offsets);
slm_store4<int, 32, ESIMD_ABGR_ENABLE>(v0, offsets);
});
}