Skip to content

Commit abf8a6a

Browse files
committed
Fix/refactor esimd intrinsic tests.
Signed-off-by: Konstantin S Bobrovsky <konstantin.s.bobrovsky@intel.com>
1 parent 9623914 commit abf8a6a

File tree

3 files changed

+123
-114
lines changed

3 files changed

+123
-114
lines changed

llvm/test/SYCLLowerIR/esimd_lower_intrins.ll

Lines changed: 7 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
; consumable by the CM back-end.
33
;
44
; RUN: opt < %s -LowerESIMD -S | FileCheck %s
5+
;
6+
; TODO refactor all the test cases - make them C++ and move to
7+
; sycl\test\esimd\intrins_trans.cpp for much easier maintenance w/o losing
8+
; testing strength. Formally, each LLVM pass should have .ll tests, but this is
9+
; not practical in this case.
10+
;
11+
; All new test cases should be added to intrins_trans.cpp
512

613
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
714
target triple = "spir64-unknown-unknown"
@@ -16,77 +23,6 @@ target triple = "spir64-unknown-unknown"
1623

1724
; LowerESIMD pass should process every function,
1825
; !sycl_explicit_simd metadata is not necessary.
19-
define dso_local spir_func <32 x i32> @FUNC_1() {
20-
%a_1 = alloca <32 x i64>
21-
%1 = load <32 x i64>, <32 x i64>* %a_1
22-
%a_2 = alloca <32 x i16>
23-
%2 = load <32 x i16>, <32 x i16>* %a_2
24-
%ret_val = call spir_func <32 x i32> @_Z20__esimd_flat_atomic0ILN2cm3gen14CmAtomicOpTypeE2EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeENS4_ItXT1_EE4typeE(<32 x i64> %1, <32 x i16> %2)
25-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
26-
ret <32 x i32> %ret_val
27-
}
28-
29-
define dso_local spir_func <32 x i32> @FUNC_2() {
30-
%a_1 = alloca <32 x i64>
31-
%1 = load <32 x i64>, <32 x i64>* %a_1
32-
%a_2 = alloca <32 x i32>
33-
%2 = load <32 x i32>, <32 x i32>* %a_2
34-
%a_3 = alloca <32 x i16>
35-
%3 = load <32 x i16>, <32 x i16>* %a_3
36-
%ret_val = call spir_func <32 x i32> @_Z20__esimd_flat_atomic1ILN2cm3gen14CmAtomicOpTypeE0EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeES7_NS4_ItXT1_EE4typeE(<32 x i64> %1, <32 x i32> %2, <32 x i16> %3)
37-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
38-
ret <32 x i32> %ret_val
39-
}
40-
41-
define dso_local spir_func <32 x i32> @FUNC_3() {
42-
%a_1 = alloca <32 x i64>
43-
%1 = load <32 x i64>, <32 x i64>* %a_1
44-
%a_2 = alloca <32 x i32>
45-
%2 = load <32 x i32>, <32 x i32>* %a_2
46-
%a_3 = alloca <32 x i32>
47-
%3 = load <32 x i32>, <32 x i32>* %a_3
48-
%a_4 = alloca <32 x i16>
49-
%4 = load <32 x i16>, <32 x i16>* %a_4
50-
%ret_val = call spir_func <32 x i32> @_Z20__esimd_flat_atomic2ILN2cm3gen14CmAtomicOpTypeE7EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeES7_S7_NS4_ItXT1_EE4typeE(<32 x i64> %1, <32 x i32> %2, <32 x i32> %3, <32 x i16> %4)
51-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
52-
ret <32 x i32> %ret_val
53-
}
54-
55-
define dso_local spir_func <32 x i32> @FUNC_4() {
56-
%ret_val = call spir_func <32 x i32> @_Z33__esimd_flat_block_read_unalignedIjLi32ELN2cm3gen9CacheHintE0ELS2_0EENS1_13__vector_typeIT_XT0_EE4typeEy(i64 0)
57-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.block.ld.unaligned.v32i32.i64(i64 0)
58-
ret <32 x i32> %ret_val
59-
}
60-
61-
define dso_local spir_func void @FUNC_5() {
62-
%a_1 = alloca <32 x i32>
63-
%1 = load <32 x i32>, <32 x i32>* %a_1
64-
call spir_func void @_Z24__esimd_flat_block_writeIjLi32ELN2cm3gen9CacheHintE0ELS2_0EEvyNS1_13__vector_typeIT_XT0_EE4typeE(i64 0, <32 x i32> %1)
65-
; CHECK: call void @llvm.genx.svm.block.st.i64.v32i32(i64 0, <32 x i32> %{{[0-9a-zA-Z_.]+}})
66-
ret void
67-
}
68-
69-
define dso_local spir_func <32 x i32> @FUNC_6() {
70-
%a_1 = alloca <32 x i64>
71-
%1 = load <32 x i64>, <32 x i64>* %a_1
72-
%a_2 = alloca <32 x i16>
73-
%2 = load <32 x i16>, <32 x i16>* %a_2
74-
%ret_val = call spir_func <32 x i32> @_Z17__esimd_flat_readIjLi32ELi0ELN2cm3gen9CacheHintE0ELS2_0EENS1_13__vector_typeIT_XmlT0_clL_ZNS1_20ElemsPerAddrDecodingEjET1_EEE4typeENS3_IyXT0_EE4typeEiNS3_ItXT0_EE4typeE(<32 x i64> %1, i32 0, <32 x i16> %2)
75-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
76-
ret <32 x i32> %ret_val
77-
}
78-
79-
define dso_local spir_func void @FUNC_7() {
80-
%a_1 = alloca <32 x i64>
81-
%1 = load <32 x i64>, <32 x i64>* %a_1
82-
%a_2 = alloca <32 x i32>
83-
%2 = load <32 x i32>, <32 x i32>* %a_2
84-
%a_3 = alloca <32 x i16>
85-
%3 = load <32 x i16>, <32 x i16>* %a_3
86-
call spir_func void @_Z18__esimd_flat_writeIjLi32ELi0ELN2cm3gen9CacheHintE0ELS2_0EEvNS1_13__vector_typeIyXT0_EE4typeENS3_IT_XmlT0_clL_ZNS1_20ElemsPerAddrDecodingEjET1_EEE4typeEiNS3_ItXT0_EE4typeE(<32 x i64> %1, <32 x i32> %2, i32 0, <32 x i16> %3)
87-
; CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
88-
ret void
89-
}
9026

9127
define dso_local spir_func <16 x i16> @FUNC_8() {
9228
%a_1 = alloca <16 x i16>
@@ -98,16 +34,6 @@ define dso_local spir_func <16 x i16> @FUNC_8() {
9834
ret <16 x i16> %ret_val
9935
}
10036

101-
define dso_local spir_func <1 x float> @FUNC_9() {
102-
%a_1 = alloca <1 x float>
103-
%1 = load <1 x float>, <1 x float>* %a_1
104-
%a_2 = alloca <1 x float>
105-
%2 = load <1 x float>, <1 x float>* %a_2
106-
%ret_val = call spir_func <1 x float> @_Z16__esimd_div_ieeeILi1EEN2cm3gen13__vector_typeIfXT_EE4typeES4_S4_(<1 x float> %1, <1 x float> %2)
107-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <1 x float> @llvm.genx.ieee.div.v1f32(<1 x float> %{{[0-9a-zA-Z_.]+}}, <1 x float> %{{[0-9a-zA-Z_.]+}})
108-
ret <1 x float> %ret_val
109-
}
110-
11137
define dso_local spir_func <8 x float> @FUNC_10() {
11238
%a_1 = alloca <16 x float>
11339
%1 = load <16 x float>, <16 x float>* %a_1
@@ -126,20 +52,6 @@ define dso_local spir_func <16 x float> @FUNC_11() {
12652
ret <16 x float> %ret_val
12753
}
12854

129-
define dso_local spir_func <32 x i32> @FUNC_21(%opencl.image2d_ro_t addrspace(1)* %0, i32 %1, i32 %2) {
130-
%ret_val = call spir_func <32 x i32> @_Z24__esimd_media_block_loadIiLi4ELi8E14ocl_image2d_roEN2cm3gen13__vector_typeIT_XmlT0_T1_EE4typeEjT2_jjjj(i32 0, %opencl.image2d_ro_t addrspace(1)* %0, i32 0, i32 32, i32 %1, i32 %2)
131-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.media.ld.v32i32(i32 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}})
132-
ret <32 x i32> %ret_val
133-
}
134-
135-
define dso_local spir_func void @FUNC_22(%opencl.image2d_wo_t addrspace(1)* %0, i32 %1, i32 %2) {
136-
%a_3 = alloca <32 x i32>
137-
%4 = load <32 x i32>, <32 x i32>* %a_3
138-
call spir_func void @_Z25__esimd_media_block_storeIiLi4ELi8E14ocl_image2d_woEvjT2_jjjjN2cm3gen13__vector_typeIT_XmlT0_T1_EE4typeE(i32 0, %opencl.image2d_wo_t addrspace(1)* %0, i32 0, i32 32, i32 %1, i32 %2, <32 x i32> %4)
139-
; CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
140-
ret void
141-
}
142-
14355
define dso_local spir_func <16 x i32> @FUNC_23() {
14456
%ret_val = call spir_func <16 x i32> @_Z13__esimd_vloadIiLi16EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"cm::gen::simd<int, 16>", %"cm::gen::simd<int, 16>"* @vg, i32 0, i32 0) to <16 x i32> addrspace(4)*))
14557
; CHECK: %ret_val1 = load <16 x i32>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"cm::gen::simd<int, 16>", %"cm::gen::simd<int, 16>"* @vg, i32 0, i32 0) to <16 x i32> addrspace(4)*), align 64
@@ -268,12 +180,6 @@ define dso_local spir_func <16 x i32> @FUNC_39() {
268180
ret <16 x i32> %ret_val
269181
}
270182

271-
define dso_local spir_func <8 x i32> @FUNC_40() {
272-
%ret_val = call spir_func <8 x i32> @_Z22__esimd_slm_block_readIiLi8EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT0_EE4typeEj(i32 0)
273-
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.oword.ld.v8i32(i32 0, i32 254, i32 0)
274-
ret <8 x i32> %ret_val
275-
}
276-
277183
define dso_local spir_func void @FUNC_41() {
278184
call spir_func void @_Z16__esimd_sbarrierN2cl4sycl3ext5intel3gpu17EsimdSbarrierTypeE(i8 zeroext 1)
279185
; CHECK: call void @llvm.genx.sbarrier(i8 1)
@@ -378,16 +284,7 @@ define dso_local spir_func <32 x half> @FUNC_52() {
378284
}
379285

380286
declare dso_local i32 @_Z15__esimd_lane_idv()
381-
382-
declare dso_local spir_func <32 x i32> @_Z20__esimd_flat_atomic0ILN2cm3gen14CmAtomicOpTypeE2EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeENS4_ItXT1_EE4typeE(<32 x i64> %0, <32 x i16> %1)
383-
declare dso_local spir_func <32 x i32> @_Z20__esimd_flat_atomic1ILN2cm3gen14CmAtomicOpTypeE0EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeES7_NS4_ItXT1_EE4typeE(<32 x i64> %0, <32 x i32> %1, <32 x i16> %2)
384-
declare dso_local spir_func <32 x i32> @_Z20__esimd_flat_atomic2ILN2cm3gen14CmAtomicOpTypeE7EjLi32ELNS1_9CacheHintE0ELS3_0EENS1_13__vector_typeIT0_XT1_EE4typeENS4_IyXT1_EE4typeES7_S7_NS4_ItXT1_EE4typeE(<32 x i64> %0, <32 x i32> %1, <32 x i32> %2, <32 x i16> %3)
385-
declare dso_local spir_func <32 x i32> @_Z33__esimd_flat_block_read_unalignedIjLi32ELN2cm3gen9CacheHintE0ELS2_0EENS1_13__vector_typeIT_XT0_EE4typeEy(i64 %0)
386-
declare dso_local spir_func void @_Z24__esimd_flat_block_writeIjLi32ELN2cm3gen9CacheHintE0ELS2_0EEvyNS1_13__vector_typeIT_XT0_EE4typeE(i64 %0, <32 x i32> %1)
387-
declare dso_local spir_func <32 x i32> @_Z17__esimd_flat_readIjLi32ELi0ELN2cm3gen9CacheHintE0ELS2_0EENS1_13__vector_typeIT_XmlT0_clL_ZNS1_20ElemsPerAddrDecodingEjET1_EEE4typeENS3_IyXT0_EE4typeEiNS3_ItXT0_EE4typeE(<32 x i64> %0, i32 %1, <32 x i16> %2)
388-
declare dso_local spir_func void @_Z18__esimd_flat_writeIjLi32ELi0ELN2cm3gen9CacheHintE0ELS2_0EEvNS1_13__vector_typeIyXT0_EE4typeENS3_IT_XmlT0_clL_ZNS1_20ElemsPerAddrDecodingEjET1_EEE4typeEiNS3_ItXT0_EE4typeE(<32 x i64> %0, <32 x i32> %1, i32 %2, <32 x i16> %3)
389287
declare dso_local spir_func <16 x i16> @_Z12__esimd_sminIsLi16EEN2cm3gen13__vector_typeIT_XT0_EE4typeES5_S5_(<16 x i16> %0, <16 x i16> %1)
390-
declare dso_local spir_func <1 x float> @_Z16__esimd_div_ieeeILi1EEN2cm3gen13__vector_typeIfXT_EE4typeES4_S4_(<1 x float> %0, <1 x float> %1)
391288
declare dso_local spir_func <8 x float> @_Z16__esimd_rdregionIfLi16ELi8ELi0ELi8ELi1ELi0EEN2cm3gen13__vector_typeIT_XT1_EE4typeENS2_IS3_XT0_EE4typeEt(<16 x float> %0, i16 zeroext %1)
392289
declare dso_local spir_func <16 x float> @_Z16__esimd_wrregionIfLi16ELi8ELi0ELi8ELi1ELi0EEN2cm3gen13__vector_typeIT_XT0_EE4typeES5_NS2_IS3_XT1_EE4typeEtNS2_ItXT1_EE4typeE(<16 x float> %0, <8 x float> %1, i16 zeroext %2, <8 x i16> %3)
393290
declare dso_local spir_func <16 x i32> @_Z13__esimd_vloadIiLi16EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<16 x i32> addrspace(4)* %0)
@@ -404,8 +301,6 @@ declare dso_local spir_func void @_Z14__esimd_vstoreIfLi1EEvPN2cm3gen13__vector_
404301
declare dso_local spir_func <16 x float> @_Z13__esimd_vloadIfLi16EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<16 x float> addrspace(4)* %0)
405302
declare dso_local spir_func void @_Z14__esimd_vstoreIfLi8EEvPN2cm3gen13__vector_typeIT_XT0_EE4typeES5_(<8 x float> addrspace(4)* %0, <8 x float> %1)
406303
declare dso_local spir_func <8 x float> @_Z13__esimd_vloadIfLi8EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<8 x float> addrspace(4)* %0)
407-
declare dso_local spir_func <32 x i32> @_Z24__esimd_media_block_loadIiLi4ELi8E14ocl_image2d_roEN2cm3gen13__vector_typeIT_XmlT0_T1_EE4typeEjT2_jjjj(i32 %0, %opencl.image2d_ro_t addrspace(1)* %1, i32 %2, i32 %3, i32 %4, i32 %5)
408-
declare dso_local spir_func void @_Z25__esimd_media_block_storeIiLi4ELi8E14ocl_image2d_woEvjT2_jjjjN2cm3gen13__vector_typeIT_XmlT0_T1_EE4typeE(i32 %0, %opencl.image2d_wo_t addrspace(1)* %1, i32 %2, i32 %3, i32 %4, i32 %5, <32 x i32> %6)
409304
declare dso_local spir_func <32 x i32> @_Z13__esimd_vloadIiLi32EEN2cm3gen13__vector_typeIT_XT0_EE4typeEPKS5_(<32 x i32> addrspace(4)* %0)
410305
declare dso_local spir_func void @_Z14__esimd_vstoreIfLi16EEvPN2cm3gen13__vector_typeIT_XT0_EE4typeES5_(<16 x float> addrspace(4)* %0, <16 x float> %1)
411306
declare dso_local spir_func void @_ZN2cl4sycl3ext5intel12experimental5esimd8slm_initEj(i32)
@@ -417,7 +312,6 @@ declare dso_local spir_func <16 x i32> @_Z18__esimd_uudp4a_satIjjjjLi16EEN2cl4sy
417312
declare dso_local spir_func <16 x i32> @_Z18__esimd_usdp4a_satIjiiiLi16EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
418313
declare dso_local spir_func <16 x i32> @_Z18__esimd_sudp4a_satIijjjLi16EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
419314
declare dso_local spir_func <16 x i32> @_Z18__esimd_ssdp4a_satIiiiiLi16EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT3_EE4typeENS4_IT0_XT3_EE4typeENS4_IT1_XT3_EE4typeENS4_IT2_XT3_EE4typeE(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
420-
declare dso_local spir_func <8 x i32> @_Z22__esimd_slm_block_readIiLi8EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT0_EE4typeEj(i32 %0)
421315
declare dso_local spir_func void @_Z16__esimd_sbarrierN2cl4sycl3ext5intel3gpu17EsimdSbarrierTypeE(i8 %0)
422316
declare dso_local spir_func <8 x i32> @_Z18__esimd_rdindirectIiLi16ELi8ELi0EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT1_EE4typeENS4_IS5_XT0_EE4typeENS4_ItXT1_EE4typeE(<16 x i32>, <8 x i16>)
423317
declare dso_local spir_func <16 x i32> @_Z18__esimd_wrindirectIiLi16ELi8ELi0EEN2cl4sycl3ext5intel3gpu11vector_typeIT_XT0_EE4typeES7_NS4_IS5_XT1_EE4typeENS4_ItXT1_EE4typeESB_(<16 x i32>, <8 x i32>, <8 x i16>, <8 x i16>)

sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,8 @@ __esimd_gather_scaled(__SEIEED::simd_mask_storage_t<N> pred,
486486
}
487487
#endif // __SYCL_DEVICE_ONLY__
488488

489-
// slm_block_read reads a block of data from SLM
489+
// Reads a block of data from given surface at given offset, offset must be
490+
// 16-byte-aligned.
490491
template <typename Ty, int N, typename SurfIndAliasTy, int32_t IsModified = 0>
491492
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
492493
__esimd_oword_ld(SurfIndAliasTy surf_ind, uint32_t addr)

sycl/test/esimd/intrins_trans.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,117 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
154154
}
155155
return d;
156156
}
157+
158+
// TODO
159+
// 1. __esimd* intrinsic translation tests from
160+
// llvm\test\SYCLLowerIR\esimd_lower_intrins.ll should be refactored and
161+
// moved here, as the form below is much easier to maintain with the same
162+
// level of testing strength
163+
// 2. Test cases above should be refactored not to use user-level APIs like
164+
// gather and use __esimd* calls instead.
165+
template <class T, int N> using vec = typename simd<T, N>::vector_type;
166+
167+
template <int N> using mask = typename simd_mask<N>::vector_type;
168+
169+
SYCL_EXTERNAL void use(const vec<float, 8> &x) SYCL_ESIMD_FUNCTION;
170+
SYCL_EXTERNAL void use(const vec<int, 8> &x) SYCL_ESIMD_FUNCTION;
171+
SYCL_EXTERNAL void use(const vec<unsigned char, 8> &x) SYCL_ESIMD_FUNCTION;
172+
173+
SYCL_EXTERNAL vec<float, 8> get8f() SYCL_ESIMD_FUNCTION;
174+
SYCL_EXTERNAL vec<int, 8> get8i() SYCL_ESIMD_FUNCTION;
175+
SYCL_EXTERNAL vec<uint64_t, 8> get8ui64() SYCL_ESIMD_FUNCTION;
176+
SYCL_EXTERNAL vec<unsigned short, 8> get8ui16() SYCL_ESIMD_FUNCTION;
177+
SYCL_EXTERNAL vec<unsigned char, 8> get8ui8() SYCL_ESIMD_FUNCTION;
178+
179+
SYCL_EXTERNAL void
180+
test_mem_intrins(uint64_t addr, const vec<float, 8> &xf,
181+
const vec<float, 8> &xi) SYCL_ESIMD_FUNCTION {
182+
{
183+
constexpr SurfaceIndex si = 0;
184+
vec<float, 8> x = __esimd_oword_ld_unaligned<float, 8>(si, 0);
185+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.oword.ld.unaligned.v8f32(i32 0, i32 0, i32 0)
186+
use(x);
187+
}
188+
{
189+
constexpr SurfaceIndex si = 0;
190+
vec<float, 8> x = __esimd_oword_ld<float, 8>(si, 0);
191+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.oword.ld.v8f32(i32 0, i32 0, i32 0)
192+
use(x);
193+
}
194+
{
195+
constexpr SurfaceIndex si = 0;
196+
__esimd_oword_st<float, 8>(si, 0, get8f());
197+
// CHECK-LABEL: call void @llvm.genx.oword.st.v8f32(i32 0, i32 0, <8 x float> %{{[a-zA-Z0-9.]+}})
198+
}
199+
{
200+
// TODO
201+
// vec<int, 8> x = __esimd_svm_block_ld<int, 8>(addr);
202+
} {
203+
vec<int, 8> x = __esimd_svm_block_ld_unaligned<int, 8>(addr);
204+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x i32> @llvm.genx.svm.block.ld.unaligned.v8i32.i64(i64 %{{[a-zA-Z0-9.]+}})
205+
use(x);
206+
}
207+
{
208+
__esimd_svm_block_st<int, 8>(addr, get8i());
209+
// CHECK-LABEL: call void @llvm.genx.svm.block.st.i64.v8i32(i64 %{{[a-zA-Z0-9.]+}}, <8 x i32> %{{[a-zA-Z0-9.]+}})
210+
}
211+
{
212+
auto x = __esimd_svm_gather<unsigned char, 8>(get8ui64(), 0, get8ui16());
213+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x i8> @llvm.genx.svm.gather.v8i8.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, i32 0, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i8> undef)
214+
use(x);
215+
}
216+
{
217+
__esimd_svm_scatter<unsigned char, 8>(get8ui64(), get8ui8(), 0, get8ui16());
218+
// CHECK-LABEL: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i8(<8 x i1> %{{[a-zA-Z0-9.]+}}, i32 0, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i8> %{{[a-zA-Z0-9.]+}})
219+
}
220+
{
221+
auto x =
222+
__esimd_svm_atomic0<atomic_op::inc, int, 8>(get8ui64(), get8ui16());
223+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x i32> @llvm.genx.svm.atomic.inc.v8i32.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i32> undef)
224+
use(x);
225+
}
226+
{
227+
vec<float, 8> src0 = get8f();
228+
auto x = __esimd_svm_atomic1<atomic_op::fmin, float, 8>(get8ui64(), src0,
229+
get8ui16());
230+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.svm.atomic.fmin.v8f32.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x float> %{{[a-zA-Z0-9.]+}}, <8 x float> undef)
231+
use(x);
232+
}
233+
{
234+
vec<float, 8> src0 = get8f();
235+
vec<float, 8> src1 = get8f();
236+
auto x = __esimd_svm_atomic2<atomic_op::fcmpwr, float, 8>(get8ui64(), src0,
237+
src1, get8ui16());
238+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.svm.atomic.fcmpwr.v8f32.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x float> %{{[a-zA-Z0-9.]+}}, <8 x float> %{{[a-zA-Z0-9.]+}}, <8 x float> undef)
239+
use(x);
240+
}
241+
{
242+
constexpr SurfaceIndex si = 0;
243+
vec<float, 8> x =
244+
__esimd_media_ld<float, 2, 4, 0, SurfaceIndex, 0, 4>(si, 0, 0);
245+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.media.ld.v8f32(i32 0, i32 0, i32 0, i32 4, i32 0, i32 0)
246+
use(x);
247+
}
248+
{
249+
constexpr SurfaceIndex si = 0;
250+
vec<float, 8> x = get8f();
251+
__esimd_media_st<float, 2, 4, 0, SurfaceIndex, 0, 4>(si, 0, 0, x);
252+
// CHECK-LABEL: call void @llvm.genx.media.st.v8f32(i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, <8 x float> %{{[a-zA-Z0-9.]+}})
253+
}
254+
}
255+
256+
SYCL_EXTERNAL void test_math_intrins() SYCL_ESIMD_FUNCTION {
257+
{
258+
vec<float, 8> x0 = get8f();
259+
vec<float, 8> x1 = get8f();
260+
auto y = __esimd_ieee_div<8>(x0, x1);
261+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.ieee.div.v8f32(<8 x float> %{{[a-zA-Z0-9.]+}}, <8 x float> %{{[a-zA-Z0-9.]+}})
262+
use(y);
263+
}
264+
{
265+
vec<float, 8> x = get8f();
266+
auto y = __esimd_ieee_sqrt<8>(x);
267+
// CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.ieee.sqrt.v8f32(<8 x float> %{{[a-zA-Z0-9.]+}})
268+
use(y);
269+
}
270+
}

0 commit comments

Comments
 (0)