Skip to content

Commit 472c58e

Browse files
t4c1bader
authored andcommitted
[SYCL] Add tests for some half builtins (intel#880)
Co-authored-by: Alexey Bader <alexey.bader@intel.com>
1 parent ae7cbfe commit 472c58e

File tree

1 file changed

+197
-0
lines changed

1 file changed

+197
-0
lines changed

SYCL/Basic/half_builtins.cpp

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
// OpenCL CPU driver does not support cl_khr_fp16 extension
8+
// UNSUPPORTED: cpu && opencl
9+
10+
#include <sycl/sycl.hpp>
11+
12+
#include <cmath>
13+
#include <limits>
14+
15+
using namespace sycl;
16+
17+
constexpr int SZ_max = 16;
18+
19+
bool check(float a, float b) {
20+
return fabs(2 * (a - b) / (a + b)) < std::numeric_limits<half>::epsilon() ||
21+
a < std::numeric_limits<half>::min();
22+
}
23+
24+
template <int N> bool check(vec<float, N> a, vec<float, N> b) {
25+
for (int i = 0; i < N; i++) {
26+
if (!check(a[i], b[i])) {
27+
return false;
28+
}
29+
}
30+
return true;
31+
}
32+
33+
#define TEST_BUILTIN_1_VEC_IMPL(NAME, SZ) \
34+
{ \
35+
float##SZ *a = (float##SZ *)&A[0]; \
36+
float##SZ *b = (float##SZ *)&B[0]; \
37+
if (i < SZ_max / SZ) { \
38+
if (!check(NAME(a[i]), NAME(a[i].convert<half>()).convert<float>())) { \
39+
err[0] = 1; \
40+
} \
41+
} \
42+
}
43+
44+
// vectors of size 3 need separate test, as they actually have the size of 4
45+
// elements
46+
#define TEST_BUILTIN_1_VEC3_IMPL(NAME) \
47+
{ \
48+
float3 *a = (float3 *)&A[0]; \
49+
float3 *b = (float3 *)&B[0]; \
50+
if (i < SZ_max / 4) { \
51+
if (!check(NAME(a[i]), NAME(a[i].convert<half>()).convert<float>())) { \
52+
err[0] = 1; \
53+
} \
54+
} \
55+
}
56+
57+
#define TEST_BUILTIN_1_SCAL_IMPL(NAME) \
58+
{ \
59+
float *a = (float *)&A[0]; \
60+
float *b = (float *)&B[0]; \
61+
if (!check(NAME(a[i]), (float)NAME((half)a[i]))) { \
62+
err[0] = 1; \
63+
} \
64+
}
65+
66+
#define TEST_BUILTIN_1(NAME) \
67+
TEST_BUILTIN_1_SCAL_IMPL(NAME) \
68+
TEST_BUILTIN_1_VEC_IMPL(NAME, 2) \
69+
TEST_BUILTIN_1_VEC3_IMPL(NAME) \
70+
TEST_BUILTIN_1_VEC_IMPL(NAME, 4) \
71+
TEST_BUILTIN_1_VEC_IMPL(NAME, 8) \
72+
TEST_BUILTIN_1_VEC_IMPL(NAME, 16)
73+
74+
#define TEST_BUILTIN_2_VEC_IMPL(NAME, SZ) \
75+
{ \
76+
float##SZ *a = (float##SZ *)&A[0]; \
77+
float##SZ *b = (float##SZ *)&B[0]; \
78+
if (i < SZ_max / SZ) { \
79+
if (!check(NAME(a[i], b[i]), \
80+
NAME(a[i].convert<half>(), b[i].convert<half>()) \
81+
.convert<float>())) { \
82+
err[0] = 1; \
83+
} \
84+
} \
85+
}
86+
87+
#define TEST_BUILTIN_2_VEC3_IMPL(NAME) \
88+
{ \
89+
float3 *a = (float3 *)&A[0]; \
90+
float3 *b = (float3 *)&B[0]; \
91+
if (i < SZ_max / 4) { \
92+
if (!check(NAME(a[i], b[i]), \
93+
NAME(a[i].convert<half>(), b[i].convert<half>()) \
94+
.convert<float>())) { \
95+
err[0] = 1; \
96+
} \
97+
} \
98+
}
99+
100+
#define TEST_BUILTIN_2_SCAL_IMPL(NAME) \
101+
{ \
102+
float *a = (float *)&A[0]; \
103+
float *b = (float *)&B[0]; \
104+
if (!check(NAME(a[i], b[i]), (float)NAME((half)a[i], (half)b[i]))) { \
105+
err[0] = 1; \
106+
} \
107+
}
108+
109+
#define TEST_BUILTIN_2(NAME) \
110+
TEST_BUILTIN_2_SCAL_IMPL(NAME) \
111+
TEST_BUILTIN_2_VEC_IMPL(NAME, 2) \
112+
TEST_BUILTIN_2_VEC3_IMPL(NAME) \
113+
TEST_BUILTIN_2_VEC_IMPL(NAME, 4) \
114+
TEST_BUILTIN_2_VEC_IMPL(NAME, 8) \
115+
TEST_BUILTIN_2_VEC_IMPL(NAME, 16)
116+
117+
#define TEST_BUILTIN_3_VEC_IMPL(NAME, SZ) \
118+
{ \
119+
float##SZ *a = (float##SZ *)&A[0]; \
120+
float##SZ *b = (float##SZ *)&B[0]; \
121+
float##SZ *c = (float##SZ *)&C[0]; \
122+
if (i < SZ_max / SZ) { \
123+
if (!check(NAME(a[i], b[i], c[i]), \
124+
NAME(a[i].convert<half>(), b[i].convert<half>(), \
125+
c[i].convert<half>()) \
126+
.convert<float>())) { \
127+
err[0] = 1; \
128+
} \
129+
} \
130+
}
131+
132+
#define TEST_BUILTIN_3_VEC3_IMPL(NAME) \
133+
{ \
134+
float3 *a = (float3 *)&A[0]; \
135+
float3 *b = (float3 *)&B[0]; \
136+
float3 *c = (float3 *)&C[0]; \
137+
if (i < SZ_max / 4) { \
138+
if (!check(NAME(a[i], b[i], c[i]), \
139+
NAME(a[i].convert<half>(), b[i].convert<half>(), \
140+
c[i].convert<half>()) \
141+
.convert<float>())) { \
142+
err[0] = 1; \
143+
} \
144+
} \
145+
}
146+
147+
#define TEST_BUILTIN_3_SCAL_IMPL(NAME) \
148+
{ \
149+
float *a = (float *)&A[0]; \
150+
float *b = (float *)&B[0]; \
151+
float *c = (float *)&C[0]; \
152+
if (!check(NAME(a[i], b[i], c[i]), \
153+
(float)NAME((half)a[i], (half)b[i], (half)c[i]))) { \
154+
err[0] = 1; \
155+
} \
156+
}
157+
158+
#define TEST_BUILTIN_3(NAME) \
159+
TEST_BUILTIN_3_SCAL_IMPL(NAME) \
160+
TEST_BUILTIN_3_VEC_IMPL(NAME, 2) \
161+
TEST_BUILTIN_3_VEC3_IMPL(NAME) \
162+
TEST_BUILTIN_3_VEC_IMPL(NAME, 4) \
163+
TEST_BUILTIN_3_VEC_IMPL(NAME, 8) \
164+
TEST_BUILTIN_3_VEC_IMPL(NAME, 16)
165+
166+
int main() {
167+
queue q;
168+
float16 a, b, c, d;
169+
for (int i = 0; i < SZ_max; i++) {
170+
a[i] = i / (float)SZ_max;
171+
b[i] = (SZ_max - i) / (float)SZ_max;
172+
c[i] = (float)(3 * i);
173+
}
174+
int err = 0;
175+
{
176+
buffer<float16> a_buf(&a, 1);
177+
buffer<float16> b_buf(&b, 1);
178+
buffer<float16> c_buf(&c, 1);
179+
buffer<int> err_buf(&err, 1);
180+
q.submit([&](handler &cgh) {
181+
auto A = a_buf.get_access<access::mode::read>(cgh);
182+
auto B = b_buf.get_access<access::mode::read>(cgh);
183+
auto C = c_buf.get_access<access::mode::read>(cgh);
184+
auto err = err_buf.get_access<access::mode::write>(cgh);
185+
cgh.parallel_for(SZ_max, [=](item<1> index) {
186+
size_t i = index.get_id(0);
187+
TEST_BUILTIN_1(fabs);
188+
TEST_BUILTIN_2(fmin);
189+
TEST_BUILTIN_2(fmax);
190+
TEST_BUILTIN_3(fma);
191+
});
192+
});
193+
}
194+
assert(err == 0);
195+
196+
return 0;
197+
}

0 commit comments

Comments
 (0)