diff --git a/SYCL/Basic/half_builtins.cpp b/SYCL/Basic/half_builtins.cpp new file mode 100755 index 0000000000..0d49bd6891 --- /dev/null +++ b/SYCL/Basic/half_builtins.cpp @@ -0,0 +1,197 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// OpenCL CPU driver does not support cl_khr_fp16 extension +// UNSUPPORTED: cpu && opencl + +#include + +#include +#include + +using namespace sycl; + +constexpr int SZ_max = 16; + +bool check(float a, float b) { + return fabs(2 * (a - b) / (a + b)) < std::numeric_limits::epsilon() || + a < std::numeric_limits::min(); +} + +template bool check(vec a, vec b) { + for (int i = 0; i < N; i++) { + if (!check(a[i], b[i])) { + return false; + } + } + return true; +} + +#define TEST_BUILTIN_1_VEC_IMPL(NAME, SZ) \ + { \ + float##SZ *a = (float##SZ *)&A[0]; \ + float##SZ *b = (float##SZ *)&B[0]; \ + if (i < SZ_max / SZ) { \ + if (!check(NAME(a[i]), NAME(a[i].convert()).convert())) { \ + err[0] = 1; \ + } \ + } \ + } + +// vectors of size 3 need separate test, as they actually have the size of 4 +// elements +#define TEST_BUILTIN_1_VEC3_IMPL(NAME) \ + { \ + float3 *a = (float3 *)&A[0]; \ + float3 *b = (float3 *)&B[0]; \ + if (i < SZ_max / 4) { \ + if (!check(NAME(a[i]), NAME(a[i].convert()).convert())) { \ + err[0] = 1; \ + } \ + } \ + } + +#define TEST_BUILTIN_1_SCAL_IMPL(NAME) \ + { \ + float *a = (float *)&A[0]; \ + float *b = (float *)&B[0]; \ + if (!check(NAME(a[i]), (float)NAME((half)a[i]))) { \ + err[0] = 1; \ + } \ + } + +#define TEST_BUILTIN_1(NAME) \ + TEST_BUILTIN_1_SCAL_IMPL(NAME) \ + TEST_BUILTIN_1_VEC_IMPL(NAME, 2) \ + TEST_BUILTIN_1_VEC3_IMPL(NAME) \ + TEST_BUILTIN_1_VEC_IMPL(NAME, 4) \ + TEST_BUILTIN_1_VEC_IMPL(NAME, 8) \ + TEST_BUILTIN_1_VEC_IMPL(NAME, 16) + +#define TEST_BUILTIN_2_VEC_IMPL(NAME, SZ) \ + { \ + float##SZ *a = (float##SZ *)&A[0]; \ + float##SZ *b = (float##SZ *)&B[0]; \ + if (i < SZ_max / SZ) { \ + if (!check(NAME(a[i], b[i]), \ + NAME(a[i].convert(), b[i].convert()) \ + .convert())) { \ + err[0] = 1; \ + } \ + } \ + } + +#define TEST_BUILTIN_2_VEC3_IMPL(NAME) \ + { \ + float3 *a = (float3 *)&A[0]; \ + float3 *b = (float3 *)&B[0]; \ + if (i < SZ_max / 4) { \ + if (!check(NAME(a[i], b[i]), \ + NAME(a[i].convert(), b[i].convert()) \ + .convert())) { \ + err[0] = 1; \ + } \ + } \ + } + +#define TEST_BUILTIN_2_SCAL_IMPL(NAME) \ + { \ + float *a = (float *)&A[0]; \ + float *b = (float *)&B[0]; \ + if (!check(NAME(a[i], b[i]), (float)NAME((half)a[i], (half)b[i]))) { \ + err[0] = 1; \ + } \ + } + +#define TEST_BUILTIN_2(NAME) \ + TEST_BUILTIN_2_SCAL_IMPL(NAME) \ + TEST_BUILTIN_2_VEC_IMPL(NAME, 2) \ + TEST_BUILTIN_2_VEC3_IMPL(NAME) \ + TEST_BUILTIN_2_VEC_IMPL(NAME, 4) \ + TEST_BUILTIN_2_VEC_IMPL(NAME, 8) \ + TEST_BUILTIN_2_VEC_IMPL(NAME, 16) + +#define TEST_BUILTIN_3_VEC_IMPL(NAME, SZ) \ + { \ + float##SZ *a = (float##SZ *)&A[0]; \ + float##SZ *b = (float##SZ *)&B[0]; \ + float##SZ *c = (float##SZ *)&C[0]; \ + if (i < SZ_max / SZ) { \ + if (!check(NAME(a[i], b[i], c[i]), \ + NAME(a[i].convert(), b[i].convert(), \ + c[i].convert()) \ + .convert())) { \ + err[0] = 1; \ + } \ + } \ + } + +#define TEST_BUILTIN_3_VEC3_IMPL(NAME) \ + { \ + float3 *a = (float3 *)&A[0]; \ + float3 *b = (float3 *)&B[0]; \ + float3 *c = (float3 *)&C[0]; \ + if (i < SZ_max / 4) { \ + if (!check(NAME(a[i], b[i], c[i]), \ + NAME(a[i].convert(), b[i].convert(), \ + c[i].convert()) \ + .convert())) { \ + err[0] = 1; \ + } \ + } \ + } + +#define TEST_BUILTIN_3_SCAL_IMPL(NAME) \ + { \ + float *a = (float *)&A[0]; \ + float *b = (float *)&B[0]; \ + float *c = (float *)&C[0]; \ + if (!check(NAME(a[i], b[i], c[i]), \ + (float)NAME((half)a[i], (half)b[i], (half)c[i]))) { \ + err[0] = 1; \ + } \ + } + +#define TEST_BUILTIN_3(NAME) \ + TEST_BUILTIN_3_SCAL_IMPL(NAME) \ + TEST_BUILTIN_3_VEC_IMPL(NAME, 2) \ + TEST_BUILTIN_3_VEC3_IMPL(NAME) \ + TEST_BUILTIN_3_VEC_IMPL(NAME, 4) \ + TEST_BUILTIN_3_VEC_IMPL(NAME, 8) \ + TEST_BUILTIN_3_VEC_IMPL(NAME, 16) + +int main() { + queue q; + float16 a, b, c, d; + for (int i = 0; i < SZ_max; i++) { + a[i] = i / (float)SZ_max; + b[i] = (SZ_max - i) / (float)SZ_max; + c[i] = (float)(3 * i); + } + int err = 0; + { + buffer a_buf(&a, 1); + buffer b_buf(&b, 1); + buffer c_buf(&c, 1); + buffer err_buf(&err, 1); + q.submit([&](handler &cgh) { + auto A = a_buf.get_access(cgh); + auto B = b_buf.get_access(cgh); + auto C = c_buf.get_access(cgh); + auto err = err_buf.get_access(cgh); + cgh.parallel_for(SZ_max, [=](item<1> index) { + size_t i = index.get_id(0); + TEST_BUILTIN_1(fabs); + TEST_BUILTIN_2(fmin); + TEST_BUILTIN_2(fmax); + TEST_BUILTIN_3(fma); + }); + }); + } + assert(err == 0); + + return 0; +}