From 159e9f64a2d435af2fbc5588dadef3003d3b1ef6 Mon Sep 17 00:00:00 2001 From: SYCL Unbound Team Date: Thu, 20 Jul 2023 16:25:32 +0100 Subject: [PATCH 1/5] [SYCL][Bindless][4/4] Add experimental implementation of SYCL bindless images extension This commit stands as the fourth, and final, commit of four to make code review easier, mostly covering the changes made to the e2e tests with the additional tests for bindless images. The bindless images extension provides a new interface for allocating, creating, and accessing images in SYCL. Image memory allocation is seperated from image handle creation, and image handles can be passed to kernels without requesting access through accessors. This approach provides much more flexibility to the user, as well as enabling programs to implement features that were impossible to implement using standard SYCL images, such as a texture atlas. In addition to providing a new interface for images, this extension also provides initial experimental support for importing external memory into SYCL. Co-authored-by: Isaac Ault Co-authored-by: Hugh Bird Co-authored-by: Duncan Brawley Co-authored-by: Przemek Malon Co-authored-by: Chedy Najjar Co-authored-by: Sean Stirling Co-authored-by: Peter Zuzek Implement revision 4 of the bindless images extension proposal: https://github.com/intel/llvm/pull/9842 --- sycl/test-e2e/CMakeLists.txt | 2 + .../bindless_images/image_get_info.cpp | 206 ++++ .../bindless_images/mipmap/mipmap_read_1D.cpp | 142 +++ .../bindless_images/mipmap/mipmap_read_2D.cpp | 150 +++ .../bindless_images/mipmap/mipmap_read_3D.cpp | 142 +++ sycl/test-e2e/bindless_images/read_1D.cpp | 143 +++ sycl/test-e2e/bindless_images/read_2D.cpp | 130 +++ .../bindless_images/read_2D_dynamic.cpp | 134 +++ sycl/test-e2e/bindless_images/read_3D.cpp | 129 +++ .../test-e2e/bindless_images/read_sampled.cpp | 947 ++++++++++++++++++ .../bindless_images/read_write_1D.cpp | 125 +++ .../read_write_1D_subregion.cpp | 136 +++ .../bindless_images/read_write_2D.cpp | 136 +++ .../read_write_2D_subregion.cpp | 153 +++ .../bindless_images/read_write_3D.cpp | 136 +++ .../read_write_3D_subregion.cpp | 174 ++++ .../bindless_images/read_write_unsampled.cpp | 664 ++++++++++++ sycl/test-e2e/bindless_images/sampling_1D.cpp | 118 +++ sycl/test-e2e/bindless_images/sampling_2D.cpp | 147 +++ .../sampling_2D_USM_shared.cpp | 152 +++ .../bindless_images/sampling_2D_half.cpp | 133 +++ sycl/test-e2e/bindless_images/sampling_3D.cpp | 127 +++ .../vulkan_interop/sampled_images.cpp | 270 +++++ .../vulkan_interop/unsampled_images.cpp | 444 ++++++++ .../vulkan_interop/vulkan_common.hpp | 407 ++++++++ sycl/test-e2e/lit.cfg.py | 17 +- sycl/test-e2e/lit.site.cfg.py.in | 4 + 27 files changed, 5464 insertions(+), 4 deletions(-) create mode 100644 sycl/test-e2e/bindless_images/image_get_info.cpp create mode 100644 sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp create mode 100644 sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp create mode 100644 sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp create mode 100644 sycl/test-e2e/bindless_images/read_1D.cpp create mode 100644 sycl/test-e2e/bindless_images/read_2D.cpp create mode 100644 sycl/test-e2e/bindless_images/read_2D_dynamic.cpp create mode 100644 sycl/test-e2e/bindless_images/read_3D.cpp create mode 100644 sycl/test-e2e/bindless_images/read_sampled.cpp create mode 100644 sycl/test-e2e/bindless_images/read_write_1D.cpp create mode 100644 sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp create mode 100644 sycl/test-e2e/bindless_images/read_write_2D.cpp create mode 100644 sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp create mode 100644 sycl/test-e2e/bindless_images/read_write_3D.cpp create mode 100644 sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp create mode 100644 sycl/test-e2e/bindless_images/read_write_unsampled.cpp create mode 100644 sycl/test-e2e/bindless_images/sampling_1D.cpp create mode 100644 sycl/test-e2e/bindless_images/sampling_2D.cpp create mode 100644 sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp create mode 100644 sycl/test-e2e/bindless_images/sampling_2D_half.cpp create mode 100644 sycl/test-e2e/bindless_images/sampling_3D.cpp create mode 100644 sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp create mode 100644 sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp create mode 100644 sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp diff --git a/sycl/test-e2e/CMakeLists.txt b/sycl/test-e2e/CMakeLists.txt index d7aa95a5a6c75..89ef52e4a0b75 100644 --- a/sycl/test-e2e/CMakeLists.txt +++ b/sycl/test-e2e/CMakeLists.txt @@ -24,6 +24,8 @@ endif() # Standalone. find_package(Threads REQUIRED) set(SYCL_THREADS_LIB ${CMAKE_THREAD_LIBS_INIT}) +find_package(Vulkan) + if(NOT LLVM_LIT) find_program(LLVM_LIT NAMES llvm-lit lit.py lit diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp new file mode 100644 index 0000000000000..841a5c350e63d --- /dev/null +++ b/sycl/test-e2e/bindless_images/image_get_info.cpp @@ -0,0 +1,206 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +void printString(std::string name) { +#ifdef VERBOSE_PRINT + std::cout << name; +#endif +} + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + size_t height = 13; + size_t width = 7; + size_t depth = 11; + + bool validated = true; + + try { + // Submit dummy kernel to let the runtime decide the backend (CUDA) + // Without this, the default Level Zero backend is active + q.submit([&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::r, + sycl::image_channel_type::signed_int32); + + // Extension: returns the device pointer to the allocated memory + // Input images memory + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + + // Extension: query for bindless image support -- device aspects + bool bindless_support = dev.has(sycl::aspect::ext_oneapi_bindless_images); + bool bindless_shared_usm_support = + dev.has(sycl::aspect::ext_oneapi_bindless_images_shared_usm); + bool usm_1d_support = + dev.has(sycl::aspect::ext_oneapi_bindless_images_1d_usm); + bool usm_2d_support = + dev.has(sycl::aspect::ext_oneapi_bindless_images_2d_usm); + +#ifdef VERBOSE_PRINT + std::cout << "bindless_images_support: " << bindless_support + << "\nbindless_images_shared_usm_support: " + << bindless_shared_usm_support + << "\nbindless_images_1d_usm_support: " << usm_1d_support + << "\nbindless_images_2d_usm_support: " << usm_2d_support << "\n"; +#endif + + // Extension: get pitch alignment information from device -- device info + // Make sure our pitch alignment queries work properly + // These can be different depending on the device so we cannot test that the + // values are correct + // But we should at least see that the query itself works + auto pitch_align = dev.get_info< + sycl::ext::oneapi::experimental::info::device::image_pitch_align>(); + auto max_pitch = dev.get_info(); + auto max_width = dev.get_info(); + auto max_height = dev.get_info(); + +#ifdef VERBOSE_PRINT + std::cout << "image_pitch_align: " << pitch_align + << "\nmax_image_linear_pitch: " << max_pitch + << "\nmax_image_linear_width: " << max_width + << "\nmax_image_linear_height: " << max_height << "\n"; +#endif + + // Extension: query for bindless image mipmaps support -- aspects & info + bool mipmap_support = dev.has(sycl::aspect::ext_oneapi_mipmap); + bool mipmap_anisotropy_support = + dev.has(sycl::aspect::ext_oneapi_mipmap_anisotropy); + float mipmap_max_anisotropy = dev.get_info< + sycl::ext::oneapi::experimental::info::device::mipmap_max_anisotropy>(); + bool mipmap_level_reference_support = + dev.has(sycl::aspect::ext_oneapi_mipmap_level_reference); + +#ifdef VERBOSE_PRINT + std::cout << "mipmap_support: " << mipmap_support + << "\nmipmap_anisotropy_support: " << mipmap_anisotropy_support + << "\nmipmap_max_anisotropy: " << mipmap_max_anisotropy + << "\nmipmap_level_reference_support: " + << mipmap_level_reference_support << "\n"; +#endif + + // Extension: query for bindless image interop support -- device aspects + bool interop_memory_import_support = + dev.has(sycl::aspect::ext_oneapi_interop_memory_import); + bool interop_memory_export_support = + dev.has(sycl::aspect::ext_oneapi_interop_memory_export); + bool interop_semaphore_import_support = + dev.has(sycl::aspect::ext_oneapi_interop_semaphore_import); + bool interop_semaphore_export_support = + dev.has(sycl::aspect::ext_oneapi_interop_semaphore_export); + +#ifdef VERBOSE_PRINT + std::cout << "interop_memory_import_support: " + << interop_memory_import_support + << "\ninterop_memory_export_support: " + << interop_memory_export_support + << "\ninterop_semaphore_import_support: " + << interop_semaphore_import_support + << "\ninterop_semaphore_export_support: " + << interop_semaphore_export_support << "\n"; +#endif + + auto rangeMem = img_mem_0.get_range(); + auto range = sycl::ext::oneapi::experimental::get_image_range( + img_mem_0.get_handle(), dev, ctxt); + if (rangeMem != range) { + printString("handle and mem object disagree on image dimensions!\n"); + validated = false; + } + if (range[0] == width) { + printString("width is correct!\n"); + } else { + printString("width is NOT correct!\n"); + validated = false; + } + if (range[1] == height) { + printString("height is correct!\n"); + } else { + printString("height is NOT correct!\n"); + validated = false; + } + if (range[2] == depth) { + printString("depth is correct!\n"); + } else { + printString("depth is NOT correct!\n"); + validated = false; + } + + auto type = img_mem_0.get_type(); + if (type == sycl::ext::oneapi::experimental::image_type::standard) { + printString("image type is correct!\n"); + } else { + printString("image type is NOT correct!\n"); + validated = false; + } + + auto ctypeMem = img_mem_0.get_channel_type(); + auto ctype = sycl::ext::oneapi::experimental::get_image_channel_type( + img_mem_0.get_handle(), dev, ctxt); + if (ctypeMem != ctype) { + printString("handle and mem object disagree on image channel type!\n"); + validated = false; + } + if (ctype == sycl::image_channel_type::signed_int32) { + printString("channel type is correct!\n"); + } else { + printString("channel type is NOT correct!\n"); + validated = false; + } + + auto corder = img_mem_0.get_channel_order(); + if (corder == sycl::image_channel_order::r) { + printString("channel order is correct!\n"); + } else { + printString("channel order is NOT correct!\n"); + validated = false; + } + + auto numchannelsMem = img_mem_0.get_num_channels(); + auto numchannels = sycl::ext::oneapi::experimental::get_image_num_channels( + img_mem_0.get_handle(), dev, ctxt); + if (numchannelsMem != numchannels) { + printString("handle and mem object disagree on number of channels!\n"); + validated = false; + } + if (numchannels == 1) { + printString("num channels is correct!\n"); + } else { + printString("num channels is NOT correct!\n"); + validated = false; + } + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + if (validated) { + std::cout << "Test Passed!\n"; + return 0; + } + + std::cout << "Test Failed!\n"; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp new file mode 100644 index 0000000000000..12d5b540a4793 --- /dev/null +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp @@ -0,0 +1,142 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t N = 16; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N / 2); + std::vector copyOut(N / 2); + int j = 0; + for (int i = 0; i < N; i++) { + expected[i] = i + (j + 10); + if (i % 2) + j++; + dataIn1[i] = sycl::float4(i, i, i, i); + if (i < (N / 2)) { + dataIn2[i] = sycl::float4(i + 10, i + 10, i + 10, i + 10); + copyOut[i] = sycl::float4{0, 0, 0, 0}; + } + } + + try { + + size_t width = N; + unsigned int num_levels = 2; + + // Extension: image descriptor -- number of levels + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::mipmap, num_levels); + + // Extension: allocate mipmap memory on device + sycl::ext::oneapi::experimental::image_mem mip_mem(desc, dev, ctxt); + + // Extension: retrieve level 0 + sycl::ext::oneapi::experimental::image_mem_handle img_mem1 = + mip_mem.get_mip_level_mem_handle(0); + + // Extension: copy over data to device at level 0 + q.ext_oneapi_copy(dataIn1.data(), img_mem1, desc); + + // Extension: copy data to device at level 1 + q.ext_oneapi_copy(dataIn2.data(), mip_mem.get_mip_level_mem_handle(1), + desc.get_mip_level_desc(1)); + q.wait_and_throw(); + + // Extension: define a sampler object -- extended mipmap attributes + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::mirrored_repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, + (float)num_levels, 8.0f); + + // Extension: create a sampled image handle to represent the mipmap + sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = + sycl::ext::oneapi::experimental::create_image(mip_mem, samp, desc, dev, + ctxt); + + sycl::buffer buf((float *)out.data(), N); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access(cgh, N); + + cgh.parallel_for(N, [=](sycl::id<1> id) { + float sum = 0; + float x = float(id[0] + 0.5) / (float)N; + // Extension: read mipmap level 0 with anisotropic filtering and level 1 + // with LOD + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + mipHandle, x, 0.0f, 0.0f); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image(mipHandle, + x, 1.0f); + + sum = px1[0] + px2[0]; + outAcc[id] = sum; + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device + q.ext_oneapi_copy(mip_mem.get_mip_level_mem_handle(1), copyOut.data(), + desc.get_mip_level_desc(1)); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(mipHandle, dev, ctxt); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp new file mode 100644 index 0000000000000..32c7b53db0ce4 --- /dev/null +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp @@ -0,0 +1,150 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 16; + size_t height = 16; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N / 4); + std::vector dataIn3(N / 16); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + dataIn1[i + (width * j)] = {i + (width * j), 0, 0, 0}; + } + } + for (int i = 0; i < (N / 4); i++) { + dataIn2[i] = {i, i, i, i}; + } + for (int i = 0; i < (N / 16); i++) { + dataIn3[i] = {i, i, i, i}; + } + // Expected each x and y will repeat twice + // since mipmap level 1 is half in size + int jj = 0; + for (int i = 0; i < width - 1; i += 2) { + for (int j = 0; j < height - 1; j += 2, jj++) { + expected[j + (width * i)] = jj; + expected[j + (width * (i + 1))] = jj; + expected[(j + 1) + (width * i)] = jj; + expected[(j + 1) + (width * (i + 1))] = jj; + } + } + + try { + + size_t num_levels = 3; + + // Extension: image descriptor -- number of levels + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::mipmap, num_levels); + + // Extension: define a sampler object -- extended mipmap attributes + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::clamp, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, + (float)num_levels, 8.0f); + + // Extension: allocate mipmap memory on device + sycl::ext::oneapi::experimental::image_mem mip_mem(desc, q); + + // Extension: copy data to device at all levels -- copy func handles desc + // sizing + q.ext_oneapi_copy(dataIn1.data(), mip_mem.get_mip_level_mem_handle(0), + desc.get_mip_level_desc(0)); + q.ext_oneapi_copy(dataIn1.data(), mip_mem.get_mip_level_mem_handle(1), + desc.get_mip_level_desc(1)); + q.ext_oneapi_copy(dataIn3.data(), mip_mem.get_mip_level_mem_handle(2), + desc.get_mip_level_desc(2)); + q.wait_and_throw(); + + // Extension: create a sampled image handle to represent the mipmap + sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = + sycl::ext::oneapi::experimental::create_image(mip_mem, samp, desc, q); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read mipmap level 1 with LOD + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + mipHandle, sycl::float2(fdim0, fdim1), 1.0f); + + outAcc[sycl::id<2>{dim1, dim0}] = px2[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(mipHandle, q); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp new file mode 100644 index 0000000000000..382cd639e91de --- /dev/null +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp @@ -0,0 +1,142 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 4; + size_t height = 4; + size_t depth = 4; + size_t N = width * height * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[i + width * (j + height * k)] = i + width * (j + height * k); + dataIn1[i + width * (j + height * k)] = {i + width * (j + height * k), + 0, 0, 0}; + } + } + } + for (int i = 0; i < (N / 8); i++) { + dataIn2[i] = i; + } + + try { + + // Extension: image descriptor -- number of levels + unsigned int num_levels = 2; + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::mipmap, num_levels); + + // Extension: define a sampler object -- extended mipmap attributes + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::clamp, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, + (float)num_levels, 8.0f); + + // Extension: allocate mipmap memory on device + sycl::ext::oneapi::experimental::image_mem mip_mem(desc, dev, ctxt); + + // Extension: copy data to device levels 0 and 1 + q.ext_oneapi_copy(dataIn1.data(), mip_mem.get_mip_level_mem_handle(0), + desc.get_mip_level_desc(0)); + q.ext_oneapi_copy(dataIn2.data(), mip_mem.get_mip_level_mem_handle(1), + desc.get_mip_level_desc(1)); + q.wait(); + + // Extension: create a sampled image handle to represent the mipmap + sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = + sycl::ext::oneapi::experimental::create_image(mip_mem, samp, desc, dev, + ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<3>{depth, height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<3>{depth, height, width}); + + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {width, height, depth}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + size_t dim2 = it.get_local_id(2); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + float fdim2 = float(dim2 + 0.5) / (float)depth; + + // Extension: read mipmap with anisotropic filtering with zero + // viewing gradients + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + mipHandle, sycl::float4(fdim0, fdim1, fdim2, (float)0), + sycl::float4(0.0f, 0.0f, 0.0f, 0.0f), + sycl::float4(0.0f, 0.0f, 0.0f, 0.0f)); + + outAcc[sycl::id<3>{dim2, dim1, dim0}] = px1[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(mipHandle, dev, ctxt); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_1D.cpp b/sycl/test-e2e/bindless_images/read_1D.cpp new file mode 100644 index 0000000000000..645e2641cd49b --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_1D.cpp @@ -0,0 +1,143 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t width = 512; + std::vector out(width); + std::vector expected(width); + std::vector dataIn1(width); + std::vector dataIn2(width); + float exp = 512; + for (int i = 0; i < width; i++) { + expected[i] = exp; + dataIn1[i] = sycl::float4(i, i, i, i); + dataIn2[i] = sycl::float4(width - i, width - i, width - i, width - i); + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + + // std::hash specialization to ensure `image_mem` follows common reference + // semantics + assert(std::hash{}(img_mem_0) != + std::hash{}(img_mem_1)); + + // We're able to use move semantics + // Move construct + sycl::ext::oneapi::experimental::image_mem img_mem_0_move_construct( + std::move(img_mem_0)); + // Move assign + sycl::ext::oneapi::experimental::image_mem img_mem_0_move_assign; + img_mem_0_move_assign = std::move(img_mem_0_move_construct); + + // We're able to use copy semantics + // Copy construct + sycl::ext::oneapi::experimental::image_mem img_mem_1_copy_construct( + img_mem_1); + // Copy assign + sycl::ext::oneapi::experimental::image_mem img_mem_1_copy_assign; + img_mem_1_copy_assign = img_mem_1_copy_construct; + + // Equality operators to ensure `image_mem` follows common reference + // semantics + assert(img_mem_0_move_assign != img_mem_1_copy_assign); + assert(img_mem_1 == img_mem_1_copy_assign); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0_move_assign, + desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(img_mem_1_copy_assign, + desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), img_mem_0_move_assign.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), img_mem_1_copy_assign.get_handle(), desc); + + q.wait_and_throw(); + + sycl::buffer buf((float *)out.data(), width); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access(cgh, width); + + cgh.parallel_for(width, [=](sycl::id<1> id) { + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, int(id[0])); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgHandle2, int(id[0])); + + sum = px1[0] + px2[0]; + outAcc[id] = sum; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, dev, + ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < width; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_2D.cpp b/sycl/test-e2e/bindless_images/read_2D.cpp new file mode 100644 index 0000000000000..36e49b39a1178 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_2D.cpp @@ -0,0 +1,130 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 7; + size_t height = 3; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = j * 3; + dataIn1[i + (width * j)] = {j, j, j, j}; + dataIn2[i + (width * j)] = {j * 2, j * 2, j * 2, j * 2}; + } + } + + // Image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + try { + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, + ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, + ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + + // Extension: copy over data to device (handler variant) + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + }); + + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + }); + q.wait_and_throw(); + + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::int2(dim0, dim1)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::int2(dim0, dim1)); + + sum = px1[0] + px2[0]; + outAcc[sycl::id<2>{dim1, dim0}] = sum; + }); + }); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, dev, + ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp b/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp new file mode 100644 index 0000000000000..69e1ee449b4a6 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp @@ -0,0 +1,134 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t numImages = 5; + size_t width = 7; + size_t height = 3; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = (i + (width * j)) * numImages; + dataIn[i + (width * j)] = {i + (width * j), 0, 0, 0}; + } + } + + try { + + // Extension: image descriptor - can use the same for all images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Allocate each image and save the device memory handles + std::vector> + imgAllocations; + for (int i = 0; i < numImages; i++) { + // Extension: returns the handle to the device allocated memory + imgAllocations.push_back( + std::make_shared(desc, + q)); + } + + // Copy over data to device for each image + for (int i = 0; i < numImages; i++) { + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn.data(), imgAllocations[i]->get_handle(), desc); + } + q.wait_and_throw(); + + // Create the images and return the handles + std::vector + imgHandles; + for (int i = 0; i < numImages; i++) { + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(*imgAllocations[i], + desc, q); + imgHandles.push_back(imgHandle); + } + + sycl::buffer buf(out.data(), sycl::range<2>{height, width}); + sycl::buffer imgHandlesBuf{imgHandles}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor outAcc{buf, cgh, sycl::write_only}; + sycl::accessor imgHandleAcc{imgHandlesBuf, cgh, sycl::read_only}; + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Sum each image by reading their handle + float sum = 0; + for (int i = 0; i < numImages; i++) { + // Extension: read image data from handle + sum += (sycl::ext::oneapi::experimental::read_image( + imgHandleAcc[i], sycl::int2(dim0, dim1)))[0]; + } + outAcc[sycl::id<2>{dim1, dim0}] = sum; + }); + }); + + // Using image handles requires manual synchronization + q.wait_and_throw(); + + // Extension: cleanup + for (int i = 0; i < numImages; i++) { + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandles[i], q); + } + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_3D.cpp b/sycl/test-e2e/bindless_images/read_3D.cpp new file mode 100644 index 0000000000000..4b6c82e53b665 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_3D.cpp @@ -0,0 +1,129 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 1024; + size_t height = 1024; + size_t depth = 16; + size_t N = width * height * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + // ROW-MAJOR + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[i + width * (j + height * k)] = j * 3; + dataIn1[i + width * (j + height * k)] = {j, j, j, j}; + dataIn2[i + width * (j + height * k)] = {j * 2, j * 2, j * 2, j * 2}; + } + } + } + + try { + + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, + ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, + ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<3>{depth, height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<3>{depth, height, width}); + + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {16, 16, 4}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::int4(dim0, dim1, dim2, 0)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::int4(dim0, dim1, dim2, 0)); + + sum = px1[0] + px2[0]; + outAcc[sycl::id<3>{dim2, dim1, dim0}] = sum; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, dev, + ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_sampled.cpp b/sycl/test-e2e/bindless_images/read_sampled.cpp new file mode 100644 index 0000000000000..f7b3c415f62bc --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_sampled.cpp @@ -0,0 +1,947 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include +#include +#include + +// Print test names and pass status +//#define VERBOSE_LV1 + +// Same as above plus sampler, offset, margin of error, largest error found and +// results of one mismatch +//#define VERBOSE_LV2 + +// Same as above but all mismatches are printed +//#define VERBOSE_LV3 + +// Helpers and utilities +struct util { + template + static void fill_rand(std::vector> &v, int seed) { + std::default_random_engine generator; + generator.seed(seed); + auto distribution = [&]() { + if constexpr (std::is_same_v) { + return std::uniform_real_distribution(0.0, 100.0); + } else if constexpr (std::is_floating_point_v) { + return std::uniform_real_distribution(0.0, 100.0); + } else { + return std::uniform_int_distribution(0, 100); + } + }(); + for (int i = 0; i < v.size(); ++i) { + sycl::vec temp; + + for (int j = 0; j < NChannels; j++) { + temp[j] = distribution(generator); + } + + v[i] = temp; + } + } + + // Return fractional part of argument + // Whole part is returned through wholeComp + static double fract(double x, double *wholeComp) { + // This fmin operation is to prevent fract from returning 1.0. + // Instead will return the largest possible floating-point number less + // than 1.0 + double fractComp = std::fmin(x - std::floor(x), 0x1.fffffep-1f); + *wholeComp = std::floor(x); + return fractComp; + } + + // Returns the two pixels to access plus the weight each of them have + static double get_common_linear_fract_and_coords_fp64(double coord, int *x0, + int *x1) { + double pixelCoord; + + // Subtract to align so that pixel center is 0.5 away from origin. + coord = coord - 0.5; + + double weight = fract(coord, &pixelCoord); + *x0 = static_cast(std::floor(pixelCoord)); + *x1 = *x0 + 1; + return weight; + } + + // Linear sampling is the process of giving a weighted linear blend + // between the nearest adjacent pixels. + // When performing linear sampling, we subtract 0.5 from the original + // coordinate to get the center-adjusted coordinate (as pixels are "centered" + // on the half-integers). For example, with original coord 3.2, we get a + // center-adjusted coord of 2.7. With 2.7, we have 70% of the pixel value will + // come from the pixel at coord 3 and 30% from the pixel value at coord 2 + + // The function arguments here are the two pixels to use and the weight to + // give each of them. + template + static sycl::vec + linearOp1D(sycl::vec pix1, sycl::vec pix2, + double weight) { + + sycl::vec weightArr(weight); + sycl::vec one(1.0f); + + sycl::vec Ti0 = pix1.template convert(); + sycl::vec Ti1 = pix2.template convert(); + + sycl::vec result; + + result = ((one - weightArr) * Ti0 + weightArr * Ti1); + + // Round to nearest whole number. + // There is no option to do this via sycl::rounding_mode. + if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { + for (int i = 0; i < NChannels; i++) { + result[i] = std::round(result[i]); + } + } + + return result.template convert(); + } + + // Out of range coords return a border color + // The border color happens to be all zeros + template + static VecType clampNearest(double coordX, int width, + std::vector &input_image) { + // Due to pixel centers being 0.5 away from origin and because + // 0.5 is *not* subtracted here, rounding down gives the same results as + // rounding to nearest number if 0.5 is subtracted to account + // for pixel center + int coordXInt = static_cast(std::floor(coordX)); + + // Clamp sampling according to the SYCL spec returns a border color. + // The border color is all zeros. + // There does not appear to be any way for the user to set the border color + if (coordXInt > width - 1) { + return VecType{0}; + } + if (coordXInt < 0) { + return VecType{0}; + } + return input_image[coordXInt]; + } + + // Out of range coords are clamped to the extent. + template + static VecType clampToEdgeNearest(double coordX, int width, + std::vector &input_image) { + // Due to pixel centers being 0.5 away from origin and because + // 0.5 is *not* subtracted here, rounding down gives the same results as + // rounding to nearest number if 0.5 is subtracted to account + // for pixel center + int coordXInt = static_cast(std::floor(coordX)); + // Clamp to extent + coordXInt = std::clamp(coordXInt, 0, width - 1); + return input_image[coordXInt]; + } + + // Out of range coords are wrapped to the valid range. + template + static VecType repeatNearest(double coordX, int width, + std::vector &input_image) { + + // Convert unnormalized input coord to normalized format + double normCoordX = coordX / width; + + // Keep only the fractional component of the number and unnormalize. + double fractComp = (normCoordX - std::floor(normCoordX)); + + // Unnormalize fractComp + double unnorm = fractComp * width; + + // Due to pixel centers being 0.5 away from origin and because + // 0.5 is *not* subtracted here, rounding down gives the same results as + // rounding to nearest number if 0.5 is subtracted to account + // for pixel center + int coordXInt = static_cast(std::floor(unnorm)); + + // Handle negative coords + if (coordXInt < 0) { + coordXInt = width + coordXInt; + } + + return input_image[coordXInt]; + } + + // Out of range coordinates are flipped at every integer junction + template + static VecType mirroredRepeatNearest(double coordX, int width, + std::vector &input_image) { + + // Convert unnormalized input coord to normalized format + double normCoordX = coordX / width; + + // Round to nearest multiple of two. + // e.g. + // normCoordX == 0.3 -> result = 0 + // normCoordX == 1.3 -> result = 2 + // normCoordX == 2.4 -> result = 2 + // normCoordX == 3.42 -> result = 4 + double nearestMulOfTwo = 2.0f * std::rint(0.5f * normCoordX); + // Subtract nearestMulOfTwo from normCoordX. + // Gives the normalized form of the coord to use. + // With normCoordX=1.3, norm is set to 0.7 + // With normCoordX=2.4, norm is set to 0.4 + double norm = std::abs(normCoordX - nearestMulOfTwo); + // Unnormalize norm + double unnorm = norm * width; + // Round down and cast to int + int coordXInt = static_cast(std::floor(unnorm)); + // Constrain to valid range + coordXInt = std::min(coordXInt, width - 1); + + // This prevents when at an integer junction, having three + // accesses to pixel at normalized location 0 and 1 instead of two which is + // correct. + int oddShift = 0; + // If not at image boundry and precisely on a pixel + if (std::fmod(normCoordX, 1) != 0.0 && + std::fmod(normCoordX * width, 1) == 0.0) { + // Set oddShift to be one when the integral part of the normalized + // coords is odd. + // Otherwise set to zero. + oddShift = + std::abs(static_cast(std::fmod(std::floor(normCoordX), 2))); + } + coordXInt -= oddShift; + + return input_image[coordXInt]; + } + + // Out of range coords return a border color + // The border color is all zeros + template + static sycl::vec + clampLinear(double coordX, int width, + std::vector> &input_image) { + using VecType = sycl::vec; + // Get coords for linear sampling + int i0, i1; + double weight = get_common_linear_fract_and_coords_fp64(coordX, &i0, &i1); + + VecType pix1; + VecType pix2; + + // Clamp sampling according to the SYCL spec returns a border color. + // The border color is all zeros. + // There does not appear to be any way for the user to set the border color. + if (i0 < 0 || i0 > width - 1) { + pix1 = VecType(0); + } else { + pix1 = input_image[i0]; + } + + if (i1 < 0 || i1 > width - 1) { + pix2 = VecType(0); + } else { + pix2 = input_image[i1]; + } + + // Perform linear sampling + return linearOp1D(pix1, pix2, weight); + } + + // Out of range coords are clamped to the extent. + template + static sycl::vec + clampToEdgeLinear(double coordX, int width, + std::vector> &input_image) { + using VecType = sycl::vec; + // Get coords for linear sampling + int i0, i1; + double weight = get_common_linear_fract_and_coords_fp64(coordX, &i0, &i1); + + // Clamp to extent + i0 = std::clamp(i0, 0, width - 1); + i1 = std::clamp(i1, 0, width - 1); + + VecType pix1 = input_image[i0]; + VecType pix2 = input_image[i1]; + + // Perform linear sampling + return linearOp1D(pix1, pix2, weight); + } + + // Out of range coords are wrapped to the valid range + template + static sycl::vec + repeatLinear(double coordX, int width, + std::vector> &input_image) { + using VecType = sycl::vec; + + // Convert unnormalized input coord to normalized format + double normCoordX = coordX / width; + + double unnorm = (normCoordX - static_cast(normCoordX)) * width; + // Get coords for linear sampling + int i0, i1; + double weight = get_common_linear_fract_and_coords_fp64(unnorm, &i0, &i1); + + // Wrap linear sampling coords to valid range + if (i0 < 0) { + i0 = width + i0; + } + if (i1 < 0) { + i1 = width + i1; + } + + if (i1 > width - 1) { + i1 = i1 - width; + } + if (i0 > width - 1) { + i0 = i0 - width; + } + + VecType pix1 = input_image[i0]; + VecType pix2 = input_image[i1]; + + // Perform linear sampling + return linearOp1D(pix1, pix2, weight); + } + + // Out of range coordinates are flipped at every integer junction + template + static sycl::vec + mirroredRepeatLinear(double coordX, int width, + std::vector> &input_image) { + using VecType = sycl::vec; + + // Convert unnormalized input coord to normalized format + double normCoordX = coordX / width; + + // Round to nearest multiple of two. + // e.g. + // normCoordX == 0.3 -> result = 0 + // normCoordX == 1.3 -> result = 2 + // normCoordX == 2.4 -> result = 2 + // normCoordX == 3.42 -> result = 4 + double nearestMulOfTwo = 2.0f * std::rint(0.5f * normCoordX); + // Subtract nearestMulOfTwo from normCoordX. + // Gives the normalized form of the coord to use. + // With normCoordX=1.3, norm is set to 0.7 + // With normCoordX=2.4, norm is set to 0.4 + double norm = std::abs(normCoordX - nearestMulOfTwo); + // Unnormalize norm + double unnorm = norm * width; + + // Get coords for linear sampling + int i0, i1; + double weight = get_common_linear_fract_and_coords_fp64(unnorm, &i0, &i1); + + // get_common_linear sometimes returns numbers out of bounds. + // Handle this by wrapping to boundary. + i0 = std::max(i0, 0); + i1 = std::min(i1, width - 1); + + VecType pix1 = input_image[i0]; + VecType pix2 = input_image[i1]; + + // Perform linear sampling + return linearOp1D(pix1, pix2, weight); + } + + template > + static sycl::vec + read(sycl::range<1> globalSize, double coordX, double offset, + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + std::vector> &input_image) { + using VecType = sycl::vec; + coordX = coordX + offset; + int width = globalSize[0]; + + // Ensure that coordX always contains unnormalized coords + sycl::coordinate_normalization_mode SampNormMode = samp.coordinate; + if (SampNormMode == sycl::coordinate_normalization_mode::normalized) { + // Unnormalize + coordX = coordX * width; + } + + sycl::filtering_mode SampFiltMode = samp.filtering; + if (SampFiltMode == sycl::filtering_mode::nearest) { + + sycl::addressing_mode SampAddrMode = samp.addressing; + if (SampAddrMode == sycl::addressing_mode::clamp) { + return clampNearest(coordX, width, input_image); + } + + if (SampAddrMode == sycl::addressing_mode::clamp_to_edge) { + return clampToEdgeNearest(coordX, width, input_image); + } + + if (SampAddrMode == sycl::addressing_mode::repeat) { + if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) { + assert(false && + "Repeat addressing mode must be used with normalized coords"); + } + return repeatNearest(coordX, width, input_image); + } + + if (SampAddrMode == sycl::addressing_mode::mirrored_repeat) { + if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) { + assert(false && "Mirrored repeat addressing mode must be used with " + "normalized coords"); + } + return mirroredRepeatNearest(coordX, width, input_image); + } + + if (SampAddrMode == sycl::addressing_mode::none) { + int intCoordX = static_cast(std::floor(coordX)); + if (intCoordX < 0 || intCoordX >= width) { + assert(false && "Accessed out of bounds with addressing mode none! " + "Undefined Behaviour!"); + } + return input_image[intCoordX]; + } + + } else { // linear + sycl::addressing_mode SampAddrMode = samp.addressing; + if (SampAddrMode == sycl::addressing_mode::clamp) { + return clampLinear(coordX, width, input_image); + } + if (SampAddrMode == sycl::addressing_mode::clamp_to_edge) { + return clampToEdgeLinear(coordX, width, input_image); + } + if (SampAddrMode == sycl::addressing_mode::repeat) { + if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) { + assert(false && + "Repeat addressing mode must be used with normalized coords"); + } + return repeatLinear(coordX, width, input_image); + } + if (SampAddrMode == sycl::addressing_mode::mirrored_repeat) { + if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) { + assert(false && "Mirrored repeat addressing mode must be used with " + "normalized coords"); + } + return mirroredRepeatLinear(coordX, width, + input_image); + } + if (SampAddrMode == sycl::addressing_mode::none) { + if (coordX < 0 || coordX >= width) { + assert(false && "Accessed out of bounds with addressing mode none! " + "Undefined Behaviour!"); + } + assert(false && "filtering mode linear with addressing mode none " + "currently not supported"); + } + } + assert(false && "Invalid sampler encountered!"); + } + + // parallel_for ND bound normalized + template + static void run_ndim_test_host( + sycl::range globalSize, double offset, + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + std::vector> &input_image, + std::vector> &output) { + using VecType = sycl::vec; + bool isNorm = + (samp.coordinate == sycl::coordinate_normalization_mode::normalized); + + if constexpr (NDims == 1) { + for (int i = 0; i < globalSize[0]; i++) { + double coordX; + if (isNorm) { + coordX = (double)i / (double)globalSize[0]; + } else { + coordX = i; + } + VecType result = read( + globalSize, coordX, offset, samp, input_image); + output[i] = result; + } + } else if constexpr (NDims == 2) { + assert(false && "2d normalized not yet implemented"); + } else if constexpr (NDims == 3) { + assert(false && "3d normalized not yet implemented"); + } else { + assert(false && "Invalid dimension number set"); + } + } + + // parallel_for ND bindless normalized + template + static void run_ndim_test_device( + sycl::queue &q, sycl::range globalSize, + sycl::range localSize, double offset, + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + sycl::ext::oneapi::experimental::sampled_image_handle input_image, + sycl::buffer, NDims> &output, + sycl::range bufSize) { + using VecType = sycl::vec; + bool isNorm = + (samp.coordinate == sycl::coordinate_normalization_mode::normalized); + if constexpr (NDims == 1) { + try { + q.submit([&](sycl::handler &cgh) { + auto outAcc = output.template get_access( + cgh, bufSize); + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + double coordX = 0.0; + if (isNorm) { + coordX = (double)dim0 / (double)globalSize[0]; + } else { + coordX = dim0; + } + + VecType px1 = + sycl::ext::oneapi::experimental::read_image( + input_image, float(coordX + offset)); + + outAcc[(int)dim0] = px1; + }); + }); + } catch (sycl::exception e) { + std::cerr << "\tKernel submission failed! " << e.what() << std::endl; + } catch (...) { + std::cerr << "\tKernel submission failed!" << std::endl; + } + } else if constexpr (NDims == 2) { + assert(false && "2d normalized not yet implemented"); + } else if constexpr (NDims == 3) { + assert(false && "3d normalized not yet implemented"); + } else { + assert(false && "Invalid dimension number set"); + } + } +}; + +void printTestInfo( + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + double offset) { + + sycl::addressing_mode SampAddrMode = samp.addressing; + sycl::coordinate_normalization_mode SampNormMode = samp.coordinate; + sycl::filtering_mode SampFiltMode = samp.filtering; + + std::cout << "---------------------------------------NEW " + "SAMPLER---------------------------------------\n"; + + std::cout << "addressing mode: "; + switch (SampAddrMode) { + case sycl::addressing_mode::mirrored_repeat: + std::cout << "mirrored_repeat\n"; + break; + case sycl::addressing_mode::repeat: + std::cout << "repeat\n"; + break; + case sycl::addressing_mode::clamp_to_edge: + std::cout << "clamp_to_edge\n"; + break; + case sycl::addressing_mode::clamp: + std::cout << "clamp\n"; + break; + case sycl::addressing_mode::none: + std::cout << "none\n"; + break; + } + + std::cout << "coordinate normalization mode: "; + switch (SampNormMode) { + case sycl::coordinate_normalization_mode::normalized: + std::cout << "normalized\n"; + break; + case sycl::coordinate_normalization_mode::unnormalized: + std::cout << "unnormalized\n"; + break; + } + + std::cout << "filtering mode: "; + switch (SampFiltMode) { + case sycl::filtering_mode::nearest: + std::cout << "nearest\n"; + break; + case sycl::filtering_mode::linear: + std::cout << "linear\n"; + break; + } + std::cout << "offset: " << offset << "\n"; +} + +bool isNumberWithinPercentOfNumber(float firstN, float percent, float secondN, + float &diff, float &percDiff) { + // Get absolute difference of the two numbers + diff = std::abs(firstN - secondN); + // Get the percentage difference of the two numbers + percDiff = + 100.0f * (std::abs(firstN - secondN) / (((firstN + secondN) / 2.0f))); + + // Check if perc difference is not greater then maximum allowed + return percDiff <= percent; +} + +template +bool run_test(sycl::range dims, sycl::range localSize, + double offset, + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + unsigned int seed = 0) { + using VecType = sycl::vec; + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // skip half tests if not supported + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#if defined(VERBOSE_LV1) || defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + + size_t num_elems = dims[0]; + if (NDims > 1) + num_elems *= dims[1]; + if (NDims > 2) + num_elems *= dims[2]; + + std::vector input_0(num_elems); + std::vector expected(num_elems); + std::vector actual(num_elems); + + std::srand(seed); + util::fill_rand(input_0, seed); + + { + sycl::range globalSize = dims; + util::run_ndim_test_host(globalSize, offset, samp, + input_0, expected); + } + + try { + + sycl::ext::oneapi::experimental::image_descriptor desc(dims, COrder, CType); + + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); + + auto img_input = + sycl::ext::oneapi::experimental::create_image(img_mem_0, samp, desc, q); + + q.ext_oneapi_copy(input_0.data(), img_mem_0.get_handle(), desc); + q.wait_and_throw(); + + { + sycl::range bufSize = dims; + sycl::range globalSize = dims; + sycl::buffer outBuf((VecType *)actual.data(), bufSize); + q.wait_and_throw(); + util::run_ndim_test_device( + q, globalSize, localSize, offset, samp, img_input, outBuf, bufSize); + q.wait_and_throw(); + } + + // Cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(img_input, q); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return true; + } + + // Collect and validate output + + // The following sets the percentage margin of error. + + // The margin of error might be different for different backends. + // For CUDA, low-precision interpolation is used for linear sampling + // according to the CUDA programming guide. + // Specifically, CUDA devices uses 9 bits for the linear sampling weight with + // 8 for the fractional value. (One extra so 1.0 is exactly represented) + // 8 bits for the fractional value means there are 256 possible values + // to represent between 1 and 0. As a percentage error, (1/256) * 100 + // gives 0.390625. Meaning that the percentage error for every + // linear interpolation is up to 0.390625% away from the correct value. + // There is no error when linear sampling does not occur. + + float deviation = 0.390625f; + + // For tests using nearest filtering mode, no margin of error is expected. + if (samp.filtering == sycl::filtering_mode::nearest) { + deviation = 0.0f; + } + + bool validated = true; + float largestError = 0.0f; + float largestPercentError = 0.0f; + for (int i = 0; i < num_elems; i++) { + for (int j = 0; j < NChannels; ++j) { + bool mismatch = false; + if (actual[i][j] != expected[i][j]) { + // Nvidia GPUs have a 0.4%~ margin of error due to only using 8 bits to + // represent values between 1-0 for weights during linear interpolation. + float diff, percDiff; + if (!isNumberWithinPercentOfNumber(actual[i][j], deviation, + expected[i][j], diff, percDiff)) { + mismatch = true; + validated = false; + } + if (diff > largestError) { + largestError = diff; + } + if (percDiff > largestPercentError) { + largestPercentError = percDiff; + } + } + if (mismatch) { +#if defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + std::cout << "\tResult mismatch at [" << i << "][" << j + << "] Expected: " << +DType(expected[i][j]) + << ", Actual: " << +DType(actual[i][j]) << std::endl; +#endif + +#ifndef VERBOSE_LV3 + break; +#endif + } + } +#ifndef VERBOSE_LV3 + if (!validated) { + break; + } +#endif + } + +#if defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + std::cout << "largestError: " << largestError << "\n"; + std::cout << "largestPercentError: " << largestPercentError << "%" + << "\n"; + std::cout << "Margin of Error: " << deviation << "%" + << "\n"; +#endif + +#if defined(VERBOSE_LV1) || defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + if (validated) { + std::cout << "\tTest passed!\n"; + } else { + std::cout << "\tTest failed!\n"; + } +#endif + + return !validated; +} + +void printTestName(std::string name) { +#if defined(VERBOSE_LV1) || defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + std::cout << name; +#endif +} + +template > +bool run_tests(sycl::range dims, sycl::range localSize, + double offset, int seed, + sycl::coordinate_normalization_mode normMode) { + + // addressing_mode::none currently removed due to + // inconsistent behavour when switching between + // normalized and unnormalized coords. + sycl::addressing_mode addrModes[4] = { + sycl::addressing_mode::repeat, sycl::addressing_mode::mirrored_repeat, + sycl::addressing_mode::clamp_to_edge, sycl::addressing_mode::clamp}; + + sycl::filtering_mode filtModes[2] = {sycl::filtering_mode::nearest, + sycl::filtering_mode::linear}; + + bool failed = false; + + for (auto addrMode : addrModes) { + + for (auto filtMode : filtModes) { + + if (normMode == sycl::coordinate_normalization_mode::unnormalized) { + // These sampler combinations are not valid according to the SYCL spec + if (addrMode == sycl::addressing_mode::repeat || + addrMode == sycl::addressing_mode::mirrored_repeat) { + continue; + } + } + // Skip using offset with address_mode of none. Will cause undefined + // behaviour. + if (addrMode == sycl::addressing_mode::none && offset != 0.0) { + continue; + } + + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + addrMode, normMode, filtMode); + +#if defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + printTestInfo(samp, offset); +#endif + + // Tests using int data type currently disabled due to inconsistent + // rounding behaviour against non-float types smaller then 32 bit. + + printTestName("Running 1D short\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D short2\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D short4\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D unsigned short\n"); + failed |= run_test< + NDims, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_1d1, class ushort_1d2>( + dims, localSize, offset, samp, seed); + printTestName("Running 1D unsigned short2\n"); + failed |= run_test< + NDims, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_1d1, class ushort2_1d2>( + dims, localSize, offset, samp, seed); + printTestName("Running 1D unsigned short4\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D char\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D char2\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D char4\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D unsigned char\n"); + failed |= run_test< + NDims, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_1d1, class uchar_1d2>( + dims, localSize, offset, samp, seed); + printTestName("Running 1D unsigned char2\n"); + failed |= run_test< + NDims, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_1d1, class uchar2_1d2>( + dims, localSize, offset, samp, seed); + printTestName("Running 1D unsigned char4\n"); + failed |= run_test< + NDims, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_1d1, class uchar4_1d2>( + dims, localSize, offset, samp, seed); + + printTestName("Running 1D float\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D float2\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D float4\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D half\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D half2\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D half4\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D float - dims: 1024, local: 512\n"); + failed |= run_test({1024}, {512}, offset, samp, seed); + printTestName("Running 1D float4 - dims: 4096, local: 8\n"); + failed |= run_test({4096}, {8}, offset, samp, seed); + } + } + + return !failed; +} + +template +bool run_offset(sycl::range dims, sycl::range localSize, + double offset, int seed) { + bool normPassed = + run_tests(dims, localSize, (offset / (double)dims[0]), seed, + sycl::coordinate_normalization_mode::normalized); + bool nonormPassed = + run_tests(dims, localSize, offset, seed, + sycl::coordinate_normalization_mode::unnormalized); + return normPassed && nonormPassed; +} + +template +bool run_no_offset(sycl::range dims, sycl::range localSize, + int seed) { + bool normPassed = + run_tests(dims, localSize, 0.0, seed, + sycl::coordinate_normalization_mode::normalized); + bool nonormPassed = + run_tests(dims, localSize, 0.0, seed, + sycl::coordinate_normalization_mode::unnormalized); + return normPassed && nonormPassed; +} + +template +bool run_dim(sycl::range dims, sycl::range localSize, + double offset, int seed) { + bool offsetPassed = run_offset(dims, localSize, offset, seed); + bool noOffsetPassed = run_no_offset(dims, localSize, seed); + return offsetPassed && noOffsetPassed; +} + +bool run_all(int seed) { return run_dim<1>({512}, {32}, 20, seed); } + +int main() { + + unsigned int seed = 0; + bool result = run_all(seed); + + if (result) { + std::cout << "All tests passed!\n"; + return 0; + } + + std::cerr << "An error has occured!\n"; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_write_1D.cpp b/sycl/test-e2e/bindless_images/read_write_1D.cpp new file mode 100644 index 0000000000000..88e0b28a7f46a --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_1D.cpp @@ -0,0 +1,125 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t width = 512; + std::vector out(width); + std::vector expected(width); + std::vector dataIn1(width); + std::vector dataIn2(width); + float exp = 512; + for (int i = 0; i < width; i++) { + expected[i] = exp; + dataIn1[i] = sycl::float4(i, i, i, i); + dataIn2[i] = sycl::float4(width - i, width - i, width - i, width - i); + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + // Input images memory + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + + // Output image memory + sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, + ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn2 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, + ctxt); + + sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = + sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, dev, + ctxt); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(width, [=](sycl::id<1> id) { + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgIn1, int(id[0])); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgIn2, int(id[0])); + + sum = px1[0] + px2[0]; + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgOut, int(id[0]), sycl::float4(sum)); + }); + }); + + q.wait_and_throw(); + // Extension: copy data from device to host + q.ext_oneapi_copy(img_mem_2.get_handle(), out.data(), desc); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn1, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn2, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < width; i++) { + bool mismatch = false; + if (out[i][0] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp new file mode 100644 index 0000000000000..0a64d934a8c49 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp @@ -0,0 +1,136 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t width = 512; + std::vector out(width); + std::vector expected(width); + std::vector dataIn1(width); + std::vector dataIn2(width); + for (int i = 0; i < width; i++) { + expected[i] = i * 3; + dataIn1[i] = i; + dataIn2[i] = i * 2; + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::r, sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem img_mem_00(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + + // We're able to use move semantics + auto img_mem_0 = std::move(img_mem_00); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 = + sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + + // Extension: copy over data to device (2 subregions) + sycl::range copySrcOffset = {0, 0, 0}; + sycl::range copyExtent = {width / 2, 1, 1}; + sycl::range srcExtent = {width, 0, 0}; + + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + img_mem_0.get_handle(), {0, 0, 0}, desc, copyExtent); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, 0}, srcExtent, + img_mem_0.get_handle(), {width / 2, 0, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + img_mem_1.get_handle(), {0, 0, 0}, desc, copyExtent); + q.ext_oneapi_copy(dataIn2.data(), {width / 2, 0, 0}, srcExtent, + img_mem_1.get_handle(), {width / 2, 0, 0}, desc, + copyExtent); + + q.wait_and_throw(); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(width, [=](sycl::id<1> id) { + float sum = 0; + // Extension: read image data from handle + float px1 = sycl::ext::oneapi::experimental::read_image( + imgHandle1, int(id[0])); + float px2 = sycl::ext::oneapi::experimental::read_image( + imgHandle2, int(id[0])); + + sum = px1 + px2; + sycl::ext::oneapi::experimental::write_image(imgHandle3, + int(id[0]), sum); + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device to host (two sub-regions) + sycl::range copy_extent_2 = {width / 2, 1, 1}; + sycl::range dest_extent_0 = {width, 0, 0}; + q.ext_oneapi_copy(img_mem_2.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, dest_extent_0, copy_extent_2); + q.ext_oneapi_copy(img_mem_2.get_handle(), {width / 2, 0, 0}, desc, + out.data(), {width / 2, 0, 0}, dest_extent_0, + copy_extent_2); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < width; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_write_2D.cpp b/sycl/test-e2e/bindless_images/read_write_2D.cpp new file mode 100644 index 0000000000000..6ac8f3c8ed36e --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_2D.cpp @@ -0,0 +1,136 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t height = 32; + size_t width = 32; + size_t N = height * width; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = j * 3; + dataIn1[i + (width * j)] = {j, j, j, j}; + dataIn2[i + (width * j)] = {j * 2, j * 2, j * 2, j * 2}; + } + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + // Input images memory + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + + // Output image memory + sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, + ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn2 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, + ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = + sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, dev, + ctxt); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgIn1, sycl::int2(dim0, dim1)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgIn2, sycl::int2(dim0, dim1)); + + sum = px1[0] + px2[0]; + + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgOut, sycl::int2(dim0, dim1), sycl::float4(sum)); + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device to host (handler variant) + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(img_mem_2.get_handle(), out.data(), desc); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn1, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn2, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i][0] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp new file mode 100644 index 0000000000000..6f98565ce97db --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp @@ -0,0 +1,153 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 32; + size_t height = 32; + size_t N = width * height; + std::vector out(N); + std::vector expected(N / 4); + std::vector dataIn1(N / 4); + std::vector dataIn2(N / 4); + for (int i = 0; i < width / 2; i++) { + for (int j = 0; j < height / 2; j++) { + expected[j + ((height / 2) * i)] = j * 3; + dataIn1[j + ((height / 2) * i)] = j; + dataIn2[j + ((height / 2) * i)] = j * 2; + } + } + + // Image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::r, + sycl::image_channel_type::fp32); + + try { + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 = + sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + + // Extension: copy over data to device (four subregions/quadrants) + sycl::range copyExtent = {width / 2, height / 2, 1}; + sycl::range srcExtent = {width / 2, height / 2, 0}; + + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + img_mem_0.get_handle(), {0, 0, 0}, desc, copyExtent); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + img_mem_0.get_handle(), {width / 2, 0, 0}, desc, + copyExtent); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + img_mem_0.get_handle(), {0, height / 2, 0}, desc, + copyExtent); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + img_mem_0.get_handle(), {width / 2, height / 2, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + img_mem_1.get_handle(), {0, 0, 0}, desc, copyExtent); + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + img_mem_1.get_handle(), {width / 2, 0, 0}, desc, + copyExtent); + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + img_mem_1.get_handle(), {0, height / 2, 0}, desc, + copyExtent); + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + img_mem_1.get_handle(), {width / 2, height / 2, 0}, desc, + copyExtent); + + q.wait_and_throw(); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + float sum = 0; + // Extension: read image data from handle + float px1 = sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::int2(dim0, dim1)); + float px2 = sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::int2(dim0, dim1)); + + sum = px1 + px2; + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgHandle3, sycl::int2(dim0, dim1), sum); + }); + }); + q.wait_and_throw(); + + // Extension: copy data from device to host (two sub-regions) + sycl::range copy_extent_2 = {width, height / 2, 1}; + sycl::range dest_extent_0 = {width, height, 0}; + q.ext_oneapi_copy(img_mem_2.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, dest_extent_0, copy_extent_2); + q.ext_oneapi_copy(img_mem_2.get_handle(), {0, height / 2, 0}, desc, + out.data(), {0, height / 2, 0}, dest_extent_0, + copy_extent_2); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i % (N / 4)]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_write_3D.cpp b/sycl/test-e2e/bindless_images/read_write_3D.cpp new file mode 100644 index 0000000000000..1f42edc1ab8b4 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_3D.cpp @@ -0,0 +1,136 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t height = 13; + size_t width = 7; + size_t depth = 11; + size_t N = height * width * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[i + width * (j + height * k)] = j * 3; + dataIn1[i + width * (j + height * k)] = {j, j, j, j}; + dataIn2[i + width * (j + height * k)] = {j * 2, j * 2, j * 2, j * 2}; + } + } + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + // Input images memory + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + + // Output image memory + sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, + ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn2 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, + ctxt); + + sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = + sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, dev, + ctxt); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {width, height, depth}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + size_t dim2 = it.get_local_id(2); + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgIn1, sycl::int4(dim0, dim1, dim2, 0)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgIn2, sycl::int4(dim0, dim1, dim2, 0)); + + sum = px1[0] + px2[0]; + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgOut, sycl::int4(dim0, dim1, dim2, 0), sycl::float4(sum)); + }); + }); + + q.wait_and_throw(); + // Extension: copy data from device to host + q.ext_oneapi_copy(img_mem_2.get_handle(), out.data(), desc); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn1, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn2, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i][0] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp new file mode 100644 index 0000000000000..9b4b21640dbb5 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp @@ -0,0 +1,174 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 16; + size_t height = 16; + size_t depth = 8; + size_t N = width * height * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + // ROW-MAJOR + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[k + (depth) * (j + (height)*i)] = + (k + (depth) * (j + (height)*i)) * 3; + dataIn1[k + (depth) * (j + (height)*i)] = + k + (depth) * (j + (height)*i); + dataIn2[k + (depth) * (j + (height)*i)] = + (k + (depth) * (j + (height)*i)) * 2; + } + } + } + + try { + + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::r, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + + // Extension: copy over data to device (8 sub-regions) + sycl::range copy_extent_0 = {width / 2, height / 2, depth / 2}; + sycl::range src_extent_0 = {width, height, depth}; + + // First image with 8 sub-regions + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, src_extent_0, + img_mem_0.get_handle(), {0, 0, 0}, desc, copy_extent_0); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, 0}, src_extent_0, + img_mem_0.get_handle(), {width / 2, 0, 0}, desc, + copy_extent_0); + q.ext_oneapi_copy(dataIn1.data(), {0, height / 2, 0}, src_extent_0, + img_mem_0.get_handle(), {0, height / 2, 0}, desc, + copy_extent_0); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, depth / 2}, src_extent_0, + img_mem_0.get_handle(), {0, 0, depth / 2}, desc, + copy_extent_0); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, height / 2, 0}, src_extent_0, + img_mem_0.get_handle(), {width / 2, height / 2, 0}, desc, + copy_extent_0); + q.ext_oneapi_copy(dataIn1.data(), {0, height / 2, depth / 2}, src_extent_0, + img_mem_0.get_handle(), {0, height / 2, depth / 2}, desc, + copy_extent_0); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, depth / 2}, src_extent_0, + img_mem_0.get_handle(), {width / 2, 0, depth / 2}, desc, + copy_extent_0); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, height / 2, depth / 2}, + src_extent_0, img_mem_0.get_handle(), + {width / 2, height / 2, depth / 2}, desc, copy_extent_0); + + // Second image with 2 sub-regions + sycl::range copy_extent_1 = {width, height, depth / 2}; + sycl::range src_extent_1 = {width, height, depth}; + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, src_extent_1, + img_mem_1.get_handle(), {0, 0, 0}, desc, copy_extent_1); + q.ext_oneapi_copy(dataIn2.data(), {0, 0, depth / 2}, src_extent_1, + img_mem_1.get_handle(), {0, 0, depth / 2}, desc, + copy_extent_1); + + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 = + sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {16, 16, 2}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + float sum = 0; + // Extension: read image data from handle + float px1 = sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::int4(dim0, dim1, dim2, 0)); + float px2 = sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::int4(dim0, dim1, dim2, 0)); + + sum = px1 + px2; + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgHandle3, sycl::int4(dim0, dim1, dim2, 0), sum); + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device to host (two sub-regions) + sycl::range copy_extent_2 = {width, height, depth / 2}; + sycl::range dest_extent_0 = {width, height, depth}; + q.ext_oneapi_copy(img_mem_2.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, dest_extent_0, copy_extent_2); + q.ext_oneapi_copy(img_mem_2.get_handle(), {0, 0, depth / 2}, desc, + out.data(), {0, 0, depth / 2}, dest_extent_0, + copy_extent_2); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle3, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp new file mode 100644 index 0000000000000..69516ade249e1 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp @@ -0,0 +1,664 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include +#include +#include + +static sycl::device dev; + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +// Helpers and utilities +struct util { + template ::value, bool> = true> + static void fill_rand(std::vector> &v) { + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + for (int i = 0; i < v.size(); ++i) { + v[i] = sycl::vec(distribution(generator)); + } + } + + template ::value, bool> = true> + static void fill_rand(std::vector> &v) { + std::default_random_engine generator; + std::uniform_real_distribution distribution(0.0, 100.0); + for (int i = 0; i < v.size(); ++i) { + v[i] = sycl::vec(distribution(generator)); + } + } + + template < + typename DType, int NChannels, + std::enable_if_t::value, bool> = true> + static void fill_rand(std::vector> &v) { + std::default_random_engine generator; + std::uniform_real_distribution distribution(0.0, 100.0); + for (int i = 0; i < v.size(); ++i) { + v[i] = sycl::vec(distribution(generator)); + } + } + + template + static void add_host(const std::vector> &in_0, + const std::vector> &in_1, + std::vector> &out) { + for (int i = 0; i < out.size(); ++i) { + for (int j = 0; j < NChannels; ++j) { + out[i][j] = in_0[i][j] + in_1[i][j]; + } + } + } + + template > + static DType add_kernel(const DType in_0, const DType in_1) { + return in_0 + in_1; + } + + template 1)>> + static sycl::vec + add_kernel(const sycl::vec &in_0, + const sycl::vec &in_1) { + sycl::vec out; + for (int i = 0; i < NChannels; ++i) { + out[i] = in_0[i] + in_1[i]; + } + return out; + } + + // parallel_for 3D + template > + static void run_ndim_test( + sycl::queue q, sycl::range<3> globalSize, sycl::range<3> localSize, + sycl::ext::oneapi::experimental::unsampled_image_handle input_0, + sycl::ext::oneapi::experimental::unsampled_image_handle input_1, + sycl::ext::oneapi::experimental::unsampled_image_handle output) { + using VecType = sycl::vec; + try { + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + + if constexpr (NChannels >= 1) { + VecType px1 = + sycl::ext::oneapi::experimental::read_image( + input_0, sycl::int4(dim0, dim1, dim2, 0)); + VecType px2 = + sycl::ext::oneapi::experimental::read_image( + input_1, sycl::int4(dim0, dim1, dim2, 0)); + + auto sum = + VecType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, sycl::int4(dim0, dim1, dim2, 0), VecType(sum)); + } else { + DType px1 = sycl::ext::oneapi::experimental::read_image( + input_0, sycl::int4(dim0, dim1, dim2, 0)); + DType px2 = sycl::ext::oneapi::experimental::read_image( + input_1, sycl::int4(dim0, dim1, dim2, 0)); + + auto sum = DType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, sycl::int4(dim0, dim1, dim2, 0), DType(sum)); + } + }); + }); + } catch (sycl::exception e) { + std::cout << "\tKernel submission failed! " << e.what() << std::endl; + } catch (...) { + std::cout << "\tKernel submission failed!" << std::endl; + } + } + + // parallel_for 2D + template > + static void run_ndim_test( + sycl::queue q, sycl::range<2> globalSize, sycl::range<2> localSize, + sycl::ext::oneapi::experimental::unsampled_image_handle input_0, + sycl::ext::oneapi::experimental::unsampled_image_handle input_1, + sycl::ext::oneapi::experimental::unsampled_image_handle output) { + using VecType = sycl::vec; + try { + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + + if constexpr (NChannels >= 1) { + VecType px1 = + sycl::ext::oneapi::experimental::read_image( + input_0, sycl::int2(dim0, dim1)); + VecType px2 = + sycl::ext::oneapi::experimental::read_image( + input_1, sycl::int2(dim0, dim1)); + + auto sum = + VecType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, sycl::int2(dim0, dim1), VecType(sum)); + } else { + DType px1 = sycl::ext::oneapi::experimental::read_image( + input_0, sycl::int2(dim0, dim1)); + DType px2 = sycl::ext::oneapi::experimental::read_image( + input_1, sycl::int2(dim0, dim1)); + + auto sum = DType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, sycl::int2(dim0, dim1), DType(sum)); + } + }); + }); + } catch (sycl::exception e) { + std::cout << "\tKernel submission failed! " << e.what() << std::endl; + } catch (...) { + std::cout << "\tKernel submission failed!" << std::endl; + } + } + + // parallel_for 1D + template > + static void run_ndim_test( + sycl::queue q, sycl::range<1> globalSize, sycl::range<1> localSize, + sycl::ext::oneapi::experimental::unsampled_image_handle input_0, + sycl::ext::oneapi::experimental::unsampled_image_handle input_1, + sycl::ext::oneapi::experimental::unsampled_image_handle output) { + using VecType = sycl::vec; + try { + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + + if constexpr (NChannels >= 1) { + VecType px1 = + sycl::ext::oneapi::experimental::read_image( + input_0, int(dim0)); + VecType px2 = + sycl::ext::oneapi::experimental::read_image( + input_1, int(dim0)); + + auto sum = + VecType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, int(dim0), VecType(sum)); + } else { + DType px1 = sycl::ext::oneapi::experimental::read_image( + input_0, int(dim0)); + DType px2 = sycl::ext::oneapi::experimental::read_image( + input_1, int(dim0)); + + auto sum = DType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, int(dim0), DType(sum)); + } + }); + }); + } catch (sycl::exception e) { + std::cout << "\tKernel submission failed! " << e.what() << std::endl; + } catch (...) { + std::cout << "\tKernel submission failed!" << std::endl; + } + } +}; + +template +bool run_test(sycl::range dims, sycl::range localSize, + unsigned int seed = 0) { + using VecType = sycl::vec; + + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // skip half tests if not supported + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + + size_t num_elems = dims[0]; + if (NDims > 1) + num_elems *= dims[1]; + if (NDims > 2) + num_elems *= dims[2]; + + std::vector input_0(num_elems); + std::vector input_1(num_elems); + std::vector expected(num_elems); + std::vector actual(num_elems); + + std::srand(seed); + util::fill_rand(input_0); + util::fill_rand(input_1); + util::add_host(input_0, input_1, expected); + + try { + sycl::ext::oneapi::experimental::image_descriptor desc(dims, COrder, CType); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + + auto img_input_0 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); + auto img_input_1 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); + auto img_output = + sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + + // Extension: copy over data to device + q.ext_oneapi_copy(input_0.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(input_1.data(), img_mem_1.get_handle(), desc); + q.wait(); + + { + sycl::range globalSize = dims; + q.wait(); + util::run_ndim_test( + q, globalSize, localSize, img_input_0, img_input_1, img_output); + q.wait(); + + q.ext_oneapi_copy(img_mem_2.get_handle(), actual.data(), desc); + q.wait(); + } + + // Cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(img_input_0, q); + sycl::ext::oneapi::experimental::destroy_image_handle(img_input_1, q); + sycl::ext::oneapi::experimental::destroy_image_handle(img_output, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < num_elems; i++) { + for (int j = 0; j < NChannels; ++j) { + bool mismatch = false; + if (actual[i][j] != expected[i][j]) { + mismatch = true; + validated = false; + } + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "\tResult mismatch at [" << i << "][" << j + << "] Expected: " << +expected[i][j] + << ", Actual: " << +actual[i][j] << std::endl; +#else + break; +#endif + } + } + } +#ifdef VERBOSE_PRINT + if (validated) { + std::cout << "\tTest passed!" << std::endl; + } else { + std::cout << "\tTest failed!\n"; + } +#endif + + return !validated; +} + +void printTestName(std::string name) { +#ifdef VERBOSE_PRINT + std::cout << name; +#endif +} + +int main() { + + unsigned int seed = 0; + bool failed = false; + + printTestName("Running 1D int\n"); + failed |= + run_test<1, int, 1, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::r, class int_1d>({32}, {2}, seed); + printTestName("Running 2D int\n"); + failed |= run_test<2, int32_t, 1, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::r, class int_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D int\n"); + failed |= run_test<3, int32_t, 1, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::r, class int_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D int2\n"); + failed |= + run_test<1, int, 2, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rg, class int2_1d>({32}, {2}, seed); + printTestName("Running 2D int2\n"); + failed |= run_test<2, int32_t, 2, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rg, class int2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D int2\n"); + failed |= run_test<3, int32_t, 2, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rg, class int2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D int4\n"); + failed |= + run_test<1, int, 4, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rgba, class int4_1d>({32}, {2}, seed); + printTestName("Running 2D int4\n"); + failed |= run_test<2, int32_t, 4, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rgba, class int4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D int4\n"); + failed |= run_test<3, int32_t, 4, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rgba, class int4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D unsigned int\n"); + failed |= + run_test<1, unsigned int, 1, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::r, class uint_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned int\n"); + failed |= run_test<2, uint32_t, 1, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::r, class uint_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned int\n"); + failed |= run_test<3, uint32_t, 1, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::r, class uint_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned int2\n"); + failed |= + run_test<1, unsigned int, 2, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rg, class uint2_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned int2\n"); + failed |= run_test<2, uint32_t, 2, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rg, class uint2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned int2\n"); + failed |= run_test<3, uint32_t, 2, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rg, class uint2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned int4\n"); + failed |= + run_test<1, unsigned int, 4, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rgba, class uint4_1d>({32}, {2}, + seed); + printTestName("Running 2D unsigned int4\n"); + failed |= run_test<2, uint32_t, 4, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rgba, class uint4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned int4\n"); + failed |= run_test<3, uint32_t, 4, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rgba, class uint4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D short\n"); + failed |= + run_test<1, short, 1, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::r, class short_1d>({32}, {2}, seed); + printTestName("Running 2D short\n"); + failed |= run_test<2, short, 1, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::r, class short_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D short\n"); + failed |= run_test<3, short, 1, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::r, class short_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D short2\n"); + failed |= + run_test<1, short, 2, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rg, class short2_1d>({32}, {2}, seed); + printTestName("Running 2D short2\n"); + failed |= run_test<2, short, 2, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rg, class short2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D short2\n"); + failed |= run_test<3, short, 2, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rg, class short2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D short4\n"); + failed |= run_test<1, short, 4, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rgba, class short4_1d>( + {32}, {2}, seed); + printTestName("Running 2D short4\n"); + failed |= run_test<2, short, 4, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rgba, class short4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D short4\n"); + failed |= run_test<3, short, 4, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rgba, class short4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D unsigned short\n"); + failed |= + run_test<1, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned short\n"); + failed |= + run_test<2, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_2d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 3D unsigned short\n"); + failed |= + run_test<3, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned short2\n"); + failed |= + run_test<1, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_1d>({32}, {2}, + seed); + printTestName("Running 2D unsigned short2\n"); + failed |= + run_test<2, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_2d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 3D unsigned short2\n"); + failed |= + run_test<3, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned short4\n"); + failed |= + run_test<1, unsigned short, 4, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rgba, class ushort4_1d>({32}, {2}, + seed); + printTestName("Running 2D unsigned short4\n"); + failed |= + run_test<2, unsigned short, 4, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rgba, class ushort4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned short4\n"); + failed |= + run_test<3, unsigned short, 4, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rgba, class ushort4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D char\n"); + failed |= + run_test<1, signed char, 1, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::r, class char_1d>({32}, {2}, seed); + printTestName("Running 2D char\n"); + failed |= run_test<2, signed char, 1, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::r, class char_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D char\n"); + failed |= run_test<3, signed char, 1, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::r, class char_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D char2\n"); + failed |= + run_test<1, signed char, 2, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rg, class char2_1d>({32}, {2}, seed); + printTestName("Running 2D char2\n"); + failed |= run_test<2, signed char, 2, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rg, class char2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D char2\n"); + failed |= run_test<3, signed char, 2, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rg, class char2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D char4\n"); + failed |= run_test<1, signed char, 4, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rgba, class char4_1d>({32}, {2}, + seed); + printTestName("Running 2D char4\n"); + failed |= run_test<2, signed char, 4, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rgba, class char4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D char4\n"); + failed |= run_test<3, signed char, 4, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rgba, class char4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D unsigned char\n"); + failed |= + run_test<1, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned char\n"); + failed |= + run_test<2, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_2d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 3D unsigned char\n"); + failed |= + run_test<3, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_3d>({48, 128, 32}, + {16, 16, 4}, seed); + printTestName("Running 1D unsigned char2\n"); + failed |= + run_test<1, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned char2\n"); + failed |= + run_test<2, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_2d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 3D unsigned char2\n"); + failed |= + run_test<3, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned char4\n"); + failed |= + run_test<1, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_1d>({32}, {2}, + seed); + printTestName("Running 2D unsigned char4\n"); + failed |= + run_test<2, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned char4\n"); + failed |= + run_test<3, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D float\n"); + failed |= run_test<1, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class float_1d>({1024}, + {512}, seed); + printTestName("Running 2D float\n"); + failed |= run_test<2, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class float_2d>( + {4096, 3808}, {32, 32}, seed); + printTestName("Running 3D float\n"); + failed |= run_test<3, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class float_3d>( + {1024, 832, 32}, {16, 16, 4}, seed); + printTestName("Running 1D float2\n"); + failed |= run_test<1, float, 2, sycl::image_channel_type::fp32, + sycl::image_channel_order::rg, class float2_1d>( + {608}, {32}, seed); + printTestName("Running 2D float2\n"); + failed |= run_test<2, float, 2, sycl::image_channel_type::fp32, + sycl::image_channel_order::rg, class float2_2d>( + {3808, 4096}, {32, 32}, seed); + printTestName("Running 3D float2\n"); + failed |= run_test<3, float, 2, sycl::image_channel_type::fp32, + sycl::image_channel_order::rg, class float2_3d>( + {832, 1024, 32}, {16, 16, 4}, seed); + printTestName("Running 1D float4\n"); + failed |= run_test<1, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float4_1d>( + {1024}, {512}, seed); + printTestName("Running 2D float4\n"); + failed |= run_test<2, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float4_2d>( + {4096, 4096}, {32, 32}, seed); + printTestName("Running 3D float4\n"); + failed |= run_test<3, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float4_3d>( + {1024, 1024, 16}, {16, 16, 4}, seed); + + printTestName("Running 1D half\n"); + failed |= + run_test<1, sycl::half, 1, sycl::image_channel_type::fp16, + sycl::image_channel_order::r, class half_1d>({32}, {2}, seed); + printTestName("Running 2D half\n"); + failed |= run_test<2, sycl::half, 1, sycl::image_channel_type::fp16, + sycl::image_channel_order::r, class half_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D half\n"); + failed |= run_test<3, sycl::half, 1, sycl::image_channel_type::fp16, + sycl::image_channel_order::r, class half_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D half2\n"); + failed |= + run_test<1, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class half2_1d>({32}, {2}, seed); + printTestName("Running 2D half2\n"); + failed |= run_test<2, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class half2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D half2\n"); + failed |= run_test<3, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class half2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D half4\n"); + failed |= run_test<1, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class half4_1d>({32}, {2}, + seed); + printTestName("Running 2D half4\n"); + failed |= run_test<2, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class half4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D half4\n"); + failed |= run_test<3, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class half4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + if (failed) { + std::cerr << "An error has occured!\n"; + return 1; + } + + std::cout << "All tests passed!\n"; + return 0; +} diff --git a/sycl/test-e2e/bindless_images/sampling_1D.cpp b/sycl/test-e2e/bindless_images/sampling_1D.cpp new file mode 100644 index 0000000000000..f1fac37cea56e --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_1D.cpp @@ -0,0 +1,118 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + +#if defined(SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES) + assert(SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES == 1); +#if defined(VERBOSE_PRINT) + std::cout << "SYCL_EXT_ONEAPI_BINDLESS_IMAGES is defined!" << std::endl; +#endif +#else + std::cerr << "Bindless images feature test macro is not defined!" + << std::endl; + assert(false); +#endif // defined(SYCL_EXT_ONEAPI_BINDLESS_IMAGES) + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t N = 32; + size_t width = N; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + for (int i = 0; i < N; i++) { + expected[i] = i; + dataIn1[i] = float(i); + } + + try { + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::r, sycl::image_channel_type::fp32); + + sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: allocate memory on device + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + auto imgHandle1 = sycl::ext::oneapi::experimental::create_image( + img_mem_0, samp1, desc, dev, ctxt); + + sycl::buffer buf((float *)out.data(), N); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access(cgh, N); + + cgh.parallel_for(N, [=](sycl::id<1> id) { + // Normalize coordinate -- +0.5 to look towards centre of pixel + float x = float(id[0] + 0.5) / (float)N; + // Extension: read image data from handle + float px1 = + sycl::ext::oneapi::experimental::read_image(imgHandle1, x); + + outAcc[id] = px1; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test passed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/sampling_2D.cpp b/sycl/test-e2e/bindless_images/sampling_2D.cpp new file mode 100644 index 0000000000000..b5c8dcf103d77 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_2D.cpp @@ -0,0 +1,147 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 5; + size_t height = 6; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = (i + (width * j)) * 3; + dataIn1[i + (width * j)] = {(i + (width * j)), 0, 0, 0}; + dataIn2[i + (width * j)] = {(i + (width * j)) * 2, 0, 0, 0}; + } + } + + try { + sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: image descriptor -- can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + size_t pitch = 0; + + // Extension: returns the device pointer to USM allocated pitched memory + auto img_mem_usm_0 = + sycl::ext::oneapi::experimental::pitched_alloc_device(&pitch, desc, q); + + if (img_mem_usm_0 == nullptr) { + std::cout << "Error allocating images!" << std::endl; + return 1; + } + + // Extension: allocate memory on device + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + + // Extension: copy over data to device for USM image (handler variant) + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(dataIn1.data(), img_mem_usm_0, desc, pitch); + }); + + // Extension: copy over data to device for non-USM image + q.ext_oneapi_copy(dataIn2.data(), img_mem_0.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the images and return the handles + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(img_mem_usm_0, pitch, + samp1, desc, dev, ctxt); + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, samp1, desc, + dev, ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::float2(fdim0, fdim1)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::float2(fdim0, fdim1)); + + outAcc[sycl::id<2>{dim1, dim0}] = px1[0] + px2[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, dev, + ctxt); + sycl::free(img_mem_usm_0, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp new file mode 100644 index 0000000000000..1601a205a4156 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp @@ -0,0 +1,152 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + if (!dev.has(sycl::aspect::ext_oneapi_bindless_images_shared_usm)) { + std::cout + << "images backed by USM shared allocations are not supported, skipping" + << std::endl; + return 0; + } + + // declare image data + size_t width = 5; + size_t height = 6; + size_t N = width * height; + size_t width_in_bytes = width * sizeof(float); + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = i + (width * j); + dataIn[i + (width * j)] = i + (width * j); + } + } + + try { + sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::addressing_mode::clamp, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::r, + sycl::image_channel_type::fp32); + + auto device_pitch_align = dev.get_info< + sycl::ext::oneapi::experimental::info::device::image_pitch_align>(); + auto device_max_pitch = dev.get_info(); + + // Pitch requirements: + // - pitch % device_pitch_align == 0 + // - pitch >= width_in_bytes + // - pitch <= device_max_pitch + size_t pitch = device_pitch_align * + std::ceil(float(width_in_bytes) / float(device_pitch_align)); + assert(pitch <= device_max_pitch); + + // Shared USM allocation + auto img_mem = sycl::aligned_alloc_shared(device_pitch_align, + (pitch * height), dev, ctxt); + + if (img_mem == nullptr) { + std::cerr << "Error allocating images!" << std::endl; + return 1; + } + + // Copy to shared USM and incorporate pitch + for (size_t i = 0; i < height; i++) { + memcpy(static_cast(img_mem) + (i * pitch / sizeof(float)), + dataIn.data() + (i * width), width_in_bytes); + } + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::sampled_image_handle img_handle = + sycl::ext::oneapi::experimental::create_image(img_mem, pitch, samp1, + desc, dev, ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read image data from handle + float px = sycl::ext::oneapi::experimental::read_image( + img_handle, sycl::float2(fdim0, fdim1)); + + outAcc[sycl::id<2>{dim1, dim0}] = px; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(img_handle, dev, + ctxt); + sycl::free(img_mem, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp new file mode 100644 index 0000000000000..4304bb6bd8aa3 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp @@ -0,0 +1,133 @@ +// REQUIRES: linux +// REQUIRES: cuda +// REQUIRES: aspect-fp16 + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 5; + size_t height = 6; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = i + (width * j); + dataIn1[i + (width * j)] = {i + (width * j), 0, 0, 0}; + } + } + + try { + sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + unsigned int element_size_bytes = sizeof(sycl::half) * 4; + size_t width_in_bytes = width * element_size_bytes; + size_t pitch = 0; + + // Extension: returns the device pointer to USM allocated pitched memory + auto img_mem_0 = sycl::ext::oneapi::experimental::pitched_alloc_device( + &pitch, width_in_bytes, height, element_size_bytes, q); + + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp16); + + if (img_mem_0 == nullptr) { + std::cout << "Error allocating images!" << std::endl; + return 1; + } + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), img_mem_0, desc, pitch); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, pitch, samp1, + desc, dev, ctxt); + + sycl::buffer buf((sycl::half *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read image data from handle + sycl::half4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::float2(fdim0, fdim1)); + + outAcc[sycl::id<2>{dim1, dim0}] = px1[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + sycl::free(img_mem_0, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/sampling_3D.cpp b/sycl/test-e2e/bindless_images/sampling_3D.cpp new file mode 100644 index 0000000000000..a5d9b1cf803e1 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_3D.cpp @@ -0,0 +1,127 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 4; + size_t height = 6; + size_t depth = 8; + size_t N = width * height * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[i + width * (j + height * k)] = i + width * (j + height * k); + dataIn1[i + width * (j + height * k)] = {i + width * (j + height * k), + 0, 0, 0}; + } + } + } + + try { + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::addressing_mode::clamp, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: allocate memory on device + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, samp1, desc, + dev, ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<3>{depth, height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<3>{depth, height, width}); + + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {width, height, depth}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + size_t dim2 = it.get_local_id(2); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + float fdim2 = float(dim2 + 0.5) / (float)depth; + + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::float4(fdim0, fdim1, fdim2, (float)0)); + + outAcc[sycl::id<3>{dim2, dim1, dim0}] = px1[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp new file mode 100644 index 0000000000000..dc48054844a9a --- /dev/null +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp @@ -0,0 +1,270 @@ +// REQUIRES: linux +// REQUIRES: cuda +// REQUIRES: vulkan + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %link-vulkan %s -o %t.out +// RUN: %t.out + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +#include + +#include "vulkan_common.hpp" + +#include +#include +#include +#include + +// Returns true if validated correctly +bool run_sycl(int input_image_fd, size_t width, size_t height) { + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Image descriptor - mapped to Vulkan image layout + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::r, + sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::interop, 1 /*num_levels*/); + + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + const size_t img_size = width * height * sizeof(float); + + // Extension: external memory descriptor + sycl::ext::oneapi::experimental::external_mem_descriptor< + sycl::ext::oneapi::experimental::external_mem_fd> + input_ext_mem_desc{input_image_fd, img_size}; + + // Extension: interop mem handle imported from file descriptor + sycl::ext::oneapi::experimental::interop_mem_handle input_interop_mem_handle = + sycl::ext::oneapi::experimental::import_external_memory( + input_ext_mem_desc, q); + + // Extension: interop mem handle imported from file descriptor + sycl::ext::oneapi::experimental::image_mem_handle input_mapped_mem_handle = + sycl::ext::oneapi::experimental::map_external_memory_array( + input_interop_mem_handle, desc, q); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::sampled_image_handle img_input = + sycl::ext::oneapi::experimental::create_image(input_mapped_mem_handle, + samp, desc, q); + + std::vector out(width * height); + + try { + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read image data from handle (Vulkan imported) + float pixel = sycl::ext::oneapi::experimental::read_image( + img_input, sycl::float2(fdim0, fdim1)); + + pixel *= 10.f; + outAcc[sycl::id<2>{dim1, dim0}] = pixel; + }); + }); + } catch (...) { + std::cerr << "Kernel submission failed!" << std::endl; + assert(false); + } + + try { + sycl::ext::oneapi::experimental::destroy_image_handle(img_input, q); + sycl::ext::oneapi::experimental::release_external_memory( + input_interop_mem_handle, q); + } catch (...) { + std::cerr << "Destroying interop memory failed!\n"; + } + + printString("Validating\n"); + bool validated = true; + for (int i = 0; i < width * height; i++) { + bool mismatch = false; + float expected = (float)(i)*10.f; + if (out[i] != expected) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected + << ", Actual: " << out[i] << "\n"; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!\n"; + return true; + } + std::cout << "Test failed!\n"; + return false; +} + +// Returns true if validated correctly +bool run_test() { + const uint32_t width = 16, height = 16; + const size_t imageSizeBytes = width * height * sizeof(float); + + printString("Creating input image\n"); + // Create input image memory + auto inputImage = vkutil::createImage( + VK_IMAGE_TYPE_2D, VK_FORMAT_R32_SFLOAT, {width, height, 1}, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT); + auto inputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex( + inputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + auto inputMemory = + vkutil::allocateDeviceMemory(imageSizeBytes, inputImageMemoryTypeIndex); + VK_CHECK_CALL(vkBindImageMemory(vk_device, inputImage, inputMemory, + 0 /*memoryOffset*/)); + + printString("Creating staging buffers\n"); + // Create input staging memory + auto inputStagingBuffer = vkutil::createBuffer( + imageSizeBytes, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + auto inputStagingMemoryTypeIndex = vkutil::getBufferMemoryTypeIndex( + inputStagingBuffer, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + auto inputStagingMemory = vkutil::allocateDeviceMemory( + imageSizeBytes, inputStagingMemoryTypeIndex, false /*exportable*/); + VK_CHECK_CALL(vkBindBufferMemory(vk_device, inputStagingBuffer, + inputStagingMemory, 0 /*memoryOffset*/)); + + printString("Populating staging buffer\n"); + // Populate staging memory + float *inputStagingData = nullptr; + VK_CHECK_CALL(vkMapMemory(vk_device, inputStagingMemory, 0 /*offset*/, + imageSizeBytes, 0 /*flags*/, + (void **)&inputStagingData)); + for (int i = 0; i < width * height; ++i) { + inputStagingData[i] = (float)i; + } + vkUnmapMemory(vk_device, inputStagingMemory); + + printString("Submitting image layout transition\n"); + // Transition image layouts + { + VkImageMemoryBarrier barrierInput = {}; + barrierInput.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrierInput.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrierInput.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrierInput.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierInput.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierInput.image = inputImage; + barrierInput.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrierInput.subresourceRange.levelCount = 1; + barrierInput.subresourceRange.layerCount = 1; + barrierInput.srcAccessMask = 0; + barrierInput.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_computeCmdBuffer, &cbbi)); + vkCmdPipelineBarrier(vk_computeCmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrierInput); + VK_CHECK_CALL(vkEndCommandBuffer(vk_computeCmdBuffer)); + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_computeCmdBuffer; + + VK_CHECK_CALL(vkQueueSubmit(vk_compute_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_compute_queue)); + } + + printString("Copying staging memory to images\n"); + // Copy staging to main image memory + { + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VkBufferImageCopy copyRegion = {}; + copyRegion.imageExtent = {width, height, 1}; + copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copyRegion.imageSubresource.layerCount = 1; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_transferCmdBuffers[0], &cbbi)); + vkCmdCopyBufferToImage(vk_transferCmdBuffers[0], inputStagingBuffer, + inputImage, VK_IMAGE_LAYOUT_GENERAL, + 1 /*regionCount*/, ©Region); + VK_CHECK_CALL(vkEndCommandBuffer(vk_transferCmdBuffers[0])); + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_transferCmdBuffers[0]; + + VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_transfer_queue)); + } + + printString("Getting memory file descriptors and calling into SYCL\n"); + // Pass memory to SYCL for modification + auto input_fd = vkutil::getMemoryOpaqueFD(inputMemory); + bool validated = run_sycl(input_fd, width, height); + + // Cleanup + vkDestroyBuffer(vk_device, inputStagingBuffer, nullptr); + vkDestroyImage(vk_device, inputImage, nullptr); + vkFreeMemory(vk_device, inputStagingMemory, nullptr); + vkFreeMemory(vk_device, inputMemory, nullptr); + + return validated; +} + +int main() { + + if (vkutil::setupInstance() != VK_SUCCESS) { + std::cerr << "Instance setup failed!\n"; + return EXIT_FAILURE; + } + + if (vkutil::setupDevice("NVIDIA") != VK_SUCCESS) { + std::cerr << "Device setup failed!\n"; + return EXIT_FAILURE; + } + + if (vkutil::setupCommandBuffers() != VK_SUCCESS) { + std::cerr << "Compute pipeline setup failed!\n"; + return EXIT_FAILURE; + } + + bool validated = run_test(); + + if (vkutil::cleanup() != VK_SUCCESS) { + std::cerr << "Cleanup failed!\n"; + return EXIT_FAILURE; + } + + return validated ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp new file mode 100644 index 0000000000000..5b3f93a4c6af3 --- /dev/null +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -0,0 +1,444 @@ +// REQUIRES: linux +// REQUIRES: cuda +// REQUIRES: vulkan + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %link-vulkan %s -o %t.out +// RUN: %t.out + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +#include + +#include "vulkan_common.hpp" + +#include +#include +#include +#include + +// Returns true if validated correctly +void run_sycl(int input_image_fd, int output_image_fd, + int sycl_wait_semaphore_fd, int sycl_done_semaphore_fd, + size_t width, size_t height) { + try { + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Image descriptor - mapped to Vulkan image layout + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::unsigned_int32, + sycl::ext::oneapi::experimental::image_type::interop, 1 /*num_levels*/); + + const size_t img_size = width * height * sizeof(sycl::uint4); + + std::vector out(width * height); + + // Extension: external memory descriptor + sycl::ext::oneapi::experimental::external_mem_descriptor< + sycl::ext::oneapi::experimental::external_mem_fd> + inputExtMemDesc{input_image_fd, img_size}; + sycl::ext::oneapi::experimental::external_mem_descriptor< + sycl::ext::oneapi::experimental::external_mem_fd> + outputExtMemDesc{output_image_fd, img_size}; + + sycl::ext::oneapi::experimental::interop_mem_handle + input_interop_mem_handle = + sycl::ext::oneapi::experimental::import_external_memory( + inputExtMemDesc, dev, ctxt); + + sycl::ext::oneapi::experimental::interop_mem_handle + output_interop_mem_handle = + sycl::ext::oneapi::experimental::import_external_memory( + outputExtMemDesc, dev, ctxt); + + sycl::ext::oneapi::experimental::image_mem_handle input_mapped_mem_handle = + sycl::ext::oneapi::experimental::map_external_memory_array( + input_interop_mem_handle, desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem_handle output_mapped_mem_handle = + sycl::ext::oneapi::experimental::map_external_memory_array( + output_interop_mem_handle, desc, dev, ctxt); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle img_input = + sycl::ext::oneapi::experimental::create_image(input_mapped_mem_handle, + desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle img_output = + sycl::ext::oneapi::experimental::create_image(output_mapped_mem_handle, + desc, dev, ctxt); + + // Extension: import semaphores + sycl::ext::oneapi::experimental::external_semaphore_descriptor< + sycl::ext::oneapi::experimental::external_semaphore_fd> + sycl_wait_external_semaphore_desc{sycl_wait_semaphore_fd}; + + sycl::ext::oneapi::experimental::external_semaphore_descriptor< + sycl::ext::oneapi::experimental::external_semaphore_fd> + sycl_done_external_semaphore_desc{sycl_done_semaphore_fd}; + + sycl::ext::oneapi::experimental::interop_semaphore_handle + sycl_wait_interop_semaphore_handle = + sycl::ext::oneapi::experimental::import_external_semaphore( + sycl_wait_external_semaphore_desc, dev, ctxt); + + sycl::ext::oneapi::experimental::interop_semaphore_handle + sycl_done_interop_semaphore_handle = + sycl::ext::oneapi::experimental::import_external_semaphore( + sycl_done_external_semaphore_desc, dev, ctxt); + + // Extension: wait for imported semaphore + q.ext_oneapi_wait_external_semaphore(sycl_wait_interop_semaphore_handle); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {32, 32}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + + // Extension: read image data from handle (Vulkan imported) + sycl::uint4 pixel = + sycl::ext::oneapi::experimental::read_image( + img_input, sycl::int2(dim0, dim1)); + + pixel *= 10; + + // Extension: write image data using handle (Vulkan imported) + sycl::ext::oneapi::experimental::write_image( + img_output, sycl::int2(dim0, dim1), pixel); + }); + }); + + // Extension: signal imported semaphore + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_signal_external_semaphore( + sycl_done_interop_semaphore_handle); + }); + + // Wait for kernel completion before destroying external objects + q.wait_and_throw(); + + sycl::ext::oneapi::experimental::release_external_memory( + input_interop_mem_handle, dev, ctxt); + sycl::ext::oneapi::experimental::release_external_memory( + output_interop_mem_handle, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_external_semaphore( + sycl_wait_interop_semaphore_handle, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_external_semaphore( + sycl_done_interop_semaphore_handle, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(img_input, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(img_output, dev, + ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } +} + +// Returns true if validated correctly +bool run_test() { + const uint32_t width = 1024 * 4, height = 1024 * 4; + const size_t imageSizeBytes = width * height * sizeof(sycl::uint4); + + printString("Creating input image\n"); + // Create input image memory + auto inputImage = vkutil::createImage( + VK_IMAGE_TYPE_2D, VK_FORMAT_R32G32B32A32_UINT, {width, height, 1}, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT); + auto inputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex( + inputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + auto inputMemory = + vkutil::allocateDeviceMemory(imageSizeBytes, inputImageMemoryTypeIndex); + VK_CHECK_CALL(vkBindImageMemory(vk_device, inputImage, inputMemory, + 0 /*memoryOffset*/)); + + printString("Creating output image\n"); + // Create output image memory + auto outputImage = vkutil::createImage( + VK_IMAGE_TYPE_2D, VK_FORMAT_R32G32B32A32_UINT, {width, height, 1}, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT); + auto outputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex( + outputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + auto outputMemory = + vkutil::allocateDeviceMemory(imageSizeBytes, outputImageMemoryTypeIndex); + VK_CHECK_CALL(vkBindImageMemory(vk_device, outputImage, outputMemory, + 0 /*memoryOffset*/)); + + printString("Creating staging buffers\n"); + // Create input staging memory + auto inputStagingBuffer = vkutil::createBuffer( + imageSizeBytes, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + auto inputStagingMemoryTypeIndex = vkutil::getBufferMemoryTypeIndex( + inputStagingBuffer, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + auto inputStagingMemory = vkutil::allocateDeviceMemory( + imageSizeBytes, inputStagingMemoryTypeIndex, false /*exportable*/); + VK_CHECK_CALL(vkBindBufferMemory(vk_device, inputStagingBuffer, + inputStagingMemory, 0 /*memoryOffset*/)); + + // Create output staging memory + auto outputStagingBuffer = vkutil::createBuffer( + imageSizeBytes, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + auto outputStagingMemoryTypeIndex = vkutil::getBufferMemoryTypeIndex( + outputStagingBuffer, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + auto outputStagingMemory = vkutil::allocateDeviceMemory( + imageSizeBytes, outputStagingMemoryTypeIndex, false /*exportable*/); + VK_CHECK_CALL(vkBindBufferMemory(vk_device, outputStagingBuffer, + outputStagingMemory, 0 /*memoryOffset*/)); + + printString("Populating staging buffer\n"); + // Populate staging memory + sycl::vec *inputStagingData = nullptr; + VK_CHECK_CALL(vkMapMemory(vk_device, inputStagingMemory, 0 /*offset*/, + imageSizeBytes, 0 /*flags*/, + (void **)&inputStagingData)); + for (int i = 0; i < width * height; ++i) { + inputStagingData[i] = + sycl::vec{4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3}; + } + vkUnmapMemory(vk_device, inputStagingMemory); + + printString("Submitting image layout transition\n"); + // Transition image layouts + { + VkImageMemoryBarrier barrierInput = {}; + barrierInput.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrierInput.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrierInput.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrierInput.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierInput.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierInput.image = inputImage; + barrierInput.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrierInput.subresourceRange.levelCount = 1; + barrierInput.subresourceRange.layerCount = 1; + barrierInput.srcAccessMask = 0; + barrierInput.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + VkImageMemoryBarrier barrierOutput = {}; + barrierOutput.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrierOutput.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrierOutput.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrierOutput.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierOutput.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierOutput.image = outputImage; + barrierOutput.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrierOutput.subresourceRange.levelCount = 1; + barrierOutput.subresourceRange.layerCount = 1; + barrierOutput.srcAccessMask = 0; + barrierOutput.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_computeCmdBuffer, &cbbi)); + vkCmdPipelineBarrier(vk_computeCmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrierInput); + vkCmdPipelineBarrier(vk_computeCmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrierOutput); + VK_CHECK_CALL(vkEndCommandBuffer(vk_computeCmdBuffer)); + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_computeCmdBuffer; + + VK_CHECK_CALL(vkQueueSubmit(vk_compute_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_compute_queue)); + } + + // Create semaphore to later import in SYCL + printString("Creating semaphores\n"); + VkSemaphore syclWaitSemaphore; + { + VkExportSemaphoreCreateInfo esci = {}; + esci.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; + esci.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + VkSemaphoreCreateInfo sci = {}; + sci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + sci.pNext = &esci; + VK_CHECK_CALL( + vkCreateSemaphore(vk_device, &sci, nullptr, &syclWaitSemaphore)); + } + + VkSemaphore syclDoneSemaphore; + { + VkExportSemaphoreCreateInfo esci = {}; + esci.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; + esci.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + VkSemaphoreCreateInfo sci = {}; + sci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + sci.pNext = &esci; + VK_CHECK_CALL( + vkCreateSemaphore(vk_device, &sci, nullptr, &syclDoneSemaphore)); + } + + printString("Copying staging memory to images\n"); + // Copy staging to main image memory + { + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + + VkBufferImageCopy copyRegion = {}; + copyRegion.imageExtent = {width, height, 1}; + copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copyRegion.imageSubresource.layerCount = 1; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_transferCmdBuffers[0], &cbbi)); + vkCmdCopyBufferToImage(vk_transferCmdBuffers[0], inputStagingBuffer, + inputImage, VK_IMAGE_LAYOUT_GENERAL, + 1 /*regionCount*/, ©Region); + VK_CHECK_CALL(vkEndCommandBuffer(vk_transferCmdBuffers[0])); + + std::vector stages{VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT}; + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_transferCmdBuffers[0]; + + submission.signalSemaphoreCount = 1; + submission.pSignalSemaphores = &syclWaitSemaphore; + submission.pWaitDstStageMask = stages.data(); + + VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + } + + printString("Getting memory file descriptors and calling into SYCL\n"); + // Pass memory to SYCL for modification + int input_fd = vkutil::getMemoryOpaqueFD(inputMemory); + int output_fd = vkutil::getMemoryOpaqueFD(outputMemory); + + // Pass semaphores to SYCL for synchronization + int sycl_wait_semaphore_fd = vkutil::getSemaphoreOpaqueFD(syclWaitSemaphore); + int sycl_done_semaphore_fd = vkutil::getSemaphoreOpaqueFD(syclDoneSemaphore); + + run_sycl(input_fd, output_fd, sycl_wait_semaphore_fd, sycl_done_semaphore_fd, + width, height); + + printString("Copying image memory to staging memory\n"); + // Copy main image memory to staging + { + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + + VkBufferImageCopy copyRegion = {}; + copyRegion.imageExtent = {width, height, 1}; + copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copyRegion.imageSubresource.layerCount = 1; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_transferCmdBuffers[1], &cbbi)); + vkCmdCopyImageToBuffer(vk_transferCmdBuffers[1], outputImage, + VK_IMAGE_LAYOUT_GENERAL, outputStagingBuffer, + 1 /*regionCount*/, ©Region); + VK_CHECK_CALL(vkEndCommandBuffer(vk_transferCmdBuffers[1])); + + std::vector stages{VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT}; + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_transferCmdBuffers[1]; + + submission.waitSemaphoreCount = 1; + submission.pWaitSemaphores = &syclDoneSemaphore; + submission.pWaitDstStageMask = stages.data(); + + VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_transfer_queue)); + } + + printString("Validating\n"); + // Validate that SYCL made changes to the memory + bool validated = true; + sycl::vec *outputStagingData = nullptr; + VK_CHECK_CALL(vkMapMemory(vk_device, outputStagingMemory, 0 /*offset*/, + imageSizeBytes, 0 /*flags*/, + (void **)&outputStagingData)); + for (int i = 0; i < width * height; ++i) { + sycl::vec expected = {4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3}; + expected *= 10; + for (int j = 0; j < 4; ++j) { + if (outputStagingData[i][j] != expected[j]) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! actual[" << i << "][" << j + << "] == " << outputStagingData[i][j] + << " : expected == " << expected[j] << "\n"; + validated = false; +#else + break; +#endif + } + } + if (!validated) + break; + } + vkUnmapMemory(vk_device, outputStagingMemory); + + if (validated) { + std::cout << "Test passed!\n"; + } else { + std::cout << "Test failed!\n"; + } + + // Cleanup + vkDestroyBuffer(vk_device, inputStagingBuffer, nullptr); + vkDestroyBuffer(vk_device, outputStagingBuffer, nullptr); + vkDestroyImage(vk_device, inputImage, nullptr); + vkDestroyImage(vk_device, outputImage, nullptr); + vkFreeMemory(vk_device, inputStagingMemory, nullptr); + vkFreeMemory(vk_device, outputStagingMemory, nullptr); + vkFreeMemory(vk_device, inputMemory, nullptr); + vkFreeMemory(vk_device, outputMemory, nullptr); + vkDestroySemaphore(vk_device, syclWaitSemaphore, nullptr); + vkDestroySemaphore(vk_device, syclDoneSemaphore, nullptr); + + return validated; +} + +int main() { + + if (vkutil::setupInstance() != VK_SUCCESS) { + std::cerr << "Instance setup failed!\n"; + return EXIT_FAILURE; + } + + // Currently only Nvidia devices are tested + if (vkutil::setupDevice("NVIDIA") != VK_SUCCESS) { + std::cerr << "Device setup failed!\n"; + return EXIT_FAILURE; + } + + if (vkutil::setupCommandBuffers() != VK_SUCCESS) { + std::cerr << "Command buffers setup failed!\n"; + return EXIT_FAILURE; + } + + bool validated = run_test(); + + if (vkutil::cleanup() != VK_SUCCESS) { + std::cerr << "Cleanup failed!\n"; + return EXIT_FAILURE; + } + + return validated ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp new file mode 100644 index 0000000000000..fa71f10a7fb9d --- /dev/null +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp @@ -0,0 +1,407 @@ +#pragma once +#include + +#include +#include +#include + +void printString(std::string str) { +#ifdef VERBOSE_PRINT + std::cout << str; +#endif +} + +#define VK_CHECK_CALL_RET(call) \ + { \ + VkResult err = call; \ + if (err != VK_SUCCESS) \ + return err; \ + } + +#define VK_CHECK_CALL(call) \ + { \ + VkResult err = call; \ + if (err != VK_SUCCESS) \ + std::cerr << #call << " failed. Code: " << err << "\n"; \ + } + +static VkInstance vk_instance; +static VkPhysicalDevice vk_physical_device; +static VkDebugUtilsMessengerEXT vk_debug_messenger; +static VkDevice vk_device; +static VkQueue vk_compute_queue; +static VkQueue vk_transfer_queue; + +static PFN_vkGetMemoryFdKHR vk_getMemoryFdKHR; +static PFN_vkGetSemaphoreFdKHR vk_getSemaphoreFdKHR; + +static uint32_t vk_computeQueueFamilyIndex; +static uint32_t vk_transferQueueFamilyIndex; + +static VkCommandPool vk_computeCmdPool; +static VkCommandPool vk_transferCmdPool; + +static VkCommandBuffer vk_computeCmdBuffer; +static VkCommandBuffer vk_transferCmdBuffers[2]; + +static VKAPI_ATTR VkBool32 VKAPI_CALL +debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, + void *pUserData) { + // Only print errors from validation layer + if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { + std::cerr << pCallbackData->pMessage << "\n"; + } + return VK_FALSE; +} + +namespace vkutil { +VkResult setupInstance() { + VkApplicationInfo ai = {}; + ai.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + ai.pApplicationName = "SYCL-Vulkan-Interop"; + ai.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + ai.pEngineName = ""; + ai.engineVersion = VK_MAKE_VERSION(1, 0, 0); + ai.apiVersion = VK_API_VERSION_1_0; + + uint32_t layerCount; + VK_CHECK_CALL_RET(vkEnumerateInstanceLayerProperties(&layerCount, nullptr)); + + std::vector availableLayers(layerCount); + VK_CHECK_CALL_RET( + vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.data())); + + VkInstanceCreateInfo ci = {}; + ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + ci.pApplicationInfo = &ai; + std::vector extensions = { + VK_EXT_DEBUG_UTILS_EXTENSION_NAME, + VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME, + VK_KHR_EXTERNAL_FENCE_CAPABILITIES_EXTENSION_NAME, + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME}; + ci.enabledExtensionCount = extensions.size(); + ci.ppEnabledExtensionNames = extensions.data(); + std::vector layers = {"VK_LAYER_KHRONOS_validation"}; + ci.enabledLayerCount = layers.size(); + ci.ppEnabledLayerNames = layers.data(); + + VK_CHECK_CALL_RET(vkCreateInstance(&ci, nullptr, &vk_instance)); + + VkDebugUtilsMessengerCreateInfoEXT dumci = {}; + dumci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + dumci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + dumci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + dumci.pfnUserCallback = debugCallback; + + auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr( + vk_instance, "vkCreateDebugUtilsMessengerEXT"); + if (func != nullptr) { + VK_CHECK_CALL_RET(func(vk_instance, &dumci, nullptr, &vk_debug_messenger)); + } else { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + + vk_getMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetInstanceProcAddr( + vk_instance, "vkGetMemoryFdKHR"); + + vk_getSemaphoreFdKHR = (PFN_vkGetSemaphoreFdKHR)vkGetInstanceProcAddr( + vk_instance, "vkGetSemaphoreFdKHR"); + + return VK_SUCCESS; +} + +VkResult setupDevice(std::string device) { + uint32_t physicalDeviceCount = 0; + VK_CHECK_CALL_RET( + vkEnumeratePhysicalDevices(vk_instance, &physicalDeviceCount, nullptr)); + if (physicalDeviceCount == 0) { + return VK_ERROR_DEVICE_LOST; + } + std::vector physicalDevices(physicalDeviceCount); + VK_CHECK_CALL_RET(vkEnumeratePhysicalDevices( + vk_instance, &physicalDeviceCount, physicalDevices.data())); + + bool foundDevice = false; + + for (int i = 0; i < physicalDeviceCount; i++) { + vk_physical_device = physicalDevices[i]; + VkPhysicalDeviceProperties props; + vkGetPhysicalDeviceProperties(vk_physical_device, &props); + std::string str(props.deviceName); + + if (str.find(device) != std::string::npos) { + foundDevice = true; + break; + } + } + + if (!foundDevice) { + std::cerr << "Failed to find suitable device!\n"; + return VK_ERROR_DEVICE_LOST; + } + + uint32_t queueFamilyCount = 0; + vkGetPhysicalDeviceQueueFamilyProperties(vk_physical_device, + &queueFamilyCount, nullptr); + std::vector queueFamilies(queueFamilyCount); + vkGetPhysicalDeviceQueueFamilyProperties( + vk_physical_device, &queueFamilyCount, queueFamilies.data()); + uint32_t i = 0; + for (auto &qf : queueFamilies) { + if (qf.queueFlags & VK_QUEUE_COMPUTE_BIT) { + vk_computeQueueFamilyIndex = i; + } + if (qf.queueFlags & VK_QUEUE_TRANSFER_BIT) { + vk_transferQueueFamilyIndex = i; + } + ++i; + } + + float queuePriority = 1.f; + + std::vector qcis; + if (vk_computeQueueFamilyIndex == vk_transferQueueFamilyIndex) { + qcis.resize(1); + qcis[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + qcis[0].queueFamilyIndex = vk_transferQueueFamilyIndex; + qcis[0].queueCount = 1; + qcis[0].pQueuePriorities = &queuePriority; + } else { + qcis.resize(2); + qcis[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + qcis[0].queueFamilyIndex = vk_transferQueueFamilyIndex; + qcis[0].queueCount = 1; + qcis[0].pQueuePriorities = &queuePriority; + + qcis[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + qcis[1].queueFamilyIndex = vk_computeQueueFamilyIndex; + qcis[1].queueCount = 1; + qcis[1].pQueuePriorities = &queuePriority; + } + + VkPhysicalDeviceFeatures deviceFeatures = {}; + + std::vector extensions = { + VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME}; + + VkDeviceCreateInfo dci = {}; + dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + dci.pQueueCreateInfos = qcis.data(); + dci.queueCreateInfoCount = qcis.size(); + dci.pEnabledFeatures = &deviceFeatures; + dci.enabledExtensionCount = extensions.size(); + dci.ppEnabledExtensionNames = extensions.data(); + + VK_CHECK_CALL_RET( + vkCreateDevice(vk_physical_device, &dci, nullptr, &vk_device)); + + vkGetDeviceQueue(vk_device, vk_transferQueueFamilyIndex, 0, + &vk_transfer_queue); + vkGetDeviceQueue(vk_device, vk_computeQueueFamilyIndex, 0, &vk_compute_queue); + + return VK_SUCCESS; +} + +VkResult setupCommandBuffers() { + VkCommandPoolCreateInfo cpci = {}; + cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cpci.queueFamilyIndex = vk_computeQueueFamilyIndex; + cpci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CHECK_CALL_RET( + vkCreateCommandPool(vk_device, &cpci, nullptr, &vk_computeCmdPool)); + + if (vk_computeQueueFamilyIndex == vk_transferQueueFamilyIndex) { + vk_transferCmdPool = vk_computeCmdPool; + } else { + VkCommandPoolCreateInfo cpci = {}; + cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cpci.queueFamilyIndex = vk_transferQueueFamilyIndex; + cpci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CHECK_CALL_RET( + vkCreateCommandPool(vk_device, &cpci, nullptr, &vk_transferCmdPool)); + } + + { + VkCommandBufferAllocateInfo cbai = {}; + cbai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cbai.commandPool = vk_computeCmdPool; + cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cbai.commandBufferCount = 1; + VK_CHECK_CALL_RET( + vkAllocateCommandBuffers(vk_device, &cbai, &vk_computeCmdBuffer)); + } + + { + VkCommandBufferAllocateInfo cbai = {}; + cbai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cbai.commandPool = vk_transferCmdPool; + cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cbai.commandBufferCount = 2; + VK_CHECK_CALL_RET( + vkAllocateCommandBuffers(vk_device, &cbai, vk_transferCmdBuffers)); + } + + return VK_SUCCESS; +} + +VkBuffer createBuffer(size_t size, VkBufferUsageFlags usage) { + VkBufferCreateInfo bci = {}; + bci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bci.size = size; + bci.usage = usage; + bci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + VkBuffer buffer; + if (vkCreateBuffer(vk_device, &bci, nullptr, &buffer) != VK_SUCCESS) { + std::cerr << "Could not create buffer!\n"; + return VK_NULL_HANDLE; + } + return buffer; +} + +VkImage createImage(VkImageType type, VkFormat format, VkExtent3D extent, + VkImageUsageFlags usage, bool exportable = true) { + VkImageCreateInfo ici = {}; + ici.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + ici.imageType = type; + ici.format = format; + ici.extent = extent; + ici.mipLevels = 1; + ici.arrayLayers = 1; + // ici.tiling = VK_IMAGE_TILING_LINEAR; + ici.usage = usage; + ici.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ici.samples = VK_SAMPLE_COUNT_1_BIT; + // ici.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + + VkExternalMemoryImageCreateInfo emici = {}; + if (exportable) { + emici.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; + emici.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + ici.pNext = &emici; + } + + VkImage image; + if (vkCreateImage(vk_device, &ici, nullptr, &image)) { + std::cerr << "Could not create image!\n"; + return VK_NULL_HANDLE; + } + return image; +} + +VkDeviceMemory allocateDeviceMemory(size_t size, uint32_t memoryTypeIndex, + bool exportable = true) { + VkMemoryAllocateInfo mai = {}; + mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + mai.allocationSize = size; + mai.memoryTypeIndex = memoryTypeIndex; + + VkExportMemoryAllocateInfo emai = {}; + if (exportable) { + emai.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + emai.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + mai.pNext = &emai; + } + + VkDeviceMemory memory; + if (vkAllocateMemory(vk_device, &mai, nullptr, &memory) != VK_SUCCESS) { + std::cerr << "Could not allocate device memory!\n"; + return VK_NULL_HANDLE; + } + return memory; +} + +uint32_t getImageMemoryTypeIndex(VkImage image, VkMemoryPropertyFlags flags) { + VkMemoryRequirements memRequirements; + vkGetImageMemoryRequirements(vk_device, image, &memRequirements); + + VkPhysicalDeviceMemoryProperties memProperties; + vkGetPhysicalDeviceMemoryProperties(vk_physical_device, &memProperties); + + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + if ((memRequirements.memoryTypeBits & (1 << i)) && + (memProperties.memoryTypes[i].propertyFlags & flags) == flags) { + return i; + } + } + std::cerr << "Image memory type index not found!\n"; + return 0; +} + +uint32_t getBufferMemoryTypeIndex(VkBuffer buffer, + VkMemoryPropertyFlags flags) { + VkMemoryRequirements memRequirements; + vkGetBufferMemoryRequirements(vk_device, buffer, &memRequirements); + + VkPhysicalDeviceMemoryProperties memProperties; + vkGetPhysicalDeviceMemoryProperties(vk_physical_device, &memProperties); + + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + if ((memRequirements.memoryTypeBits & (1 << i)) && + (memProperties.memoryTypes[i].propertyFlags & flags) == flags) { + return i; + } + } + std::cerr << "Buffer memory type index not found!\n"; + return 0; +} + +VkResult cleanup() { + + if (vk_computeQueueFamilyIndex == vk_transferQueueFamilyIndex) { + vkDestroyCommandPool(vk_device, vk_computeCmdPool, nullptr); + } else { + vkDestroyCommandPool(vk_device, vk_computeCmdPool, nullptr); + vkDestroyCommandPool(vk_device, vk_transferCmdPool, nullptr); + } + + auto destroyDebugUtilsMessenger = + (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( + vk_instance, "vkDestroyDebugUtilsMessengerEXT"); + if (destroyDebugUtilsMessenger != nullptr) { + destroyDebugUtilsMessenger(vk_instance, vk_debug_messenger, nullptr); + } + vkDestroyDevice(vk_device, nullptr); + vkDestroyInstance(vk_instance, nullptr); + return VK_SUCCESS; +} + +int getMemoryOpaqueFD(VkDeviceMemory memory) { + VkMemoryGetFdInfoKHR mgfi = {}; + mgfi.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + mgfi.memory = memory; + mgfi.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + int fd = 0; + if (vk_getMemoryFdKHR != nullptr) { + VK_CHECK_CALL(vk_getMemoryFdKHR(vk_device, &mgfi, &fd)); + } + return fd; +} + +int getSemaphoreOpaqueFD(VkSemaphore semaphore) { + VkSemaphoreGetFdInfoKHR sgfi = {}; + sgfi.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR; + sgfi.semaphore = semaphore; + sgfi.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + int fd = 0; + if (vk_getSemaphoreFdKHR != nullptr) { + VK_CHECK_CALL(vk_getSemaphoreFdKHR(vk_device, &sgfi, &fd)); + } + return fd; +} + +} // namespace vkutil diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index ba173a37609b0..55b08ad980f54 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -196,7 +196,7 @@ config.substitutions.append( ('%opencl_include_dir', config.opencl_include_dir) ) if cl_options: - config.substitutions.append( ('%sycl_options', ' ' + os.path.normpath(os.path.join(config.sycl_libs_dir + '/../lib/sycl7.lib')) + ' /I' + + config.substitutions.append( ('%sycl_options', ' ' + config.sycl_libs_dir + '/../lib/sycl7.lib /I' + config.sycl_include + ' /I' + os.path.join(config.sycl_include, 'sycl')) ) config.substitutions.append( ('%include_option', '/FI' ) ) config.substitutions.append( ('%debug_option', '/DEBUG' ) ) @@ -218,6 +218,16 @@ config.substitutions.append( ('%fPIC', ('' if platform.system() == 'Windows' else '-fPIC')) ) config.substitutions.append( ('%shared_lib', '-shared') ) + +config.substitutions.append( ('%vulkan_include_dir', config.vulkan_include_dir ) ) +config.substitutions.append( ('%vulkan_lib', config.vulkan_lib ) ) + +vulkan_lib_path = os.path.dirname(config.vulkan_lib) +config.substitutions.append( ('%link-vulkan', '-L %s -lvulkan -I %s' % (vulkan_lib_path, config.vulkan_include_dir ) ) ) + +if config.vulkan_found == "TRUE": + config.available_features.add('vulkan') + if not config.gpu_aot_target_opts: config.gpu_aot_target_opts = '"-device *"' @@ -243,8 +253,7 @@ 'ext_oneapi_cuda':('gpu'), 'ext_oneapi_level_zero':('gpu'), 'ext_oneapi_hip':('gpu'), - 'ext_intel_esimd_emulator':('gpu'), - 'native_cpu':('cpu')} + 'ext_intel_esimd_emulator':('gpu')} for d in config.sycl_devices: be, dev = d.split(':') if be not in available_devices or dev not in available_devices[be]: @@ -414,7 +423,7 @@ aspect_features = set('aspect-' + a for a in aspects) sg_size_features = set('sg-' + s for s in sg_sizes) - features = set(); + features = set() features.update(aspect_features) features.update(sg_size_features) diff --git a/sycl/test-e2e/lit.site.cfg.py.in b/sycl/test-e2e/lit.site.cfg.py.in index a96f9dbff92f9..8abc1ba7bcce1 100644 --- a/sycl/test-e2e/lit.site.cfg.py.in +++ b/sycl/test-e2e/lit.site.cfg.py.in @@ -38,6 +38,10 @@ config.external_tests = "@SYCL_EXTERNAL_TESTS@" config.extra_include = "@CMAKE_CURRENT_SOURCE_DIR@/include" config.gpu_aot_target_opts = lit_config.params.get("gpu_aot_target_opts", "@GPU_AOT_TARGET_OPTS@") +config.vulkan_include_dir = "@Vulkan_INCLUDE_DIRS@" +config.vulkan_lib = "@Vulkan_LIBRARY@" +config.vulkan_found = "@Vulkan_FOUND@" + config.run_launcher = lit_config.params.get('run_launcher', "@SYCL_E2E_RUN_LAUNCHER@") import lit.llvm From 9b4dcb45f112a1a0dd802ca889d25161b2a8ed33 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Fri, 21 Jul 2023 13:20:04 +0100 Subject: [PATCH 2/5] Update queries with PR3 fix --- sycl/test-e2e/bindless_images/image_get_info.cpp | 8 ++++---- sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp index 841a5c350e63d..719eb171ef3ea 100644 --- a/sycl/test-e2e/bindless_images/image_get_info.cpp +++ b/sycl/test-e2e/bindless_images/image_get_info.cpp @@ -65,17 +65,17 @@ int main() { // values are correct // But we should at least see that the query itself works auto pitch_align = dev.get_info< - sycl::ext::oneapi::experimental::info::device::image_pitch_align>(); + sycl::ext::oneapi::experimental::info::device::image_row_pitch_align>(); auto max_pitch = dev.get_info(); + device::max_image_linear_row_pitch>(); auto max_width = dev.get_info(); auto max_height = dev.get_info(); #ifdef VERBOSE_PRINT - std::cout << "image_pitch_align: " << pitch_align - << "\nmax_image_linear_pitch: " << max_pitch + std::cout << "image_row_pitch_align: " << pitch_align + << "\nmax_image_linear_row_pitch: " << max_pitch << "\nmax_image_linear_width: " << max_width << "\nmax_image_linear_height: " << max_height << "\n"; #endif diff --git a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp index 1601a205a4156..ee00a3ac48a6a 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp @@ -54,9 +54,10 @@ int main() { sycl::image_channel_type::fp32); auto device_pitch_align = dev.get_info< - sycl::ext::oneapi::experimental::info::device::image_pitch_align>(); - auto device_max_pitch = dev.get_info(); + sycl::ext::oneapi::experimental::info::device::image_row_pitch_align>(); + auto device_max_pitch = + dev.get_info(); // Pitch requirements: // - pitch % device_pitch_align == 0 From 4ecd21ad84d704e14864d54dfc6888204c0a7563 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Wed, 26 Jul 2023 16:10:41 +0100 Subject: [PATCH 3/5] Address formatting issue --- sycl/test-e2e/bindless_images/read_sampled.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/bindless_images/read_sampled.cpp b/sycl/test-e2e/bindless_images/read_sampled.cpp index f7b3c415f62bc..0a5a57bc0f156 100644 --- a/sycl/test-e2e/bindless_images/read_sampled.cpp +++ b/sycl/test-e2e/bindless_images/read_sampled.cpp @@ -10,14 +10,14 @@ #include // Print test names and pass status -//#define VERBOSE_LV1 +// #define VERBOSE_LV1 // Same as above plus sampler, offset, margin of error, largest error found and // results of one mismatch -//#define VERBOSE_LV2 +// #define VERBOSE_LV2 // Same as above but all mismatches are printed -//#define VERBOSE_LV3 +// #define VERBOSE_LV3 // Helpers and utilities struct util { From 195d75b4da32da58de969bc2dad4353655261a73 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Mon, 31 Jul 2023 15:21:49 +0100 Subject: [PATCH 4/5] Address merge issue with lit.cfg.py --- sycl/test-e2e/lit.cfg.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 55b08ad980f54..86cd5d1547ccd 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -196,7 +196,7 @@ config.substitutions.append( ('%opencl_include_dir', config.opencl_include_dir) ) if cl_options: - config.substitutions.append( ('%sycl_options', ' ' + config.sycl_libs_dir + '/../lib/sycl7.lib /I' + + config.substitutions.append( ('%sycl_options', ' ' + os.path.normpath(os.path.join(config.sycl_libs_dir + '/../lib/sycl7.lib')) + ' /I' + config.sycl_include + ' /I' + os.path.join(config.sycl_include, 'sycl')) ) config.substitutions.append( ('%include_option', '/FI' ) ) config.substitutions.append( ('%debug_option', '/DEBUG' ) ) @@ -253,7 +253,8 @@ 'ext_oneapi_cuda':('gpu'), 'ext_oneapi_level_zero':('gpu'), 'ext_oneapi_hip':('gpu'), - 'ext_intel_esimd_emulator':('gpu')} + 'ext_intel_esimd_emulator':('gpu'), + 'native_cpu':('cpu')} for d in config.sycl_devices: be, dev = d.split(':') if be not in available_devices or dev not in available_devices[be]: @@ -423,7 +424,7 @@ aspect_features = set('aspect-' + a for a in aspects) sg_size_features = set('sg-' + s for s in sg_sizes) - features = set() + features = set(); features.update(aspect_features) features.update(sg_size_features) From 01930b59e31661dfedb37ce5c439b1a5655ad6e8 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Tue, 1 Aug 2023 13:48:09 +0100 Subject: [PATCH 5/5] Address feedback --- .../bindless_images/image_get_info.cpp | 106 +++++++++--------- .../bindless_images/mipmap/mipmap_read_1D.cpp | 28 ++--- .../bindless_images/mipmap/mipmap_read_2D.cpp | 24 ++-- .../bindless_images/mipmap/mipmap_read_3D.cpp | 22 ++-- sycl/test-e2e/bindless_images/read_1D.cpp | 47 ++++---- sycl/test-e2e/bindless_images/read_2D.cpp | 22 ++-- .../bindless_images/read_2D_dynamic.cpp | 8 +- sycl/test-e2e/bindless_images/read_3D.cpp | 22 ++-- .../test-e2e/bindless_images/read_sampled.cpp | 22 +--- .../bindless_images/read_write_1D.cpp | 29 +++-- .../read_write_1D_subregion.cpp | 49 ++++---- .../bindless_images/read_write_2D.cpp | 29 +++-- .../read_write_2D_subregion.cpp | 63 +++++------ .../bindless_images/read_write_3D.cpp | 29 +++-- .../read_write_3D_subregion.cpp | 95 ++++++++-------- .../bindless_images/read_write_unsampled.cpp | 8 +- sycl/test-e2e/bindless_images/sampling_1D.cpp | 33 +++--- sycl/test-e2e/bindless_images/sampling_2D.cpp | 30 ++--- .../sampling_2D_USM_shared.cpp | 59 +++++----- .../bindless_images/sampling_2D_half.cpp | 39 ++++--- sycl/test-e2e/bindless_images/sampling_3D.cpp | 31 +++-- .../vulkan_interop/sampled_images.cpp | 2 +- 22 files changed, 381 insertions(+), 416 deletions(-) diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp index 719eb171ef3ea..931332b928cb4 100644 --- a/sycl/test-e2e/bindless_images/image_get_info.cpp +++ b/sycl/test-e2e/bindless_images/image_get_info.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -40,23 +40,23 @@ int main() { // Extension: returns the device pointer to the allocated memory // Input images memory - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem(desc, dev, ctxt); // Extension: query for bindless image support -- device aspects - bool bindless_support = dev.has(sycl::aspect::ext_oneapi_bindless_images); - bool bindless_shared_usm_support = + bool bindlessSupport = dev.has(sycl::aspect::ext_oneapi_bindless_images); + bool bindlessSharedUsmSupport = dev.has(sycl::aspect::ext_oneapi_bindless_images_shared_usm); - bool usm_1d_support = + bool usm1dSupport = dev.has(sycl::aspect::ext_oneapi_bindless_images_1d_usm); - bool usm_2d_support = + bool usm2dSupport = dev.has(sycl::aspect::ext_oneapi_bindless_images_2d_usm); #ifdef VERBOSE_PRINT - std::cout << "bindless_images_support: " << bindless_support + std::cout << "bindless_images_support: " << bindlessSupport << "\nbindless_images_shared_usm_support: " - << bindless_shared_usm_support - << "\nbindless_images_1d_usm_support: " << usm_1d_support - << "\nbindless_images_2d_usm_support: " << usm_2d_support << "\n"; + << bindlessSharedUsmSupport + << "\nbindless_images_1d_usm_support: " 1dS + << "\nbindless_images_2d_usm_support: " << S << "\n"; #endif // Extension: get pitch alignment information from device -- device info @@ -64,63 +64,61 @@ int main() { // These can be different depending on the device so we cannot test that the // values are correct // But we should at least see that the query itself works - auto pitch_align = dev.get_info< + auto pitchAlign = dev.get_info< sycl::ext::oneapi::experimental::info::device::image_row_pitch_align>(); - auto max_pitch = dev.get_info(); - auto max_width = dev.get_info(); - auto max_height = dev.get_info(); + auto maxPitch = dev.get_info(); + auto maxWidth = dev.get_info(); + auto maxheight = dev.get_info(); #ifdef VERBOSE_PRINT - std::cout << "image_row_pitch_align: " << pitch_align - << "\nmax_image_linear_row_pitch: " << max_pitch - << "\nmax_image_linear_width: " << max_width - << "\nmax_image_linear_height: " << max_height << "\n"; + std::cout << "image_row_pitch_align: " << pitchAlign + << "\nmax_image_linear_row_pitch: " << maxPitch + << "\nmax_image_linear_width: " << maxWidth + << "\nmax_image_linear_height: " << maxheight << "\n"; #endif // Extension: query for bindless image mipmaps support -- aspects & info - bool mipmap_support = dev.has(sycl::aspect::ext_oneapi_mipmap); - bool mipmap_anisotropy_support = + bool mipmapSupport = dev.has(sycl::aspect::ext_oneapi_mipmap); + bool mipmapAnisotropySupport = dev.has(sycl::aspect::ext_oneapi_mipmap_anisotropy); - float mipmap_max_anisotropy = dev.get_info< + float mipmapMaxAnisotropy = dev.get_info< sycl::ext::oneapi::experimental::info::device::mipmap_max_anisotropy>(); - bool mipmap_level_reference_support = + bool mipmapLevelReferenceSupport = dev.has(sycl::aspect::ext_oneapi_mipmap_level_reference); #ifdef VERBOSE_PRINT - std::cout << "mipmap_support: " << mipmap_support - << "\nmipmap_anisotropy_support: " << mipmap_anisotropy_support - << "\nmipmap_max_anisotropy: " << mipmap_max_anisotropy - << "\nmipmap_level_reference_support: " - << mipmap_level_reference_support << "\n"; + std::cout << "mipmapSupport: " << mipmapSupport + << "\nmipmapAnisotropySupport: " << mipmapAnisotropySupport + << "\nmipmapMaxAnisotropy: " << mipmapMaxAnisotropy + << "\nmipmapLevelReferenceSupport: " + << mipmapLevelReferenceSupport << "\n"; #endif // Extension: query for bindless image interop support -- device aspects - bool interop_memory_import_support = + bool interopMemoryImportSupport = dev.has(sycl::aspect::ext_oneapi_interop_memory_import); - bool interop_memory_export_support = + bool interopMemoryExportSupport = dev.has(sycl::aspect::ext_oneapi_interop_memory_export); - bool interop_semaphore_import_support = + bool interopSemaphoreImportSupport = dev.has(sycl::aspect::ext_oneapi_interop_semaphore_import); - bool interop_semaphore_export_support = + bool interopSemaphoreExportSupport = dev.has(sycl::aspect::ext_oneapi_interop_semaphore_export); #ifdef VERBOSE_PRINT - std::cout << "interop_memory_import_support: " - << interop_memory_import_support - << "\ninterop_memory_export_support: " - << interop_memory_export_support - << "\ninterop_semaphore_import_support: " - << interop_semaphore_import_support - << "\ninterop_semaphore_export_support: " - << interop_semaphore_export_support << "\n"; + std::cout << "interopMemoryImportSupport: " << interopMemoryImportSupport + << "\ninteropMemoryExportSupport: " << interopMemoryExportSupport + << "\ninteropSemaphoreImportSupport: " + << interopSemaphoreImportSupport + << "\ninteropSemaphoreExportSupport: " + << interopSemaphoreExportSupport << "\n"; #endif - auto rangeMem = img_mem_0.get_range(); + auto rangeMem = imgMem.get_range(); auto range = sycl::ext::oneapi::experimental::get_image_range( - img_mem_0.get_handle(), dev, ctxt); + imgMem.get_handle(), dev, ctxt); if (rangeMem != range) { printString("handle and mem object disagree on image dimensions!\n"); validated = false; @@ -144,7 +142,7 @@ int main() { validated = false; } - auto type = img_mem_0.get_type(); + auto type = imgMem.get_type(); if (type == sycl::ext::oneapi::experimental::image_type::standard) { printString("image type is correct!\n"); } else { @@ -152,9 +150,9 @@ int main() { validated = false; } - auto ctypeMem = img_mem_0.get_channel_type(); + auto ctypeMem = imgMem.get_channel_type(); auto ctype = sycl::ext::oneapi::experimental::get_image_channel_type( - img_mem_0.get_handle(), dev, ctxt); + imgMem.get_handle(), dev, ctxt); if (ctypeMem != ctype) { printString("handle and mem object disagree on image channel type!\n"); validated = false; @@ -166,7 +164,7 @@ int main() { validated = false; } - auto corder = img_mem_0.get_channel_order(); + auto corder = imgMem.get_channel_order(); if (corder == sycl::image_channel_order::r) { printString("channel order is correct!\n"); } else { @@ -174,9 +172,9 @@ int main() { validated = false; } - auto numchannelsMem = img_mem_0.get_num_channels(); + auto numchannelsMem = imgMem.get_num_channels(); auto numchannels = sycl::ext::oneapi::experimental::get_image_num_channels( - img_mem_0.get_handle(), dev, ctxt); + imgMem.get_handle(), dev, ctxt); if (numchannelsMem != numchannels) { printString("handle and mem object disagree on number of channels!\n"); validated = false; @@ -190,10 +188,10 @@ int main() { } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } if (validated) { @@ -201,6 +199,6 @@ int main() { return 0; } - std::cout << "Test Failed!\n"; - return 1; + std::cout << "Test Failed!" << std::endl; + return 3; } diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp index 12d5b540a4793..41ff725849ad7 100644 --- a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -40,26 +40,26 @@ int main() { try { size_t width = N; - unsigned int num_levels = 2; + unsigned int numLevels = 2; // Extension: image descriptor -- number of levels sycl::ext::oneapi::experimental::image_descriptor desc( {width}, sycl::image_channel_order::rgba, sycl::image_channel_type::fp32, - sycl::ext::oneapi::experimental::image_type::mipmap, num_levels); + sycl::ext::oneapi::experimental::image_type::mipmap, numLevels); // Extension: allocate mipmap memory on device - sycl::ext::oneapi::experimental::image_mem mip_mem(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem mipMem(desc, dev, ctxt); // Extension: retrieve level 0 - sycl::ext::oneapi::experimental::image_mem_handle img_mem1 = - mip_mem.get_mip_level_mem_handle(0); + sycl::ext::oneapi::experimental::image_mem_handle imgMem1 = + mipMem.get_mip_level_mem_handle(0); // Extension: copy over data to device at level 0 - q.ext_oneapi_copy(dataIn1.data(), img_mem1, desc); + q.ext_oneapi_copy(dataIn1.data(), imgMem1, desc); // Extension: copy data to device at level 1 - q.ext_oneapi_copy(dataIn2.data(), mip_mem.get_mip_level_mem_handle(1), + q.ext_oneapi_copy(dataIn2.data(), mipMem.get_mip_level_mem_handle(1), desc.get_mip_level_desc(1)); q.wait_and_throw(); @@ -68,11 +68,11 @@ int main() { sycl::addressing_mode::mirrored_repeat, sycl::coordinate_normalization_mode::normalized, sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, - (float)num_levels, 8.0f); + (float)numLevels, 8.0f); // Extension: create a sampled image handle to represent the mipmap sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = - sycl::ext::oneapi::experimental::create_image(mip_mem, samp, desc, dev, + sycl::ext::oneapi::experimental::create_image(mipMem, samp, desc, dev, ctxt); sycl::buffer buf((float *)out.data(), N); @@ -99,7 +99,7 @@ int main() { q.wait_and_throw(); // Extension: copy data from device - q.ext_oneapi_copy(mip_mem.get_mip_level_mem_handle(1), copyOut.data(), + q.ext_oneapi_copy(mipMem.get_mip_level_mem_handle(1), copyOut.data(), desc.get_mip_level_desc(1)); q.wait_and_throw(); @@ -108,10 +108,10 @@ int main() { } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -138,5 +138,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp index 32c7b53db0ce4..9120dd694e58b 100644 --- a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -52,37 +52,37 @@ int main() { try { - size_t num_levels = 3; + size_t numLevels = 3; // Extension: image descriptor -- number of levels sycl::ext::oneapi::experimental::image_descriptor desc( {width, height}, sycl::image_channel_order::rgba, sycl::image_channel_type::fp32, - sycl::ext::oneapi::experimental::image_type::mipmap, num_levels); + sycl::ext::oneapi::experimental::image_type::mipmap, numLevels); // Extension: define a sampler object -- extended mipmap attributes sycl::ext::oneapi::experimental::bindless_image_sampler samp( sycl::addressing_mode::clamp, sycl::coordinate_normalization_mode::normalized, sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, - (float)num_levels, 8.0f); + (float)numLevels, 8.0f); // Extension: allocate mipmap memory on device - sycl::ext::oneapi::experimental::image_mem mip_mem(desc, q); + sycl::ext::oneapi::experimental::image_mem mipMem(desc, q); // Extension: copy data to device at all levels -- copy func handles desc // sizing - q.ext_oneapi_copy(dataIn1.data(), mip_mem.get_mip_level_mem_handle(0), + q.ext_oneapi_copy(dataIn1.data(), mipMem.get_mip_level_mem_handle(0), desc.get_mip_level_desc(0)); - q.ext_oneapi_copy(dataIn1.data(), mip_mem.get_mip_level_mem_handle(1), + q.ext_oneapi_copy(dataIn1.data(), mipMem.get_mip_level_mem_handle(1), desc.get_mip_level_desc(1)); - q.ext_oneapi_copy(dataIn3.data(), mip_mem.get_mip_level_mem_handle(2), + q.ext_oneapi_copy(dataIn3.data(), mipMem.get_mip_level_mem_handle(2), desc.get_mip_level_desc(2)); q.wait_and_throw(); // Extension: create a sampled image handle to represent the mipmap sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = - sycl::ext::oneapi::experimental::create_image(mip_mem, samp, desc, q); + sycl::ext::oneapi::experimental::create_image(mipMem, samp, desc, q); sycl::buffer buf((float *)out.data(), sycl::range<2>{height, width}); @@ -116,10 +116,10 @@ int main() { } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -146,5 +146,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp index 382cd639e91de..bacd6e081e9cf 100644 --- a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -43,32 +43,32 @@ int main() { try { // Extension: image descriptor -- number of levels - unsigned int num_levels = 2; + unsigned int numLevels = 2; sycl::ext::oneapi::experimental::image_descriptor desc( {width, height, depth}, sycl::image_channel_order::rgba, sycl::image_channel_type::fp32, - sycl::ext::oneapi::experimental::image_type::mipmap, num_levels); + sycl::ext::oneapi::experimental::image_type::mipmap, numLevels); // Extension: define a sampler object -- extended mipmap attributes sycl::ext::oneapi::experimental::bindless_image_sampler samp( sycl::addressing_mode::clamp, sycl::coordinate_normalization_mode::normalized, sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, - (float)num_levels, 8.0f); + (float)numLevels, 8.0f); // Extension: allocate mipmap memory on device - sycl::ext::oneapi::experimental::image_mem mip_mem(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem mipMem(desc, dev, ctxt); // Extension: copy data to device levels 0 and 1 - q.ext_oneapi_copy(dataIn1.data(), mip_mem.get_mip_level_mem_handle(0), + q.ext_oneapi_copy(dataIn1.data(), mipMem.get_mip_level_mem_handle(0), desc.get_mip_level_desc(0)); - q.ext_oneapi_copy(dataIn2.data(), mip_mem.get_mip_level_mem_handle(1), + q.ext_oneapi_copy(dataIn2.data(), mipMem.get_mip_level_mem_handle(1), desc.get_mip_level_desc(1)); q.wait(); // Extension: create a sampled image handle to represent the mipmap sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = - sycl::ext::oneapi::experimental::create_image(mip_mem, samp, desc, dev, + sycl::ext::oneapi::experimental::create_image(mipMem, samp, desc, dev, ctxt); sycl::buffer buf((float *)out.data(), @@ -108,10 +108,10 @@ int main() { } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -138,5 +138,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_1D.cpp b/sycl/test-e2e/bindless_images/read_1D.cpp index 645e2641cd49b..8a47c131b4a95 100644 --- a/sycl/test-e2e/bindless_images/read_1D.cpp +++ b/sycl/test-e2e/bindless_images/read_1D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -38,46 +38,45 @@ int main() { sycl::image_channel_type::fp32); // Extension: allocate memory on device and create the handle - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); // std::hash specialization to ensure `image_mem` follows common reference // semantics - assert(std::hash{}(img_mem_0) != - std::hash{}(img_mem_1)); + assert(std::hash{}(imgMem0) != + std::hash{}(imgMem1)); // We're able to use move semantics // Move construct - sycl::ext::oneapi::experimental::image_mem img_mem_0_move_construct( - std::move(img_mem_0)); + sycl::ext::oneapi::experimental::image_mem imgMem0MoveConstruct( + std::move(imgMem0)); // Move assign - sycl::ext::oneapi::experimental::image_mem img_mem_0_move_assign; - img_mem_0_move_assign = std::move(img_mem_0_move_construct); + sycl::ext::oneapi::experimental::image_mem imgMem0MoveAssign; + imgMem0MoveAssign = std::move(imgMem0MoveConstruct); // We're able to use copy semantics // Copy construct - sycl::ext::oneapi::experimental::image_mem img_mem_1_copy_construct( - img_mem_1); + sycl::ext::oneapi::experimental::image_mem imgMem1CopyConstruct(imgMem1); // Copy assign - sycl::ext::oneapi::experimental::image_mem img_mem_1_copy_assign; - img_mem_1_copy_assign = img_mem_1_copy_construct; + sycl::ext::oneapi::experimental::image_mem imgMem1CopyAssign; + imgMem1CopyAssign = imgMem1CopyConstruct; // Equality operators to ensure `image_mem` follows common reference // semantics - assert(img_mem_0_move_assign != img_mem_1_copy_assign); - assert(img_mem_1 == img_mem_1_copy_assign); + assert(imgMem0MoveAssign != imgMem1CopyAssign); + assert(imgMem1 == imgMem1CopyAssign); // Extension: create the image and return the handle sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0_move_assign, - desc, dev, ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem0MoveAssign, desc, + dev, ctxt); sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = - sycl::ext::oneapi::experimental::create_image(img_mem_1_copy_assign, - desc, dev, ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem1CopyAssign, desc, + dev, ctxt); // Extension: copy over data to device - q.ext_oneapi_copy(dataIn1.data(), img_mem_0_move_assign.get_handle(), desc); - q.ext_oneapi_copy(dataIn2.data(), img_mem_1_copy_assign.get_handle(), desc); + q.ext_oneapi_copy(dataIn1.data(), imgMem0MoveAssign.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1CopyAssign.get_handle(), desc); q.wait_and_throw(); @@ -109,10 +108,10 @@ int main() { ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -139,5 +138,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_2D.cpp b/sycl/test-e2e/bindless_images/read_2D.cpp index 36e49b39a1178..0ac2d2aec2350 100644 --- a/sycl/test-e2e/bindless_images/read_2D.cpp +++ b/sycl/test-e2e/bindless_images/read_2D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -41,27 +41,25 @@ int main() { try { // Extension: allocate memory on device and create the handle - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); // Extension: create the image and return the handle sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = - sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); sycl::buffer buf((float *)out.data(), sycl::range<2>{height, width}); // Extension: copy over data to device (handler variant) q.submit([&](sycl::handler &cgh) { - cgh.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + cgh.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); }); q.submit([&](sycl::handler &cgh) { - cgh.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + cgh.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); }); q.wait_and_throw(); @@ -96,10 +94,10 @@ int main() { ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -126,5 +124,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp b/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp index 69e1ee449b4a6..2451a59f229b4 100644 --- a/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp +++ b/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -100,10 +100,10 @@ int main() { } } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -130,5 +130,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_3D.cpp b/sycl/test-e2e/bindless_images/read_3D.cpp index 4b6c82e53b665..b71f0b5828db1 100644 --- a/sycl/test-e2e/bindless_images/read_3D.cpp +++ b/sycl/test-e2e/bindless_images/read_3D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -46,21 +46,19 @@ int main() { sycl::image_channel_type::fp32); // Extension: allocate memory on device and create the handle - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); // Extension: copy over data to device - q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); - q.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + q.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); q.wait_and_throw(); // Extension: create the image and return the handle sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = - sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); sycl::buffer buf((float *)out.data(), sycl::range<3>{depth, height, width}); @@ -97,10 +95,10 @@ int main() { ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -125,5 +123,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_sampled.cpp b/sycl/test-e2e/bindless_images/read_sampled.cpp index 0a5a57bc0f156..9d036a5ff3c5f 100644 --- a/sycl/test-e2e/bindless_images/read_sampled.cpp +++ b/sycl/test-e2e/bindless_images/read_sampled.cpp @@ -4,10 +4,10 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include #include #include +#include // Print test names and pass status // #define VERBOSE_LV1 @@ -45,17 +45,6 @@ struct util { } } - // Return fractional part of argument - // Whole part is returned through wholeComp - static double fract(double x, double *wholeComp) { - // This fmin operation is to prevent fract from returning 1.0. - // Instead will return the largest possible floating-point number less - // than 1.0 - double fractComp = std::fmin(x - std::floor(x), 0x1.fffffep-1f); - *wholeComp = std::floor(x); - return fractComp; - } - // Returns the two pixels to access plus the weight each of them have static double get_common_linear_fract_and_coords_fp64(double coord, int *x0, int *x1) { @@ -64,7 +53,7 @@ struct util { // Subtract to align so that pixel center is 0.5 away from origin. coord = coord - 0.5; - double weight = fract(coord, &pixelCoord); + double weight = sycl::fract(coord, &pixelCoord); *x0 = static_cast(std::floor(pixelCoord)); *x1 = *x0 + 1; return weight; @@ -123,10 +112,7 @@ struct util { // Clamp sampling according to the SYCL spec returns a border color. // The border color is all zeros. // There does not appear to be any way for the user to set the border color - if (coordXInt > width - 1) { - return VecType{0}; - } - if (coordXInt < 0) { + if (coordXInt > width - 1 || coordXInt < 0) { return VecType{0}; } return input_image[coordXInt]; @@ -509,8 +495,10 @@ struct util { }); } catch (sycl::exception e) { std::cerr << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); } catch (...) { std::cerr << "\tKernel submission failed!" << std::endl; + exit(-1); } } else if constexpr (NDims == 2) { assert(false && "2d normalized not yet implemented"); diff --git a/sycl/test-e2e/bindless_images/read_write_1D.cpp b/sycl/test-e2e/bindless_images/read_write_1D.cpp index 88e0b28a7f46a..eabf71e9248d8 100644 --- a/sycl/test-e2e/bindless_images/read_write_1D.cpp +++ b/sycl/test-e2e/bindless_images/read_write_1D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -39,28 +39,25 @@ int main() { // Extension: allocate memory on device and create the handle // Input images memory - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); // Output image memory - sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, dev, ctxt); // Extension: copy over data to device - q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); - q.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + q.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); q.wait_and_throw(); // Extension: create the image and return the handle sycl::ext::oneapi::experimental::unsampled_image_handle imgIn1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); sycl::ext::oneapi::experimental::unsampled_image_handle imgIn2 = - sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = - sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, dev, ctxt); q.submit([&](sycl::handler &cgh) { cgh.parallel_for(width, [=](sycl::id<1> id) { @@ -82,7 +79,7 @@ int main() { q.wait_and_throw(); // Extension: copy data from device to host - q.ext_oneapi_copy(img_mem_2.get_handle(), out.data(), desc); + q.ext_oneapi_copy(imgMem2.get_handle(), out.data(), desc); q.wait_and_throw(); // Extension: cleanup @@ -91,10 +88,10 @@ int main() { sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, dev, ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -121,5 +118,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp index 0a64d934a8c49..3691a397f1942 100644 --- a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -36,37 +36,37 @@ int main() { {width}, sycl::image_channel_order::r, sycl::image_channel_type::fp32); // Extension: allocate memory on device and create the handle - sycl::ext::oneapi::experimental::image_mem img_mem_00(desc, q); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); - sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem00(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q); // We're able to use move semantics - auto img_mem_0 = std::move(img_mem_00); + auto imgMem0 = std::move(imgMem00); // Extension: create the image and return the handle sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q); sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = - sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q); sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 = - sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q); // Extension: copy over data to device (2 subregions) sycl::range copySrcOffset = {0, 0, 0}; - sycl::range copyExtent = {width / 2, 1, 1}; + sycl::range copyExtent1 = {width / 2, 1, 1}; sycl::range srcExtent = {width, 0, 0}; q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, - img_mem_0.get_handle(), {0, 0, 0}, desc, copyExtent); + imgMem0.get_handle(), {0, 0, 0}, desc, copyExtent1); q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, 0}, srcExtent, - img_mem_0.get_handle(), {width / 2, 0, 0}, desc, - copyExtent); + imgMem0.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, - img_mem_1.get_handle(), {0, 0, 0}, desc, copyExtent); + imgMem1.get_handle(), {0, 0, 0}, desc, copyExtent1); q.ext_oneapi_copy(dataIn2.data(), {width / 2, 0, 0}, srcExtent, - img_mem_1.get_handle(), {width / 2, 0, 0}, desc, - copyExtent); + imgMem1.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); q.wait_and_throw(); @@ -88,13 +88,12 @@ int main() { q.wait_and_throw(); // Extension: copy data from device to host (two sub-regions) - sycl::range copy_extent_2 = {width / 2, 1, 1}; - sycl::range dest_extent_0 = {width, 0, 0}; - q.ext_oneapi_copy(img_mem_2.get_handle(), {0, 0, 0}, desc, out.data(), - {0, 0, 0}, dest_extent_0, copy_extent_2); - q.ext_oneapi_copy(img_mem_2.get_handle(), {width / 2, 0, 0}, desc, - out.data(), {width / 2, 0, 0}, dest_extent_0, - copy_extent_2); + sycl::range copyExtent2 = {width / 2, 1, 1}; + sycl::range destExtent = {width, 0, 0}; + q.ext_oneapi_copy(imgMem2.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, destExtent, copyExtent2); + q.ext_oneapi_copy(imgMem2.get_handle(), {width / 2, 0, 0}, desc, out.data(), + {width / 2, 0, 0}, destExtent, copyExtent2); q.wait_and_throw(); // Extension: cleanup @@ -102,10 +101,10 @@ int main() { sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -132,5 +131,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_write_2D.cpp b/sycl/test-e2e/bindless_images/read_write_2D.cpp index 6ac8f3c8ed36e..dbc5f4cc219f9 100644 --- a/sycl/test-e2e/bindless_images/read_write_2D.cpp +++ b/sycl/test-e2e/bindless_images/read_write_2D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -42,27 +42,24 @@ int main() { // Extension: allocate memory on device and create the handle // Input images memory - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); // Output image memory - sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, dev, ctxt); // Extension: copy over data to device - q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); - q.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + q.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); q.wait_and_throw(); // Extension: create the image and return the handle sycl::ext::oneapi::experimental::unsampled_image_handle imgIn1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); sycl::ext::oneapi::experimental::unsampled_image_handle imgIn2 = - sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = - sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, dev, ctxt); q.submit([&](sycl::handler &cgh) { cgh.parallel_for( @@ -91,7 +88,7 @@ int main() { // Extension: copy data from device to host (handler variant) q.submit([&](sycl::handler &cgh) { - cgh.ext_oneapi_copy(img_mem_2.get_handle(), out.data(), desc); + cgh.ext_oneapi_copy(imgMem2.get_handle(), out.data(), desc); }); q.wait_and_throw(); @@ -102,10 +99,10 @@ int main() { sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, dev, ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -132,5 +129,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp index 6f98565ce97db..55e43eb3e7fe6 100644 --- a/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -41,45 +41,45 @@ int main() { try { // Extension: allocate memory on device and create the handle - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); - sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q); // Extension: create the image and return the handle sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q); sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = - sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q); sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 = - sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q); // Extension: copy over data to device (four subregions/quadrants) - sycl::range copyExtent = {width / 2, height / 2, 1}; + sycl::range copyExtent1 = {width / 2, height / 2, 1}; sycl::range srcExtent = {width / 2, height / 2, 0}; q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, - img_mem_0.get_handle(), {0, 0, 0}, desc, copyExtent); + imgMem0.get_handle(), {0, 0, 0}, desc, copyExtent1); q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, - img_mem_0.get_handle(), {width / 2, 0, 0}, desc, - copyExtent); + imgMem0.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, - img_mem_0.get_handle(), {0, height / 2, 0}, desc, - copyExtent); + imgMem0.get_handle(), {0, height / 2, 0}, desc, + copyExtent1); q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, - img_mem_0.get_handle(), {width / 2, height / 2, 0}, desc, - copyExtent); + imgMem0.get_handle(), {width / 2, height / 2, 0}, desc, + copyExtent1); q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, - img_mem_1.get_handle(), {0, 0, 0}, desc, copyExtent); + imgMem1.get_handle(), {0, 0, 0}, desc, copyExtent1); q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, - img_mem_1.get_handle(), {width / 2, 0, 0}, desc, - copyExtent); + imgMem1.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, - img_mem_1.get_handle(), {0, height / 2, 0}, desc, - copyExtent); + imgMem1.get_handle(), {0, height / 2, 0}, desc, + copyExtent1); q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, - img_mem_1.get_handle(), {width / 2, height / 2, 0}, desc, - copyExtent); + imgMem1.get_handle(), {width / 2, height / 2, 0}, desc, + copyExtent1); q.wait_and_throw(); @@ -105,13 +105,12 @@ int main() { q.wait_and_throw(); // Extension: copy data from device to host (two sub-regions) - sycl::range copy_extent_2 = {width, height / 2, 1}; - sycl::range dest_extent_0 = {width, height, 0}; - q.ext_oneapi_copy(img_mem_2.get_handle(), {0, 0, 0}, desc, out.data(), - {0, 0, 0}, dest_extent_0, copy_extent_2); - q.ext_oneapi_copy(img_mem_2.get_handle(), {0, height / 2, 0}, desc, - out.data(), {0, height / 2, 0}, dest_extent_0, - copy_extent_2); + sycl::range copyExtent2 = {width, height / 2, 1}; + sycl::range destExtent = {width, height, 0}; + q.ext_oneapi_copy(imgMem2.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, destExtent, copyExtent2); + q.ext_oneapi_copy(imgMem2.get_handle(), {0, height / 2, 0}, desc, + out.data(), {0, height / 2, 0}, destExtent, copyExtent2); q.wait_and_throw(); // Extension: cleanup @@ -119,10 +118,10 @@ int main() { sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -149,5 +148,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_write_3D.cpp b/sycl/test-e2e/bindless_images/read_write_3D.cpp index 1f42edc1ab8b4..38216599fe735 100644 --- a/sycl/test-e2e/bindless_images/read_write_3D.cpp +++ b/sycl/test-e2e/bindless_images/read_write_3D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -45,28 +45,25 @@ int main() { // Extension: allocate memory on device and create the handle // Input images memory - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); // Output image memory - sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, dev, ctxt); // Extension: copy over data to device - q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); - q.ext_oneapi_copy(dataIn2.data(), img_mem_1.get_handle(), desc); + q.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); q.wait_and_throw(); // Extension: create the image and return the handle sycl::ext::oneapi::experimental::unsampled_image_handle imgIn1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); sycl::ext::oneapi::experimental::unsampled_image_handle imgIn2 = - sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = - sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, dev, - ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, dev, ctxt); q.submit([&](sycl::handler &cgh) { cgh.parallel_for( @@ -93,7 +90,7 @@ int main() { q.wait_and_throw(); // Extension: copy data from device to host - q.ext_oneapi_copy(img_mem_2.get_handle(), out.data(), desc); + q.ext_oneapi_copy(imgMem2.get_handle(), out.data(), desc); q.wait_and_throw(); // Extension: cleanup @@ -102,10 +99,10 @@ int main() { sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, dev, ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -132,5 +129,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp index 9b4b21640dbb5..6044162a8887d 100644 --- a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -49,57 +49,57 @@ int main() { sycl::image_channel_type::fp32); // Extension: allocate memory on device and create the handle - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); - sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); - sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q); // Extension: copy over data to device (8 sub-regions) - sycl::range copy_extent_0 = {width / 2, height / 2, depth / 2}; - sycl::range src_extent_0 = {width, height, depth}; + sycl::range copyExtent1 = {width / 2, height / 2, depth / 2}; + sycl::range srcExtent1 = {width, height, depth}; // First image with 8 sub-regions - q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, src_extent_0, - img_mem_0.get_handle(), {0, 0, 0}, desc, copy_extent_0); - q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, 0}, src_extent_0, - img_mem_0.get_handle(), {width / 2, 0, 0}, desc, - copy_extent_0); - q.ext_oneapi_copy(dataIn1.data(), {0, height / 2, 0}, src_extent_0, - img_mem_0.get_handle(), {0, height / 2, 0}, desc, - copy_extent_0); - q.ext_oneapi_copy(dataIn1.data(), {0, 0, depth / 2}, src_extent_0, - img_mem_0.get_handle(), {0, 0, depth / 2}, desc, - copy_extent_0); - q.ext_oneapi_copy(dataIn1.data(), {width / 2, height / 2, 0}, src_extent_0, - img_mem_0.get_handle(), {width / 2, height / 2, 0}, desc, - copy_extent_0); - q.ext_oneapi_copy(dataIn1.data(), {0, height / 2, depth / 2}, src_extent_0, - img_mem_0.get_handle(), {0, height / 2, depth / 2}, desc, - copy_extent_0); - q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, depth / 2}, src_extent_0, - img_mem_0.get_handle(), {width / 2, 0, depth / 2}, desc, - copy_extent_0); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent1, + imgMem0.get_handle(), {0, 0, 0}, desc, copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, 0}, srcExtent1, + imgMem0.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {0, height / 2, 0}, srcExtent1, + imgMem0.get_handle(), {0, height / 2, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, depth / 2}, srcExtent1, + imgMem0.get_handle(), {0, 0, depth / 2}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, height / 2, 0}, srcExtent1, + imgMem0.get_handle(), {width / 2, height / 2, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {0, height / 2, depth / 2}, srcExtent1, + imgMem0.get_handle(), {0, height / 2, depth / 2}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, depth / 2}, srcExtent1, + imgMem0.get_handle(), {width / 2, 0, depth / 2}, desc, + copyExtent1); q.ext_oneapi_copy(dataIn1.data(), {width / 2, height / 2, depth / 2}, - src_extent_0, img_mem_0.get_handle(), - {width / 2, height / 2, depth / 2}, desc, copy_extent_0); + srcExtent1, imgMem0.get_handle(), + {width / 2, height / 2, depth / 2}, desc, copyExtent1); // Second image with 2 sub-regions - sycl::range copy_extent_1 = {width, height, depth / 2}; - sycl::range src_extent_1 = {width, height, depth}; - q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, src_extent_1, - img_mem_1.get_handle(), {0, 0, 0}, desc, copy_extent_1); - q.ext_oneapi_copy(dataIn2.data(), {0, 0, depth / 2}, src_extent_1, - img_mem_1.get_handle(), {0, 0, depth / 2}, desc, - copy_extent_1); + sycl::range copyExtent2 = {width, height, depth / 2}; + sycl::range srcExtent2 = {width, height, depth}; + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent2, + imgMem1.get_handle(), {0, 0, 0}, desc, copyExtent2); + q.ext_oneapi_copy(dataIn2.data(), {0, 0, depth / 2}, srcExtent2, + imgMem1.get_handle(), {0, 0, depth / 2}, desc, + copyExtent2); q.wait_and_throw(); // Extension: create the image and return the handle sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q); sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = - sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q); sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 = - sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q); q.submit([&](sycl::handler &cgh) { cgh.parallel_for( @@ -125,13 +125,12 @@ int main() { q.wait_and_throw(); // Extension: copy data from device to host (two sub-regions) - sycl::range copy_extent_2 = {width, height, depth / 2}; - sycl::range dest_extent_0 = {width, height, depth}; - q.ext_oneapi_copy(img_mem_2.get_handle(), {0, 0, 0}, desc, out.data(), - {0, 0, 0}, dest_extent_0, copy_extent_2); - q.ext_oneapi_copy(img_mem_2.get_handle(), {0, 0, depth / 2}, desc, - out.data(), {0, 0, depth / 2}, dest_extent_0, - copy_extent_2); + sycl::range copyExtent3 = {width, height, depth / 2}; + sycl::range destExtent = {width, height, depth}; + q.ext_oneapi_copy(imgMem2.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, destExtent, copyExtent3); + q.ext_oneapi_copy(imgMem2.get_handle(), {0, 0, depth / 2}, desc, out.data(), + {0, 0, depth / 2}, destExtent, copyExtent3); q.wait_and_throw(); // Extension: cleanup @@ -140,10 +139,10 @@ int main() { sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle3, q); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -170,5 +169,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp index 69516ade249e1..aedd45bdf458e 100644 --- a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp @@ -4,9 +4,9 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include #include +#include #include static sycl::device dev; @@ -120,8 +120,10 @@ struct util { }); } catch (sycl::exception e) { std::cout << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); } catch (...) { std::cout << "\tKernel submission failed!" << std::endl; + exit(-1); } } @@ -168,8 +170,10 @@ struct util { }); } catch (sycl::exception e) { std::cout << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); } catch (...) { std::cout << "\tKernel submission failed!" << std::endl; + exit(-1); } } @@ -215,8 +219,10 @@ struct util { }); } catch (sycl::exception e) { std::cout << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); } catch (...) { std::cout << "\tKernel submission failed!" << std::endl; + exit(-1); } } }; diff --git a/sycl/test-e2e/bindless_images/sampling_1D.cpp b/sycl/test-e2e/bindless_images/sampling_1D.cpp index f1fac37cea56e..b6ea4a5815210 100644 --- a/sycl/test-e2e/bindless_images/sampling_1D.cpp +++ b/sycl/test-e2e/bindless_images/sampling_1D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -14,15 +14,15 @@ class image_addition; int main() { -#if defined(SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES) - assert(SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES == 1); +#if defined(SYCL_EXT_ONEAPI_BINDLESS_IMAGES) + assert(SYCL_EXT_ONEAPI_BINDLESS_IMAGES == 1); #if defined(VERBOSE_PRINT) std::cout << "SYCL_EXT_ONEAPI_BINDLESS_IMAGES is defined!" << std::endl; #endif #else std::cerr << "Bindless images feature test macro is not defined!" << std::endl; - assert(false); + return 1; #endif // defined(SYCL_EXT_ONEAPI_BINDLESS_IMAGES) sycl::device dev; @@ -34,10 +34,10 @@ int main() { size_t width = N; std::vector out(N); std::vector expected(N); - std::vector dataIn1(N); + std::vector dataIn(N); for (int i = 0; i < N; i++) { expected[i] = i; - dataIn1[i] = float(i); + dataIn[i] = float(i); } try { @@ -45,21 +45,21 @@ int main() { sycl::ext::oneapi::experimental::image_descriptor desc( {width}, sycl::image_channel_order::r, sycl::image_channel_type::fp32); - sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::ext::oneapi::experimental::bindless_image_sampler samp( sycl::addressing_mode::repeat, sycl::coordinate_normalization_mode::normalized, sycl::filtering_mode::linear); // Extension: allocate memory on device - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem(desc, dev, ctxt); // Extension: copy over data to device - q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(dataIn.data(), imgMem.get_handle(), desc); q.wait_and_throw(); // Extension: create the image and return the handle - auto imgHandle1 = sycl::ext::oneapi::experimental::create_image( - img_mem_0, samp1, desc, dev, ctxt); + auto imgHandle = sycl::ext::oneapi::experimental::create_image( + imgMem, samp, desc, dev, ctxt); sycl::buffer buf((float *)out.data(), N); q.submit([&](sycl::handler &cgh) { @@ -70,7 +70,7 @@ int main() { float x = float(id[0] + 0.5) / (float)N; // Extension: read image data from handle float px1 = - sycl::ext::oneapi::experimental::read_image(imgHandle1, x); + sycl::ext::oneapi::experimental::read_image(imgHandle, x); outAcc[id] = px1; }); @@ -79,15 +79,14 @@ int main() { q.wait_and_throw(); // Extension: cleanup - sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, - ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -114,5 +113,5 @@ int main() { } std::cout << "Test passed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/sampling_2D.cpp b/sycl/test-e2e/bindless_images/sampling_2D.cpp index b5c8dcf103d77..c3035e8ed09ec 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -35,7 +35,7 @@ int main() { } try { - sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::ext::oneapi::experimental::bindless_image_sampler samp( sycl::addressing_mode::repeat, sycl::coordinate_normalization_mode::normalized, sycl::filtering_mode::linear); @@ -47,33 +47,33 @@ int main() { size_t pitch = 0; // Extension: returns the device pointer to USM allocated pitched memory - auto img_mem_usm_0 = + auto imgMemUSM0 = sycl::ext::oneapi::experimental::pitched_alloc_device(&pitch, desc, q); - if (img_mem_usm_0 == nullptr) { + if (imgMemUSM0 == nullptr) { std::cout << "Error allocating images!" << std::endl; return 1; } // Extension: allocate memory on device - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); // Extension: copy over data to device for USM image (handler variant) q.submit([&](sycl::handler &cgh) { - cgh.ext_oneapi_copy(dataIn1.data(), img_mem_usm_0, desc, pitch); + cgh.ext_oneapi_copy(dataIn1.data(), imgMemUSM0, desc, pitch); }); // Extension: copy over data to device for non-USM image - q.ext_oneapi_copy(dataIn2.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem0.get_handle(), desc); q.wait_and_throw(); // Extension: create the images and return the handles sycl::ext::oneapi::experimental::sampled_image_handle imgHandle1 = - sycl::ext::oneapi::experimental::create_image(img_mem_usm_0, pitch, - samp1, desc, dev, ctxt); + sycl::ext::oneapi::experimental::create_image(imgMemUSM0, pitch, samp, + desc, dev, ctxt); sycl::ext::oneapi::experimental::sampled_image_handle imgHandle2 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, samp1, desc, - dev, ctxt); + sycl::ext::oneapi::experimental::create_image(imgMem0, samp, desc, dev, + ctxt); sycl::buffer buf((float *)out.data(), sycl::range<2>{height, width}); @@ -110,13 +110,13 @@ int main() { ctxt); sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, dev, ctxt); - sycl::free(img_mem_usm_0, ctxt); + sycl::free(imgMemUSM0, ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -143,5 +143,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp index ee00a3ac48a6a..eaa7da3594b35 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp @@ -1,12 +1,13 @@ // REQUIRES: linux // REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images_shared_usm // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -19,18 +20,11 @@ int main() { sycl::queue q(dev); auto ctxt = q.get_context(); - if (!dev.has(sycl::aspect::ext_oneapi_bindless_images_shared_usm)) { - std::cout - << "images backed by USM shared allocations are not supported, skipping" - << std::endl; - return 0; - } - // declare image data size_t width = 5; size_t height = 6; size_t N = width * height; - size_t width_in_bytes = width * sizeof(float); + size_t widthInBytes = width * sizeof(float); std::vector out(N); std::vector expected(N); std::vector dataIn(N); @@ -43,7 +37,7 @@ int main() { } try { - sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::ext::oneapi::experimental::bindless_image_sampler samp( sycl::addressing_mode::clamp, sycl::coordinate_normalization_mode::normalized, sycl::filtering_mode::linear); @@ -53,39 +47,39 @@ int main() { {width, height}, sycl::image_channel_order::r, sycl::image_channel_type::fp32); - auto device_pitch_align = dev.get_info< + auto devicePitchAlign = dev.get_info< sycl::ext::oneapi::experimental::info::device::image_row_pitch_align>(); - auto device_max_pitch = + auto deviceMaxPitch = dev.get_info(); // Pitch requirements: - // - pitch % device_pitch_align == 0 - // - pitch >= width_in_bytes - // - pitch <= device_max_pitch - size_t pitch = device_pitch_align * - std::ceil(float(width_in_bytes) / float(device_pitch_align)); - assert(pitch <= device_max_pitch); + // - pitch % devicePitchAlign == 0 + // - pitch >= widthInBytes + // - pitch <= deviceMaxPitch + size_t pitch = devicePitchAlign * + std::ceil(float(widthInBytes) / float(devicePitchAlign)); + assert(pitch <= deviceMaxPitch); // Shared USM allocation - auto img_mem = sycl::aligned_alloc_shared(device_pitch_align, - (pitch * height), dev, ctxt); + auto imgMem = sycl::aligned_alloc_shared(devicePitchAlign, (pitch * height), + dev, ctxt); - if (img_mem == nullptr) { + if (imgMem == nullptr) { std::cerr << "Error allocating images!" << std::endl; return 1; } // Copy to shared USM and incorporate pitch for (size_t i = 0; i < height; i++) { - memcpy(static_cast(img_mem) + (i * pitch / sizeof(float)), - dataIn.data() + (i * width), width_in_bytes); + memcpy(static_cast(imgMem) + (i * pitch / sizeof(float)), + dataIn.data() + (i * width), widthInBytes); } // Extension: create the image and return the handle - sycl::ext::oneapi::experimental::sampled_image_handle img_handle = - sycl::ext::oneapi::experimental::create_image(img_mem, pitch, samp1, - desc, dev, ctxt); + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(imgMem, pitch, samp, desc, + dev, ctxt); sycl::buffer buf((float *)out.data(), sycl::range<2>{height, width}); @@ -105,7 +99,7 @@ int main() { // Extension: read image data from handle float px = sycl::ext::oneapi::experimental::read_image( - img_handle, sycl::float2(fdim0, fdim1)); + imgHandle, sycl::float2(fdim0, fdim1)); outAcc[sycl::id<2>{dim1, dim0}] = px; }); @@ -114,15 +108,14 @@ int main() { q.wait_and_throw(); // Extension: cleanup - sycl::ext::oneapi::experimental::destroy_image_handle(img_handle, dev, - ctxt); - sycl::free(img_mem, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt); + sycl::free(imgMem, ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -149,5 +142,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp index 4304bb6bd8aa3..71d709e89b67e 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp @@ -5,8 +5,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -25,46 +25,46 @@ int main() { size_t N = width * height; std::vector out(N); std::vector expected(N); - std::vector dataIn1(N); + std::vector dataIn(N); for (int i = 0; i < width; i++) { for (int j = 0; j < height; j++) { expected[i + (width * j)] = i + (width * j); - dataIn1[i + (width * j)] = {i + (width * j), 0, 0, 0}; + dataIn[i + (width * j)] = {i + (width * j), 0, 0, 0}; } } try { - sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::ext::oneapi::experimental::bindless_image_sampler samp( sycl::addressing_mode::repeat, sycl::coordinate_normalization_mode::normalized, sycl::filtering_mode::linear); - unsigned int element_size_bytes = sizeof(sycl::half) * 4; - size_t width_in_bytes = width * element_size_bytes; + unsigned int elementSizebytes = sizeof(sycl::half) * 4; + size_t widthInBytes = width * elementSizebytes; size_t pitch = 0; // Extension: returns the device pointer to USM allocated pitched memory - auto img_mem_0 = sycl::ext::oneapi::experimental::pitched_alloc_device( - &pitch, width_in_bytes, height, element_size_bytes, q); + auto imgMem = sycl::ext::oneapi::experimental::pitched_alloc_device( + &pitch, widthInBytes, height, elementSizebytes, q); // Extension: image descriptor sycl::ext::oneapi::experimental::image_descriptor desc( {width, height}, sycl::image_channel_order::rgba, sycl::image_channel_type::fp16); - if (img_mem_0 == nullptr) { + if (imgMem == nullptr) { std::cout << "Error allocating images!" << std::endl; return 1; } // Extension: copy over data to device - q.ext_oneapi_copy(dataIn1.data(), img_mem_0, desc, pitch); + q.ext_oneapi_copy(dataIn.data(), imgMem, desc, pitch); q.wait_and_throw(); // Extension: create the image and return the handle - sycl::ext::oneapi::experimental::sampled_image_handle imgHandle1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, pitch, samp1, - desc, dev, ctxt); + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(imgMem, pitch, samp, desc, + dev, ctxt); sycl::buffer buf((sycl::half *)out.data(), sycl::range<2>{height, width}); @@ -85,7 +85,7 @@ int main() { // Extension: read image data from handle sycl::half4 px1 = sycl::ext::oneapi::experimental::read_image( - imgHandle1, sycl::float2(fdim0, fdim1)); + imgHandle, sycl::float2(fdim0, fdim1)); outAcc[sycl::id<2>{dim1, dim0}] = px1[0]; }); @@ -94,15 +94,14 @@ int main() { q.wait_and_throw(); // Extension: cleanup - sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, - ctxt); - sycl::free(img_mem_0, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt); + sycl::free(imgMem, ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -129,5 +128,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/sampling_3D.cpp b/sycl/test-e2e/bindless_images/sampling_3D.cpp index a5d9b1cf803e1..674bb06f0837c 100644 --- a/sycl/test-e2e/bindless_images/sampling_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampling_3D.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %t.out -#include #include +#include // Uncomment to print additional test information // #define VERBOSE_PRINT @@ -25,13 +25,13 @@ int main() { size_t N = width * height * depth; std::vector out(N); std::vector expected(N); - std::vector dataIn1(N); + std::vector dataIn(N); for (int i = 0; i < width; i++) { for (int j = 0; j < height; j++) { for (int k = 0; k < depth; k++) { expected[i + width * (j + height * k)] = i + width * (j + height * k); - dataIn1[i + width * (j + height * k)] = {i + width * (j + height * k), - 0, 0, 0}; + dataIn[i + width * (j + height * k)] = {i + width * (j + height * k), 0, + 0, 0}; } } } @@ -42,22 +42,22 @@ int main() { {width, height, depth}, sycl::image_channel_order::rgba, sycl::image_channel_type::fp32); - sycl::ext::oneapi::experimental::bindless_image_sampler samp1( + sycl::ext::oneapi::experimental::bindless_image_sampler samp( sycl::addressing_mode::clamp, sycl::coordinate_normalization_mode::normalized, sycl::filtering_mode::linear); // Extension: allocate memory on device - sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem(desc, dev, ctxt); // Extension: copy over data to device - q.ext_oneapi_copy(dataIn1.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(dataIn.data(), imgMem.get_handle(), desc); q.wait_and_throw(); // Extension: create the image and return the handle - sycl::ext::oneapi::experimental::sampled_image_handle imgHandle1 = - sycl::ext::oneapi::experimental::create_image(img_mem_0, samp1, desc, - dev, ctxt); + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(imgMem, samp, desc, dev, + ctxt); sycl::buffer buf((float *)out.data(), sycl::range<3>{depth, height, width}); @@ -80,7 +80,7 @@ int main() { // Extension: read image data from handle sycl::float4 px1 = sycl::ext::oneapi::experimental::read_image( - imgHandle1, sycl::float4(fdim0, fdim1, fdim2, (float)0)); + imgHandle, sycl::float4(fdim0, fdim1, fdim2, (float)0)); outAcc[sycl::id<3>{dim2, dim1, dim0}] = px1[0]; }); @@ -89,14 +89,13 @@ int main() { q.wait_and_throw(); // Extension: cleanup - sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, - ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt); } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); + return 1; } catch (...) { std::cerr << "Unknown exception caught!\n"; - exit(-1); + return 2; } // collect and validate output @@ -123,5 +122,5 @@ int main() { } std::cout << "Test failed!" << std::endl; - return 1; + return 3; } diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp index dc48054844a9a..99d6ab6ffba9d 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp @@ -84,7 +84,7 @@ bool run_sycl(int input_image_fd, size_t width, size_t height) { }); } catch (...) { std::cerr << "Kernel submission failed!" << std::endl; - assert(false); + exit(-1); } try {