diff --git a/sycl/test-e2e/CMakeLists.txt b/sycl/test-e2e/CMakeLists.txt index d7aa95a5a6c75..89ef52e4a0b75 100644 --- a/sycl/test-e2e/CMakeLists.txt +++ b/sycl/test-e2e/CMakeLists.txt @@ -24,6 +24,8 @@ endif() # Standalone. find_package(Threads REQUIRED) set(SYCL_THREADS_LIB ${CMAKE_THREAD_LIBS_INIT}) +find_package(Vulkan) + if(NOT LLVM_LIT) find_program(LLVM_LIT NAMES llvm-lit lit.py lit diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp new file mode 100644 index 0000000000000..931332b928cb4 --- /dev/null +++ b/sycl/test-e2e/bindless_images/image_get_info.cpp @@ -0,0 +1,204 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +void printString(std::string name) { +#ifdef VERBOSE_PRINT + std::cout << name; +#endif +} + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + size_t height = 13; + size_t width = 7; + size_t depth = 11; + + bool validated = true; + + try { + // Submit dummy kernel to let the runtime decide the backend (CUDA) + // Without this, the default Level Zero backend is active + q.submit([&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::r, + sycl::image_channel_type::signed_int32); + + // Extension: returns the device pointer to the allocated memory + // Input images memory + sycl::ext::oneapi::experimental::image_mem imgMem(desc, dev, ctxt); + + // Extension: query for bindless image support -- device aspects + bool bindlessSupport = dev.has(sycl::aspect::ext_oneapi_bindless_images); + bool bindlessSharedUsmSupport = + dev.has(sycl::aspect::ext_oneapi_bindless_images_shared_usm); + bool usm1dSupport = + dev.has(sycl::aspect::ext_oneapi_bindless_images_1d_usm); + bool usm2dSupport = + dev.has(sycl::aspect::ext_oneapi_bindless_images_2d_usm); + +#ifdef VERBOSE_PRINT + std::cout << "bindless_images_support: " << bindlessSupport + << "\nbindless_images_shared_usm_support: " + << bindlessSharedUsmSupport + << "\nbindless_images_1d_usm_support: " 1dS + << "\nbindless_images_2d_usm_support: " << S << "\n"; +#endif + + // Extension: get pitch alignment information from device -- device info + // Make sure our pitch alignment queries work properly + // These can be different depending on the device so we cannot test that the + // values are correct + // But we should at least see that the query itself works + auto pitchAlign = dev.get_info< + sycl::ext::oneapi::experimental::info::device::image_row_pitch_align>(); + auto maxPitch = dev.get_info(); + auto maxWidth = dev.get_info(); + auto maxheight = dev.get_info(); + +#ifdef VERBOSE_PRINT + std::cout << "image_row_pitch_align: " << pitchAlign + << "\nmax_image_linear_row_pitch: " << maxPitch + << "\nmax_image_linear_width: " << maxWidth + << "\nmax_image_linear_height: " << maxheight << "\n"; +#endif + + // Extension: query for bindless image mipmaps support -- aspects & info + bool mipmapSupport = dev.has(sycl::aspect::ext_oneapi_mipmap); + bool mipmapAnisotropySupport = + dev.has(sycl::aspect::ext_oneapi_mipmap_anisotropy); + float mipmapMaxAnisotropy = dev.get_info< + sycl::ext::oneapi::experimental::info::device::mipmap_max_anisotropy>(); + bool mipmapLevelReferenceSupport = + dev.has(sycl::aspect::ext_oneapi_mipmap_level_reference); + +#ifdef VERBOSE_PRINT + std::cout << "mipmapSupport: " << mipmapSupport + << "\nmipmapAnisotropySupport: " << mipmapAnisotropySupport + << "\nmipmapMaxAnisotropy: " << mipmapMaxAnisotropy + << "\nmipmapLevelReferenceSupport: " + << mipmapLevelReferenceSupport << "\n"; +#endif + + // Extension: query for bindless image interop support -- device aspects + bool interopMemoryImportSupport = + dev.has(sycl::aspect::ext_oneapi_interop_memory_import); + bool interopMemoryExportSupport = + dev.has(sycl::aspect::ext_oneapi_interop_memory_export); + bool interopSemaphoreImportSupport = + dev.has(sycl::aspect::ext_oneapi_interop_semaphore_import); + bool interopSemaphoreExportSupport = + dev.has(sycl::aspect::ext_oneapi_interop_semaphore_export); + +#ifdef VERBOSE_PRINT + std::cout << "interopMemoryImportSupport: " << interopMemoryImportSupport + << "\ninteropMemoryExportSupport: " << interopMemoryExportSupport + << "\ninteropSemaphoreImportSupport: " + << interopSemaphoreImportSupport + << "\ninteropSemaphoreExportSupport: " + << interopSemaphoreExportSupport << "\n"; +#endif + + auto rangeMem = imgMem.get_range(); + auto range = sycl::ext::oneapi::experimental::get_image_range( + imgMem.get_handle(), dev, ctxt); + if (rangeMem != range) { + printString("handle and mem object disagree on image dimensions!\n"); + validated = false; + } + if (range[0] == width) { + printString("width is correct!\n"); + } else { + printString("width is NOT correct!\n"); + validated = false; + } + if (range[1] == height) { + printString("height is correct!\n"); + } else { + printString("height is NOT correct!\n"); + validated = false; + } + if (range[2] == depth) { + printString("depth is correct!\n"); + } else { + printString("depth is NOT correct!\n"); + validated = false; + } + + auto type = imgMem.get_type(); + if (type == sycl::ext::oneapi::experimental::image_type::standard) { + printString("image type is correct!\n"); + } else { + printString("image type is NOT correct!\n"); + validated = false; + } + + auto ctypeMem = imgMem.get_channel_type(); + auto ctype = sycl::ext::oneapi::experimental::get_image_channel_type( + imgMem.get_handle(), dev, ctxt); + if (ctypeMem != ctype) { + printString("handle and mem object disagree on image channel type!\n"); + validated = false; + } + if (ctype == sycl::image_channel_type::signed_int32) { + printString("channel type is correct!\n"); + } else { + printString("channel type is NOT correct!\n"); + validated = false; + } + + auto corder = imgMem.get_channel_order(); + if (corder == sycl::image_channel_order::r) { + printString("channel order is correct!\n"); + } else { + printString("channel order is NOT correct!\n"); + validated = false; + } + + auto numchannelsMem = imgMem.get_num_channels(); + auto numchannels = sycl::ext::oneapi::experimental::get_image_num_channels( + imgMem.get_handle(), dev, ctxt); + if (numchannelsMem != numchannels) { + printString("handle and mem object disagree on number of channels!\n"); + validated = false; + } + if (numchannels == 1) { + printString("num channels is correct!\n"); + } else { + printString("num channels is NOT correct!\n"); + validated = false; + } + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + if (validated) { + std::cout << "Test Passed!\n"; + return 0; + } + + std::cout << "Test Failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp new file mode 100644 index 0000000000000..41ff725849ad7 --- /dev/null +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp @@ -0,0 +1,142 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t N = 16; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N / 2); + std::vector copyOut(N / 2); + int j = 0; + for (int i = 0; i < N; i++) { + expected[i] = i + (j + 10); + if (i % 2) + j++; + dataIn1[i] = sycl::float4(i, i, i, i); + if (i < (N / 2)) { + dataIn2[i] = sycl::float4(i + 10, i + 10, i + 10, i + 10); + copyOut[i] = sycl::float4{0, 0, 0, 0}; + } + } + + try { + + size_t width = N; + unsigned int numLevels = 2; + + // Extension: image descriptor -- number of levels + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::mipmap, numLevels); + + // Extension: allocate mipmap memory on device + sycl::ext::oneapi::experimental::image_mem mipMem(desc, dev, ctxt); + + // Extension: retrieve level 0 + sycl::ext::oneapi::experimental::image_mem_handle imgMem1 = + mipMem.get_mip_level_mem_handle(0); + + // Extension: copy over data to device at level 0 + q.ext_oneapi_copy(dataIn1.data(), imgMem1, desc); + + // Extension: copy data to device at level 1 + q.ext_oneapi_copy(dataIn2.data(), mipMem.get_mip_level_mem_handle(1), + desc.get_mip_level_desc(1)); + q.wait_and_throw(); + + // Extension: define a sampler object -- extended mipmap attributes + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::mirrored_repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, + (float)numLevels, 8.0f); + + // Extension: create a sampled image handle to represent the mipmap + sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = + sycl::ext::oneapi::experimental::create_image(mipMem, samp, desc, dev, + ctxt); + + sycl::buffer buf((float *)out.data(), N); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access(cgh, N); + + cgh.parallel_for(N, [=](sycl::id<1> id) { + float sum = 0; + float x = float(id[0] + 0.5) / (float)N; + // Extension: read mipmap level 0 with anisotropic filtering and level 1 + // with LOD + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + mipHandle, x, 0.0f, 0.0f); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image(mipHandle, + x, 1.0f); + + sum = px1[0] + px2[0]; + outAcc[id] = sum; + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device + q.ext_oneapi_copy(mipMem.get_mip_level_mem_handle(1), copyOut.data(), + desc.get_mip_level_desc(1)); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(mipHandle, dev, ctxt); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp new file mode 100644 index 0000000000000..9120dd694e58b --- /dev/null +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp @@ -0,0 +1,150 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 16; + size_t height = 16; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N / 4); + std::vector dataIn3(N / 16); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + dataIn1[i + (width * j)] = {i + (width * j), 0, 0, 0}; + } + } + for (int i = 0; i < (N / 4); i++) { + dataIn2[i] = {i, i, i, i}; + } + for (int i = 0; i < (N / 16); i++) { + dataIn3[i] = {i, i, i, i}; + } + // Expected each x and y will repeat twice + // since mipmap level 1 is half in size + int jj = 0; + for (int i = 0; i < width - 1; i += 2) { + for (int j = 0; j < height - 1; j += 2, jj++) { + expected[j + (width * i)] = jj; + expected[j + (width * (i + 1))] = jj; + expected[(j + 1) + (width * i)] = jj; + expected[(j + 1) + (width * (i + 1))] = jj; + } + } + + try { + + size_t numLevels = 3; + + // Extension: image descriptor -- number of levels + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::mipmap, numLevels); + + // Extension: define a sampler object -- extended mipmap attributes + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::clamp, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, + (float)numLevels, 8.0f); + + // Extension: allocate mipmap memory on device + sycl::ext::oneapi::experimental::image_mem mipMem(desc, q); + + // Extension: copy data to device at all levels -- copy func handles desc + // sizing + q.ext_oneapi_copy(dataIn1.data(), mipMem.get_mip_level_mem_handle(0), + desc.get_mip_level_desc(0)); + q.ext_oneapi_copy(dataIn1.data(), mipMem.get_mip_level_mem_handle(1), + desc.get_mip_level_desc(1)); + q.ext_oneapi_copy(dataIn3.data(), mipMem.get_mip_level_mem_handle(2), + desc.get_mip_level_desc(2)); + q.wait_and_throw(); + + // Extension: create a sampled image handle to represent the mipmap + sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = + sycl::ext::oneapi::experimental::create_image(mipMem, samp, desc, q); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read mipmap level 1 with LOD + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + mipHandle, sycl::float2(fdim0, fdim1), 1.0f); + + outAcc[sycl::id<2>{dim1, dim0}] = px2[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(mipHandle, q); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp new file mode 100644 index 0000000000000..bacd6e081e9cf --- /dev/null +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp @@ -0,0 +1,142 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 4; + size_t height = 4; + size_t depth = 4; + size_t N = width * height * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[i + width * (j + height * k)] = i + width * (j + height * k); + dataIn1[i + width * (j + height * k)] = {i + width * (j + height * k), + 0, 0, 0}; + } + } + } + for (int i = 0; i < (N / 8); i++) { + dataIn2[i] = i; + } + + try { + + // Extension: image descriptor -- number of levels + unsigned int numLevels = 2; + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::mipmap, numLevels); + + // Extension: define a sampler object -- extended mipmap attributes + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::clamp, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, + (float)numLevels, 8.0f); + + // Extension: allocate mipmap memory on device + sycl::ext::oneapi::experimental::image_mem mipMem(desc, dev, ctxt); + + // Extension: copy data to device levels 0 and 1 + q.ext_oneapi_copy(dataIn1.data(), mipMem.get_mip_level_mem_handle(0), + desc.get_mip_level_desc(0)); + q.ext_oneapi_copy(dataIn2.data(), mipMem.get_mip_level_mem_handle(1), + desc.get_mip_level_desc(1)); + q.wait(); + + // Extension: create a sampled image handle to represent the mipmap + sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = + sycl::ext::oneapi::experimental::create_image(mipMem, samp, desc, dev, + ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<3>{depth, height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<3>{depth, height, width}); + + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {width, height, depth}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + size_t dim2 = it.get_local_id(2); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + float fdim2 = float(dim2 + 0.5) / (float)depth; + + // Extension: read mipmap with anisotropic filtering with zero + // viewing gradients + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + mipHandle, sycl::float4(fdim0, fdim1, fdim2, (float)0), + sycl::float4(0.0f, 0.0f, 0.0f, 0.0f), + sycl::float4(0.0f, 0.0f, 0.0f, 0.0f)); + + outAcc[sycl::id<3>{dim2, dim1, dim0}] = px1[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(mipHandle, dev, ctxt); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_1D.cpp b/sycl/test-e2e/bindless_images/read_1D.cpp new file mode 100644 index 0000000000000..8a47c131b4a95 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_1D.cpp @@ -0,0 +1,142 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t width = 512; + std::vector out(width); + std::vector expected(width); + std::vector dataIn1(width); + std::vector dataIn2(width); + float exp = 512; + for (int i = 0; i < width; i++) { + expected[i] = exp; + dataIn1[i] = sycl::float4(i, i, i, i); + dataIn2[i] = sycl::float4(width - i, width - i, width - i, width - i); + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); + + // std::hash specialization to ensure `image_mem` follows common reference + // semantics + assert(std::hash{}(imgMem0) != + std::hash{}(imgMem1)); + + // We're able to use move semantics + // Move construct + sycl::ext::oneapi::experimental::image_mem imgMem0MoveConstruct( + std::move(imgMem0)); + // Move assign + sycl::ext::oneapi::experimental::image_mem imgMem0MoveAssign; + imgMem0MoveAssign = std::move(imgMem0MoveConstruct); + + // We're able to use copy semantics + // Copy construct + sycl::ext::oneapi::experimental::image_mem imgMem1CopyConstruct(imgMem1); + // Copy assign + sycl::ext::oneapi::experimental::image_mem imgMem1CopyAssign; + imgMem1CopyAssign = imgMem1CopyConstruct; + + // Equality operators to ensure `image_mem` follows common reference + // semantics + assert(imgMem0MoveAssign != imgMem1CopyAssign); + assert(imgMem1 == imgMem1CopyAssign); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(imgMem0MoveAssign, desc, + dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(imgMem1CopyAssign, desc, + dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), imgMem0MoveAssign.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1CopyAssign.get_handle(), desc); + + q.wait_and_throw(); + + sycl::buffer buf((float *)out.data(), width); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access(cgh, width); + + cgh.parallel_for(width, [=](sycl::id<1> id) { + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, int(id[0])); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgHandle2, int(id[0])); + + sum = px1[0] + px2[0]; + outAcc[id] = sum; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, dev, + ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < width; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_2D.cpp b/sycl/test-e2e/bindless_images/read_2D.cpp new file mode 100644 index 0000000000000..0ac2d2aec2350 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_2D.cpp @@ -0,0 +1,128 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 7; + size_t height = 3; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = j * 3; + dataIn1[i + (width * j)] = {j, j, j, j}; + dataIn2[i + (width * j)] = {j * 2, j * 2, j * 2, j * 2}; + } + } + + // Image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + try { + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + + // Extension: copy over data to device (handler variant) + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); + }); + + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); + }); + q.wait_and_throw(); + + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::int2(dim0, dim1)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::int2(dim0, dim1)); + + sum = px1[0] + px2[0]; + outAcc[sycl::id<2>{dim1, dim0}] = sum; + }); + }); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, dev, + ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp b/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp new file mode 100644 index 0000000000000..2451a59f229b4 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_2D_dynamic.cpp @@ -0,0 +1,134 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t numImages = 5; + size_t width = 7; + size_t height = 3; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = (i + (width * j)) * numImages; + dataIn[i + (width * j)] = {i + (width * j), 0, 0, 0}; + } + } + + try { + + // Extension: image descriptor - can use the same for all images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Allocate each image and save the device memory handles + std::vector> + imgAllocations; + for (int i = 0; i < numImages; i++) { + // Extension: returns the handle to the device allocated memory + imgAllocations.push_back( + std::make_shared(desc, + q)); + } + + // Copy over data to device for each image + for (int i = 0; i < numImages; i++) { + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn.data(), imgAllocations[i]->get_handle(), desc); + } + q.wait_and_throw(); + + // Create the images and return the handles + std::vector + imgHandles; + for (int i = 0; i < numImages; i++) { + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(*imgAllocations[i], + desc, q); + imgHandles.push_back(imgHandle); + } + + sycl::buffer buf(out.data(), sycl::range<2>{height, width}); + sycl::buffer imgHandlesBuf{imgHandles}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor outAcc{buf, cgh, sycl::write_only}; + sycl::accessor imgHandleAcc{imgHandlesBuf, cgh, sycl::read_only}; + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Sum each image by reading their handle + float sum = 0; + for (int i = 0; i < numImages; i++) { + // Extension: read image data from handle + sum += (sycl::ext::oneapi::experimental::read_image( + imgHandleAcc[i], sycl::int2(dim0, dim1)))[0]; + } + outAcc[sycl::id<2>{dim1, dim0}] = sum; + }); + }); + + // Using image handles requires manual synchronization + q.wait_and_throw(); + + // Extension: cleanup + for (int i = 0; i < numImages; i++) { + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandles[i], q); + } + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_3D.cpp b/sycl/test-e2e/bindless_images/read_3D.cpp new file mode 100644 index 0000000000000..b71f0b5828db1 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_3D.cpp @@ -0,0 +1,127 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 1024; + size_t height = 1024; + size_t depth = 16; + size_t N = width * height * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + // ROW-MAJOR + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[i + width * (j + height * k)] = j * 3; + dataIn1[i + width * (j + height * k)] = {j, j, j, j}; + dataIn2[i + width * (j + height * k)] = {j * 2, j * 2, j * 2, j * 2}; + } + } + } + + try { + + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<3>{depth, height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<3>{depth, height, width}); + + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {16, 16, 4}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::int4(dim0, dim1, dim2, 0)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::int4(dim0, dim1, dim2, 0)); + + sum = px1[0] + px2[0]; + outAcc[sycl::id<3>{dim2, dim1, dim0}] = sum; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, dev, + ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_sampled.cpp b/sycl/test-e2e/bindless_images/read_sampled.cpp new file mode 100644 index 0000000000000..9d036a5ff3c5f --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_sampled.cpp @@ -0,0 +1,935 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include +#include +#include + +// Print test names and pass status +// #define VERBOSE_LV1 + +// Same as above plus sampler, offset, margin of error, largest error found and +// results of one mismatch +// #define VERBOSE_LV2 + +// Same as above but all mismatches are printed +// #define VERBOSE_LV3 + +// Helpers and utilities +struct util { + template + static void fill_rand(std::vector> &v, int seed) { + std::default_random_engine generator; + generator.seed(seed); + auto distribution = [&]() { + if constexpr (std::is_same_v) { + return std::uniform_real_distribution(0.0, 100.0); + } else if constexpr (std::is_floating_point_v) { + return std::uniform_real_distribution(0.0, 100.0); + } else { + return std::uniform_int_distribution(0, 100); + } + }(); + for (int i = 0; i < v.size(); ++i) { + sycl::vec temp; + + for (int j = 0; j < NChannels; j++) { + temp[j] = distribution(generator); + } + + v[i] = temp; + } + } + + // Returns the two pixels to access plus the weight each of them have + static double get_common_linear_fract_and_coords_fp64(double coord, int *x0, + int *x1) { + double pixelCoord; + + // Subtract to align so that pixel center is 0.5 away from origin. + coord = coord - 0.5; + + double weight = sycl::fract(coord, &pixelCoord); + *x0 = static_cast(std::floor(pixelCoord)); + *x1 = *x0 + 1; + return weight; + } + + // Linear sampling is the process of giving a weighted linear blend + // between the nearest adjacent pixels. + // When performing linear sampling, we subtract 0.5 from the original + // coordinate to get the center-adjusted coordinate (as pixels are "centered" + // on the half-integers). For example, with original coord 3.2, we get a + // center-adjusted coord of 2.7. With 2.7, we have 70% of the pixel value will + // come from the pixel at coord 3 and 30% from the pixel value at coord 2 + + // The function arguments here are the two pixels to use and the weight to + // give each of them. + template + static sycl::vec + linearOp1D(sycl::vec pix1, sycl::vec pix2, + double weight) { + + sycl::vec weightArr(weight); + sycl::vec one(1.0f); + + sycl::vec Ti0 = pix1.template convert(); + sycl::vec Ti1 = pix2.template convert(); + + sycl::vec result; + + result = ((one - weightArr) * Ti0 + weightArr * Ti1); + + // Round to nearest whole number. + // There is no option to do this via sycl::rounding_mode. + if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { + for (int i = 0; i < NChannels; i++) { + result[i] = std::round(result[i]); + } + } + + return result.template convert(); + } + + // Out of range coords return a border color + // The border color happens to be all zeros + template + static VecType clampNearest(double coordX, int width, + std::vector &input_image) { + // Due to pixel centers being 0.5 away from origin and because + // 0.5 is *not* subtracted here, rounding down gives the same results as + // rounding to nearest number if 0.5 is subtracted to account + // for pixel center + int coordXInt = static_cast(std::floor(coordX)); + + // Clamp sampling according to the SYCL spec returns a border color. + // The border color is all zeros. + // There does not appear to be any way for the user to set the border color + if (coordXInt > width - 1 || coordXInt < 0) { + return VecType{0}; + } + return input_image[coordXInt]; + } + + // Out of range coords are clamped to the extent. + template + static VecType clampToEdgeNearest(double coordX, int width, + std::vector &input_image) { + // Due to pixel centers being 0.5 away from origin and because + // 0.5 is *not* subtracted here, rounding down gives the same results as + // rounding to nearest number if 0.5 is subtracted to account + // for pixel center + int coordXInt = static_cast(std::floor(coordX)); + // Clamp to extent + coordXInt = std::clamp(coordXInt, 0, width - 1); + return input_image[coordXInt]; + } + + // Out of range coords are wrapped to the valid range. + template + static VecType repeatNearest(double coordX, int width, + std::vector &input_image) { + + // Convert unnormalized input coord to normalized format + double normCoordX = coordX / width; + + // Keep only the fractional component of the number and unnormalize. + double fractComp = (normCoordX - std::floor(normCoordX)); + + // Unnormalize fractComp + double unnorm = fractComp * width; + + // Due to pixel centers being 0.5 away from origin and because + // 0.5 is *not* subtracted here, rounding down gives the same results as + // rounding to nearest number if 0.5 is subtracted to account + // for pixel center + int coordXInt = static_cast(std::floor(unnorm)); + + // Handle negative coords + if (coordXInt < 0) { + coordXInt = width + coordXInt; + } + + return input_image[coordXInt]; + } + + // Out of range coordinates are flipped at every integer junction + template + static VecType mirroredRepeatNearest(double coordX, int width, + std::vector &input_image) { + + // Convert unnormalized input coord to normalized format + double normCoordX = coordX / width; + + // Round to nearest multiple of two. + // e.g. + // normCoordX == 0.3 -> result = 0 + // normCoordX == 1.3 -> result = 2 + // normCoordX == 2.4 -> result = 2 + // normCoordX == 3.42 -> result = 4 + double nearestMulOfTwo = 2.0f * std::rint(0.5f * normCoordX); + // Subtract nearestMulOfTwo from normCoordX. + // Gives the normalized form of the coord to use. + // With normCoordX=1.3, norm is set to 0.7 + // With normCoordX=2.4, norm is set to 0.4 + double norm = std::abs(normCoordX - nearestMulOfTwo); + // Unnormalize norm + double unnorm = norm * width; + // Round down and cast to int + int coordXInt = static_cast(std::floor(unnorm)); + // Constrain to valid range + coordXInt = std::min(coordXInt, width - 1); + + // This prevents when at an integer junction, having three + // accesses to pixel at normalized location 0 and 1 instead of two which is + // correct. + int oddShift = 0; + // If not at image boundry and precisely on a pixel + if (std::fmod(normCoordX, 1) != 0.0 && + std::fmod(normCoordX * width, 1) == 0.0) { + // Set oddShift to be one when the integral part of the normalized + // coords is odd. + // Otherwise set to zero. + oddShift = + std::abs(static_cast(std::fmod(std::floor(normCoordX), 2))); + } + coordXInt -= oddShift; + + return input_image[coordXInt]; + } + + // Out of range coords return a border color + // The border color is all zeros + template + static sycl::vec + clampLinear(double coordX, int width, + std::vector> &input_image) { + using VecType = sycl::vec; + // Get coords for linear sampling + int i0, i1; + double weight = get_common_linear_fract_and_coords_fp64(coordX, &i0, &i1); + + VecType pix1; + VecType pix2; + + // Clamp sampling according to the SYCL spec returns a border color. + // The border color is all zeros. + // There does not appear to be any way for the user to set the border color. + if (i0 < 0 || i0 > width - 1) { + pix1 = VecType(0); + } else { + pix1 = input_image[i0]; + } + + if (i1 < 0 || i1 > width - 1) { + pix2 = VecType(0); + } else { + pix2 = input_image[i1]; + } + + // Perform linear sampling + return linearOp1D(pix1, pix2, weight); + } + + // Out of range coords are clamped to the extent. + template + static sycl::vec + clampToEdgeLinear(double coordX, int width, + std::vector> &input_image) { + using VecType = sycl::vec; + // Get coords for linear sampling + int i0, i1; + double weight = get_common_linear_fract_and_coords_fp64(coordX, &i0, &i1); + + // Clamp to extent + i0 = std::clamp(i0, 0, width - 1); + i1 = std::clamp(i1, 0, width - 1); + + VecType pix1 = input_image[i0]; + VecType pix2 = input_image[i1]; + + // Perform linear sampling + return linearOp1D(pix1, pix2, weight); + } + + // Out of range coords are wrapped to the valid range + template + static sycl::vec + repeatLinear(double coordX, int width, + std::vector> &input_image) { + using VecType = sycl::vec; + + // Convert unnormalized input coord to normalized format + double normCoordX = coordX / width; + + double unnorm = (normCoordX - static_cast(normCoordX)) * width; + // Get coords for linear sampling + int i0, i1; + double weight = get_common_linear_fract_and_coords_fp64(unnorm, &i0, &i1); + + // Wrap linear sampling coords to valid range + if (i0 < 0) { + i0 = width + i0; + } + if (i1 < 0) { + i1 = width + i1; + } + + if (i1 > width - 1) { + i1 = i1 - width; + } + if (i0 > width - 1) { + i0 = i0 - width; + } + + VecType pix1 = input_image[i0]; + VecType pix2 = input_image[i1]; + + // Perform linear sampling + return linearOp1D(pix1, pix2, weight); + } + + // Out of range coordinates are flipped at every integer junction + template + static sycl::vec + mirroredRepeatLinear(double coordX, int width, + std::vector> &input_image) { + using VecType = sycl::vec; + + // Convert unnormalized input coord to normalized format + double normCoordX = coordX / width; + + // Round to nearest multiple of two. + // e.g. + // normCoordX == 0.3 -> result = 0 + // normCoordX == 1.3 -> result = 2 + // normCoordX == 2.4 -> result = 2 + // normCoordX == 3.42 -> result = 4 + double nearestMulOfTwo = 2.0f * std::rint(0.5f * normCoordX); + // Subtract nearestMulOfTwo from normCoordX. + // Gives the normalized form of the coord to use. + // With normCoordX=1.3, norm is set to 0.7 + // With normCoordX=2.4, norm is set to 0.4 + double norm = std::abs(normCoordX - nearestMulOfTwo); + // Unnormalize norm + double unnorm = norm * width; + + // Get coords for linear sampling + int i0, i1; + double weight = get_common_linear_fract_and_coords_fp64(unnorm, &i0, &i1); + + // get_common_linear sometimes returns numbers out of bounds. + // Handle this by wrapping to boundary. + i0 = std::max(i0, 0); + i1 = std::min(i1, width - 1); + + VecType pix1 = input_image[i0]; + VecType pix2 = input_image[i1]; + + // Perform linear sampling + return linearOp1D(pix1, pix2, weight); + } + + template > + static sycl::vec + read(sycl::range<1> globalSize, double coordX, double offset, + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + std::vector> &input_image) { + using VecType = sycl::vec; + coordX = coordX + offset; + int width = globalSize[0]; + + // Ensure that coordX always contains unnormalized coords + sycl::coordinate_normalization_mode SampNormMode = samp.coordinate; + if (SampNormMode == sycl::coordinate_normalization_mode::normalized) { + // Unnormalize + coordX = coordX * width; + } + + sycl::filtering_mode SampFiltMode = samp.filtering; + if (SampFiltMode == sycl::filtering_mode::nearest) { + + sycl::addressing_mode SampAddrMode = samp.addressing; + if (SampAddrMode == sycl::addressing_mode::clamp) { + return clampNearest(coordX, width, input_image); + } + + if (SampAddrMode == sycl::addressing_mode::clamp_to_edge) { + return clampToEdgeNearest(coordX, width, input_image); + } + + if (SampAddrMode == sycl::addressing_mode::repeat) { + if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) { + assert(false && + "Repeat addressing mode must be used with normalized coords"); + } + return repeatNearest(coordX, width, input_image); + } + + if (SampAddrMode == sycl::addressing_mode::mirrored_repeat) { + if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) { + assert(false && "Mirrored repeat addressing mode must be used with " + "normalized coords"); + } + return mirroredRepeatNearest(coordX, width, input_image); + } + + if (SampAddrMode == sycl::addressing_mode::none) { + int intCoordX = static_cast(std::floor(coordX)); + if (intCoordX < 0 || intCoordX >= width) { + assert(false && "Accessed out of bounds with addressing mode none! " + "Undefined Behaviour!"); + } + return input_image[intCoordX]; + } + + } else { // linear + sycl::addressing_mode SampAddrMode = samp.addressing; + if (SampAddrMode == sycl::addressing_mode::clamp) { + return clampLinear(coordX, width, input_image); + } + if (SampAddrMode == sycl::addressing_mode::clamp_to_edge) { + return clampToEdgeLinear(coordX, width, input_image); + } + if (SampAddrMode == sycl::addressing_mode::repeat) { + if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) { + assert(false && + "Repeat addressing mode must be used with normalized coords"); + } + return repeatLinear(coordX, width, input_image); + } + if (SampAddrMode == sycl::addressing_mode::mirrored_repeat) { + if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) { + assert(false && "Mirrored repeat addressing mode must be used with " + "normalized coords"); + } + return mirroredRepeatLinear(coordX, width, + input_image); + } + if (SampAddrMode == sycl::addressing_mode::none) { + if (coordX < 0 || coordX >= width) { + assert(false && "Accessed out of bounds with addressing mode none! " + "Undefined Behaviour!"); + } + assert(false && "filtering mode linear with addressing mode none " + "currently not supported"); + } + } + assert(false && "Invalid sampler encountered!"); + } + + // parallel_for ND bound normalized + template + static void run_ndim_test_host( + sycl::range globalSize, double offset, + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + std::vector> &input_image, + std::vector> &output) { + using VecType = sycl::vec; + bool isNorm = + (samp.coordinate == sycl::coordinate_normalization_mode::normalized); + + if constexpr (NDims == 1) { + for (int i = 0; i < globalSize[0]; i++) { + double coordX; + if (isNorm) { + coordX = (double)i / (double)globalSize[0]; + } else { + coordX = i; + } + VecType result = read( + globalSize, coordX, offset, samp, input_image); + output[i] = result; + } + } else if constexpr (NDims == 2) { + assert(false && "2d normalized not yet implemented"); + } else if constexpr (NDims == 3) { + assert(false && "3d normalized not yet implemented"); + } else { + assert(false && "Invalid dimension number set"); + } + } + + // parallel_for ND bindless normalized + template + static void run_ndim_test_device( + sycl::queue &q, sycl::range globalSize, + sycl::range localSize, double offset, + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + sycl::ext::oneapi::experimental::sampled_image_handle input_image, + sycl::buffer, NDims> &output, + sycl::range bufSize) { + using VecType = sycl::vec; + bool isNorm = + (samp.coordinate == sycl::coordinate_normalization_mode::normalized); + if constexpr (NDims == 1) { + try { + q.submit([&](sycl::handler &cgh) { + auto outAcc = output.template get_access( + cgh, bufSize); + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + double coordX = 0.0; + if (isNorm) { + coordX = (double)dim0 / (double)globalSize[0]; + } else { + coordX = dim0; + } + + VecType px1 = + sycl::ext::oneapi::experimental::read_image( + input_image, float(coordX + offset)); + + outAcc[(int)dim0] = px1; + }); + }); + } catch (sycl::exception e) { + std::cerr << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); + } catch (...) { + std::cerr << "\tKernel submission failed!" << std::endl; + exit(-1); + } + } else if constexpr (NDims == 2) { + assert(false && "2d normalized not yet implemented"); + } else if constexpr (NDims == 3) { + assert(false && "3d normalized not yet implemented"); + } else { + assert(false && "Invalid dimension number set"); + } + } +}; + +void printTestInfo( + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + double offset) { + + sycl::addressing_mode SampAddrMode = samp.addressing; + sycl::coordinate_normalization_mode SampNormMode = samp.coordinate; + sycl::filtering_mode SampFiltMode = samp.filtering; + + std::cout << "---------------------------------------NEW " + "SAMPLER---------------------------------------\n"; + + std::cout << "addressing mode: "; + switch (SampAddrMode) { + case sycl::addressing_mode::mirrored_repeat: + std::cout << "mirrored_repeat\n"; + break; + case sycl::addressing_mode::repeat: + std::cout << "repeat\n"; + break; + case sycl::addressing_mode::clamp_to_edge: + std::cout << "clamp_to_edge\n"; + break; + case sycl::addressing_mode::clamp: + std::cout << "clamp\n"; + break; + case sycl::addressing_mode::none: + std::cout << "none\n"; + break; + } + + std::cout << "coordinate normalization mode: "; + switch (SampNormMode) { + case sycl::coordinate_normalization_mode::normalized: + std::cout << "normalized\n"; + break; + case sycl::coordinate_normalization_mode::unnormalized: + std::cout << "unnormalized\n"; + break; + } + + std::cout << "filtering mode: "; + switch (SampFiltMode) { + case sycl::filtering_mode::nearest: + std::cout << "nearest\n"; + break; + case sycl::filtering_mode::linear: + std::cout << "linear\n"; + break; + } + std::cout << "offset: " << offset << "\n"; +} + +bool isNumberWithinPercentOfNumber(float firstN, float percent, float secondN, + float &diff, float &percDiff) { + // Get absolute difference of the two numbers + diff = std::abs(firstN - secondN); + // Get the percentage difference of the two numbers + percDiff = + 100.0f * (std::abs(firstN - secondN) / (((firstN + secondN) / 2.0f))); + + // Check if perc difference is not greater then maximum allowed + return percDiff <= percent; +} + +template +bool run_test(sycl::range dims, sycl::range localSize, + double offset, + sycl::ext::oneapi::experimental::bindless_image_sampler &samp, + unsigned int seed = 0) { + using VecType = sycl::vec; + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // skip half tests if not supported + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#if defined(VERBOSE_LV1) || defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + + size_t num_elems = dims[0]; + if (NDims > 1) + num_elems *= dims[1]; + if (NDims > 2) + num_elems *= dims[2]; + + std::vector input_0(num_elems); + std::vector expected(num_elems); + std::vector actual(num_elems); + + std::srand(seed); + util::fill_rand(input_0, seed); + + { + sycl::range globalSize = dims; + util::run_ndim_test_host(globalSize, offset, samp, + input_0, expected); + } + + try { + + sycl::ext::oneapi::experimental::image_descriptor desc(dims, COrder, CType); + + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); + + auto img_input = + sycl::ext::oneapi::experimental::create_image(img_mem_0, samp, desc, q); + + q.ext_oneapi_copy(input_0.data(), img_mem_0.get_handle(), desc); + q.wait_and_throw(); + + { + sycl::range bufSize = dims; + sycl::range globalSize = dims; + sycl::buffer outBuf((VecType *)actual.data(), bufSize); + q.wait_and_throw(); + util::run_ndim_test_device( + q, globalSize, localSize, offset, samp, img_input, outBuf, bufSize); + q.wait_and_throw(); + } + + // Cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(img_input, q); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return true; + } + + // Collect and validate output + + // The following sets the percentage margin of error. + + // The margin of error might be different for different backends. + // For CUDA, low-precision interpolation is used for linear sampling + // according to the CUDA programming guide. + // Specifically, CUDA devices uses 9 bits for the linear sampling weight with + // 8 for the fractional value. (One extra so 1.0 is exactly represented) + // 8 bits for the fractional value means there are 256 possible values + // to represent between 1 and 0. As a percentage error, (1/256) * 100 + // gives 0.390625. Meaning that the percentage error for every + // linear interpolation is up to 0.390625% away from the correct value. + // There is no error when linear sampling does not occur. + + float deviation = 0.390625f; + + // For tests using nearest filtering mode, no margin of error is expected. + if (samp.filtering == sycl::filtering_mode::nearest) { + deviation = 0.0f; + } + + bool validated = true; + float largestError = 0.0f; + float largestPercentError = 0.0f; + for (int i = 0; i < num_elems; i++) { + for (int j = 0; j < NChannels; ++j) { + bool mismatch = false; + if (actual[i][j] != expected[i][j]) { + // Nvidia GPUs have a 0.4%~ margin of error due to only using 8 bits to + // represent values between 1-0 for weights during linear interpolation. + float diff, percDiff; + if (!isNumberWithinPercentOfNumber(actual[i][j], deviation, + expected[i][j], diff, percDiff)) { + mismatch = true; + validated = false; + } + if (diff > largestError) { + largestError = diff; + } + if (percDiff > largestPercentError) { + largestPercentError = percDiff; + } + } + if (mismatch) { +#if defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + std::cout << "\tResult mismatch at [" << i << "][" << j + << "] Expected: " << +DType(expected[i][j]) + << ", Actual: " << +DType(actual[i][j]) << std::endl; +#endif + +#ifndef VERBOSE_LV3 + break; +#endif + } + } +#ifndef VERBOSE_LV3 + if (!validated) { + break; + } +#endif + } + +#if defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + std::cout << "largestError: " << largestError << "\n"; + std::cout << "largestPercentError: " << largestPercentError << "%" + << "\n"; + std::cout << "Margin of Error: " << deviation << "%" + << "\n"; +#endif + +#if defined(VERBOSE_LV1) || defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + if (validated) { + std::cout << "\tTest passed!\n"; + } else { + std::cout << "\tTest failed!\n"; + } +#endif + + return !validated; +} + +void printTestName(std::string name) { +#if defined(VERBOSE_LV1) || defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + std::cout << name; +#endif +} + +template > +bool run_tests(sycl::range dims, sycl::range localSize, + double offset, int seed, + sycl::coordinate_normalization_mode normMode) { + + // addressing_mode::none currently removed due to + // inconsistent behavour when switching between + // normalized and unnormalized coords. + sycl::addressing_mode addrModes[4] = { + sycl::addressing_mode::repeat, sycl::addressing_mode::mirrored_repeat, + sycl::addressing_mode::clamp_to_edge, sycl::addressing_mode::clamp}; + + sycl::filtering_mode filtModes[2] = {sycl::filtering_mode::nearest, + sycl::filtering_mode::linear}; + + bool failed = false; + + for (auto addrMode : addrModes) { + + for (auto filtMode : filtModes) { + + if (normMode == sycl::coordinate_normalization_mode::unnormalized) { + // These sampler combinations are not valid according to the SYCL spec + if (addrMode == sycl::addressing_mode::repeat || + addrMode == sycl::addressing_mode::mirrored_repeat) { + continue; + } + } + // Skip using offset with address_mode of none. Will cause undefined + // behaviour. + if (addrMode == sycl::addressing_mode::none && offset != 0.0) { + continue; + } + + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + addrMode, normMode, filtMode); + +#if defined(VERBOSE_LV2) || defined(VERBOSE_LV3) + printTestInfo(samp, offset); +#endif + + // Tests using int data type currently disabled due to inconsistent + // rounding behaviour against non-float types smaller then 32 bit. + + printTestName("Running 1D short\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D short2\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D short4\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D unsigned short\n"); + failed |= run_test< + NDims, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_1d1, class ushort_1d2>( + dims, localSize, offset, samp, seed); + printTestName("Running 1D unsigned short2\n"); + failed |= run_test< + NDims, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_1d1, class ushort2_1d2>( + dims, localSize, offset, samp, seed); + printTestName("Running 1D unsigned short4\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D char\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D char2\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D char4\n"); + failed |= + run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D unsigned char\n"); + failed |= run_test< + NDims, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_1d1, class uchar_1d2>( + dims, localSize, offset, samp, seed); + printTestName("Running 1D unsigned char2\n"); + failed |= run_test< + NDims, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_1d1, class uchar2_1d2>( + dims, localSize, offset, samp, seed); + printTestName("Running 1D unsigned char4\n"); + failed |= run_test< + NDims, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_1d1, class uchar4_1d2>( + dims, localSize, offset, samp, seed); + + printTestName("Running 1D float\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D float2\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D float4\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D half\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D half2\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + printTestName("Running 1D half4\n"); + failed |= run_test(dims, localSize, offset, samp, seed); + + printTestName("Running 1D float - dims: 1024, local: 512\n"); + failed |= run_test({1024}, {512}, offset, samp, seed); + printTestName("Running 1D float4 - dims: 4096, local: 8\n"); + failed |= run_test({4096}, {8}, offset, samp, seed); + } + } + + return !failed; +} + +template +bool run_offset(sycl::range dims, sycl::range localSize, + double offset, int seed) { + bool normPassed = + run_tests(dims, localSize, (offset / (double)dims[0]), seed, + sycl::coordinate_normalization_mode::normalized); + bool nonormPassed = + run_tests(dims, localSize, offset, seed, + sycl::coordinate_normalization_mode::unnormalized); + return normPassed && nonormPassed; +} + +template +bool run_no_offset(sycl::range dims, sycl::range localSize, + int seed) { + bool normPassed = + run_tests(dims, localSize, 0.0, seed, + sycl::coordinate_normalization_mode::normalized); + bool nonormPassed = + run_tests(dims, localSize, 0.0, seed, + sycl::coordinate_normalization_mode::unnormalized); + return normPassed && nonormPassed; +} + +template +bool run_dim(sycl::range dims, sycl::range localSize, + double offset, int seed) { + bool offsetPassed = run_offset(dims, localSize, offset, seed); + bool noOffsetPassed = run_no_offset(dims, localSize, seed); + return offsetPassed && noOffsetPassed; +} + +bool run_all(int seed) { return run_dim<1>({512}, {32}, 20, seed); } + +int main() { + + unsigned int seed = 0; + bool result = run_all(seed); + + if (result) { + std::cout << "All tests passed!\n"; + return 0; + } + + std::cerr << "An error has occured!\n"; + return 1; +} diff --git a/sycl/test-e2e/bindless_images/read_write_1D.cpp b/sycl/test-e2e/bindless_images/read_write_1D.cpp new file mode 100644 index 0000000000000..eabf71e9248d8 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_1D.cpp @@ -0,0 +1,122 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t width = 512; + std::vector out(width); + std::vector expected(width); + std::vector dataIn1(width); + std::vector dataIn2(width); + float exp = 512; + for (int i = 0; i < width; i++) { + expected[i] = exp; + dataIn1[i] = sycl::float4(i, i, i, i); + dataIn2[i] = sycl::float4(width - i, width - i, width - i, width - i); + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + // Input images memory + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); + + // Output image memory + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn1 = + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn2 = + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); + + sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, dev, ctxt); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(width, [=](sycl::id<1> id) { + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgIn1, int(id[0])); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgIn2, int(id[0])); + + sum = px1[0] + px2[0]; + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgOut, int(id[0]), sycl::float4(sum)); + }); + }); + + q.wait_and_throw(); + // Extension: copy data from device to host + q.ext_oneapi_copy(imgMem2.get_handle(), out.data(), desc); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn1, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn2, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < width; i++) { + bool mismatch = false; + if (out[i][0] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp new file mode 100644 index 0000000000000..3691a397f1942 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp @@ -0,0 +1,135 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t width = 512; + std::vector out(width); + std::vector expected(width); + std::vector dataIn1(width); + std::vector dataIn2(width); + for (int i = 0; i < width; i++) { + expected[i] = i * 3; + dataIn1[i] = i; + dataIn2[i] = i * 2; + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::r, sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem imgMem00(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q); + + // We're able to use move semantics + auto imgMem0 = std::move(imgMem00); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 = + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q); + + // Extension: copy over data to device (2 subregions) + sycl::range copySrcOffset = {0, 0, 0}; + sycl::range copyExtent1 = {width / 2, 1, 1}; + sycl::range srcExtent = {width, 0, 0}; + + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + imgMem0.get_handle(), {0, 0, 0}, desc, copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, 0}, srcExtent, + imgMem0.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); + + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + imgMem1.get_handle(), {0, 0, 0}, desc, copyExtent1); + q.ext_oneapi_copy(dataIn2.data(), {width / 2, 0, 0}, srcExtent, + imgMem1.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); + + q.wait_and_throw(); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(width, [=](sycl::id<1> id) { + float sum = 0; + // Extension: read image data from handle + float px1 = sycl::ext::oneapi::experimental::read_image( + imgHandle1, int(id[0])); + float px2 = sycl::ext::oneapi::experimental::read_image( + imgHandle2, int(id[0])); + + sum = px1 + px2; + sycl::ext::oneapi::experimental::write_image(imgHandle3, + int(id[0]), sum); + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device to host (two sub-regions) + sycl::range copyExtent2 = {width / 2, 1, 1}; + sycl::range destExtent = {width, 0, 0}; + q.ext_oneapi_copy(imgMem2.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, destExtent, copyExtent2); + q.ext_oneapi_copy(imgMem2.get_handle(), {width / 2, 0, 0}, desc, out.data(), + {width / 2, 0, 0}, destExtent, copyExtent2); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < width; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_write_2D.cpp b/sycl/test-e2e/bindless_images/read_write_2D.cpp new file mode 100644 index 0000000000000..dbc5f4cc219f9 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_2D.cpp @@ -0,0 +1,133 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t height = 32; + size_t width = 32; + size_t N = height * width; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = j * 3; + dataIn1[i + (width * j)] = {j, j, j, j}; + dataIn2[i + (width * j)] = {j * 2, j * 2, j * 2, j * 2}; + } + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + // Input images memory + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); + + // Output image memory + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn1 = + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn2 = + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, dev, ctxt); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgIn1, sycl::int2(dim0, dim1)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgIn2, sycl::int2(dim0, dim1)); + + sum = px1[0] + px2[0]; + + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgOut, sycl::int2(dim0, dim1), sycl::float4(sum)); + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device to host (handler variant) + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(imgMem2.get_handle(), out.data(), desc); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn1, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn2, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i][0] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp new file mode 100644 index 0000000000000..55e43eb3e7fe6 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_2D_subregion.cpp @@ -0,0 +1,152 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 32; + size_t height = 32; + size_t N = width * height; + std::vector out(N); + std::vector expected(N / 4); + std::vector dataIn1(N / 4); + std::vector dataIn2(N / 4); + for (int i = 0; i < width / 2; i++) { + for (int j = 0; j < height / 2; j++) { + expected[j + ((height / 2) * i)] = j * 3; + dataIn1[j + ((height / 2) * i)] = j; + dataIn2[j + ((height / 2) * i)] = j * 2; + } + } + + // Image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::r, + sycl::image_channel_type::fp32); + + try { + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 = + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q); + + // Extension: copy over data to device (four subregions/quadrants) + sycl::range copyExtent1 = {width / 2, height / 2, 1}; + sycl::range srcExtent = {width / 2, height / 2, 0}; + + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + imgMem0.get_handle(), {0, 0, 0}, desc, copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + imgMem0.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + imgMem0.get_handle(), {0, height / 2, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent, + imgMem0.get_handle(), {width / 2, height / 2, 0}, desc, + copyExtent1); + + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + imgMem1.get_handle(), {0, 0, 0}, desc, copyExtent1); + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + imgMem1.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + imgMem1.get_handle(), {0, height / 2, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent, + imgMem1.get_handle(), {width / 2, height / 2, 0}, desc, + copyExtent1); + + q.wait_and_throw(); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + float sum = 0; + // Extension: read image data from handle + float px1 = sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::int2(dim0, dim1)); + float px2 = sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::int2(dim0, dim1)); + + sum = px1 + px2; + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgHandle3, sycl::int2(dim0, dim1), sum); + }); + }); + q.wait_and_throw(); + + // Extension: copy data from device to host (two sub-regions) + sycl::range copyExtent2 = {width, height / 2, 1}; + sycl::range destExtent = {width, height, 0}; + q.ext_oneapi_copy(imgMem2.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, destExtent, copyExtent2); + q.ext_oneapi_copy(imgMem2.get_handle(), {0, height / 2, 0}, desc, + out.data(), {0, height / 2, 0}, destExtent, copyExtent2); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i % (N / 4)]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_write_3D.cpp b/sycl/test-e2e/bindless_images/read_write_3D.cpp new file mode 100644 index 0000000000000..38216599fe735 --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_3D.cpp @@ -0,0 +1,133 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t height = 13; + size_t width = 7; + size_t depth = 11; + size_t N = height * width * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[i + width * (j + height * k)] = j * 3; + dataIn1[i + width * (j + height * k)] = {j, j, j, j}; + dataIn2[i + width * (j + height * k)] = {j * 2, j * 2, j * 2, j * 2}; + } + } + } + + try { + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + // Input images memory + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, dev, ctxt); + + // Output image memory + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), imgMem0.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), imgMem1.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn1 = + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn2 = + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, dev, ctxt); + + sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, dev, ctxt); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {width, height, depth}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + size_t dim2 = it.get_local_id(2); + float sum = 0; + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgIn1, sycl::int4(dim0, dim1, dim2, 0)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgIn2, sycl::int4(dim0, dim1, dim2, 0)); + + sum = px1[0] + px2[0]; + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgOut, sycl::int4(dim0, dim1, dim2, 0), sycl::float4(sum)); + }); + }); + + q.wait_and_throw(); + // Extension: copy data from device to host + q.ext_oneapi_copy(imgMem2.get_handle(), out.data(), desc); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn1, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn2, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i][0] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i][0] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp new file mode 100644 index 0000000000000..6044162a8887d --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp @@ -0,0 +1,173 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 16; + size_t height = 16; + size_t depth = 8; + size_t N = width * height * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + // ROW-MAJOR + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[k + (depth) * (j + (height)*i)] = + (k + (depth) * (j + (height)*i)) * 3; + dataIn1[k + (depth) * (j + (height)*i)] = + k + (depth) * (j + (height)*i); + dataIn2[k + (depth) * (j + (height)*i)] = + (k + (depth) * (j + (height)*i)) * 2; + } + } + } + + try { + + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::r, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q); + + // Extension: copy over data to device (8 sub-regions) + sycl::range copyExtent1 = {width / 2, height / 2, depth / 2}; + sycl::range srcExtent1 = {width, height, depth}; + + // First image with 8 sub-regions + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, srcExtent1, + imgMem0.get_handle(), {0, 0, 0}, desc, copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, 0}, srcExtent1, + imgMem0.get_handle(), {width / 2, 0, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {0, height / 2, 0}, srcExtent1, + imgMem0.get_handle(), {0, height / 2, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {0, 0, depth / 2}, srcExtent1, + imgMem0.get_handle(), {0, 0, depth / 2}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, height / 2, 0}, srcExtent1, + imgMem0.get_handle(), {width / 2, height / 2, 0}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {0, height / 2, depth / 2}, srcExtent1, + imgMem0.get_handle(), {0, height / 2, depth / 2}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, 0, depth / 2}, srcExtent1, + imgMem0.get_handle(), {width / 2, 0, depth / 2}, desc, + copyExtent1); + q.ext_oneapi_copy(dataIn1.data(), {width / 2, height / 2, depth / 2}, + srcExtent1, imgMem0.get_handle(), + {width / 2, height / 2, depth / 2}, desc, copyExtent1); + + // Second image with 2 sub-regions + sycl::range copyExtent2 = {width, height, depth / 2}; + sycl::range srcExtent2 = {width, height, depth}; + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, srcExtent2, + imgMem1.get_handle(), {0, 0, 0}, desc, copyExtent2); + q.ext_oneapi_copy(dataIn2.data(), {0, 0, depth / 2}, srcExtent2, + imgMem1.get_handle(), {0, 0, depth / 2}, desc, + copyExtent2); + + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 = + sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {16, 16, 2}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + float sum = 0; + // Extension: read image data from handle + float px1 = sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::int4(dim0, dim1, dim2, 0)); + float px2 = sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::int4(dim0, dim1, dim2, 0)); + + sum = px1 + px2; + // Extension: write to image with handle + sycl::ext::oneapi::experimental::write_image( + imgHandle3, sycl::int4(dim0, dim1, dim2, 0), sum); + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device to host (two sub-regions) + sycl::range copyExtent3 = {width, height, depth / 2}; + sycl::range destExtent = {width, height, depth}; + q.ext_oneapi_copy(imgMem2.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, destExtent, copyExtent3); + q.ext_oneapi_copy(imgMem2.get_handle(), {0, 0, depth / 2}, desc, out.data(), + {0, 0, depth / 2}, destExtent, copyExtent3); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle3, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp new file mode 100644 index 0000000000000..aedd45bdf458e --- /dev/null +++ b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp @@ -0,0 +1,670 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include +#include +#include + +static sycl::device dev; + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +// Helpers and utilities +struct util { + template ::value, bool> = true> + static void fill_rand(std::vector> &v) { + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + for (int i = 0; i < v.size(); ++i) { + v[i] = sycl::vec(distribution(generator)); + } + } + + template ::value, bool> = true> + static void fill_rand(std::vector> &v) { + std::default_random_engine generator; + std::uniform_real_distribution distribution(0.0, 100.0); + for (int i = 0; i < v.size(); ++i) { + v[i] = sycl::vec(distribution(generator)); + } + } + + template < + typename DType, int NChannels, + std::enable_if_t::value, bool> = true> + static void fill_rand(std::vector> &v) { + std::default_random_engine generator; + std::uniform_real_distribution distribution(0.0, 100.0); + for (int i = 0; i < v.size(); ++i) { + v[i] = sycl::vec(distribution(generator)); + } + } + + template + static void add_host(const std::vector> &in_0, + const std::vector> &in_1, + std::vector> &out) { + for (int i = 0; i < out.size(); ++i) { + for (int j = 0; j < NChannels; ++j) { + out[i][j] = in_0[i][j] + in_1[i][j]; + } + } + } + + template > + static DType add_kernel(const DType in_0, const DType in_1) { + return in_0 + in_1; + } + + template 1)>> + static sycl::vec + add_kernel(const sycl::vec &in_0, + const sycl::vec &in_1) { + sycl::vec out; + for (int i = 0; i < NChannels; ++i) { + out[i] = in_0[i] + in_1[i]; + } + return out; + } + + // parallel_for 3D + template > + static void run_ndim_test( + sycl::queue q, sycl::range<3> globalSize, sycl::range<3> localSize, + sycl::ext::oneapi::experimental::unsampled_image_handle input_0, + sycl::ext::oneapi::experimental::unsampled_image_handle input_1, + sycl::ext::oneapi::experimental::unsampled_image_handle output) { + using VecType = sycl::vec; + try { + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + size_t dim2 = it.get_global_id(2); + + if constexpr (NChannels >= 1) { + VecType px1 = + sycl::ext::oneapi::experimental::read_image( + input_0, sycl::int4(dim0, dim1, dim2, 0)); + VecType px2 = + sycl::ext::oneapi::experimental::read_image( + input_1, sycl::int4(dim0, dim1, dim2, 0)); + + auto sum = + VecType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, sycl::int4(dim0, dim1, dim2, 0), VecType(sum)); + } else { + DType px1 = sycl::ext::oneapi::experimental::read_image( + input_0, sycl::int4(dim0, dim1, dim2, 0)); + DType px2 = sycl::ext::oneapi::experimental::read_image( + input_1, sycl::int4(dim0, dim1, dim2, 0)); + + auto sum = DType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, sycl::int4(dim0, dim1, dim2, 0), DType(sum)); + } + }); + }); + } catch (sycl::exception e) { + std::cout << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); + } catch (...) { + std::cout << "\tKernel submission failed!" << std::endl; + exit(-1); + } + } + + // parallel_for 2D + template > + static void run_ndim_test( + sycl::queue q, sycl::range<2> globalSize, sycl::range<2> localSize, + sycl::ext::oneapi::experimental::unsampled_image_handle input_0, + sycl::ext::oneapi::experimental::unsampled_image_handle input_1, + sycl::ext::oneapi::experimental::unsampled_image_handle output) { + using VecType = sycl::vec; + try { + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + + if constexpr (NChannels >= 1) { + VecType px1 = + sycl::ext::oneapi::experimental::read_image( + input_0, sycl::int2(dim0, dim1)); + VecType px2 = + sycl::ext::oneapi::experimental::read_image( + input_1, sycl::int2(dim0, dim1)); + + auto sum = + VecType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, sycl::int2(dim0, dim1), VecType(sum)); + } else { + DType px1 = sycl::ext::oneapi::experimental::read_image( + input_0, sycl::int2(dim0, dim1)); + DType px2 = sycl::ext::oneapi::experimental::read_image( + input_1, sycl::int2(dim0, dim1)); + + auto sum = DType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, sycl::int2(dim0, dim1), DType(sum)); + } + }); + }); + } catch (sycl::exception e) { + std::cout << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); + } catch (...) { + std::cout << "\tKernel submission failed!" << std::endl; + exit(-1); + } + } + + // parallel_for 1D + template > + static void run_ndim_test( + sycl::queue q, sycl::range<1> globalSize, sycl::range<1> localSize, + sycl::ext::oneapi::experimental::unsampled_image_handle input_0, + sycl::ext::oneapi::experimental::unsampled_image_handle input_1, + sycl::ext::oneapi::experimental::unsampled_image_handle output) { + using VecType = sycl::vec; + try { + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range{globalSize, localSize}, + [=](sycl::nd_item it) { + size_t dim0 = it.get_global_id(0); + + if constexpr (NChannels >= 1) { + VecType px1 = + sycl::ext::oneapi::experimental::read_image( + input_0, int(dim0)); + VecType px2 = + sycl::ext::oneapi::experimental::read_image( + input_1, int(dim0)); + + auto sum = + VecType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, int(dim0), VecType(sum)); + } else { + DType px1 = sycl::ext::oneapi::experimental::read_image( + input_0, int(dim0)); + DType px2 = sycl::ext::oneapi::experimental::read_image( + input_1, int(dim0)); + + auto sum = DType(util::add_kernel(px1, px2)); + sycl::ext::oneapi::experimental::write_image( + output, int(dim0), DType(sum)); + } + }); + }); + } catch (sycl::exception e) { + std::cout << "\tKernel submission failed! " << e.what() << std::endl; + exit(-1); + } catch (...) { + std::cout << "\tKernel submission failed!" << std::endl; + exit(-1); + } + } +}; + +template +bool run_test(sycl::range dims, sycl::range localSize, + unsigned int seed = 0) { + using VecType = sycl::vec; + + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // skip half tests if not supported + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + + size_t num_elems = dims[0]; + if (NDims > 1) + num_elems *= dims[1]; + if (NDims > 2) + num_elems *= dims[2]; + + std::vector input_0(num_elems); + std::vector input_1(num_elems); + std::vector expected(num_elems); + std::vector actual(num_elems); + + std::srand(seed); + util::fill_rand(input_0); + util::fill_rand(input_1); + util::add_host(input_0, input_1, expected); + + try { + sycl::ext::oneapi::experimental::image_descriptor desc(dims, COrder, CType); + + // Extension: allocate memory on device and create the handle + sycl::ext::oneapi::experimental::image_mem img_mem_0(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_1(desc, q); + sycl::ext::oneapi::experimental::image_mem img_mem_2(desc, q); + + auto img_input_0 = + sycl::ext::oneapi::experimental::create_image(img_mem_0, desc, q); + auto img_input_1 = + sycl::ext::oneapi::experimental::create_image(img_mem_1, desc, q); + auto img_output = + sycl::ext::oneapi::experimental::create_image(img_mem_2, desc, q); + + // Extension: copy over data to device + q.ext_oneapi_copy(input_0.data(), img_mem_0.get_handle(), desc); + q.ext_oneapi_copy(input_1.data(), img_mem_1.get_handle(), desc); + q.wait(); + + { + sycl::range globalSize = dims; + q.wait(); + util::run_ndim_test( + q, globalSize, localSize, img_input_0, img_input_1, img_output); + q.wait(); + + q.ext_oneapi_copy(img_mem_2.get_handle(), actual.data(), desc); + q.wait(); + } + + // Cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(img_input_0, q); + sycl::ext::oneapi::experimental::destroy_image_handle(img_input_1, q); + sycl::ext::oneapi::experimental::destroy_image_handle(img_output, q); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < num_elems; i++) { + for (int j = 0; j < NChannels; ++j) { + bool mismatch = false; + if (actual[i][j] != expected[i][j]) { + mismatch = true; + validated = false; + } + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "\tResult mismatch at [" << i << "][" << j + << "] Expected: " << +expected[i][j] + << ", Actual: " << +actual[i][j] << std::endl; +#else + break; +#endif + } + } + } +#ifdef VERBOSE_PRINT + if (validated) { + std::cout << "\tTest passed!" << std::endl; + } else { + std::cout << "\tTest failed!\n"; + } +#endif + + return !validated; +} + +void printTestName(std::string name) { +#ifdef VERBOSE_PRINT + std::cout << name; +#endif +} + +int main() { + + unsigned int seed = 0; + bool failed = false; + + printTestName("Running 1D int\n"); + failed |= + run_test<1, int, 1, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::r, class int_1d>({32}, {2}, seed); + printTestName("Running 2D int\n"); + failed |= run_test<2, int32_t, 1, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::r, class int_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D int\n"); + failed |= run_test<3, int32_t, 1, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::r, class int_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D int2\n"); + failed |= + run_test<1, int, 2, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rg, class int2_1d>({32}, {2}, seed); + printTestName("Running 2D int2\n"); + failed |= run_test<2, int32_t, 2, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rg, class int2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D int2\n"); + failed |= run_test<3, int32_t, 2, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rg, class int2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D int4\n"); + failed |= + run_test<1, int, 4, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rgba, class int4_1d>({32}, {2}, seed); + printTestName("Running 2D int4\n"); + failed |= run_test<2, int32_t, 4, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rgba, class int4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D int4\n"); + failed |= run_test<3, int32_t, 4, sycl::image_channel_type::signed_int32, + sycl::image_channel_order::rgba, class int4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D unsigned int\n"); + failed |= + run_test<1, unsigned int, 1, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::r, class uint_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned int\n"); + failed |= run_test<2, uint32_t, 1, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::r, class uint_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned int\n"); + failed |= run_test<3, uint32_t, 1, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::r, class uint_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned int2\n"); + failed |= + run_test<1, unsigned int, 2, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rg, class uint2_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned int2\n"); + failed |= run_test<2, uint32_t, 2, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rg, class uint2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned int2\n"); + failed |= run_test<3, uint32_t, 2, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rg, class uint2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned int4\n"); + failed |= + run_test<1, unsigned int, 4, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rgba, class uint4_1d>({32}, {2}, + seed); + printTestName("Running 2D unsigned int4\n"); + failed |= run_test<2, uint32_t, 4, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rgba, class uint4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned int4\n"); + failed |= run_test<3, uint32_t, 4, sycl::image_channel_type::unsigned_int32, + sycl::image_channel_order::rgba, class uint4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D short\n"); + failed |= + run_test<1, short, 1, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::r, class short_1d>({32}, {2}, seed); + printTestName("Running 2D short\n"); + failed |= run_test<2, short, 1, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::r, class short_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D short\n"); + failed |= run_test<3, short, 1, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::r, class short_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D short2\n"); + failed |= + run_test<1, short, 2, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rg, class short2_1d>({32}, {2}, seed); + printTestName("Running 2D short2\n"); + failed |= run_test<2, short, 2, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rg, class short2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D short2\n"); + failed |= run_test<3, short, 2, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rg, class short2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D short4\n"); + failed |= run_test<1, short, 4, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rgba, class short4_1d>( + {32}, {2}, seed); + printTestName("Running 2D short4\n"); + failed |= run_test<2, short, 4, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rgba, class short4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D short4\n"); + failed |= run_test<3, short, 4, sycl::image_channel_type::signed_int16, + sycl::image_channel_order::rgba, class short4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D unsigned short\n"); + failed |= + run_test<1, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned short\n"); + failed |= + run_test<2, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_2d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 3D unsigned short\n"); + failed |= + run_test<3, unsigned short, 1, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::r, class ushort_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned short2\n"); + failed |= + run_test<1, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_1d>({32}, {2}, + seed); + printTestName("Running 2D unsigned short2\n"); + failed |= + run_test<2, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_2d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 3D unsigned short2\n"); + failed |= + run_test<3, unsigned short, 2, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rg, class ushort2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned short4\n"); + failed |= + run_test<1, unsigned short, 4, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rgba, class ushort4_1d>({32}, {2}, + seed); + printTestName("Running 2D unsigned short4\n"); + failed |= + run_test<2, unsigned short, 4, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rgba, class ushort4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned short4\n"); + failed |= + run_test<3, unsigned short, 4, sycl::image_channel_type::unsigned_int16, + sycl::image_channel_order::rgba, class ushort4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D char\n"); + failed |= + run_test<1, signed char, 1, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::r, class char_1d>({32}, {2}, seed); + printTestName("Running 2D char\n"); + failed |= run_test<2, signed char, 1, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::r, class char_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D char\n"); + failed |= run_test<3, signed char, 1, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::r, class char_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D char2\n"); + failed |= + run_test<1, signed char, 2, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rg, class char2_1d>({32}, {2}, seed); + printTestName("Running 2D char2\n"); + failed |= run_test<2, signed char, 2, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rg, class char2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D char2\n"); + failed |= run_test<3, signed char, 2, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rg, class char2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D char4\n"); + failed |= run_test<1, signed char, 4, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rgba, class char4_1d>({32}, {2}, + seed); + printTestName("Running 2D char4\n"); + failed |= run_test<2, signed char, 4, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rgba, class char4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D char4\n"); + failed |= run_test<3, signed char, 4, sycl::image_channel_type::signed_int8, + sycl::image_channel_order::rgba, class char4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D unsigned char\n"); + failed |= + run_test<1, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned char\n"); + failed |= + run_test<2, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_2d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 3D unsigned char\n"); + failed |= + run_test<3, unsigned char, 1, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::r, class uchar_3d>({48, 128, 32}, + {16, 16, 4}, seed); + printTestName("Running 1D unsigned char2\n"); + failed |= + run_test<1, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_1d>({32}, {2}, seed); + printTestName("Running 2D unsigned char2\n"); + failed |= + run_test<2, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_2d>({2816, 32}, + {32, 32}, seed); + printTestName("Running 3D unsigned char2\n"); + failed |= + run_test<3, unsigned char, 2, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rg, class uchar2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D unsigned char4\n"); + failed |= + run_test<1, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_1d>({32}, {2}, + seed); + printTestName("Running 2D unsigned char4\n"); + failed |= + run_test<2, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D unsigned char4\n"); + failed |= + run_test<3, unsigned char, 4, sycl::image_channel_type::unsigned_int8, + sycl::image_channel_order::rgba, class uchar4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + printTestName("Running 1D float\n"); + failed |= run_test<1, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class float_1d>({1024}, + {512}, seed); + printTestName("Running 2D float\n"); + failed |= run_test<2, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class float_2d>( + {4096, 3808}, {32, 32}, seed); + printTestName("Running 3D float\n"); + failed |= run_test<3, float, 1, sycl::image_channel_type::fp32, + sycl::image_channel_order::r, class float_3d>( + {1024, 832, 32}, {16, 16, 4}, seed); + printTestName("Running 1D float2\n"); + failed |= run_test<1, float, 2, sycl::image_channel_type::fp32, + sycl::image_channel_order::rg, class float2_1d>( + {608}, {32}, seed); + printTestName("Running 2D float2\n"); + failed |= run_test<2, float, 2, sycl::image_channel_type::fp32, + sycl::image_channel_order::rg, class float2_2d>( + {3808, 4096}, {32, 32}, seed); + printTestName("Running 3D float2\n"); + failed |= run_test<3, float, 2, sycl::image_channel_type::fp32, + sycl::image_channel_order::rg, class float2_3d>( + {832, 1024, 32}, {16, 16, 4}, seed); + printTestName("Running 1D float4\n"); + failed |= run_test<1, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float4_1d>( + {1024}, {512}, seed); + printTestName("Running 2D float4\n"); + failed |= run_test<2, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float4_2d>( + {4096, 4096}, {32, 32}, seed); + printTestName("Running 3D float4\n"); + failed |= run_test<3, float, 4, sycl::image_channel_type::fp32, + sycl::image_channel_order::rgba, class float4_3d>( + {1024, 1024, 16}, {16, 16, 4}, seed); + + printTestName("Running 1D half\n"); + failed |= + run_test<1, sycl::half, 1, sycl::image_channel_type::fp16, + sycl::image_channel_order::r, class half_1d>({32}, {2}, seed); + printTestName("Running 2D half\n"); + failed |= run_test<2, sycl::half, 1, sycl::image_channel_type::fp16, + sycl::image_channel_order::r, class half_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D half\n"); + failed |= run_test<3, sycl::half, 1, sycl::image_channel_type::fp16, + sycl::image_channel_order::r, class half_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D half2\n"); + failed |= + run_test<1, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class half2_1d>({32}, {2}, seed); + printTestName("Running 2D half2\n"); + failed |= run_test<2, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class half2_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D half2\n"); + failed |= run_test<3, sycl::half, 2, sycl::image_channel_type::fp16, + sycl::image_channel_order::rg, class half2_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + printTestName("Running 1D half4\n"); + failed |= run_test<1, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class half4_1d>({32}, {2}, + seed); + printTestName("Running 2D half4\n"); + failed |= run_test<2, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class half4_2d>( + {2816, 32}, {32, 32}, seed); + printTestName("Running 3D half4\n"); + failed |= run_test<3, sycl::half, 4, sycl::image_channel_type::fp16, + sycl::image_channel_order::rgba, class half4_3d>( + {48, 128, 32}, {16, 16, 4}, seed); + + if (failed) { + std::cerr << "An error has occured!\n"; + return 1; + } + + std::cout << "All tests passed!\n"; + return 0; +} diff --git a/sycl/test-e2e/bindless_images/sampling_1D.cpp b/sycl/test-e2e/bindless_images/sampling_1D.cpp new file mode 100644 index 0000000000000..b6ea4a5815210 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_1D.cpp @@ -0,0 +1,117 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + +#if defined(SYCL_EXT_ONEAPI_BINDLESS_IMAGES) + assert(SYCL_EXT_ONEAPI_BINDLESS_IMAGES == 1); +#if defined(VERBOSE_PRINT) + std::cout << "SYCL_EXT_ONEAPI_BINDLESS_IMAGES is defined!" << std::endl; +#endif +#else + std::cerr << "Bindless images feature test macro is not defined!" + << std::endl; + return 1; +#endif // defined(SYCL_EXT_ONEAPI_BINDLESS_IMAGES) + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t N = 32; + size_t width = N; + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + for (int i = 0; i < N; i++) { + expected[i] = i; + dataIn[i] = float(i); + } + + try { + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, sycl::image_channel_order::r, sycl::image_channel_type::fp32); + + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: allocate memory on device + sycl::ext::oneapi::experimental::image_mem imgMem(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn.data(), imgMem.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + auto imgHandle = sycl::ext::oneapi::experimental::create_image( + imgMem, samp, desc, dev, ctxt); + + sycl::buffer buf((float *)out.data(), N); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access(cgh, N); + + cgh.parallel_for(N, [=](sycl::id<1> id) { + // Normalize coordinate -- +0.5 to look towards centre of pixel + float x = float(id[0] + 0.5) / (float)N; + // Extension: read image data from handle + float px1 = + sycl::ext::oneapi::experimental::read_image(imgHandle, x); + + outAcc[id] = px1; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test passed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/sampling_2D.cpp b/sycl/test-e2e/bindless_images/sampling_2D.cpp new file mode 100644 index 0000000000000..c3035e8ed09ec --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_2D.cpp @@ -0,0 +1,147 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 5; + size_t height = 6; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N); + std::vector dataIn2(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = (i + (width * j)) * 3; + dataIn1[i + (width * j)] = {(i + (width * j)), 0, 0, 0}; + dataIn2[i + (width * j)] = {(i + (width * j)) * 2, 0, 0, 0}; + } + } + + try { + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: image descriptor -- can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + size_t pitch = 0; + + // Extension: returns the device pointer to USM allocated pitched memory + auto imgMemUSM0 = + sycl::ext::oneapi::experimental::pitched_alloc_device(&pitch, desc, q); + + if (imgMemUSM0 == nullptr) { + std::cout << "Error allocating images!" << std::endl; + return 1; + } + + // Extension: allocate memory on device + sycl::ext::oneapi::experimental::image_mem imgMem0(desc, dev, ctxt); + + // Extension: copy over data to device for USM image (handler variant) + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(dataIn1.data(), imgMemUSM0, desc, pitch); + }); + + // Extension: copy over data to device for non-USM image + q.ext_oneapi_copy(dataIn2.data(), imgMem0.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the images and return the handles + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle1 = + sycl::ext::oneapi::experimental::create_image(imgMemUSM0, pitch, samp, + desc, dev, ctxt); + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle2 = + sycl::ext::oneapi::experimental::create_image(imgMem0, samp, desc, dev, + ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle1, sycl::float2(fdim0, fdim1)); + sycl::float4 px2 = + sycl::ext::oneapi::experimental::read_image( + imgHandle2, sycl::float2(fdim0, fdim1)); + + outAcc[sycl::id<2>{dim1, dim0}] = px1[0] + px2[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, dev, + ctxt); + sycl::free(imgMemUSM0, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp new file mode 100644 index 0000000000000..eaa7da3594b35 --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp @@ -0,0 +1,146 @@ +// REQUIRES: linux +// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images_shared_usm + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 5; + size_t height = 6; + size_t N = width * height; + size_t widthInBytes = width * sizeof(float); + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = i + (width * j); + dataIn[i + (width * j)] = i + (width * j); + } + } + + try { + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::clamp, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::r, + sycl::image_channel_type::fp32); + + auto devicePitchAlign = dev.get_info< + sycl::ext::oneapi::experimental::info::device::image_row_pitch_align>(); + auto deviceMaxPitch = + dev.get_info(); + + // Pitch requirements: + // - pitch % devicePitchAlign == 0 + // - pitch >= widthInBytes + // - pitch <= deviceMaxPitch + size_t pitch = devicePitchAlign * + std::ceil(float(widthInBytes) / float(devicePitchAlign)); + assert(pitch <= deviceMaxPitch); + + // Shared USM allocation + auto imgMem = sycl::aligned_alloc_shared(devicePitchAlign, (pitch * height), + dev, ctxt); + + if (imgMem == nullptr) { + std::cerr << "Error allocating images!" << std::endl; + return 1; + } + + // Copy to shared USM and incorporate pitch + for (size_t i = 0; i < height; i++) { + memcpy(static_cast(imgMem) + (i * pitch / sizeof(float)), + dataIn.data() + (i * width), widthInBytes); + } + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(imgMem, pitch, samp, desc, + dev, ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read image data from handle + float px = sycl::ext::oneapi::experimental::read_image( + imgHandle, sycl::float2(fdim0, fdim1)); + + outAcc[sycl::id<2>{dim1, dim0}] = px; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt); + sycl::free(imgMem, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp new file mode 100644 index 0000000000000..71d709e89b67e --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp @@ -0,0 +1,132 @@ +// REQUIRES: linux +// REQUIRES: cuda +// REQUIRES: aspect-fp16 + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 5; + size_t height = 6; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = i + (width * j); + dataIn[i + (width * j)] = {i + (width * j), 0, 0, 0}; + } + } + + try { + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + unsigned int elementSizebytes = sizeof(sycl::half) * 4; + size_t widthInBytes = width * elementSizebytes; + size_t pitch = 0; + + // Extension: returns the device pointer to USM allocated pitched memory + auto imgMem = sycl::ext::oneapi::experimental::pitched_alloc_device( + &pitch, widthInBytes, height, elementSizebytes, q); + + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp16); + + if (imgMem == nullptr) { + std::cout << "Error allocating images!" << std::endl; + return 1; + } + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn.data(), imgMem, desc, pitch); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(imgMem, pitch, samp, desc, + dev, ctxt); + + sycl::buffer buf((sycl::half *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read image data from handle + sycl::half4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle, sycl::float2(fdim0, fdim1)); + + outAcc[sycl::id<2>{dim1, dim0}] = px1[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt); + sycl::free(imgMem, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/sampling_3D.cpp b/sycl/test-e2e/bindless_images/sampling_3D.cpp new file mode 100644 index 0000000000000..674bb06f0837c --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_3D.cpp @@ -0,0 +1,126 @@ +// REQUIRES: linux +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %t.out + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 4; + size_t height = 6; + size_t depth = 8; + size_t N = width * height * depth; + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + expected[i + width * (j + height * k)] = i + width * (j + height * k); + dataIn[i + width * (j + height * k)] = {i + width * (j + height * k), 0, + 0, 0}; + } + } + } + + try { + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height, depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::clamp, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: allocate memory on device + sycl::ext::oneapi::experimental::image_mem imgMem(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn.data(), imgMem.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(imgMem, samp, desc, dev, + ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<3>{depth, height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<3>{depth, height, width}); + + cgh.parallel_for( + sycl::nd_range<3>{{width, height, depth}, {width, height, depth}}, + [=](sycl::nd_item<3> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + size_t dim2 = it.get_local_id(2); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + float fdim2 = float(dim2 + 0.5) / (float)depth; + + // Extension: read image data from handle + sycl::float4 px1 = + sycl::ext::oneapi::experimental::read_image( + imgHandle, sycl::float4(fdim0, fdim1, fdim2, (float)0)); + + outAcc[sycl::id<3>{dim2, dim1, dim0}] = px1[0]; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp new file mode 100644 index 0000000000000..99d6ab6ffba9d --- /dev/null +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp @@ -0,0 +1,270 @@ +// REQUIRES: linux +// REQUIRES: cuda +// REQUIRES: vulkan + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %link-vulkan %s -o %t.out +// RUN: %t.out + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +#include + +#include "vulkan_common.hpp" + +#include +#include +#include +#include + +// Returns true if validated correctly +bool run_sycl(int input_image_fd, size_t width, size_t height) { + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Image descriptor - mapped to Vulkan image layout + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::r, + sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::interop, 1 /*num_levels*/); + + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + const size_t img_size = width * height * sizeof(float); + + // Extension: external memory descriptor + sycl::ext::oneapi::experimental::external_mem_descriptor< + sycl::ext::oneapi::experimental::external_mem_fd> + input_ext_mem_desc{input_image_fd, img_size}; + + // Extension: interop mem handle imported from file descriptor + sycl::ext::oneapi::experimental::interop_mem_handle input_interop_mem_handle = + sycl::ext::oneapi::experimental::import_external_memory( + input_ext_mem_desc, q); + + // Extension: interop mem handle imported from file descriptor + sycl::ext::oneapi::experimental::image_mem_handle input_mapped_mem_handle = + sycl::ext::oneapi::experimental::map_external_memory_array( + input_interop_mem_handle, desc, q); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::sampled_image_handle img_input = + sycl::ext::oneapi::experimental::create_image(input_mapped_mem_handle, + samp, desc, q); + + std::vector out(width * height); + + try { + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5) / (float)width; + float fdim1 = float(dim1 + 0.5) / (float)height; + + // Extension: read image data from handle (Vulkan imported) + float pixel = sycl::ext::oneapi::experimental::read_image( + img_input, sycl::float2(fdim0, fdim1)); + + pixel *= 10.f; + outAcc[sycl::id<2>{dim1, dim0}] = pixel; + }); + }); + } catch (...) { + std::cerr << "Kernel submission failed!" << std::endl; + exit(-1); + } + + try { + sycl::ext::oneapi::experimental::destroy_image_handle(img_input, q); + sycl::ext::oneapi::experimental::release_external_memory( + input_interop_mem_handle, q); + } catch (...) { + std::cerr << "Destroying interop memory failed!\n"; + } + + printString("Validating\n"); + bool validated = true; + for (int i = 0; i < width * height; i++) { + bool mismatch = false; + float expected = (float)(i)*10.f; + if (out[i] != expected) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected + << ", Actual: " << out[i] << "\n"; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!\n"; + return true; + } + std::cout << "Test failed!\n"; + return false; +} + +// Returns true if validated correctly +bool run_test() { + const uint32_t width = 16, height = 16; + const size_t imageSizeBytes = width * height * sizeof(float); + + printString("Creating input image\n"); + // Create input image memory + auto inputImage = vkutil::createImage( + VK_IMAGE_TYPE_2D, VK_FORMAT_R32_SFLOAT, {width, height, 1}, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT); + auto inputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex( + inputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + auto inputMemory = + vkutil::allocateDeviceMemory(imageSizeBytes, inputImageMemoryTypeIndex); + VK_CHECK_CALL(vkBindImageMemory(vk_device, inputImage, inputMemory, + 0 /*memoryOffset*/)); + + printString("Creating staging buffers\n"); + // Create input staging memory + auto inputStagingBuffer = vkutil::createBuffer( + imageSizeBytes, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + auto inputStagingMemoryTypeIndex = vkutil::getBufferMemoryTypeIndex( + inputStagingBuffer, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + auto inputStagingMemory = vkutil::allocateDeviceMemory( + imageSizeBytes, inputStagingMemoryTypeIndex, false /*exportable*/); + VK_CHECK_CALL(vkBindBufferMemory(vk_device, inputStagingBuffer, + inputStagingMemory, 0 /*memoryOffset*/)); + + printString("Populating staging buffer\n"); + // Populate staging memory + float *inputStagingData = nullptr; + VK_CHECK_CALL(vkMapMemory(vk_device, inputStagingMemory, 0 /*offset*/, + imageSizeBytes, 0 /*flags*/, + (void **)&inputStagingData)); + for (int i = 0; i < width * height; ++i) { + inputStagingData[i] = (float)i; + } + vkUnmapMemory(vk_device, inputStagingMemory); + + printString("Submitting image layout transition\n"); + // Transition image layouts + { + VkImageMemoryBarrier barrierInput = {}; + barrierInput.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrierInput.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrierInput.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrierInput.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierInput.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierInput.image = inputImage; + barrierInput.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrierInput.subresourceRange.levelCount = 1; + barrierInput.subresourceRange.layerCount = 1; + barrierInput.srcAccessMask = 0; + barrierInput.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_computeCmdBuffer, &cbbi)); + vkCmdPipelineBarrier(vk_computeCmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrierInput); + VK_CHECK_CALL(vkEndCommandBuffer(vk_computeCmdBuffer)); + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_computeCmdBuffer; + + VK_CHECK_CALL(vkQueueSubmit(vk_compute_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_compute_queue)); + } + + printString("Copying staging memory to images\n"); + // Copy staging to main image memory + { + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VkBufferImageCopy copyRegion = {}; + copyRegion.imageExtent = {width, height, 1}; + copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copyRegion.imageSubresource.layerCount = 1; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_transferCmdBuffers[0], &cbbi)); + vkCmdCopyBufferToImage(vk_transferCmdBuffers[0], inputStagingBuffer, + inputImage, VK_IMAGE_LAYOUT_GENERAL, + 1 /*regionCount*/, ©Region); + VK_CHECK_CALL(vkEndCommandBuffer(vk_transferCmdBuffers[0])); + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_transferCmdBuffers[0]; + + VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_transfer_queue)); + } + + printString("Getting memory file descriptors and calling into SYCL\n"); + // Pass memory to SYCL for modification + auto input_fd = vkutil::getMemoryOpaqueFD(inputMemory); + bool validated = run_sycl(input_fd, width, height); + + // Cleanup + vkDestroyBuffer(vk_device, inputStagingBuffer, nullptr); + vkDestroyImage(vk_device, inputImage, nullptr); + vkFreeMemory(vk_device, inputStagingMemory, nullptr); + vkFreeMemory(vk_device, inputMemory, nullptr); + + return validated; +} + +int main() { + + if (vkutil::setupInstance() != VK_SUCCESS) { + std::cerr << "Instance setup failed!\n"; + return EXIT_FAILURE; + } + + if (vkutil::setupDevice("NVIDIA") != VK_SUCCESS) { + std::cerr << "Device setup failed!\n"; + return EXIT_FAILURE; + } + + if (vkutil::setupCommandBuffers() != VK_SUCCESS) { + std::cerr << "Compute pipeline setup failed!\n"; + return EXIT_FAILURE; + } + + bool validated = run_test(); + + if (vkutil::cleanup() != VK_SUCCESS) { + std::cerr << "Cleanup failed!\n"; + return EXIT_FAILURE; + } + + return validated ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp new file mode 100644 index 0000000000000..5b3f93a4c6af3 --- /dev/null +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -0,0 +1,444 @@ +// REQUIRES: linux +// REQUIRES: cuda +// REQUIRES: vulkan + +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %link-vulkan %s -o %t.out +// RUN: %t.out + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +#include + +#include "vulkan_common.hpp" + +#include +#include +#include +#include + +// Returns true if validated correctly +void run_sycl(int input_image_fd, int output_image_fd, + int sycl_wait_semaphore_fd, int sycl_done_semaphore_fd, + size_t width, size_t height) { + try { + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Image descriptor - mapped to Vulkan image layout + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, sycl::image_channel_order::rgba, + sycl::image_channel_type::unsigned_int32, + sycl::ext::oneapi::experimental::image_type::interop, 1 /*num_levels*/); + + const size_t img_size = width * height * sizeof(sycl::uint4); + + std::vector out(width * height); + + // Extension: external memory descriptor + sycl::ext::oneapi::experimental::external_mem_descriptor< + sycl::ext::oneapi::experimental::external_mem_fd> + inputExtMemDesc{input_image_fd, img_size}; + sycl::ext::oneapi::experimental::external_mem_descriptor< + sycl::ext::oneapi::experimental::external_mem_fd> + outputExtMemDesc{output_image_fd, img_size}; + + sycl::ext::oneapi::experimental::interop_mem_handle + input_interop_mem_handle = + sycl::ext::oneapi::experimental::import_external_memory( + inputExtMemDesc, dev, ctxt); + + sycl::ext::oneapi::experimental::interop_mem_handle + output_interop_mem_handle = + sycl::ext::oneapi::experimental::import_external_memory( + outputExtMemDesc, dev, ctxt); + + sycl::ext::oneapi::experimental::image_mem_handle input_mapped_mem_handle = + sycl::ext::oneapi::experimental::map_external_memory_array( + input_interop_mem_handle, desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem_handle output_mapped_mem_handle = + sycl::ext::oneapi::experimental::map_external_memory_array( + output_interop_mem_handle, desc, dev, ctxt); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle img_input = + sycl::ext::oneapi::experimental::create_image(input_mapped_mem_handle, + desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle img_output = + sycl::ext::oneapi::experimental::create_image(output_mapped_mem_handle, + desc, dev, ctxt); + + // Extension: import semaphores + sycl::ext::oneapi::experimental::external_semaphore_descriptor< + sycl::ext::oneapi::experimental::external_semaphore_fd> + sycl_wait_external_semaphore_desc{sycl_wait_semaphore_fd}; + + sycl::ext::oneapi::experimental::external_semaphore_descriptor< + sycl::ext::oneapi::experimental::external_semaphore_fd> + sycl_done_external_semaphore_desc{sycl_done_semaphore_fd}; + + sycl::ext::oneapi::experimental::interop_semaphore_handle + sycl_wait_interop_semaphore_handle = + sycl::ext::oneapi::experimental::import_external_semaphore( + sycl_wait_external_semaphore_desc, dev, ctxt); + + sycl::ext::oneapi::experimental::interop_semaphore_handle + sycl_done_interop_semaphore_handle = + sycl::ext::oneapi::experimental::import_external_semaphore( + sycl_done_external_semaphore_desc, dev, ctxt); + + // Extension: wait for imported semaphore + q.ext_oneapi_wait_external_semaphore(sycl_wait_interop_semaphore_handle); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {32, 32}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + + // Extension: read image data from handle (Vulkan imported) + sycl::uint4 pixel = + sycl::ext::oneapi::experimental::read_image( + img_input, sycl::int2(dim0, dim1)); + + pixel *= 10; + + // Extension: write image data using handle (Vulkan imported) + sycl::ext::oneapi::experimental::write_image( + img_output, sycl::int2(dim0, dim1), pixel); + }); + }); + + // Extension: signal imported semaphore + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_signal_external_semaphore( + sycl_done_interop_semaphore_handle); + }); + + // Wait for kernel completion before destroying external objects + q.wait_and_throw(); + + sycl::ext::oneapi::experimental::release_external_memory( + input_interop_mem_handle, dev, ctxt); + sycl::ext::oneapi::experimental::release_external_memory( + output_interop_mem_handle, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_external_semaphore( + sycl_wait_interop_semaphore_handle, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_external_semaphore( + sycl_done_interop_semaphore_handle, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(img_input, dev, ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(img_output, dev, + ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + exit(-1); + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + exit(-1); + } +} + +// Returns true if validated correctly +bool run_test() { + const uint32_t width = 1024 * 4, height = 1024 * 4; + const size_t imageSizeBytes = width * height * sizeof(sycl::uint4); + + printString("Creating input image\n"); + // Create input image memory + auto inputImage = vkutil::createImage( + VK_IMAGE_TYPE_2D, VK_FORMAT_R32G32B32A32_UINT, {width, height, 1}, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT); + auto inputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex( + inputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + auto inputMemory = + vkutil::allocateDeviceMemory(imageSizeBytes, inputImageMemoryTypeIndex); + VK_CHECK_CALL(vkBindImageMemory(vk_device, inputImage, inputMemory, + 0 /*memoryOffset*/)); + + printString("Creating output image\n"); + // Create output image memory + auto outputImage = vkutil::createImage( + VK_IMAGE_TYPE_2D, VK_FORMAT_R32G32B32A32_UINT, {width, height, 1}, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT); + auto outputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex( + outputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + auto outputMemory = + vkutil::allocateDeviceMemory(imageSizeBytes, outputImageMemoryTypeIndex); + VK_CHECK_CALL(vkBindImageMemory(vk_device, outputImage, outputMemory, + 0 /*memoryOffset*/)); + + printString("Creating staging buffers\n"); + // Create input staging memory + auto inputStagingBuffer = vkutil::createBuffer( + imageSizeBytes, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + auto inputStagingMemoryTypeIndex = vkutil::getBufferMemoryTypeIndex( + inputStagingBuffer, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + auto inputStagingMemory = vkutil::allocateDeviceMemory( + imageSizeBytes, inputStagingMemoryTypeIndex, false /*exportable*/); + VK_CHECK_CALL(vkBindBufferMemory(vk_device, inputStagingBuffer, + inputStagingMemory, 0 /*memoryOffset*/)); + + // Create output staging memory + auto outputStagingBuffer = vkutil::createBuffer( + imageSizeBytes, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + auto outputStagingMemoryTypeIndex = vkutil::getBufferMemoryTypeIndex( + outputStagingBuffer, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + auto outputStagingMemory = vkutil::allocateDeviceMemory( + imageSizeBytes, outputStagingMemoryTypeIndex, false /*exportable*/); + VK_CHECK_CALL(vkBindBufferMemory(vk_device, outputStagingBuffer, + outputStagingMemory, 0 /*memoryOffset*/)); + + printString("Populating staging buffer\n"); + // Populate staging memory + sycl::vec *inputStagingData = nullptr; + VK_CHECK_CALL(vkMapMemory(vk_device, inputStagingMemory, 0 /*offset*/, + imageSizeBytes, 0 /*flags*/, + (void **)&inputStagingData)); + for (int i = 0; i < width * height; ++i) { + inputStagingData[i] = + sycl::vec{4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3}; + } + vkUnmapMemory(vk_device, inputStagingMemory); + + printString("Submitting image layout transition\n"); + // Transition image layouts + { + VkImageMemoryBarrier barrierInput = {}; + barrierInput.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrierInput.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrierInput.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrierInput.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierInput.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierInput.image = inputImage; + barrierInput.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrierInput.subresourceRange.levelCount = 1; + barrierInput.subresourceRange.layerCount = 1; + barrierInput.srcAccessMask = 0; + barrierInput.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + VkImageMemoryBarrier barrierOutput = {}; + barrierOutput.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrierOutput.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrierOutput.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrierOutput.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierOutput.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrierOutput.image = outputImage; + barrierOutput.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrierOutput.subresourceRange.levelCount = 1; + barrierOutput.subresourceRange.layerCount = 1; + barrierOutput.srcAccessMask = 0; + barrierOutput.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_computeCmdBuffer, &cbbi)); + vkCmdPipelineBarrier(vk_computeCmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrierInput); + vkCmdPipelineBarrier(vk_computeCmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrierOutput); + VK_CHECK_CALL(vkEndCommandBuffer(vk_computeCmdBuffer)); + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_computeCmdBuffer; + + VK_CHECK_CALL(vkQueueSubmit(vk_compute_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_compute_queue)); + } + + // Create semaphore to later import in SYCL + printString("Creating semaphores\n"); + VkSemaphore syclWaitSemaphore; + { + VkExportSemaphoreCreateInfo esci = {}; + esci.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; + esci.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + VkSemaphoreCreateInfo sci = {}; + sci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + sci.pNext = &esci; + VK_CHECK_CALL( + vkCreateSemaphore(vk_device, &sci, nullptr, &syclWaitSemaphore)); + } + + VkSemaphore syclDoneSemaphore; + { + VkExportSemaphoreCreateInfo esci = {}; + esci.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; + esci.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + VkSemaphoreCreateInfo sci = {}; + sci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + sci.pNext = &esci; + VK_CHECK_CALL( + vkCreateSemaphore(vk_device, &sci, nullptr, &syclDoneSemaphore)); + } + + printString("Copying staging memory to images\n"); + // Copy staging to main image memory + { + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + + VkBufferImageCopy copyRegion = {}; + copyRegion.imageExtent = {width, height, 1}; + copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copyRegion.imageSubresource.layerCount = 1; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_transferCmdBuffers[0], &cbbi)); + vkCmdCopyBufferToImage(vk_transferCmdBuffers[0], inputStagingBuffer, + inputImage, VK_IMAGE_LAYOUT_GENERAL, + 1 /*regionCount*/, ©Region); + VK_CHECK_CALL(vkEndCommandBuffer(vk_transferCmdBuffers[0])); + + std::vector stages{VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT}; + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_transferCmdBuffers[0]; + + submission.signalSemaphoreCount = 1; + submission.pSignalSemaphores = &syclWaitSemaphore; + submission.pWaitDstStageMask = stages.data(); + + VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + } + + printString("Getting memory file descriptors and calling into SYCL\n"); + // Pass memory to SYCL for modification + int input_fd = vkutil::getMemoryOpaqueFD(inputMemory); + int output_fd = vkutil::getMemoryOpaqueFD(outputMemory); + + // Pass semaphores to SYCL for synchronization + int sycl_wait_semaphore_fd = vkutil::getSemaphoreOpaqueFD(syclWaitSemaphore); + int sycl_done_semaphore_fd = vkutil::getSemaphoreOpaqueFD(syclDoneSemaphore); + + run_sycl(input_fd, output_fd, sycl_wait_semaphore_fd, sycl_done_semaphore_fd, + width, height); + + printString("Copying image memory to staging memory\n"); + // Copy main image memory to staging + { + VkCommandBufferBeginInfo cbbi = {}; + cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cbbi.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + + VkBufferImageCopy copyRegion = {}; + copyRegion.imageExtent = {width, height, 1}; + copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copyRegion.imageSubresource.layerCount = 1; + + VK_CHECK_CALL(vkBeginCommandBuffer(vk_transferCmdBuffers[1], &cbbi)); + vkCmdCopyImageToBuffer(vk_transferCmdBuffers[1], outputImage, + VK_IMAGE_LAYOUT_GENERAL, outputStagingBuffer, + 1 /*regionCount*/, ©Region); + VK_CHECK_CALL(vkEndCommandBuffer(vk_transferCmdBuffers[1])); + + std::vector stages{VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT}; + + VkSubmitInfo submission = {}; + submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submission.commandBufferCount = 1; + submission.pCommandBuffers = &vk_transferCmdBuffers[1]; + + submission.waitSemaphoreCount = 1; + submission.pWaitSemaphores = &syclDoneSemaphore; + submission.pWaitDstStageMask = stages.data(); + + VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/, + &submission, VK_NULL_HANDLE /*fence*/)); + VK_CHECK_CALL(vkQueueWaitIdle(vk_transfer_queue)); + } + + printString("Validating\n"); + // Validate that SYCL made changes to the memory + bool validated = true; + sycl::vec *outputStagingData = nullptr; + VK_CHECK_CALL(vkMapMemory(vk_device, outputStagingMemory, 0 /*offset*/, + imageSizeBytes, 0 /*flags*/, + (void **)&outputStagingData)); + for (int i = 0; i < width * height; ++i) { + sycl::vec expected = {4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3}; + expected *= 10; + for (int j = 0; j < 4; ++j) { + if (outputStagingData[i][j] != expected[j]) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! actual[" << i << "][" << j + << "] == " << outputStagingData[i][j] + << " : expected == " << expected[j] << "\n"; + validated = false; +#else + break; +#endif + } + } + if (!validated) + break; + } + vkUnmapMemory(vk_device, outputStagingMemory); + + if (validated) { + std::cout << "Test passed!\n"; + } else { + std::cout << "Test failed!\n"; + } + + // Cleanup + vkDestroyBuffer(vk_device, inputStagingBuffer, nullptr); + vkDestroyBuffer(vk_device, outputStagingBuffer, nullptr); + vkDestroyImage(vk_device, inputImage, nullptr); + vkDestroyImage(vk_device, outputImage, nullptr); + vkFreeMemory(vk_device, inputStagingMemory, nullptr); + vkFreeMemory(vk_device, outputStagingMemory, nullptr); + vkFreeMemory(vk_device, inputMemory, nullptr); + vkFreeMemory(vk_device, outputMemory, nullptr); + vkDestroySemaphore(vk_device, syclWaitSemaphore, nullptr); + vkDestroySemaphore(vk_device, syclDoneSemaphore, nullptr); + + return validated; +} + +int main() { + + if (vkutil::setupInstance() != VK_SUCCESS) { + std::cerr << "Instance setup failed!\n"; + return EXIT_FAILURE; + } + + // Currently only Nvidia devices are tested + if (vkutil::setupDevice("NVIDIA") != VK_SUCCESS) { + std::cerr << "Device setup failed!\n"; + return EXIT_FAILURE; + } + + if (vkutil::setupCommandBuffers() != VK_SUCCESS) { + std::cerr << "Command buffers setup failed!\n"; + return EXIT_FAILURE; + } + + bool validated = run_test(); + + if (vkutil::cleanup() != VK_SUCCESS) { + std::cerr << "Cleanup failed!\n"; + return EXIT_FAILURE; + } + + return validated ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp new file mode 100644 index 0000000000000..fa71f10a7fb9d --- /dev/null +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_common.hpp @@ -0,0 +1,407 @@ +#pragma once +#include + +#include +#include +#include + +void printString(std::string str) { +#ifdef VERBOSE_PRINT + std::cout << str; +#endif +} + +#define VK_CHECK_CALL_RET(call) \ + { \ + VkResult err = call; \ + if (err != VK_SUCCESS) \ + return err; \ + } + +#define VK_CHECK_CALL(call) \ + { \ + VkResult err = call; \ + if (err != VK_SUCCESS) \ + std::cerr << #call << " failed. Code: " << err << "\n"; \ + } + +static VkInstance vk_instance; +static VkPhysicalDevice vk_physical_device; +static VkDebugUtilsMessengerEXT vk_debug_messenger; +static VkDevice vk_device; +static VkQueue vk_compute_queue; +static VkQueue vk_transfer_queue; + +static PFN_vkGetMemoryFdKHR vk_getMemoryFdKHR; +static PFN_vkGetSemaphoreFdKHR vk_getSemaphoreFdKHR; + +static uint32_t vk_computeQueueFamilyIndex; +static uint32_t vk_transferQueueFamilyIndex; + +static VkCommandPool vk_computeCmdPool; +static VkCommandPool vk_transferCmdPool; + +static VkCommandBuffer vk_computeCmdBuffer; +static VkCommandBuffer vk_transferCmdBuffers[2]; + +static VKAPI_ATTR VkBool32 VKAPI_CALL +debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, + void *pUserData) { + // Only print errors from validation layer + if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { + std::cerr << pCallbackData->pMessage << "\n"; + } + return VK_FALSE; +} + +namespace vkutil { +VkResult setupInstance() { + VkApplicationInfo ai = {}; + ai.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + ai.pApplicationName = "SYCL-Vulkan-Interop"; + ai.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + ai.pEngineName = ""; + ai.engineVersion = VK_MAKE_VERSION(1, 0, 0); + ai.apiVersion = VK_API_VERSION_1_0; + + uint32_t layerCount; + VK_CHECK_CALL_RET(vkEnumerateInstanceLayerProperties(&layerCount, nullptr)); + + std::vector availableLayers(layerCount); + VK_CHECK_CALL_RET( + vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.data())); + + VkInstanceCreateInfo ci = {}; + ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + ci.pApplicationInfo = &ai; + std::vector extensions = { + VK_EXT_DEBUG_UTILS_EXTENSION_NAME, + VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME, + VK_KHR_EXTERNAL_FENCE_CAPABILITIES_EXTENSION_NAME, + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME}; + ci.enabledExtensionCount = extensions.size(); + ci.ppEnabledExtensionNames = extensions.data(); + std::vector layers = {"VK_LAYER_KHRONOS_validation"}; + ci.enabledLayerCount = layers.size(); + ci.ppEnabledLayerNames = layers.data(); + + VK_CHECK_CALL_RET(vkCreateInstance(&ci, nullptr, &vk_instance)); + + VkDebugUtilsMessengerCreateInfoEXT dumci = {}; + dumci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + dumci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + dumci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + dumci.pfnUserCallback = debugCallback; + + auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr( + vk_instance, "vkCreateDebugUtilsMessengerEXT"); + if (func != nullptr) { + VK_CHECK_CALL_RET(func(vk_instance, &dumci, nullptr, &vk_debug_messenger)); + } else { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + + vk_getMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetInstanceProcAddr( + vk_instance, "vkGetMemoryFdKHR"); + + vk_getSemaphoreFdKHR = (PFN_vkGetSemaphoreFdKHR)vkGetInstanceProcAddr( + vk_instance, "vkGetSemaphoreFdKHR"); + + return VK_SUCCESS; +} + +VkResult setupDevice(std::string device) { + uint32_t physicalDeviceCount = 0; + VK_CHECK_CALL_RET( + vkEnumeratePhysicalDevices(vk_instance, &physicalDeviceCount, nullptr)); + if (physicalDeviceCount == 0) { + return VK_ERROR_DEVICE_LOST; + } + std::vector physicalDevices(physicalDeviceCount); + VK_CHECK_CALL_RET(vkEnumeratePhysicalDevices( + vk_instance, &physicalDeviceCount, physicalDevices.data())); + + bool foundDevice = false; + + for (int i = 0; i < physicalDeviceCount; i++) { + vk_physical_device = physicalDevices[i]; + VkPhysicalDeviceProperties props; + vkGetPhysicalDeviceProperties(vk_physical_device, &props); + std::string str(props.deviceName); + + if (str.find(device) != std::string::npos) { + foundDevice = true; + break; + } + } + + if (!foundDevice) { + std::cerr << "Failed to find suitable device!\n"; + return VK_ERROR_DEVICE_LOST; + } + + uint32_t queueFamilyCount = 0; + vkGetPhysicalDeviceQueueFamilyProperties(vk_physical_device, + &queueFamilyCount, nullptr); + std::vector queueFamilies(queueFamilyCount); + vkGetPhysicalDeviceQueueFamilyProperties( + vk_physical_device, &queueFamilyCount, queueFamilies.data()); + uint32_t i = 0; + for (auto &qf : queueFamilies) { + if (qf.queueFlags & VK_QUEUE_COMPUTE_BIT) { + vk_computeQueueFamilyIndex = i; + } + if (qf.queueFlags & VK_QUEUE_TRANSFER_BIT) { + vk_transferQueueFamilyIndex = i; + } + ++i; + } + + float queuePriority = 1.f; + + std::vector qcis; + if (vk_computeQueueFamilyIndex == vk_transferQueueFamilyIndex) { + qcis.resize(1); + qcis[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + qcis[0].queueFamilyIndex = vk_transferQueueFamilyIndex; + qcis[0].queueCount = 1; + qcis[0].pQueuePriorities = &queuePriority; + } else { + qcis.resize(2); + qcis[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + qcis[0].queueFamilyIndex = vk_transferQueueFamilyIndex; + qcis[0].queueCount = 1; + qcis[0].pQueuePriorities = &queuePriority; + + qcis[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + qcis[1].queueFamilyIndex = vk_computeQueueFamilyIndex; + qcis[1].queueCount = 1; + qcis[1].pQueuePriorities = &queuePriority; + } + + VkPhysicalDeviceFeatures deviceFeatures = {}; + + std::vector extensions = { + VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME}; + + VkDeviceCreateInfo dci = {}; + dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + dci.pQueueCreateInfos = qcis.data(); + dci.queueCreateInfoCount = qcis.size(); + dci.pEnabledFeatures = &deviceFeatures; + dci.enabledExtensionCount = extensions.size(); + dci.ppEnabledExtensionNames = extensions.data(); + + VK_CHECK_CALL_RET( + vkCreateDevice(vk_physical_device, &dci, nullptr, &vk_device)); + + vkGetDeviceQueue(vk_device, vk_transferQueueFamilyIndex, 0, + &vk_transfer_queue); + vkGetDeviceQueue(vk_device, vk_computeQueueFamilyIndex, 0, &vk_compute_queue); + + return VK_SUCCESS; +} + +VkResult setupCommandBuffers() { + VkCommandPoolCreateInfo cpci = {}; + cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cpci.queueFamilyIndex = vk_computeQueueFamilyIndex; + cpci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CHECK_CALL_RET( + vkCreateCommandPool(vk_device, &cpci, nullptr, &vk_computeCmdPool)); + + if (vk_computeQueueFamilyIndex == vk_transferQueueFamilyIndex) { + vk_transferCmdPool = vk_computeCmdPool; + } else { + VkCommandPoolCreateInfo cpci = {}; + cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cpci.queueFamilyIndex = vk_transferQueueFamilyIndex; + cpci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CHECK_CALL_RET( + vkCreateCommandPool(vk_device, &cpci, nullptr, &vk_transferCmdPool)); + } + + { + VkCommandBufferAllocateInfo cbai = {}; + cbai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cbai.commandPool = vk_computeCmdPool; + cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cbai.commandBufferCount = 1; + VK_CHECK_CALL_RET( + vkAllocateCommandBuffers(vk_device, &cbai, &vk_computeCmdBuffer)); + } + + { + VkCommandBufferAllocateInfo cbai = {}; + cbai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cbai.commandPool = vk_transferCmdPool; + cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cbai.commandBufferCount = 2; + VK_CHECK_CALL_RET( + vkAllocateCommandBuffers(vk_device, &cbai, vk_transferCmdBuffers)); + } + + return VK_SUCCESS; +} + +VkBuffer createBuffer(size_t size, VkBufferUsageFlags usage) { + VkBufferCreateInfo bci = {}; + bci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bci.size = size; + bci.usage = usage; + bci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + VkBuffer buffer; + if (vkCreateBuffer(vk_device, &bci, nullptr, &buffer) != VK_SUCCESS) { + std::cerr << "Could not create buffer!\n"; + return VK_NULL_HANDLE; + } + return buffer; +} + +VkImage createImage(VkImageType type, VkFormat format, VkExtent3D extent, + VkImageUsageFlags usage, bool exportable = true) { + VkImageCreateInfo ici = {}; + ici.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + ici.imageType = type; + ici.format = format; + ici.extent = extent; + ici.mipLevels = 1; + ici.arrayLayers = 1; + // ici.tiling = VK_IMAGE_TILING_LINEAR; + ici.usage = usage; + ici.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ici.samples = VK_SAMPLE_COUNT_1_BIT; + // ici.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + + VkExternalMemoryImageCreateInfo emici = {}; + if (exportable) { + emici.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; + emici.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + ici.pNext = &emici; + } + + VkImage image; + if (vkCreateImage(vk_device, &ici, nullptr, &image)) { + std::cerr << "Could not create image!\n"; + return VK_NULL_HANDLE; + } + return image; +} + +VkDeviceMemory allocateDeviceMemory(size_t size, uint32_t memoryTypeIndex, + bool exportable = true) { + VkMemoryAllocateInfo mai = {}; + mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + mai.allocationSize = size; + mai.memoryTypeIndex = memoryTypeIndex; + + VkExportMemoryAllocateInfo emai = {}; + if (exportable) { + emai.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + emai.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + mai.pNext = &emai; + } + + VkDeviceMemory memory; + if (vkAllocateMemory(vk_device, &mai, nullptr, &memory) != VK_SUCCESS) { + std::cerr << "Could not allocate device memory!\n"; + return VK_NULL_HANDLE; + } + return memory; +} + +uint32_t getImageMemoryTypeIndex(VkImage image, VkMemoryPropertyFlags flags) { + VkMemoryRequirements memRequirements; + vkGetImageMemoryRequirements(vk_device, image, &memRequirements); + + VkPhysicalDeviceMemoryProperties memProperties; + vkGetPhysicalDeviceMemoryProperties(vk_physical_device, &memProperties); + + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + if ((memRequirements.memoryTypeBits & (1 << i)) && + (memProperties.memoryTypes[i].propertyFlags & flags) == flags) { + return i; + } + } + std::cerr << "Image memory type index not found!\n"; + return 0; +} + +uint32_t getBufferMemoryTypeIndex(VkBuffer buffer, + VkMemoryPropertyFlags flags) { + VkMemoryRequirements memRequirements; + vkGetBufferMemoryRequirements(vk_device, buffer, &memRequirements); + + VkPhysicalDeviceMemoryProperties memProperties; + vkGetPhysicalDeviceMemoryProperties(vk_physical_device, &memProperties); + + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + if ((memRequirements.memoryTypeBits & (1 << i)) && + (memProperties.memoryTypes[i].propertyFlags & flags) == flags) { + return i; + } + } + std::cerr << "Buffer memory type index not found!\n"; + return 0; +} + +VkResult cleanup() { + + if (vk_computeQueueFamilyIndex == vk_transferQueueFamilyIndex) { + vkDestroyCommandPool(vk_device, vk_computeCmdPool, nullptr); + } else { + vkDestroyCommandPool(vk_device, vk_computeCmdPool, nullptr); + vkDestroyCommandPool(vk_device, vk_transferCmdPool, nullptr); + } + + auto destroyDebugUtilsMessenger = + (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( + vk_instance, "vkDestroyDebugUtilsMessengerEXT"); + if (destroyDebugUtilsMessenger != nullptr) { + destroyDebugUtilsMessenger(vk_instance, vk_debug_messenger, nullptr); + } + vkDestroyDevice(vk_device, nullptr); + vkDestroyInstance(vk_instance, nullptr); + return VK_SUCCESS; +} + +int getMemoryOpaqueFD(VkDeviceMemory memory) { + VkMemoryGetFdInfoKHR mgfi = {}; + mgfi.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + mgfi.memory = memory; + mgfi.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + int fd = 0; + if (vk_getMemoryFdKHR != nullptr) { + VK_CHECK_CALL(vk_getMemoryFdKHR(vk_device, &mgfi, &fd)); + } + return fd; +} + +int getSemaphoreOpaqueFD(VkSemaphore semaphore) { + VkSemaphoreGetFdInfoKHR sgfi = {}; + sgfi.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR; + sgfi.semaphore = semaphore; + sgfi.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + int fd = 0; + if (vk_getSemaphoreFdKHR != nullptr) { + VK_CHECK_CALL(vk_getSemaphoreFdKHR(vk_device, &sgfi, &fd)); + } + return fd; +} + +} // namespace vkutil diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index ba173a37609b0..86cd5d1547ccd 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -218,6 +218,16 @@ config.substitutions.append( ('%fPIC', ('' if platform.system() == 'Windows' else '-fPIC')) ) config.substitutions.append( ('%shared_lib', '-shared') ) + +config.substitutions.append( ('%vulkan_include_dir', config.vulkan_include_dir ) ) +config.substitutions.append( ('%vulkan_lib', config.vulkan_lib ) ) + +vulkan_lib_path = os.path.dirname(config.vulkan_lib) +config.substitutions.append( ('%link-vulkan', '-L %s -lvulkan -I %s' % (vulkan_lib_path, config.vulkan_include_dir ) ) ) + +if config.vulkan_found == "TRUE": + config.available_features.add('vulkan') + if not config.gpu_aot_target_opts: config.gpu_aot_target_opts = '"-device *"' diff --git a/sycl/test-e2e/lit.site.cfg.py.in b/sycl/test-e2e/lit.site.cfg.py.in index a96f9dbff92f9..8abc1ba7bcce1 100644 --- a/sycl/test-e2e/lit.site.cfg.py.in +++ b/sycl/test-e2e/lit.site.cfg.py.in @@ -38,6 +38,10 @@ config.external_tests = "@SYCL_EXTERNAL_TESTS@" config.extra_include = "@CMAKE_CURRENT_SOURCE_DIR@/include" config.gpu_aot_target_opts = lit_config.params.get("gpu_aot_target_opts", "@GPU_AOT_TARGET_OPTS@") +config.vulkan_include_dir = "@Vulkan_INCLUDE_DIRS@" +config.vulkan_lib = "@Vulkan_LIBRARY@" +config.vulkan_found = "@Vulkan_FOUND@" + config.run_launcher = lit_config.params.get('run_launcher', "@SYCL_E2E_RUN_LAUNCHER@") import lit.llvm