diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml index 8c8f28a868..359f56a5c6 100644 --- a/.github/workflows/build-hw-reusable.yml +++ b/.github/workflows/build-hw-reusable.yml @@ -32,7 +32,8 @@ jobs: {name: "${{inputs.name}}", platform: "${{inputs.platform}}"}, ] build_type: [Debug, Release] - compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] + compiler: [{c: clang, cxx: clang++}] + repeat: [1, 2, 3, 4, 5, 6] # TODO: The latest L0 loader segfaults when built with clang. exclude: - adapter: {name: L0, platform: ""} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 6f04308154..971730a668 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -160,12 +160,12 @@ jobs: working-directory: ${{github.workspace}}/build run: ctest -C ${{matrix.build_type}} --output-on-failure -L "fuzz-short" --verbose - level-zero: - if: github.repository == 'oneapi-src/unified-runtime' - name: Level Zero - uses: ./.github/workflows/build-hw-reusable.yml - with: - name: L0 + #level-zero: + # if: github.repository == 'oneapi-src/unified-runtime' + # name: Level Zero + # uses: ./.github/workflows/build-hw-reusable.yml + # with: + # name: L0 opencl: if: github.repository == 'oneapi-src/unified-runtime' @@ -175,26 +175,26 @@ jobs: name: OPENCL platform: "Intel(R) OpenCL" - cuda: - if: github.repository == 'oneapi-src/unified-runtime' - name: CUDA - uses: ./.github/workflows/build-hw-reusable.yml - with: - name: CUDA - - hip: - if: github.repository == 'oneapi-src/unified-runtime' - name: HIP - uses: ./.github/workflows/build-hw-reusable.yml - with: - name: HIP - - native-cpu: - if: github.repository == 'oneapi-src/unified-runtime' - name: Native CPU - uses: ./.github/workflows/build-hw-reusable.yml - with: - name: NATIVE_CPU + #cuda: + # if: github.repository == 'oneapi-src/unified-runtime' + # name: CUDA + # uses: ./.github/workflows/build-hw-reusable.yml + # with: + # name: CUDA + + #hip: + # if: github.repository == 'oneapi-src/unified-runtime' + # name: HIP + # uses: ./.github/workflows/build-hw-reusable.yml + # with: + # name: HIP + + #native-cpu: + # if: github.repository == 'oneapi-src/unified-runtime' + # name: Native CPU + # uses: ./.github/workflows/build-hw-reusable.yml + # with: + # name: NATIVE_CPU windows-build: name: Build - Windows diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 1419604b9d..622868a2a7 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -107,11 +107,26 @@ macro(add_device_binary SOURCE_FILE) COMMAND ${CMAKE_COMMAND} -E env ${EXTRA_ENV} ${UR_DEVICE_CODE_EXTRACTOR} --stem="${TRIPLE}.bin" ${EXE_PATH} + COMMAND md5sum "${BIN_PATH}" + WORKING_DIRECTORY "${DEVICE_BINARY_DIR}" DEPENDS ${SOURCE_FILE} ) add_custom_target(generate_${KERNEL_NAME}_${TRIPLE} DEPENDS ${BIN_PATH}) add_dependencies(generate_device_binaries generate_${KERNEL_NAME}_${TRIPLE}) + + if(${KERNEL_NAME} MATCHES "sampler_read") + add_custom_command(OUTPUT "${EXE_PATH}.cmakemarker" + COMMAND ${CMAKE_COMMAND} -E env ${EXTRA_ENV} "${EXE_PATH}" + + COMMAND "touch" "${EXE_PATH}.cmakemarker" + + WORKING_DIRECTORY "${DEVICE_BINARY_DIR}" + DEPENDS ${BIN_PATH} + ) + add_custom_target(test_test_test DEPENDS "${EXE_PATH}.cmakemarker") + add_dependencies(generate_device_binaries test_test_test) + endif() endforeach() set(IH_PATH "${DEVICE_BINARY_DIR}/${KERNEL_NAME}.ih") @@ -149,6 +164,7 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/build_failure.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_wg_size.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/sampler_read.cpp) set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h) add_custom_command(OUTPUT ${KERNEL_HEADER} diff --git a/test/conformance/device_code/sampler_read.cpp b/test/conformance/device_code/sampler_read.cpp new file mode 100644 index 0000000000..713bf70197 --- /dev/null +++ b/test/conformance/device_code/sampler_read.cpp @@ -0,0 +1,49 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +void run_test(sycl::coordinate_normalization_mode norm_mode, sycl::addressing_mode addr_mode, sycl::filtering_mode filter_mode) { + std::cout << "Combination: " << (int)norm_mode << " - " << (int)addr_mode << " - " << (int)filter_mode << "\n"; + sycl::queue sycl_queue; + + const int height = 4; + const int width = 4; + auto image_range = sycl::range<2>(height, width); + const int channels = 4; + + sycl::float2 coord{0.0, 0.0}; + float result{0.0}; + sycl::buffer result_buff(&result, sycl::range<1>{1}); + + std::vector in_data(height * width * channels, 0.5f); + sycl::image<2> image_in(in_data.data(), sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32, image_range); + + sycl_queue.submit([&](sycl::handler &cgh) { + sycl::accessor + in_acc(image_in, cgh); + sycl::accessor result_acc( + result_buff, cgh); + + sycl::sampler smpl(norm_mode, addr_mode, filter_mode); + + cgh.single_task( + [=]() { result_acc[0] = in_acc.read(coord, smpl)[3]; }); + }); +} + +int main() { + for (auto n : {sycl::coordinate_normalization_mode::normalized, sycl::coordinate_normalization_mode::unnormalized}) { + for (auto a : {sycl::addressing_mode::none, sycl::addressing_mode::clamp_to_edge, sycl::addressing_mode::clamp, sycl::addressing_mode::repeat, sycl::addressing_mode::mirrored_repeat}) { + for (auto f : {sycl::filtering_mode::linear, sycl::filtering_mode::nearest}) { + run_test(n, a, f); + } + } + } + return 0; +} diff --git a/test/conformance/kernel/urKernelSetArgSampler.cpp b/test/conformance/kernel/urKernelSetArgSampler.cpp index 017c89ba14..5891e01742 100644 --- a/test/conformance/kernel/urKernelSetArgSampler.cpp +++ b/test/conformance/kernel/urKernelSetArgSampler.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2024 Intel Corporation // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -40,23 +40,8 @@ struct urKernelSetArgSamplerTestWithParam ur_sampler_handle_t sampler = nullptr; }; -UUR_TEST_SUITE_P( - urKernelSetArgSamplerTestWithParam, - ::testing::Combine( - ::testing::Values(true, false), - ::testing::Values(UR_SAMPLER_ADDRESSING_MODE_NONE, - UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE, - UR_SAMPLER_ADDRESSING_MODE_CLAMP, - UR_SAMPLER_ADDRESSING_MODE_REPEAT, - UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT), - ::testing::Values(UR_SAMPLER_FILTER_MODE_NEAREST, - UR_SAMPLER_FILTER_MODE_LINEAR)), - uur::deviceTestWithParamPrinter); - -TEST_P(urKernelSetArgSamplerTestWithParam, Success) { - uint32_t arg_index = 2; - ASSERT_SUCCESS(urKernelSetArgSampler(kernel, arg_index, nullptr, sampler)); -} +UUR_TEST_SUITE_P(urKernelSetArgSamplerTestWithParam, uur::sampler_values, + uur::deviceTestWithParamPrinter); struct urKernelSetArgSamplerTest : uur::urBaseKernelTest { void SetUp() { @@ -85,11 +70,65 @@ struct urKernelSetArgSamplerTest : uur::urBaseKernelTest { UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urKernelSetArgSamplerTest); -TEST_P(urKernelSetArgSamplerTest, SuccessWithProps) { - ur_kernel_arg_sampler_properties_t props{ - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES, nullptr}; - size_t arg_index = 2; - ASSERT_SUCCESS(urKernelSetArgSampler(kernel, arg_index, &props, sampler)); +struct urKernelSetArgSamplerReadTestWithParam + : uur::urBaseKernelExecutionTestWithParam { + + void SetUp() { + program_name = "sampler_read"; + UUR_RETURN_ON_FATAL_FAILURE( + urBaseKernelExecutionTestWithParam::SetUp()); + + // Images and samplers are not available on AMD + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend == UR_PLATFORM_BACKEND_HIP) { + GTEST_SKIP() << "Sampler are not supported on hip."; + } + + const auto param = getParam(); + normalized = std::get<0>(param); + addr_mode = std::get<1>(param); + filter_mode = std::get<2>(param); + + // This is an invalid combination + if (!normalized && + addr_mode == UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT) { + GTEST_SKIP() + << "Sampler can't use unnormalised repeat addressing mode"; + } + + ur_sampler_desc_t _sampler_desc = { + UR_STRUCTURE_TYPE_SAMPLER_DESC, /* sType */ + nullptr, /* pNext */ + normalized, /* normalizedCoords */ + addr_mode, /* addressingMode */ + filter_mode /* filterMode */ + }; + ASSERT_SUCCESS(urSamplerCreate(context, &_sampler_desc, &sampler)); + } + + void TearDown() { + if (sampler) { + ASSERT_SUCCESS(urSamplerRelease(sampler)); + } + UUR_RETURN_ON_FATAL_FAILURE( + urBaseKernelExecutionTestWithParam::TearDown()); + } + + ur_sampler_handle_t sampler = nullptr; + bool normalized; + ur_sampler_addressing_mode_t addr_mode; + ur_sampler_filter_mode_t filter_mode; + std::array offset = {0, 0}; + std::array size = {1, 1}; +}; +UUR_TEST_SUITE_P(urKernelSetArgSamplerReadTestWithParam, uur::sampler_values, + uur::deviceTestWithParamPrinter); + +TEST_P(urKernelSetArgSamplerTestWithParam, Success) { + uint32_t arg_index = 2; + ASSERT_SUCCESS(urKernelSetArgSampler(kernel, arg_index, nullptr, sampler)); } TEST_P(urKernelSetArgSamplerTest, InvalidNullHandleKernel) { @@ -111,3 +150,102 @@ TEST_P(urKernelSetArgSamplerTest, InvalidKernelArgumentIndex) { UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX, urKernelSetArgSampler(kernel, num_kernel_args + 1, nullptr, sampler)); } + +namespace { +template +using ImageDataArray = std::array, W>, H>; +} + +/* +TEST_P(urKernelSetArgSamplerReadTestWithParam, ReadSamplerFilter) { + std::array S({0.0, 0.0, 0.0, 1.0}); + std::array E({0.0, 0.0, 0.0, 0.0}); + + ImageDataArray<4, 4, float> input_image{{ + {S, S, S, S}, + {S, E, E, S}, + {S, E, E, S}, + {S, E, E, S}, + }}; + + ur_mem_handle_t result = nullptr; + AddBuffer1DArg(sizeof(float) * 1, &result); + ur_mem_handle_t image = nullptr; + AddInputFloatImage<4, 4>(input_image.data(), &image, + UR_IMAGE_CHANNEL_TYPE_FLOAT, + UR_IMAGE_CHANNEL_ORDER_RGBA); + if (normalized) { + AddPodArg(std::array({1.0 / 4.0, 2.0 / 4.0})); + } else { + AddPodArg(std::array({1.0, 2.0})); + } + ASSERT_SUCCESS(urKernelSetArgSampler(kernel, 4, nullptr, sampler)); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 2, offset.data(), + size.data(), nullptr, 0, nullptr, + nullptr)); + + float result_read = -1.0; + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, result, true, 0, sizeof(float), + &result_read, 0, nullptr, nullptr)); + + if (filter_mode == UR_SAMPLER_FILTER_MODE_LINEAR) { + ASSERT_FLOAT_EQ(result_read, 0.5); + } else { + ASSERT_FLOAT_EQ(result_read, 0.0); + } +} +*/ +TEST_P(urKernelSetArgSamplerReadTestWithParam, ReadSamplerAddressMode) { + std::array S({0.0, 0.0, 0.0, 1.0}); + std::array G({0.0, 0.0, 0.0, 0.5}); + + ImageDataArray<4, 4, float> input_image{{ + {S, S, G, G}, + {S, S, G, G}, + {G, G, S, S}, + {G, G, S, S}, + }}; + + std::cout << "----\n"; + std::cout << "Mode: " << addr_mode << "\n"; + std::cout << "Filter: " << filter_mode << "\n"; + + ur_mem_handle_t result = nullptr; + AddBuffer1DArg(sizeof(float) * 1, &result); + ur_mem_handle_t image = nullptr; + AddInputFloatImage<4, 4>(input_image.data(), &image, + UR_IMAGE_CHANNEL_TYPE_FLOAT, + UR_IMAGE_CHANNEL_ORDER_RGBA); + if (normalized) { + AddPodArg(std::array({0.5 / 4.0, 4.5 / 4.0})); + } else { + AddPodArg(std::array({0.5, 4.5})); + } + ASSERT_SUCCESS(urKernelSetArgSampler(kernel, 4, nullptr, sampler)); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 2, offset.data(), + size.data(), nullptr, 0, nullptr, + nullptr)); + + float result_read = -1.0; + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, result, true, 0, sizeof(float), + &result_read, 0, nullptr, nullptr)); + + switch (addr_mode) { + case UR_SAMPLER_ADDRESSING_MODE_CLAMP: + // Border colour in OpenCL is fully transparent black + ASSERT_FLOAT_EQ(result_read, 0.0); + break; + case UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: + case UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT: + ASSERT_FLOAT_EQ(result_read, 0.5); + break; + case UR_SAMPLER_ADDRESSING_MODE_REPEAT: + ASSERT_FLOAT_EQ(result_read, 1.0); + case UR_SAMPLER_ADDRESSING_MODE_NONE: + // Behaviour unspecified + break; + case UR_SAMPLER_ADDRESSING_MODE_FORCE_UINT32: + GTEST_FAIL() << "Unknown address mode"; + break; + } +} diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index cf64aa13d3..66a4a9a975 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1116,6 +1116,16 @@ std::string deviceTestWithParamPrinter( using SamplerCreateParamT = std::tuple; +// AAA +const auto sampler_values = ::testing::Combine( + ::testing::Values(false), + ::testing::Values(//UR_SAMPLER_ADDRESSING_MODE_NONE, + //UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE, + //UR_SAMPLER_ADDRESSING_MODE_CLAMP, + UR_SAMPLER_ADDRESSING_MODE_REPEAT, + UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT), + ::testing::Values(//UR_SAMPLER_FILTER_MODE_NEAREST, + UR_SAMPLER_FILTER_MODE_LINEAR)); template <> std::string deviceTestWithParamPrinter( @@ -1317,6 +1327,37 @@ struct KernelLaunchHelper { *out_buffer = mem_handle; } + // Adds a kernel arg representing a sycl buffer constructed with a 1D range. + template + void AddInputFloatImage(void *data, ur_mem_handle_t *out_buffer, + ur_image_channel_type_t channel_type, + ur_image_channel_order_t channel_order) { + ur_mem_handle_t mem_handle; + ur_image_format_t image_format{channel_order, channel_type}; + ur_image_desc_t image_desc{ + UR_STRUCTURE_TYPE_IMAGE_DESC, ///< [in] type of this structure + nullptr, ///< [in][optional] pointer to extension-specific structure + UR_MEM_TYPE_IMAGE2D, ///< [in] memory object type + W, ///< [in] image width + H, ///< [in] image height + 1, ///< [in] image depth + 1, ///< [in] image array size + 0, ///< [in] image row pitch + 0, ///< [in] image slice pitch + 0, ///< [in] number of MIP levels + 0 ///< [in] number of samples + }; + + ASSERT_SUCCESS(urMemImageCreate( + context, UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_USE_HOST_POINTER, + &image_format, &image_desc, data, &mem_handle)); + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index, nullptr, + mem_handle)); + + current_arg_index++; + *out_buffer = mem_handle; + } + template void AddPodArg(T data) { ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index, sizeof(data), nullptr, &data)); @@ -1379,6 +1420,16 @@ struct urBaseKernelExecutionTestWithParam : urBaseKernelTestWithParam { buffer_args.push_back(*out_buffer); } + // Adds a kernel arg representing a sycl buffer constructed with a W*H range. + template + void AddInputFloatImage(void *data, ur_mem_handle_t *out_buffer, + ur_image_channel_type_t channel_type, + ur_image_channel_order_t channel_order) { + helper.AddInputFloatImage(data, out_buffer, channel_type, + channel_order); + buffer_args.push_back(*out_buffer); + } + template void AddPodArg(K data) { helper.AddPodArg(data); } void Launch1DRange(size_t global_size, size_t local_size = 1) {