From 64256ea6e419ab351954e065d366909e4cc65a11 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 26 Mar 2025 06:12:42 -0700 Subject: [PATCH 1/4] [SYCL] Switch to use plain array in `sycl::vec` in more cases The problem with using `std::array` in `sycl::vec` is that we cannot compile it in some environments (namely, Windows) because the former may use something that is illegal in SYCL device code. intel/llvm#17025 fixed that, but only did so under preview breaking changes mode, which does not satisfy some of our customers immediately. This PR introduces two main changes: - it allows to opt-in for new behavior through passing `-D__SYCL_USE_NEW_VEC_IMPL=1` macro without using `-fpreview-breaking-changes` flag. That allows for a more gradual opt-in from customers who are interested in this fix - it switches the imlpementation to use the new approach with C-style arrays if their size and alignment is the same as for the corresponding `std::array` - in that case their memory layout is expected to be absolutely the same and therefore it should be safe to use the new approach without fear of some ABI incompatibilities. This allows for customers to benefit from the fix without specifying any extra macro (which should be the case for the most common platforms out there) --- .../detail/type_traits/vec_marray_traits.hpp | 8 -- sycl/include/sycl/detail/vector_convert.hpp | 2 - sycl/include/sycl/vector.hpp | 45 ++++++++-- sycl/test/abi/layout_vec.cpp | 6 +- sycl/test/basic_tests/vectors/storage.cpp | 42 ++++++++++ .../vector/bf16_builtins.cpp | 10 +-- .../vector/convert_bfloat.cpp | 8 +- .../check_device_code/vector/math_ops.cpp | 84 +++++++++---------- 8 files changed, 132 insertions(+), 73 deletions(-) create mode 100644 sycl/test/basic_tests/vectors/storage.cpp diff --git a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp index dfad1319f7b0e..6ce39bf6a072a 100644 --- a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp +++ b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp @@ -8,14 +8,6 @@ #pragma once -#ifndef __SYCL_USE_NEW_VEC_IMPL -#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) -#define __SYCL_USE_NEW_VEC_IMPL 1 -#else -#define __SYCL_USE_NEW_VEC_IMPL 0 -#endif -#endif - #include #include diff --git a/sycl/include/sycl/detail/vector_convert.hpp b/sycl/include/sycl/detail/vector_convert.hpp index 6729dfd151100..bd414b01510a8 100644 --- a/sycl/include/sycl/detail/vector_convert.hpp +++ b/sycl/include/sycl/detail/vector_convert.hpp @@ -57,9 +57,7 @@ #include // for is_sigeninteger, is_s... #include // for errc -#ifdef __INTEL_PREVIEW_BREAKING_CHANGES #include -#endif #include // bfloat16 #include diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index e8e2056191d03..9b5e1413f69dc 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -22,6 +22,15 @@ #endif #endif // __clang__ +// See vec::DataType definitions for more details +#ifndef __SYCL_USE_NEW_VEC_IMPL +#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) +#define __SYCL_USE_NEW_VEC_IMPL 1 +#else +#define __SYCL_USE_NEW_VEC_IMPL 0 +#endif +#endif + #if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__) #error "SYCL device compiler is built without ext_vector_type support" #endif @@ -86,6 +95,9 @@ struct elem { }; namespace detail { +// To be defined in tests, trick to access vec's private methods +template class vec_base_test; + template class OperationCurrentT, int... Indexes> class SwizzleOp; @@ -144,17 +156,34 @@ template class vec_base { static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements; // This represent type of underlying value. There should be only one field // in the class, so vec should be equal to float16 in memory. -#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) && \ - defined(__SYCL_USE_NEW_VEC_IMPL) - using DataType = DataT[AdjustedNum]; + // + // In intel/llvm#14130 we incorrectly used std::array as an underlying storage + // for vec data. The problem with std::array is that it comes from the C++ + // STL headers which we do not control and they may use something that is + // illegal in SYCL device code. One of specific examples is use of debug + // assertions in MSVC's STL implementation. + // + // The better approach is to use plain C++ array, but the problem here is that + // C++ specification does not provide any guarantees about the memory layout + // of std::array and therefore directly switching to it would technically be + // an ABI-break, even though the practical chances of encountering the issue + // are low. + // + // To play it safe, we only switch to use plain array if both its size and + // alignment match those of std::array, or unless the new behavior is forced + // via __SYCL_USE_NEW_VEC_IMPL or preview breaking changes mode. + using DataType = std::conditional_t< +#if __SYCL_USE_NEW_VEC_IMPL + true, #else - using DataType = std::array; - // Assuming that std::array has the same size as the underlying array. - // C++ standard does not guarantee that, but it is true for most popular - // implementations. - static_assert(sizeof(DataType) == sizeof(DataT[AdjustedNum])); + sizeof(std::array) == sizeof(DataT[AdjustedNum]) && + alignof(std::array) == + alignof(DataT[AdjustedNum]), #endif + DataT[AdjustedNum], std::array>; + // To allow testing of private methods + template friend class detail::vec_base_test; protected: // fields // Alignment is the same as size, to a maximum size of 64. SPEC requires diff --git a/sycl/test/abi/layout_vec.cpp b/sycl/test/abi/layout_vec.cpp index 7824f84e7bdd2..20ea608a88add 100644 --- a/sycl/test/abi/layout_vec.cpp +++ b/sycl/test/abi/layout_vec.cpp @@ -12,8 +12,7 @@ SYCL_EXTERNAL void foo(sycl::vec) {} // CHECK: 0 | class sycl::vec // ignore empty base classes -// CHECK: 0 | struct std::array m_Data -// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems +// CHECK: 0 | DataType m_Data // CHECK-NEXT: | [sizeof=16, dsize=16, align=16, // CHECK-NEXT: | nvsize=16, nvalign=16] @@ -23,7 +22,6 @@ SYCL_EXTERNAL void foo(sycl::vec) {} // CHECK: 0 | class sycl::vec<_Bool, 16> // ignore empty base classes -// CHECK: 0 | struct std::array<_Bool, 16> m_Data -// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems +// CHECK: 0 | DataType m_Data // CHECK-NEXT: | [sizeof=16, dsize=16, align=16, // CHECK-NEXT: | nvsize=16, nvalign=16] diff --git a/sycl/test/basic_tests/vectors/storage.cpp b/sycl/test/basic_tests/vectors/storage.cpp new file mode 100644 index 0000000000000..f4dbf5080ceb3 --- /dev/null +++ b/sycl/test/basic_tests/vectors/storage.cpp @@ -0,0 +1,42 @@ +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -fpreview-breaking-changes +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -D__SYCL_USE_NEW_VEC_IMPL=1 +// expected-no-diagnostics + +#include + +#include + +namespace sycl { +namespace detail { +template class vec_base_test { +public: + static void do_check() { + constexpr bool uses_std_array = + std::is_same_v::DataType, std::array>; + constexpr bool uses_plain_array = + std::is_same_v::DataType, T[N]>; + + constexpr bool std_array_and_plain_array_have_the_same_layout = + sizeof(std::array) == sizeof(T[N]) && + alignof(std::array) == alignof(T[N]); + +#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) || __SYCL_USE_NEW_VEC_IMPL + static_assert(uses_plain_array, + "We must use plain array regardless of " + "layout, because user is opted-in for a potential ABI-break"); +#else + static_assert(std_array_and_plain_array_have_the_same_layout == + uses_plain_array, + "If layouts are the same, we must use safer plain array " + "instead of std::array, or vice versa"); + static_assert( + !std_array_and_plain_array_have_the_same_layout == uses_std_array, + "If layouts are not the same, we must use std::array to preserve ABI"); +#endif + } +}; +} // namespace detail +} // namespace sycl + +int main() { sycl::detail::vec_base_test::do_check(); } diff --git a/sycl/test/check_device_code/vector/bf16_builtins.cpp b/sycl/test/check_device_code/vector/bf16_builtins.cpp index f35df57422a77..29baa999513c4 100644 --- a/sycl/test/check_device_code/vector/bf16_builtins.cpp +++ b/sycl/test/check_device_code/vector/bf16_builtins.cpp @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.71") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -123,7 +123,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.149") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.188") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.184") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.420") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.412") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 diff --git a/sycl/test/check_device_code/vector/convert_bfloat.cpp b/sycl/test/check_device_code/vector/convert_bfloat.cpp index 4060167a04eef..1826eb03c3ff4 100644 --- a/sycl/test/check_device_code/vector/convert_bfloat.cpp +++ b/sycl/test/check_device_code/vector/convert_bfloat.cpp @@ -63,7 +63,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.71") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.110") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] @@ -182,7 +182,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.149") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.229") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.224") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] diff --git a/sycl/test/check_device_code/vector/math_ops.cpp b/sycl/test/check_device_code/vector/math_ops.cpp index d8770417b1021..a5cf543f363d3 100644 --- a/sycl/test/check_device_code/vector/math_ops.cpp +++ b/sycl/test/check_device_code/vector/math_ops.cpp @@ -46,7 +46,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.74") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.74") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.74") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.73") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) @@ -60,7 +60,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.115") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.115") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.115") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.113") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) @@ -75,7 +75,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.126") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.126") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.126") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.123") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.167") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.167") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.167") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.163") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) @@ -104,10 +104,10 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.208") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.208") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.208") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.203") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.208", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.203", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) @@ -121,17 +121,17 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLINS0_3EXT6ONEAPI8BFLOAT16EEENST9ENABLE_IFIX24IS_OP_AVAILABLE_FOR_TYPEIST4PLUSIVET_EENS0_3VECIS5_LI3EEEE4TYPEERKSB_SF__EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I12_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I12_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META83:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I12_I_I]]) #[[ATTR8]], !noalias [[META83]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META83:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I12_I_I]]) #[[ATTR8]], !noalias [[META83]] // CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd float [[CALL_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I]] // CHECK-NEXT: store float [[ADD_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86:![0-9]+]], !noalias [[META83]] // CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META83]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80]] -// CHECK-NEXT: [[ARRAYIDX_I_I14_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I_I14_I_I]], align 2, !tbaa [[TBAA88:![0-9]+]], !noalias [[META79]] +// CHECK-NEXT: [[ARRAYIDX_I14_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I14_I_I]], align 2, !tbaa [[TBAA88:![0-9]+]], !noalias [[META79]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt4plusIvET_EENS0_3vecIS5_Li3EEEE4typeERKSB_SF_.exit: @@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.247") align 64 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.247") align 64 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.247") align 64 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.241") align 64 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) @@ -164,7 +164,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func noundef range(i8 -1, 1) <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr noundef readonly byval(%"class.sycl::_V1::vec.289") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.289") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr noundef readonly byval(%"class.sycl::_V1::vec.282") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.282") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 // CHECK-NEXT: [[LOADVECN_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 @@ -178,7 +178,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.298") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.337") align 2 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.337") align 2 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.290") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) @@ -194,7 +194,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.380") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.419") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.419") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.370") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) @@ -210,9 +210,9 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.462") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.501") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.501") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.450") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.462", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.450", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META121:![0-9]+]]) @@ -225,14 +225,14 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 4 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILGTINS0_3EXT6ONEAPI8BFLOAT16EEENST9ENABLE_IFIX24IS_OP_AVAILABLE_FOR_TYPEIST7GREATERIVET_EENS0_3VECISLI4EEEE4TYPEERKNSA_IS5_LI4EEESG__EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I14_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META127]] -// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I14_I_I]]) #[[ATTR8]], !noalias [[META127]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I14_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8]], !noalias [[META127]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I14_I_I]]) #[[ATTR8]], !noalias [[META127]] // CHECK-NEXT: [[CMP_I_I_I_I_I:%.*]] = fcmp ogt float [[CALL_I_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I_I]] // CHECK-NEXT: [[CONV6_I_I:%.*]] = sext i1 [[CMP_I_I_I_I_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I16_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I_I16_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META127]] +// CHECK-NEXT: [[ARRAYIDX_I16_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I16_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META127]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP128:![0-9]+]] // CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt7greaterIvET_EENS0_3vecIsLi4EEEE4typeERKNSA_IS5_Li4EEESG_.exit: @@ -249,7 +249,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.539") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.539") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.526") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.526") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) @@ -264,7 +264,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.579") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.579") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.565") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.565") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) @@ -277,7 +277,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.618") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.618") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.604") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.604") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META149:![0-9]+]]) @@ -289,7 +289,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.628") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.126") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.613") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META157:![0-9]+]]) @@ -302,7 +302,7 @@ SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.668") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.707") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.652") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.690") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) @@ -315,7 +315,7 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.419") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.419") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.408") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) @@ -327,9 +327,9 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.748") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.208") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.730") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.748", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.730", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META184:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META187:![0-9]+]]) @@ -341,12 +341,12 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META190]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8]], !noalias [[META190]] // CHECK-NEXT: [[TOBOOL_I_I_I:%.*]] = fcmp oeq float [[CALL_I_I_I_I_I]], 0.000000e+00 // CHECK-NEXT: [[CONV2_I_I:%.*]] = sext i1 [[TOBOOL_I_I_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I9_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I_I9_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META190]] +// CHECK-NEXT: [[ARRAYIDX_I9_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I9_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META190]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP191:![0-9]+]] // CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: @@ -358,10 +358,10 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.786") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.786") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.768") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.768") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.786", align 32 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.768", align 32 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193:![0-9]+]] // CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META196:![0-9]+]] @@ -372,15 +372,15 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 16 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META202:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8]], !noalias [[META202:![0-9]+]] // CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg float [[CALL_I_I_I_I_I]] // CHECK-NEXT: store float [[FNEG_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86]], !noalias [[META202]] // CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META202]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199]] -// CHECK-NEXT: [[ARRAYIDX_I_I7_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I_I7_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META196]] +// CHECK-NEXT: [[ARRAYIDX_I7_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I7_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META196]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: From 3338ce0c0790b5f2e23cf83fb8e6e3fc7d23d4ec Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 26 Mar 2025 07:33:29 -0700 Subject: [PATCH 2/4] Fix Windows regression tests --- sycl/test/regression/vec_array_windows.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sycl/test/regression/vec_array_windows.cpp b/sycl/test/regression/vec_array_windows.cpp index f96edbf0153a6..d9511de372f54 100644 --- a/sycl/test/regression/vec_array_windows.cpp +++ b/sycl/test/regression/vec_array_windows.cpp @@ -10,7 +10,14 @@ #include -// expected-error@* {{SYCL kernel cannot call a variadic function}} -// expected-error@* {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} -// expected-error@* {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} +// expected-no-diagnostics +// +// Our current implementation automatically opts-in for a new implementation if +// that is possible without breaking ABI. +// However, depending on the environment (used STL implementation, in +// particular) it may not be the case. Therefore, the lines below are kept for +// reference of how an error would look like in a problematic environment. +// not-expected-error@* {{SYCL kernel cannot call a variadic function}} +// not-expected-error@* {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} +// not-expected-error@* {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} SYCL_EXTERNAL auto GetFirstElement(sycl::vec v) { return v[0]; } From 0c1c53d61ae8ad19f6ba50c7de2256609eebb48e Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 27 Mar 2025 06:43:39 -0700 Subject: [PATCH 3/4] Use different macro name --- sycl/include/sycl/vector.hpp | 10 +++++----- sycl/test/basic_tests/vectors/storage.cpp | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index 9b5e1413f69dc..21ba0b51f3fb4 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -23,11 +23,11 @@ #endif // __clang__ // See vec::DataType definitions for more details -#ifndef __SYCL_USE_NEW_VEC_IMPL +#ifndef __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE #if defined(__INTEL_PREVIEW_BREAKING_CHANGES) -#define __SYCL_USE_NEW_VEC_IMPL 1 +#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 1 #else -#define __SYCL_USE_NEW_VEC_IMPL 0 +#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 0 #endif #endif @@ -171,9 +171,9 @@ template class vec_base { // // To play it safe, we only switch to use plain array if both its size and // alignment match those of std::array, or unless the new behavior is forced - // via __SYCL_USE_NEW_VEC_IMPL or preview breaking changes mode. + // via __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE or preview breaking changes mode. using DataType = std::conditional_t< -#if __SYCL_USE_NEW_VEC_IMPL +#if __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE true, #else sizeof(std::array) == sizeof(DataT[AdjustedNum]) && diff --git a/sycl/test/basic_tests/vectors/storage.cpp b/sycl/test/basic_tests/vectors/storage.cpp index f4dbf5080ceb3..bbb14cfe24227 100644 --- a/sycl/test/basic_tests/vectors/storage.cpp +++ b/sycl/test/basic_tests/vectors/storage.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only // RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -fpreview-breaking-changes -// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -D__SYCL_USE_NEW_VEC_IMPL=1 +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -D__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE=1 // expected-no-diagnostics #include @@ -21,7 +21,8 @@ template class vec_base_test { sizeof(std::array) == sizeof(T[N]) && alignof(std::array) == alignof(T[N]); -#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) || __SYCL_USE_NEW_VEC_IMPL +#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) || \ + __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE static_assert(uses_plain_array, "We must use plain array regardless of " "layout, because user is opted-in for a potential ABI-break"); From c75c1024baba0481b7e042bdb461d778fbc0194c Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 27 Mar 2025 07:30:59 -0700 Subject: [PATCH 4/4] Apply clang-format --- sycl/include/sycl/vector.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index 21ba0b51f3fb4..a10a7445602f4 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -184,6 +184,7 @@ template class vec_base { // To allow testing of private methods template friend class detail::vec_base_test; + protected: // fields // Alignment is the same as size, to a maximum size of 64. SPEC requires