From 07ff6c2903452eda62f3fe9d7dee8bc1f4f7f153 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 10 May 2024 08:58:00 -0700 Subject: [PATCH 1/2] [SYCL] Optimize `sycl::detail::memcpy` --- sycl/include/sycl/detail/memcpy.hpp | 12 +- .../check_device_code/vector/vector_as.cpp | 28 +- .../vector/vector_math_ops.cpp | 314 +++++++----------- 3 files changed, 132 insertions(+), 222 deletions(-) diff --git a/sycl/include/sycl/detail/memcpy.hpp b/sycl/include/sycl/detail/memcpy.hpp index ff96bf3e688f1..9e2eac2b30b7c 100644 --- a/sycl/include/sycl/detail/memcpy.hpp +++ b/sycl/include/sycl/detail/memcpy.hpp @@ -8,17 +8,17 @@ #pragma once -#include +#include namespace sycl { inline namespace _V1 { namespace detail { inline void memcpy(void *Dst, const void *Src, size_t Size) { - char *Destination = reinterpret_cast(Dst); - const char *Source = reinterpret_cast(Src); - for (size_t I = 0; I < Size; ++I) { - Destination[I] = Source[I]; - } +#ifdef __SYCL_DEVICE_ONLY__ + __builtin_memcpy(Dst, Src, Size); +#else + std::memcpy(Dst, Src, Size); +#endif } } // namespace detail } // namespace _V1 diff --git a/sycl/test/check_device_code/vector/vector_as.cpp b/sycl/test/check_device_code/vector/vector_as.cpp index 1079d4658f9ab..f42fb56b58791 100644 --- a/sycl/test/check_device_code/vector/vector_as.cpp +++ b/sycl/test/check_device_code/vector/vector_as.cpp @@ -19,36 +19,12 @@ template SYCL_EXTERNAL sycl::vec sycl::vec::as, ptr [[A]], align 8, {{.*}} -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 8, !tbaa [[TBAA10:![0-9]+]], !noalias [[META7]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, !tbaa [[TBAA10]], !noalias [[META7]] // CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <2 x i32> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, {{.*}} +// CHECK-NEXT: store <2 x i32> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA10]], !alias.scope [[META7]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIfLi3EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.0") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.0") align 16 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.0") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.0") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META13:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[LOADVEC4_I:%.*]] = load <4 x float>, ptr [[A]], align 16, {{.*}} -// CHECK-NEXT: [[LOADVEC42_I:%.*]] = load <4 x float>, ptr [[B]], align 16, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) +// CHECK-NEXT: [[LOADVEC4_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META14]] +// CHECK-NEXT: [[LOADVEC42_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META14]] // CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVEC4_I]], [[LOADVEC42_I]] // CHECK-NEXT: [[EXTRACTVEC5_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC5_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, {{.*}} +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC5_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA10]], !alias.scope [[META14]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.1") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.1") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META17:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, {{.*}} -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA10]], !noalias [[META18]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA10]], !noalias [[META18]] // CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <16 x i8> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, {{.*}} +// CHECK-NEXT: store <16 x i8> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA10]], !alias.scope [[META18]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.2") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.2") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.2") align 8 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.2") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.2") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.2") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META21:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, {{.*}} -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA10]], !noalias [[META22]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA10]], !noalias [[META22]] // CHECK-NEXT: [[XOR_I:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <8 x i8> [[XOR_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, {{.*}} +// CHECK-NEXT: store <8 x i8> [[XOR_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA10]], !alias.scope [[META22]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestXor(vec a, vec b) { @@ -67,11 +68,11 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.3") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.3") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.3") align 4 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.3") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.3") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.3") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META25:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, {{.*}} -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B]], align 4, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA10]], !noalias [[META26]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B]], align 4, !tbaa [[TBAA10]], !noalias [[META26]] // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: @@ -88,40 +89,40 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK: _ZN4sycl3_V1plERKNS0_3vecIbLi4EEES4_.exit: -// CHECK-NEXT: store <4 x i8> [[VECINS_I_I6_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, {{.*}} +// CHECK-NEXT: store <4 x i8> [[VECINS_I_I6_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META26]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META31:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[LOADVEC4_I:%.*]] = load <4 x half>, ptr [[A]], align 8, {{.*}} -// CHECK-NEXT: [[LOADVEC42_I:%.*]] = load <4 x half>, ptr [[B]], align 8, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK-NEXT: [[LOADVEC4_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META32]] +// CHECK-NEXT: [[LOADVEC42_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META32]] // CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVEC4_I]], [[LOADVEC42_I]] // CHECK-NEXT: [[EXTRACTVEC5_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> -// CHECK-NEXT: store <4 x half> [[EXTRACTVEC5_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, {{.*}} +// CHECK-NEXT: store <4 x half> [[EXTRACTVEC5_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA10]], !alias.scope [[META32]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable sret(%"class.sycl::_V1::vec.5") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable sret(%"class.sycl::_V1::vec.5") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] !srcloc [[META35:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[REF_TMP1_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::bfloat16", align 2 // CHECK-NEXT: [[REF_TMP3_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::bfloat16", align 2 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr nonnull [[REF_TMP1_I]]) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr nonnull [[REF_TMP3_I]]) // CHECK-NEXT: [[REF_TMP1_ASCAST_I:%.*]] = addrspacecast ptr [[REF_TMP1_I]] to ptr addrspace(4) // CHECK-NEXT: [[REF_TMP3_ASCAST_I:%.*]] = addrspacecast ptr [[REF_TMP3_I]] to ptr addrspace(4) // CHECK-NEXT: [[REF_TMP_ASCAST_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I]] to ptr addrspace(4) -// CHECK-NEXT: [[AGG_RESULT_PROMOTED_I:%.*]] = load <3 x i16>, ptr addrspace(4) [[AGG_RESULT]], align 8, {{.*}} -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x i16>, ptr [[A]], align 8, {{.*}} +// CHECK-NEXT: [[AGG_RESULT_PROMOTED_I:%.*]] = load <3 x i16>, ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META36]] +// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x i16>, ptr [[A]], align 8, !noalias [[META39:![0-9]+]] // CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I_I]], <4 x i16> poison, <3 x i32> -// CHECK-NEXT: [[LOADVEC4_I_I9_I:%.*]] = load <4 x i16>, ptr [[B]], align 8, {{.*}} +// CHECK-NEXT: [[LOADVEC4_I_I9_I:%.*]] = load <4 x i16>, ptr [[B]], align 8, !noalias [[META44:![0-9]+]] // CHECK-NEXT: [[EXTRACTVEC_I_I10_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I9_I]], <4 x i16> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: @@ -131,26 +132,26 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEES7__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw nsw i64 [[I_0_I]] to i32 -// CHECK-NEXT: call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: call void @llvm.experimental.noalias.{{.*}} +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i16> [[EXTRACTVEC_I_I_I]], i32 [[CONV_I]] -// CHECK-NEXT: store i16 [[VECEXT_I_I_I]], ptr [[REF_TMP1_I]], align 2, {{.*}} -// CHECK-NEXT: call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: call void @llvm.experimental.noalias.{{.*}} +// CHECK-NEXT: store i16 [[VECEXT_I_I_I]], ptr [[REF_TMP1_I]], align 2, !alias.scope [[META51:![0-9]+]], !noalias [[META36]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) // CHECK-NEXT: [[VECEXT_I_I11_I:%.*]] = extractelement <3 x i16> [[EXTRACTVEC_I_I10_I]], i32 [[CONV_I]] -// CHECK-NEXT: store i16 [[VECEXT_I_I11_I]], ptr [[REF_TMP3_I]], align 2, {{.*}} -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), {{.*}} -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[REF_TMP1_ASCAST_I]]) #[[ATTR9:[0-9]+]], {{.*}} -// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[REF_TMP3_ASCAST_I]]) #[[ATTR9]], {{.*}} +// CHECK-NEXT: store i16 [[VECEXT_I_I11_I]], ptr [[REF_TMP3_I]], align 2, !alias.scope [[META58:![0-9]+]], !noalias [[META36]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META36]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[REF_TMP1_ASCAST_I]]) #[[ATTR9:[0-9]+]], !noalias [[META63:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[REF_TMP3_ASCAST_I]]) #[[ATTR9]], !noalias [[META63]] // CHECK-NEXT: [[ADD_I_I:%.*]] = fadd float [[CALL_I_I_I_I]], [[CALL_I_I2_I_I]] -// CHECK-NEXT: store float [[ADD_I_I]], ptr [[REF_TMP_I_I]], align 4, {{.*}} -// CHECK-NEXT: [[CALL_I_I3_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I]]) #[[ATTR9]], {{.*}} -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), {{.*}} +// CHECK-NEXT: store float [[ADD_I_I]], ptr [[REF_TMP_I_I]], align 4, !tbaa [[TBAA66:![0-9]+]], !noalias [[META63]] +// CHECK-NEXT: [[CALL_I_I3_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I]]) #[[ATTR9]], !noalias [[META63]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META36]] // CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[VECINS_I_I12_I]], i16 [[CALL_I_I3_I_I]], i32 [[CONV_I]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK: _ZN4sycl3_V1plERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEES7_.exit: -// CHECK-NEXT: store <3 x i16> [[VECINS_I_I12_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, {{.*}} +// CHECK-NEXT: store <3 x i16> [[VECINS_I_I12_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META36]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[REF_TMP1_I]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[REF_TMP3_I]]) // CHECK-NEXT: ret void @@ -163,14 +164,14 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 64 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 64 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 64 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META69:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, {{.*}} -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA10]], !noalias [[META70]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA10]], !noalias [[META70]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <16 x i32> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i32> -// CHECK-NEXT: store <16 x i32> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 64, {{.*}} +// CHECK-NEXT: store <16 x i32> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 64, !tbaa [[TBAA10]], !alias.scope [[META70]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -178,16 +179,16 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.7") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 4 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.7") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META73:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[LOADVEC4_I:%.*]] = load <4 x i8>, ptr [[A]], align 4, {{.*}} -// CHECK-NEXT: [[LOADVEC42_I:%.*]] = load <4 x i8>, ptr [[B]], align 4, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META74:![0-9]+]]) +// CHECK-NEXT: [[LOADVEC4_I:%.*]] = load <4 x i8>, ptr [[A]], align 4, !noalias [[META74]] +// CHECK-NEXT: [[LOADVEC42_I:%.*]] = load <4 x i8>, ptr [[B]], align 4, !noalias [[META74]] // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i8> [[LOADVEC4_I]], [[LOADVEC42_I]] // CHECK-NEXT: [[CMP_I:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <3 x i32> // CHECK-NEXT: [[SEXT_I:%.*]] = sext <3 x i1> [[CMP_I]] to <3 x i8> // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i8> [[SEXT_I]], <3 x i8> poison, <4 x i32> -// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, {{.*}} +// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA10]], !alias.scope [[META74]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -195,14 +196,14 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.9") align 2 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 2 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.9") align 2 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META77:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, {{.*}} -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA10]], !noalias [[META78]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA10]], !noalias [[META78]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <2 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i8> -// CHECK-NEXT: store <2 x i8> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, {{.*}} +// CHECK-NEXT: store <2 x i8> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !tbaa [[TBAA10]], !alias.scope [[META78]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -210,14 +211,14 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.11") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 16 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.11") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META81:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, {{.*}} -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA10]], !noalias [[META82]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA10]], !noalias [[META82]] // CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <8 x half> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> -// CHECK-NEXT: store <8 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, {{.*}} +// CHECK-NEXT: store <8 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA10]], !alias.scope [[META82]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -228,14 +229,14 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // For example, vec{-0.5, 3.333} < vec{6.0, 6.666} results // into {-1, -1} on host but {0, -1} on device. // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.13") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[B:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.13") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META85:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 8, {{.*}} -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[B]], align 8, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META86:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 8, !tbaa [[TBAA10]], !noalias [[META86]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[B]], align 8, !tbaa [[TBAA10]], !noalias [[META86]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i16> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> -// CHECK-NEXT: store <4 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, {{.*}} +// CHECK-NEXT: store <4 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA10]], !alias.scope [[META86]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, @@ -246,178 +247,111 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 16 [[A:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META89:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::vec.15", align 16 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[REF_TMP_I]]) -// CHECK-NEXT: [[LOADVEC4_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) +// CHECK-NEXT: [[LOADVEC4_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META90]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x i32> [[LOADVEC4_I]], <4 x i32> poison, <3 x i32> // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I]], zeroinitializer // CHECK-NEXT: [[SEXT_I:%.*]] = sext <3 x i1> [[CMP_I]] to <3 x i32> // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr [[REF_TMP_I]], align 16, {{.*}} -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i64 [[I_0_I_I_I]], 16 -// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V1NTERKNS0_3VECIILI3EEE_EXIT:%.*]] -// CHECK: for.body.i.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[I_0_I_I_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I_I]], align 1, {{.*}} -// CHECK-NEXT: [[ARRAYIDX1_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I_I]] -// CHECK-NEXT: store i8 [[TMP0]], ptr addrspace(4) [[ARRAYIDX1_I_I_I]], align 1, {{.*}} -// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP97:![0-9]+]] -// CHECK: _ZN4sycl3_V1ntERKNS0_3vecIiLi3EEE.exit: -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META90]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.16") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 16 [[A:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.16") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META93:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META94:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA10]], !noalias [[META94]] // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]] -// CHECK-NEXT: store <4 x i32> [[SUB_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, {{.*}} +// CHECK-NEXT: store <4 x i32> [[SUB_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA10]], !alias.scope [[META94]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.17") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 16 [[A:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.17") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META97:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA10]], !noalias [[META98]] // CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[TMP0]], -// CHECK-NEXT: store <16 x i8> [[NOT_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, {{.*}} +// CHECK-NEXT: store <16 x i8> [[NOT_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA10]], !alias.scope [[META98]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.18") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.3") align 4 [[A:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.18") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.3") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::vec.3", align 4 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA10]], !noalias [[META102]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_I]], ptr [[REF_TMP_I]], align 4, {{.*}} -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i64 [[I_0_I_I_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V1NTERKNS0_3VECIBLI4EEE_EXIT:%.*]] -// CHECK: for.body.i.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[I_0_I_I_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I_I_I]], align 1, {{.*}} -// CHECK-NEXT: [[ARRAYIDX1_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I_I]] -// CHECK-NEXT: store i8 [[TMP1]], ptr addrspace(4) [[ARRAYIDX1_I_I_I]], align 1, {{.*}} -// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP97]] -// CHECK: _ZN4sycl3_V1ntERKNS0_3vecIbLi4EEE.exit: -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: store <4 x i8> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META105:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.19") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 4 [[A:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.19") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META108:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca %"class.sycl::_V1::vec.20", align 4 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META109:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA10]], !noalias [[META109]] // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i16> -// CHECK-NEXT: store <2 x i16> [[SEXT_I]], ptr [[REF_TMP_I]], align 4, {{.*}} -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i64 [[I_0_I_I_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V1NTERKNS0_3VECINS0_6DETAIL9HALF_IMPL4HALFELI2EEE_EXIT:%.*]] -// CHECK: for.body.i.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP_I]], i64 [[I_0_I_I_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I_I_I]], align 1, {{.*}} -// CHECK-NEXT: [[ARRAYIDX1_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I_I]] -// CHECK-NEXT: store i8 [[TMP1]], ptr addrspace(4) [[ARRAYIDX1_I_I_I]], align 1, {{.*}} -// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP97]] -// CHECK: _ZN4sycl3_V1ntERKNS0_3vecINS0_6detail9half_impl4halfELi2EEE.exit: -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I]]) +// CHECK-NEXT: store <2 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META112:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 16 [[A:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META115:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, {{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA10]], !noalias [[META116]] // CHECK-NEXT: [[FNEG_I:%.*]] = fneg <8 x half> [[TMP0]] -// CHECK-NEXT: store <8 x half> [[FNEG_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, {{.*}} +// CHECK-NEXT: store <8 x half> [[FNEG_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA10]], !alias.scope [[META116]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.21") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[A:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.21") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META119:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RET_I:%.*]] = alloca %"class.sycl::_V1::vec.5", align 8 // CHECK-NEXT: [[REF_TMP1_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[REF_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::bfloat16", align 2 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RET_I]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META120:![0-9]+]]) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP1_I]]) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr nonnull [[REF_TMP2_I]]) // CHECK-NEXT: [[REF_TMP1_ASCAST_I:%.*]] = addrspacecast ptr [[REF_TMP1_I]] to ptr addrspace(4) // CHECK-NEXT: [[REF_TMP2_ASCAST_I:%.*]] = addrspacecast ptr [[REF_TMP2_I]] to ptr addrspace(4) -// CHECK-NEXT: store i64 0, ptr [[RET_I]], align 8, {{.*}} -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x i16>, ptr [[A]], align 8, {{.*}} +// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x i16>, ptr [[A]], align 8, !noalias [[META123:![0-9]+]] // CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I_I]], <4 x i16> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: -// CHECK-NEXT: [[TMP0:%.*]] = phi <3 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[RET_SROA_0_0_I:%.*]] = phi <3 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY_I]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i64 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_END_I:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1NTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = trunc nuw nsw i64 [[I_0_I]] to i32 // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i16> [[EXTRACTVEC_I_I_I]], i32 [[CONV_I]] -// CHECK-NEXT: store i16 [[VECEXT_I_I_I]], ptr [[REF_TMP2_I]], align 2, {{.*}} -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[REF_TMP2_ASCAST_I]]) #[[ATTR9]], {{.*}} +// CHECK-NEXT: store i16 [[VECEXT_I_I_I]], ptr [[REF_TMP2_I]], align 2, !tbaa [[TBAA128:![0-9]+]], !alias.scope [[META130:![0-9]+]], !noalias [[META120]] +// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[REF_TMP2_ASCAST_I]]) #[[ATTR9]], !noalias [[META120]] // CHECK-NEXT: [[CMP_I_I:%.*]] = fcmp oeq float [[CALL_I_I_I]], 0.000000e+00 // CHECK-NEXT: [[CONV4_I:%.*]] = uitofp i1 [[CMP_I_I]] to float -// CHECK-NEXT: store float [[CONV4_I]], ptr [[REF_TMP1_I]], align 4, {{.*}} -// CHECK-NEXT: [[CALL_I_I9_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP1_ASCAST_I]]) #[[ATTR9]], {{.*}} -// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[TMP0]], i16 [[CALL_I_I9_I]], i32 [[CONV_I]] +// CHECK-NEXT: store float [[CONV4_I]], ptr [[REF_TMP1_I]], align 4, !tbaa [[TBAA66]], !noalias [[META120]] +// CHECK-NEXT: [[CALL_I_I9_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP1_ASCAST_I]]) #[[ATTR9]], !noalias [[META120]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RET_SROA_0_0_I]], i16 [[CALL_I_I9_I]], i32 [[CONV_I]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP140:![0-9]+]] -// CHECK: for.end.i: -// CHECK-NEXT: store <3 x i16> [[TMP0]], ptr [[RET_I]], align 1, {{.*}} -// CHECK-NEXT: call void @llvm.experimental.noalias.{{.*}} -// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] -// CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i64 [ 0, [[FOR_END_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] -// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i64 [[I_0_I_I_I]], 8 -// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V1NTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] -// CHECK: for.body.i.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[RET_I]], i64 [[I_0_I_I_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I_I_I]], align 1, {{.*}} -// CHECK-NEXT: [[ARRAYIDX1_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I_I_I]] -// CHECK-NEXT: store i8 [[TMP1]], ptr addrspace(4) [[ARRAYIDX1_I_I_I]], align 1, {{.*}} -// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i64 [[I_0_I_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP97]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP133:![0-9]+]] // CHECK: _ZN4sycl3_V1ntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RET_I]]) +// CHECK-NEXT: store <3 x i16> [[RET_SROA_0_0_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META134:![0-9]+]] +// CHECK-NEXT: [[AGG_RESULT_SROA_IDX_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[AGG_RESULT]], i64 6 +// CHECK-NEXT: store i16 0, ptr addrspace(4) [[AGG_RESULT_SROA_IDX_I]], align 2, !alias.scope [[META134]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP1_I]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[REF_TMP2_I]]) // CHECK-NEXT: ret void @@ -425,14 +359,14 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.22") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.22") align 32 [[A:%.*]]) {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.22") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.22") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[V_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::bfloat16", align 2 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.{{.*}} +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr nonnull [[V_I]]) // CHECK-NEXT: [[V_ASCAST_I:%.*]] = addrspacecast ptr [[V_I]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.memset.p4.i64(ptr addrspace(4) noundef align 32 dereferenceable(32) [[AGG_RESULT]], i8 0, i64 32, i1 false), {{.*}} +// CHECK-NEXT: tail call void @llvm.memset.p4.i64(ptr addrspace(4) noundef align 32 dereferenceable(32) [[AGG_RESULT]], i8 0, i64 32, i1 false), !alias.scope [[META138]] // CHECK-NEXT: [[REF_TMP_ASCAST_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I]] to ptr addrspace(4) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: @@ -440,17 +374,17 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i64 [[I_0_I]], 16 // CHECK-NEXT: call void @llvm.assume(i1 [[CMP_I]]) // CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i16>, ptr [[A]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[VECEXT_I:%.*]] = load i16, ptr [[TMP0]], align 2, {{.*}} -// CHECK-NEXT: store i16 [[VECEXT_I]], ptr [[V_I]], align 2, {{.*}} -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), {{.*}} -// CHECK-NEXT: [[CALL_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[V_ASCAST_I]]) #[[ATTR9]], {{.*}} +// CHECK-NEXT: [[VECEXT_I:%.*]] = load i16, ptr [[TMP0]], align 2, !noalias [[META138]] +// CHECK-NEXT: store i16 [[VECEXT_I]], ptr [[V_I]], align 2, !tbaa [[TBAA141:![0-9]+]], !alias.scope [[META143:![0-9]+]], !noalias [[META138]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META138]] +// CHECK-NEXT: [[CALL_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[V_ASCAST_I]]) #[[ATTR9]], !noalias [[META146:![0-9]+]] // CHECK-NEXT: [[FNEG_I_I:%.*]] = fneg float [[CALL_I_I]] -// CHECK-NEXT: store float [[FNEG_I_I]], ptr [[REF_TMP_I_I]], align 4, {{.*}} -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I]]) #[[ATTR9]], {{.*}} -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), {{.*}} +// CHECK-NEXT: store float [[FNEG_I_I]], ptr [[REF_TMP_I_I]], align 4, !tbaa [[TBAA66]], !noalias [[META146]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I]]) #[[ATTR9]], !noalias [[META146]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META138]] // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i16>, ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr addrspace(4) [[TMP1]], align 2, {{.*}} +// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr addrspace(4) [[TMP1]], align 2, !alias.scope [[META138]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP157:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP149:![0-9]+]] // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } From 6aa779ddb7e7bf3f4225782c100a3e4e8e5a152b Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 13 May 2024 09:35:51 -0700 Subject: [PATCH 2/2] Fix formatting --- sycl/test/.clang-format | 2 +- sycl/test/check_device_code/vector/vector_math_ops.cpp | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sycl/test/.clang-format b/sycl/test/.clang-format index ec2cbcc32023a..d4017d77aa2bd 100644 --- a/sycl/test/.clang-format +++ b/sycl/test/.clang-format @@ -1,2 +1,2 @@ BasedOnStyle: LLVM -CommentPragmas: "RUN|FAIL|REQUIRES|UNSUPPORTED|CHECK|expected-" +CommentPragmas: "RUN|FAIL|REQUIRES|UNSUPPORTED|CHECK|expected-|update_cc_test_checks.py" diff --git a/sycl/test/check_device_code/vector/vector_math_ops.cpp b/sycl/test/check_device_code/vector/vector_math_ops.cpp index 6e8427d71729c..5d6521d725341 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops.cpp @@ -1,8 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// NOTE: Assertions have been autogenerated by -// llvm/utils/update_cc_test_checks.py script, followed by some manual cleanup. -// ./llvm/utils/update_cc_test_checks.py --clang=./build/bin/clang -// ./sycl/test/check_device_code/vector/vector_math_ops.cpp +// NOTE: ..., followed by some manual cleanup. // RUN: %clangxx -I %sycl_include -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s