From eec1da13b727a55299eaf526a54226b300a28a8a Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Thu, 26 May 2022 07:58:00 -0700 Subject: [PATCH 01/15] Revert the logic committed by 16f64b8ff1f5fe44ff13de6c5d8fbd3818f81d43 Author: againull <48139823+againull@users.noreply.github.com> Date: Fri Apr 3 00:59:46 2020 -0700 [SYCL] Share PFWG lambda object through shared memory (#1455) In the current implementation private address of the PFWG lambda object is shared by leader work item through local memory to other work items. This is not correct. That is why perform the copy of the PFWG lambda object to shared memory and make work items work with address of the object in shared memory. I.e. this case should be handled in the similar way as for byval parameters. Signed-off-by: Artur Gainullin --- llvm/lib/SYCLLowerIR/LowerWGScope.cpp | 59 ++++++++------------------- 1 file changed, 16 insertions(+), 43 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp index 1653477b8f375..a5469dede26b0 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -385,16 +385,8 @@ static void copyBetweenPrivateAndShadow(Value *L, GlobalVariable *Shadow, LocAlign = MaybeAlign(AI->getAlignment()); } else { auto Arg = cast(L); - if (Arg->hasByValAttr()) { - T = Arg->getParamByValType(); - LocAlign = MaybeAlign(Arg->getParamAlignment()); - } else { - Type *Ty = Arg->getType(); - Module &M = *Shadow->getParent(); - LocAlign = M.getDataLayout().getValueOrABITypeAlignment( - MaybeAlign(Arg->getParamAlignment()), Ty); - T = Arg->getType()->getPointerElementType(); - } + T = Arg->getParamByValType(); + LocAlign = MaybeAlign(Arg->getParamAlignment()); } assert(T && "Unexpected type"); @@ -698,16 +690,7 @@ static void fixupPrivateMemoryPFWILambdaCaptures(CallInst *PFWICall) { // Go through "byval" parameters which are passed as AS(0) pointers // and: (1) create local shadows for them (2) and initialize them from the // leader's copy and (3) materialize the value in the local variable before use -// -// Do the same for 'this' pointer which points to PFWG lamda object which is -// allocated in the caller. Caller is a kernel function which is generated by -// SYCL frontend. Kernel function allocates PFWG lambda object and initalizes -// captured objects (like accessors) using arguments of the kernel. After -// intialization kernel calls PFWG function (which is the operator() of the PFWG -// object). PFWG object captures all objects by value and all uses (except -// initialization from kernel arguments) of this values can only be in scope of -// PFWG function that is why copy back of PFWG object is not needed. -static void sharePFWGPrivateObjects(Function &F, const Triple &TT) { +static void shareByValParams(Function &F, const Triple &TT) { // Skip alloca instructions and split. Alloca instructions must be in the // beginning of the function otherwise they are considered as dynamic which // can cause the problems with inlining. @@ -726,29 +709,20 @@ static void sharePFWGPrivateObjects(Function &F, const Triple &TT) { Instruction &At = LeaderBB->back(); for (auto &Arg : F.args()) { - Type *T; - LLVMContext &Ctx = At.getContext(); - IRBuilder<> Builder(Ctx); - Builder.SetInsertPoint(&LeaderBB->front()); + if (!Arg.hasByValAttr()) + continue; + + assert(Arg.getType()->getPointerAddressSpace() == + asUInt(spirv::AddrSpace::Private)); // Create the shared copy - "shadow" - for current arg - GlobalVariable *Shadow = nullptr; - if (Arg.hasByValAttr()) { - assert(Arg.getType()->getPointerAddressSpace() == - asUInt(spirv::AddrSpace::Private)); - T = Arg.getParamByValType(); - Shadow = spirv::createWGLocalVariable(*F.getParent(), T, "ArgShadow"); - } - // Process 'this' pointer which points to PFWG lambda object - else if (Arg.getArgNo() == 0) { - PointerType *PtrT = dyn_cast(Arg.getType()); - assert(PtrT && "Expected this pointer as the first argument"); - T = PtrT->getPointerElementType(); - Shadow = spirv::createWGLocalVariable(*F.getParent(), T, "ArgShadow"); - } + Type *T = Arg.getParamByValType(); + GlobalVariable *Shadow = + spirv::createWGLocalVariable(*F.getParent(), T, "ArgShadow"); - if (!Shadow) - continue; + LLVMContext &Ctx = At.getContext(); + IRBuilder<> Builder(Ctx); + Builder.SetInsertPoint(&LeaderBB->front()); copyBetweenPrivateAndShadow(&Arg, Shadow, Builder, true /*private->shadow*/); @@ -866,9 +840,8 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, for (auto *PFWICall : PFWICalls) fixupPrivateMemoryPFWILambdaCaptures(PFWICall); - // Finally, create shadows for and replace usages of byval pointer params and - // PFWG lambda object ('this' pointer). - sharePFWGPrivateObjects(F, TT); + // Finally, create shadows for and replace usages of byval pointer params. + shareByValParams(F, TT); #ifndef NDEBUG if (HaveChanges && Debug > 0) From 6c60344530fde1c9d4dfcc5d7a4b24aa9e757443 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sun, 29 May 2022 07:51:24 -0700 Subject: [PATCH 02/15] Update pass tests. --- .../SYCLLowerIR/addrspacecast_handling.ll | 87 +++++++++---------- llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll | 70 +++++++-------- 2 files changed, 69 insertions(+), 88 deletions(-) diff --git a/llvm/test/SYCLLowerIR/addrspacecast_handling.ll b/llvm/test/SYCLLowerIR/addrspacecast_handling.ll index 6dbdbe301d5b5..73f6db933b3c7 100644 --- a/llvm/test/SYCLLowerIR/addrspacecast_handling.ll +++ b/llvm/test/SYCLLowerIR/addrspacecast_handling.ll @@ -9,73 +9,64 @@ %struct.bar = type { i64 } %struct.spam = type { i64, i64, i64, i64, i32 } -; CHECK: @[[SHADOW4:.*]] = internal unnamed_addr addrspace(3) global %struct.ham addrspace(4)* -; CHECK: @[[SHADOW3:.*]] = internal unnamed_addr addrspace(3) global %struct.spam -; CHECK: @[[SHADOW2:.*]] = internal unnamed_addr addrspace(3) global %struct.ham -; CHECK: @[[SHADOW1:.*]] = internal unnamed_addr addrspace(3) global %struct.bar - define linkonce_odr dso_local spir_func void @foo(%struct.ham addrspace(4)* dereferenceable_or_null(56) %arg, %struct.bar* byval(%struct.bar) align 8 %arg1) !work_group_scope !0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_HAM:%.*]] addrspace(4)*, align 8 -; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0:#.*]] -; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP0]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_HAM:%.*]] addrspace(4)*, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[CMPZ3]], label [[LEADER:%.*]], label [[MERGE:%.*]] ; CHECK: leader: -; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.bar* [[ARG1:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 8 bitcast (%struct.bar addrspace(3)* @[[SHADOW1]] to i8 addrspace(3)*), i8* align 8 [[TMP1]], i64 8, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT_HAM]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)* -; CHECK-NEXT: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* align 16 bitcast (%struct.ham addrspace(3)* @[[SHADOW2]] to i8 addrspace(3)*), i8 addrspace(4)* align 8 [[TMP2]], i64 24, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.bar* [[ARG1:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 8 bitcast ([[STRUCT_BAR:%.*]] addrspace(3)* @ArgShadow to i8 addrspace(3)*), i8* align 8 [[TMP2]], i64 8, i1 false) ; CHECK-NEXT: br label [[MERGE]] ; CHECK: merge: -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.bar* [[ARG1]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP3]], i8 addrspace(3)* align 8 bitcast (%struct.bar addrspace(3)* @[[SHADOW1]] to i8 addrspace(3)*), i64 8, i1 false) -; CHECK-NEXT: [[TMP4:%.*]] = bitcast [[STRUCT_HAM]] addrspace(4)* [[ARG]] to i8 addrspace(4)* -; CHECK-NEXT: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 8 [[TMP4]], i8 addrspace(3)* align 16 bitcast (%struct.ham addrspace(3)* @[[SHADOW2]] to i8 addrspace(3)*), i64 24, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast [[STRUCT_HAM]] addrspace(4)** [[TMP]] to [[STRUCT_HAM]] addrspace(4)* addrspace(4)* -; CHECK-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_SPAM:%.*]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast %struct.spam* [[TMP3]] to [[STRUCT_SPAM]] addrspace(4)* -; CHECK-NEXT: [[TMP5:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] -; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP5]], 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP3]], i8 addrspace(3)* align 8 bitcast ([[STRUCT_BAR]] addrspace(3)* @ArgShadow to i8 addrspace(3)*), i64 8, i1 false) +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast [[STRUCT_HAM]] addrspace(4)** [[TMP0]] to [[STRUCT_HAM]] addrspace(4)* addrspace(4)* +; CHECK-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_SPAM:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast %struct.spam* [[TMP5]] to [[STRUCT_SPAM]] addrspace(4)* +; CHECK-NEXT: [[TMP7:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] +; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP7]], 0 ; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]] ; CHECK: wg_leader: -; CHECK-NEXT: store [[STRUCT_HAM]] addrspace(4)* [[ARG]], [[STRUCT_HAM]] addrspace(4)* addrspace(4)* [[TMP2]], align 8 +; CHECK-NEXT: store [[STRUCT_HAM]] addrspace(4)* [[ARG:%.*]], [[STRUCT_HAM]] addrspace(4)* addrspace(4)* [[TMP4]], align 8 ; CHECK-NEXT: br label [[WG_CF]] ; CHECK: wg_cf: -; CHECK-NEXT: [[TMP6:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] -; CHECK-NEXT: [[CMPZ2:%.*]] = icmp eq i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] +; CHECK-NEXT: [[CMPZ2:%.*]] = icmp eq i64 [[TMP8]], 0 ; CHECK-NEXT: br i1 [[CMPZ2]], label [[TESTMAT:%.*]], label [[LEADERMAT:%.*]] ; CHECK: TestMat: -; CHECK-NEXT: [[TMP7:%.*]] = bitcast %struct.spam* [[TMP3]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 16 bitcast (%struct.spam addrspace(3)* @[[SHADOW3]] to i8 addrspace(3)*), i8* align 8 [[TMP7]], i64 36, i1 false) -; CHECK-NEXT: [[MAT_LD:%.*]] = load [[STRUCT_HAM]] addrspace(4)*, [[STRUCT_HAM]] addrspace(4)** [[TMP]], align 8 -; CHECK-NEXT: store [[STRUCT_HAM]] addrspace(4)* [[MAT_LD]], [[STRUCT_HAM]] addrspace(4)* addrspace(3)* @[[SHADOW4]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast %struct.spam* [[TMP5]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 16 bitcast ([[STRUCT_SPAM]] addrspace(3)* @WGCopy.1 to i8 addrspace(3)*), i8* align 8 [[TMP9]], i64 36, i1 false) +; CHECK-NEXT: [[MAT_LD:%.*]] = load [[STRUCT_HAM]] addrspace(4)*, [[STRUCT_HAM]] addrspace(4)** [[TMP0]], align 8 +; CHECK-NEXT: store [[STRUCT_HAM]] addrspace(4)* [[MAT_LD]], [[STRUCT_HAM]] addrspace(4)* addrspace(3)* @WGCopy, align 8 ; CHECK-NEXT: br label [[LEADERMAT]] ; CHECK: LeaderMat: -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] -; CHECK-NEXT: [[MAT_LD1:%.*]] = load [[STRUCT_HAM]] addrspace(4)*, [[STRUCT_HAM]] addrspace(4)* addrspace(3)* @[[SHADOW4]], align 8 -; CHECK-NEXT: store [[STRUCT_HAM]] addrspace(4)* [[MAT_LD1]], [[STRUCT_HAM]] addrspace(4)** [[TMP]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast %struct.spam* [[TMP3]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP8]], i8 addrspace(3)* align 16 bitcast (%struct.spam addrspace(3)* @[[SHADOW3]] to i8 addrspace(3)*), i64 36, i1 false) -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] -; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast %struct.bar* [[ARG1]] to [[STRUCT_BAR:%.*]] addrspace(4)* -; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast [[STRUCT_SPAM]] addrspace(4)* [[TMP4]] to %struct.spam* -; CHECK-NEXT: call spir_func void @widget(%struct.bar addrspace(4)* dereferenceable_or_null(32) [[TMP5]], %struct.spam* byval(%struct.spam) align 8 [[TMP6]]) +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] +; CHECK-NEXT: [[MAT_LD1:%.*]] = load [[STRUCT_HAM]] addrspace(4)*, [[STRUCT_HAM]] addrspace(4)* addrspace(3)* @WGCopy, align 8 +; CHECK-NEXT: store [[STRUCT_HAM]] addrspace(4)* [[MAT_LD1]], [[STRUCT_HAM]] addrspace(4)** [[TMP0]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct.spam* [[TMP5]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP10]], i8 addrspace(3)* align 16 bitcast ([[STRUCT_SPAM]] addrspace(3)* @WGCopy.1 to i8 addrspace(3)*), i64 36, i1 false) +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] +; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast %struct.bar* [[ARG1]] to [[STRUCT_BAR]] addrspace(4)* +; CHECK-NEXT: [[TMP12:%.*]] = addrspacecast [[STRUCT_SPAM]] addrspace(4)* [[TMP6]] to %struct.spam* +; CHECK-NEXT: call spir_func void @widget([[STRUCT_BAR]] addrspace(4)* dereferenceable_or_null(32) [[TMP11]], %struct.spam* byval([[STRUCT_SPAM]]) align 8 [[TMP12]]) ; CHECK-NEXT: ret void ; bb: - %tmp = alloca %struct.ham addrspace(4)*, align 8 - %tmp2 = addrspacecast %struct.ham addrspace(4)** %tmp to %struct.ham addrspace(4)* addrspace(4)* - %tmp3 = alloca %struct.spam, align 8 - %tmp4 = addrspacecast %struct.spam* %tmp3 to %struct.spam addrspace(4)* - store %struct.ham addrspace(4)* %arg, %struct.ham addrspace(4)* addrspace(4)* %tmp2, align 8 - %tmp5 = addrspacecast %struct.bar* %arg1 to %struct.bar addrspace(4)* - %tmp6 = addrspacecast %struct.spam addrspace(4)* %tmp4 to %struct.spam* - call spir_func void @widget(%struct.bar addrspace(4)* dereferenceable_or_null(32) %tmp5, %struct.spam* byval(%struct.spam) align 8 %tmp6) + %0 = alloca %struct.ham addrspace(4)*, align 8 + %1 = addrspacecast %struct.ham addrspace(4)** %0 to %struct.ham addrspace(4)* addrspace(4)* + %2 = alloca %struct.spam, align 8 + %3 = addrspacecast %struct.spam* %2 to %struct.spam addrspace(4)* + store %struct.ham addrspace(4)* %arg, %struct.ham addrspace(4)* addrspace(4)* %1, align 8 + %4 = addrspacecast %struct.bar* %arg1 to %struct.bar addrspace(4)* + %5 = addrspacecast %struct.spam addrspace(4)* %3 to %struct.spam* + call spir_func void @widget(%struct.bar addrspace(4)* dereferenceable_or_null(32) %4, %struct.spam* byval(%struct.spam) align 8 %5) ret void } diff --git a/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll b/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll index 315521bbc4d63..36a618c7def4f 100644 --- a/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll +++ b/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll @@ -13,69 +13,59 @@ %struct.foo = type { %struct.barney } %struct.foo.0 = type { i8 } -; CHECK: @[[PFWG_SHADOW_PTR:.*]] = internal unnamed_addr addrspace(3) global %struct.bar addrspace(4)* -; CHECK: @[[PFWI_SHADOW:.*]] = internal unnamed_addr addrspace(3) global %struct.foo.0 -; CHECK: @[[PFWG_SHADOW:.*]] = internal unnamed_addr addrspace(3) global %struct.bar -; CHECK: @[[GROUP_SHADOW:.*]] = internal unnamed_addr addrspace(3) global %struct.zot define internal spir_func void @wibble(%struct.bar addrspace(4)* %arg, %struct.zot* byval(%struct.zot) align 8 %arg1) align 2 !work_group_scope !0 { ; CHECK-LABEL: @wibble( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BAR:%.*]] addrspace(4)*, align 8 -; CHECK-NEXT: [[TMP_FOO:%.*]] = alloca [[STRUCT_FOO_0:%.*]], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0:#.*]] -; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP0]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_BAR:%.*]] addrspace(4)*, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_FOO_0:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[CMPZ3]], label [[LEADER:%.*]], label [[MERGE:%.*]] ; CHECK: leader: -; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.zot* [[ARG1:%.*]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 16 bitcast (%struct.zot addrspace(3)* @[[GROUP_SHADOW]] to i8 addrspace(3)*), i8* align 8 [[TMP1]], i64 96, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT_BAR]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)* -; CHECK-NEXT: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* align 8 getelementptr inbounds (%struct.bar, [[STRUCT_BAR]] addrspace(3)* @[[PFWG_SHADOW]], i32 0, i32 0), i8 addrspace(4)* align 8 [[TMP2]], i64 1, i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.zot* [[ARG1:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 16 bitcast ([[STRUCT_ZOT:%.*]] addrspace(3)* @ArgShadow to i8 addrspace(3)*), i8* align 8 [[TMP3]], i64 96, i1 false) ; CHECK-NEXT: br label [[MERGE]] ; CHECK: merge: -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.zot* [[ARG1]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP3]], i8 addrspace(3)* align 16 bitcast (%struct.zot addrspace(3)* @[[GROUP_SHADOW]] to i8 addrspace(3)*), i64 96, i1 false) -; CHECK-NEXT: [[TMP4:%.*]] = bitcast [[STRUCT_BAR]] addrspace(4)* [[ARG]] to i8 addrspace(4)* -; CHECK-NEXT: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 8 [[TMP4]], i8 addrspace(3)* align 8 getelementptr inbounds (%struct.bar, [[STRUCT_BAR]] addrspace(3)* @[[PFWG_SHADOW]], i32 0, i32 0), i64 1, i1 false) +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.zot* [[ARG1]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP4]], i8 addrspace(3)* align 16 bitcast ([[STRUCT_ZOT]] addrspace(3)* @ArgShadow to i8 addrspace(3)*), i64 96, i1 false) ; CHECK-NEXT: [[TMP5:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] ; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP5]], 0 ; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]] ; CHECK: wg_leader: -; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[ARG]], [[STRUCT_BAR]] addrspace(4)** [[TMP]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)** [[TMP]], align 8 +; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[ARG:%.*]], [[STRUCT_BAR]] addrspace(4)** [[TMP0]], align 8 ; CHECK-NEXT: br label [[WG_CF]] ; CHECK: wg_cf: ; CHECK-NEXT: [[TMP6:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] ; CHECK-NEXT: [[CMPZ2:%.*]] = icmp eq i64 [[TMP6]], 0 ; CHECK-NEXT: br i1 [[CMPZ2]], label [[TESTMAT:%.*]], label [[LEADERMAT:%.*]] ; CHECK: TestMat: -; CHECK-NEXT: [[TMP7:%.*]] = bitcast %struct.foo.0* [[TMP_FOO]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 8 getelementptr inbounds (%struct.foo.0, [[STRUCT_FOO_0]] addrspace(3)* @[[PFWI_SHADOW]], i32 0, i32 0), i8* align 1 [[TMP7]], i64 1, i1 false) -; CHECK-NEXT: [[MAT_LD:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)** [[TMP]], align 8 -; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[MAT_LD]], [[STRUCT_BAR]] addrspace(4)* addrspace(3)* @[[PFWG_SHADOW_PTR]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast %struct.foo.0* [[TMP1]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 8 getelementptr inbounds ([[STRUCT_FOO_0]], [[STRUCT_FOO_0]] addrspace(3)* @WGCopy.1, i32 0, i32 0), i8* align 1 [[TMP7]], i64 1, i1 false) +; CHECK-NEXT: [[MAT_LD:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)** [[TMP0]], align 8 +; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[MAT_LD]], [[STRUCT_BAR]] addrspace(4)* addrspace(3)* @WGCopy, align 8 ; CHECK-NEXT: br label [[LEADERMAT]] ; CHECK: LeaderMat: -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] -; CHECK-NEXT: [[MAT_LD1:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)* addrspace(3)* @[[PFWG_SHADOW_PTR]], align 8 -; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[MAT_LD1]], [[STRUCT_BAR]] addrspace(4)** [[TMP]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast %struct.foo.0* [[TMP_FOO]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 1 [[TMP8]], i8 addrspace(3)* align 8 getelementptr inbounds (%struct.foo.0, [[STRUCT_FOO_0]] addrspace(3)* @[[PFWI_SHADOW]], i32 0, i32 0), i64 1, i1 false) -; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]] -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast %struct.zot* [[ARG1]] to [[STRUCT_ZOT:%.*]] addrspace(4)* -; CHECK-NEXT: call spir_func void @bar(%struct.zot addrspace(4)* [[TMP4]], %struct.foo.0* byval(%struct.foo.0) align 1 [[TMP_FOO]]) +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] +; CHECK-NEXT: [[MAT_LD1:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)* addrspace(3)* @WGCopy, align 8 +; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[MAT_LD1]], [[STRUCT_BAR]] addrspace(4)** [[TMP0]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast %struct.foo.0* [[TMP1]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 1 [[TMP8]], i8 addrspace(3)* align 8 getelementptr inbounds ([[STRUCT_FOO_0]], [[STRUCT_FOO_0]] addrspace(3)* @WGCopy.1, i32 0, i32 0), i64 1, i1 false) +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #[[ATTR0]] +; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast %struct.zot* [[ARG1]] to [[STRUCT_ZOT]] addrspace(4)* +; CHECK-NEXT: call spir_func void @bar([[STRUCT_ZOT]] addrspace(4)* [[TMP9]], %struct.foo.0* byval([[STRUCT_FOO_0]]) align 1 [[TMP1]]) ; CHECK-NEXT: ret void ; bb: - %tmp = alloca %struct.bar addrspace(4)*, align 8 - %tmp2 = alloca %struct.foo.0, align 1 - store %struct.bar addrspace(4)* %arg, %struct.bar addrspace(4)** %tmp, align 8 - %tmp3 = load %struct.bar addrspace(4)*, %struct.bar addrspace(4)** %tmp, align 8 - %tmp4 = addrspacecast %struct.zot* %arg1 to %struct.zot addrspace(4)* - call spir_func void @bar(%struct.zot addrspace(4)* %tmp4, %struct.foo.0* byval(%struct.foo.0) align 1 %tmp2) + %0 = alloca %struct.bar addrspace(4)*, align 8 + %1 = alloca %struct.foo.0, align 1 + store %struct.bar addrspace(4)* %arg, %struct.bar addrspace(4)** %0, align 8 + %2 = addrspacecast %struct.zot* %arg1 to %struct.zot addrspace(4)* + call spir_func void @bar(%struct.zot addrspace(4)* %2, %struct.foo.0* byval(%struct.foo.0) align 1 %1) ret void } From 3530e1d8f2068b47ef9e936cd88814e70110f23e Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sun, 29 May 2022 07:52:16 -0700 Subject: [PATCH 03/15] Improve debug infomation for LowerWGScope pass. --- llvm/lib/SYCLLowerIR/LowerWGScope.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp index a5469dede26b0..65984b44bdcda 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -740,6 +740,7 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, FunctionAnalysisManager &FAM) { if (!F.getMetadata(WG_SCOPE_MD)) return PreservedAnalyses::all(); + LLVM_DEBUG(llvm::dbgs() << "Function name: " << F.getName() << "\n"); const auto &TT = llvm::Triple(F.getParent()->getTargetTriple()); // Ranges of "side effect" instructions SmallVector Ranges; From 0e13ee972db037ab06624e317fa433e091d063ac Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sun, 29 May 2022 07:53:21 -0700 Subject: [PATCH 04/15] Mark auto-generated kernel with work-group metadata. --- clang/lib/Sema/SemaSYCL.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 98f90d8c602da..56a3df2081d03 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -980,7 +980,7 @@ static CXXMethodDecl *getMethodByName(const CXXRecordDecl *CRD, } static KernelInvocationKind -getKernelInvocationKind(FunctionDecl *KernelCallerFunc) { +getKernelInvocationKind(const FunctionDecl *KernelCallerFunc) { return llvm::StringSwitch(KernelCallerFunc->getName()) .Case("kernel_single_task", InvokeSingleTask) .Case("kernel_parallel_for", InvokeParallelFor) @@ -2853,6 +2853,10 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { CollectionInitExprs.push_back(createInitListExpr(KernelObj)); markParallelWorkItemCalls(); + if (getKernelInvocationKind(KernelCallerFunc) == InvokeParallelForWorkGroup) + DC.getKernelDecl()->addAttr(SYCLScopeAttr::CreateImplicit( + S.Context, SYCLScopeAttr::Level::WorkGroup)); + Stmt *DS = new (S.Context) DeclStmt(DeclGroupRef(KernelObjClone), KernelCallerSrcLoc, KernelCallerSrcLoc); BodyStmts.push_back(DS); From e1511a7ae83a11e87c077895c216fb1960d118b9 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sun, 29 May 2022 08:53:39 -0700 Subject: [PATCH 05/15] Fix formatting. --- llvm/lib/SYCLLowerIR/LowerWGScope.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp index 65984b44bdcda..3ef923a780de0 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -713,12 +713,12 @@ static void shareByValParams(Function &F, const Triple &TT) { continue; assert(Arg.getType()->getPointerAddressSpace() == - asUInt(spirv::AddrSpace::Private)); + asUInt(spirv::AddrSpace::Private)); // Create the shared copy - "shadow" - for current arg Type *T = Arg.getParamByValType(); GlobalVariable *Shadow = - spirv::createWGLocalVariable(*F.getParent(), T, "ArgShadow"); + spirv::createWGLocalVariable(*F.getParent(), T, "ArgShadow"); LLVMContext &Ctx = At.getContext(); IRBuilder<> Builder(Ctx); From eb6d2d7e21b85acc6ccc7045169f85aa90cb30f7 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 31 May 2022 06:07:03 -0700 Subject: [PATCH 06/15] Add TODO comment for a future improvement. --- llvm/lib/SYCLLowerIR/LowerWGScope.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp index 3ef923a780de0..6c971d7d486ae 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -65,6 +65,19 @@ // (1) - materialization of a PFWI object // (2) - "fixup" of the private variable address. // +// TODO: add support for the case when there are other functions between +// parallel_for_work_group and parallel_for_work_item in the call stack. +// For example: +// +// void foo(sycl::group<1> group, ...) { +// group.parallel_for_work_item(range<1>(), [&](h_item<1> i) { ... }); +// } +// ... +// cgh.parallel_for_work_group( +// range<1>(...), range<1>(...), [=](group<1> g) { +// foo(g, ...); +// }); +// // TODO The approach employed by this pass generates lots of barriers and data // copying between private and local memory, which might not be efficient. There // are optimization opportunities listed below. Also other approaches can be From 969b468bb85f13c86d2cc2e0795ddbaab8db592f Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 31 May 2022 06:28:46 -0700 Subject: [PATCH 07/15] Revert "Mark auto-generated kernel with work-group metadata." This reverts commit 0e13ee972db037ab06624e317fa433e091d063ac. --- clang/lib/Sema/SemaSYCL.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 56a3df2081d03..98f90d8c602da 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -980,7 +980,7 @@ static CXXMethodDecl *getMethodByName(const CXXRecordDecl *CRD, } static KernelInvocationKind -getKernelInvocationKind(const FunctionDecl *KernelCallerFunc) { +getKernelInvocationKind(FunctionDecl *KernelCallerFunc) { return llvm::StringSwitch(KernelCallerFunc->getName()) .Case("kernel_single_task", InvokeSingleTask) .Case("kernel_parallel_for", InvokeParallelFor) @@ -2853,10 +2853,6 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { CollectionInitExprs.push_back(createInitListExpr(KernelObj)); markParallelWorkItemCalls(); - if (getKernelInvocationKind(KernelCallerFunc) == InvokeParallelForWorkGroup) - DC.getKernelDecl()->addAttr(SYCLScopeAttr::CreateImplicit( - S.Context, SYCLScopeAttr::Level::WorkGroup)); - Stmt *DS = new (S.Context) DeclStmt(DeclGroupRef(KernelObjClone), KernelCallerSrcLoc, KernelCallerSrcLoc); BodyStmts.push_back(DS); From ca26f572ceb2c74b23cab92d55b172a3a9b56b80 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 7 Jun 2022 07:51:21 -0700 Subject: [PATCH 08/15] Add work-group scope attribute to the SYCL kernel Kernel objects passed to parallel_for_work_group function must be shared among all work-items withing a work-group. --- clang/lib/Sema/SemaSYCL.cpp | 14 ++++++++++---- clang/test/SemaSYCL/kernel-handler.cpp | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 98f90d8c602da..4bf0274d0584f 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2763,14 +2763,19 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } static VarDecl *createKernelObjClone(ASTContext &Ctx, DeclContext *DC, - const CXXRecordDecl *KernelObj) { + const CXXRecordDecl *KernelObj, + FunctionDecl *KernelCallerFunc) { TypeSourceInfo *TSInfo = KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr; + auto Type = QualType(KernelObj->getTypeForDecl(), 0); + Type->getAsRecordDecl()->setAnonymousStructOrUnion(true); VarDecl *VD = VarDecl::Create( Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), - KernelObj->getIdentifier(), QualType(KernelObj->getTypeForDecl(), 0), + KernelObj->getIdentifier(), Type, TSInfo, SC_None); - + if (getKernelInvocationKind(KernelCallerFunc) == InvokeParallelForWorkGroup) + VD->addAttr( + SYCLScopeAttr::CreateImplicit(Ctx, SYCLScopeAttr::Level::WorkGroup)); return VD; } @@ -2846,7 +2851,8 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { FunctionDecl *KernelCallerFunc) : SyclKernelFieldHandler(S), DeclCreator(DC), KernelObjClone(createKernelObjClone(S.getASTContext(), - DC.getKernelDecl(), KernelObj)), + DC.getKernelDecl(), KernelObj, + KernelCallerFunc)), VarEntity(InitializedEntity::InitializeVariable(KernelObjClone)), KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc), KernelCallerSrcLoc(KernelCallerFunc->getLocation()) { diff --git a/clang/test/SemaSYCL/kernel-handler.cpp b/clang/test/SemaSYCL/kernel-handler.cpp index 24b7d4af3d2f2..c4b5cfe1b2580 100644 --- a/clang/test/SemaSYCL/kernel-handler.cpp +++ b/clang/test/SemaSYCL/kernel-handler.cpp @@ -75,6 +75,7 @@ int main() { // NONATIVESUPPORT-NEXT: InitListExpr // NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' // NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int' +// NONATIVESUPPORT-NEXT: SYCLScopeAttr {{.*}} Implicit WorkGroup // Check declaration and initialization of kernel handler local clone using default constructor // NONATIVESUPPORT-NEXT: DeclStmt From 1a61c46b7f9fae7cca40262a5a3fde6947b74dcf Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 7 Jun 2022 08:01:29 -0700 Subject: [PATCH 09/15] clang-format --- clang/lib/Sema/SemaSYCL.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 4bf0274d0584f..1b7fc8a842cbc 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2771,8 +2771,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { Type->getAsRecordDecl()->setAnonymousStructOrUnion(true); VarDecl *VD = VarDecl::Create( Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), - KernelObj->getIdentifier(), Type, - TSInfo, SC_None); + KernelObj->getIdentifier(), Type, TSInfo, SC_None); if (getKernelInvocationKind(KernelCallerFunc) == InvokeParallelForWorkGroup) VD->addAttr( SYCLScopeAttr::CreateImplicit(Ctx, SYCLScopeAttr::Level::WorkGroup)); From b970a77fe10cf6ddc4fc812de202ea6ec61d4c09 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sun, 12 Jun 2022 07:35:34 -0700 Subject: [PATCH 10/15] Set anonymous struct flag for lambda types only. Small refactoring. --- clang/lib/Sema/SemaSYCL.cpp | 78 +++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 1b7fc8a842cbc..9bbdf075c9f0d 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2505,38 +2505,44 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { return CompoundStmt::Create(SemaRef.getASTContext(), BodyStmts, {}, {}); } - void markParallelWorkItemCalls() { - if (getKernelInvocationKind(KernelCallerFunc) == - InvokeParallelForWorkGroup) { - // Fetch the kernel object and the associated call operator - // (of either the lambda or the function object). - CXXRecordDecl *KernelObj = - GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); - CXXMethodDecl *WGLambdaFn = nullptr; - if (KernelObj->isLambda()) - WGLambdaFn = KernelObj->getLambdaCallOperator(); - else - WGLambdaFn = getOperatorParens(KernelObj); - assert(WGLambdaFn && "non callable object is passed as kernel obj"); - // Mark the function that it "works" in a work group scope: - // NOTE: In case of parallel_for_work_item the marker call itself is - // marked with work item scope attribute, here the '()' operator of the - // object passed as parameter is marked. This is an optimization - - // there are a lot of locals created at parallel_for_work_group - // scope before calling the lambda - it is more efficient to have - // all of them in the private address space rather then sharing via - // the local AS. See parallel_for_work_group implementation in the - // SYCL headers. - if (!WGLambdaFn->hasAttr()) { - WGLambdaFn->addAttr(SYCLScopeAttr::CreateImplicit( - SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup)); - // Search and mark parallel_for_work_item calls: - MarkWIScopeFnVisitor MarkWIScope(SemaRef.getASTContext()); - MarkWIScope.TraverseDecl(WGLambdaFn); - // Now mark local variables declared in the PFWG lambda with work group - // scope attribute - addScopeAttrToLocalVars(*WGLambdaFn); - } + void annotateHierarchicalParallelismAPICalls() { + // Is this a hierarchical parallelism kernel invocation? + if (getKernelInvocationKind(KernelCallerFunc) != InvokeParallelForWorkGroup) + return; + + // Mark kernel object with work-group scope attribute to avoid work-item + // scope memory allocation. + KernelObjClone->addAttr(SYCLScopeAttr::CreateImplicit( + SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup)); + + // Fetch the kernel object and the associated call operator + // (of either the lambda or the function object). + CXXRecordDecl *KernelObj = + GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); + CXXMethodDecl *WGLambdaFn = nullptr; + if (KernelObj->isLambda()) + WGLambdaFn = KernelObj->getLambdaCallOperator(); + else + WGLambdaFn = getOperatorParens(KernelObj); + assert(WGLambdaFn && "non callable object is passed as kernel obj"); + // Mark the function that it "works" in a work group scope: + // NOTE: In case of parallel_for_work_item the marker call itself is + // marked with work item scope attribute, here the '()' operator of the + // object passed as parameter is marked. This is an optimization - + // there are a lot of locals created at parallel_for_work_group + // scope before calling the lambda - it is more efficient to have + // all of them in the private address space rather then sharing via + // the local AS. See parallel_for_work_group implementation in the + // SYCL headers. + if (!WGLambdaFn->hasAttr()) { + WGLambdaFn->addAttr(SYCLScopeAttr::CreateImplicit( + SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup)); + // Search and mark parallel_for_work_item calls: + MarkWIScopeFnVisitor MarkWIScope(SemaRef.getASTContext()); + MarkWIScope.TraverseDecl(WGLambdaFn); + // Now mark local variables declared in the PFWG lambda with work group + // scope attribute + addScopeAttrToLocalVars(*WGLambdaFn); } } @@ -2768,13 +2774,11 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { TypeSourceInfo *TSInfo = KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr; auto Type = QualType(KernelObj->getTypeForDecl(), 0); - Type->getAsRecordDecl()->setAnonymousStructOrUnion(true); + if (KernelObj->isLambda()) + Type->getAsRecordDecl()->setAnonymousStructOrUnion(true); VarDecl *VD = VarDecl::Create( Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), KernelObj->getIdentifier(), Type, TSInfo, SC_None); - if (getKernelInvocationKind(KernelCallerFunc) == InvokeParallelForWorkGroup) - VD->addAttr( - SYCLScopeAttr::CreateImplicit(Ctx, SYCLScopeAttr::Level::WorkGroup)); return VD; } @@ -2856,7 +2860,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc), KernelCallerSrcLoc(KernelCallerFunc->getLocation()) { CollectionInitExprs.push_back(createInitListExpr(KernelObj)); - markParallelWorkItemCalls(); + annotateHierarchicalParallelismAPICalls(); Stmt *DS = new (S.Context) DeclStmt(DeclGroupRef(KernelObjClone), KernelCallerSrcLoc, KernelCallerSrcLoc); From 3886d8d732b2b88595d6ccc26a433a136a2f61fa Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sun, 12 Jun 2022 07:40:20 -0700 Subject: [PATCH 11/15] Remove unused function parameter. --- clang/lib/Sema/SemaSYCL.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 9bbdf075c9f0d..1126b42b80f8f 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2769,8 +2769,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } static VarDecl *createKernelObjClone(ASTContext &Ctx, DeclContext *DC, - const CXXRecordDecl *KernelObj, - FunctionDecl *KernelCallerFunc) { + const CXXRecordDecl *KernelObj) { TypeSourceInfo *TSInfo = KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr; auto Type = QualType(KernelObj->getTypeForDecl(), 0); @@ -2854,8 +2853,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { FunctionDecl *KernelCallerFunc) : SyclKernelFieldHandler(S), DeclCreator(DC), KernelObjClone(createKernelObjClone(S.getASTContext(), - DC.getKernelDecl(), KernelObj, - KernelCallerFunc)), + DC.getKernelDecl(), KernelObj)), VarEntity(InitializedEntity::InitializeVariable(KernelObjClone)), KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc), KernelCallerSrcLoc(KernelCallerFunc->getLocation()) { From 959e1aa925dfd4096c855caa127c2d39b736f1f2 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 13 Jun 2022 07:45:00 -0700 Subject: [PATCH 12/15] Apply code review suggestion. --- clang/lib/Sema/SemaSYCL.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 1126b42b80f8f..4236db8191b46 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2773,8 +2773,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { TypeSourceInfo *TSInfo = KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr; auto Type = QualType(KernelObj->getTypeForDecl(), 0); - if (KernelObj->isLambda()) - Type->getAsRecordDecl()->setAnonymousStructOrUnion(true); + Type->getAsRecordDecl()->setAnonymousStructOrUnion(KernelObj->isLambda()); VarDecl *VD = VarDecl::Create( Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), KernelObj->getIdentifier(), Type, TSInfo, SC_None); From 2d987acaaf50c659899751f1b5484359e308fee5 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 14 Jun 2022 10:29:38 -0700 Subject: [PATCH 13/15] Give a name to SYCL kernel object to make mangle-able static version of it. --- clang/lib/Sema/SemaSYCL.cpp | 12 ++++++++---- clang/test/CodeGenSYCL/accessor_inheritance.cpp | 2 +- clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp | 4 ++-- clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp | 2 +- clang/test/CodeGenSYCL/kernel-param-acc-array.cpp | 2 +- .../CodeGenSYCL/kernel-param-member-acc-array.cpp | 4 ++-- clang/test/CodeGenSYCL/kernel-param-pod-array.cpp | 12 ++++++------ .../CodeGenSYCL/no_opaque_accessor_inheritance.cpp | 2 +- .../CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp | 4 ++-- .../CodeGenSYCL/no_opaque_kernel-param-acc-array.cpp | 2 +- .../no_opaque_kernel-param-member-acc-array.cpp | 4 ++-- .../CodeGenSYCL/no_opaque_kernel-param-pod-array.cpp | 12 ++++++------ clang/test/CodeGenSYCL/no_opaque_sampler.cpp | 8 ++++---- .../CodeGenSYCL/no_opaque_union-kernel-param.cpp | 2 +- clang/test/CodeGenSYCL/sampler.cpp | 8 ++++---- clang/test/CodeGenSYCL/union-kernel-param.cpp | 6 +----- clang/test/SemaSYCL/accessor_inheritance.cpp | 2 +- clang/test/SemaSYCL/array-kernel-param.cpp | 6 +++--- clang/test/SemaSYCL/basic-kernel-wrapper.cpp | 2 +- clang/test/SemaSYCL/inheritance.cpp | 2 +- 20 files changed, 49 insertions(+), 49 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 4236db8191b46..26b5b71e7379a 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2772,11 +2772,15 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { const CXXRecordDecl *KernelObj) { TypeSourceInfo *TSInfo = KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr; - auto Type = QualType(KernelObj->getTypeForDecl(), 0); - Type->getAsRecordDecl()->setAnonymousStructOrUnion(KernelObj->isLambda()); + // Generate a name kernel object variable. + Twine Name = Twine("__SYCLKernel"); + SmallString<12> Buffer; + StringRef NameRef = Name.toStringRef(Buffer); + auto &Ident = Ctx.Idents.getOwn(NameRef); + VarDecl *VD = VarDecl::Create( - Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), - KernelObj->getIdentifier(), Type, TSInfo, SC_None); + Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), &Ident, + QualType(KernelObj->getTypeForDecl(), 0), TSInfo, SC_None); return VD; } diff --git a/clang/test/CodeGenSYCL/accessor_inheritance.cpp b/clang/test/CodeGenSYCL/accessor_inheritance.cpp index 32a130e15e869..9f9995f5c21d3 100644 --- a/clang/test/CodeGenSYCL/accessor_inheritance.cpp +++ b/clang/test/CodeGenSYCL/accessor_inheritance.cpp @@ -50,7 +50,7 @@ int main() { // CHECK: [[ARG_C]].addr.ascast = addrspacecast ptr [[ARG_C]].addr to ptr addrspace(4) // // Lambda object alloca -// CHECK: [[KERNEL_OBJ:%[a-zA-Z0-9_]+]] = addrspacecast ptr [[KERNEL]] to ptr addrspace(4) +// CHECK: [[KERNEL_OBJ:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[KERNEL]] to ptr addrspace(4) // // Kernel argument stores // CHECK: store i32 [[ARG_A]], ptr addrspace(4) [[ARG_A]].addr.ascast diff --git a/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp b/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp index 29798dffc28bf..41b9eb43b686f 100644 --- a/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp +++ b/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp @@ -27,12 +27,12 @@ int main() { // Check alloca for pointer argument // CHECK: [[MEM_ARG]].addr = alloca ptr addrspace(1) // Check lambda object alloca -// CHECK: [[ANONALLOCA:%[0-9]+]] = alloca %class.anon +// CHECK: [[ANONALLOCA:%[a-zA-Z0-9_]+]] = alloca %class.anon // Check allocas for ranges // CHECK: [[ARANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range" // CHECK: [[MRANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range" // CHECK: [[OIDA:%agg.tmp.*]] = alloca %"struct.cl::sycl::id" -// CHECK: [[ANON:%[0-9]+]] = addrspacecast ptr [[ANONALLOCA]] to ptr addrspace(4) +// CHECK: [[ANON:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[ANONALLOCA]] to ptr addrspace(4) // CHECK: [[ARANGET:%agg.tmp.*]] = addrspacecast ptr [[ARANGEA]] to ptr addrspace(4) // CHECK: [[MRANGET:%agg.tmp.*]] = addrspacecast ptr [[MRANGEA]] to ptr addrspace(4) // CHECK: [[OIDT:%agg.tmp.*]] = addrspacecast ptr [[OIDA]] to ptr addrspace(4) diff --git a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp index 7b61d4bc32250..5ebbdeec9f0b1 100644 --- a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp +++ b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp @@ -25,7 +25,7 @@ int main() { } // CHECK: define{{.*}} spir_kernel {{.*}}19use_kernel_for_test({{.*}}){{.*}} !dbg [[KERNEL:![0-9]+]] {{.*}}{ -// CHECK: getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{[0-9]+}}, i32 0, i32 0, !dbg [[LINE_A0:![0-9]+]] +// CHECK: getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0, !dbg [[LINE_A0:![0-9]+]] // CHECK: call spir_func void {{.*}}6__init{{.*}} !dbg [[LINE_A0]] // CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv{{.*}} !dbg [[LINE_B0:![0-9]+]] // CHECK: ret void, !dbg [[LINE_C0:![0-9]+]] diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp index efddb3c277d92..06f10212a5d61 100644 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp @@ -39,7 +39,7 @@ int main() { // CHECK: [[MEM_ARG2:%[a-zA-Z0-9_.]+]] = alloca ptr addrspace(1), align 8 // CHECK lambda object alloca -// CHECK: [[LOCAL_OBJECTA:%0]] = alloca %class.anon, align 4 +// CHECK: [[LOCAL_OBJECTA:%__SYCLKernel]] = alloca %class.anon, align 4 // CHECK allocas for ranges // CHECK: [[ACC_RANGE1A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::range" diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp index 6e6a63b0804d7..212f916b26166 100644 --- a/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp @@ -42,7 +42,7 @@ int main() { // CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca ptr addrspace(1), align 8 // Check lambda object alloca -// CHECK: [[LOCAL_OBJECTA:%0]] = alloca %class{{.*}}.anon, align 4 +// CHECK: [[LOCAL_OBJECTA:%__SYCLKernel]] = alloca %class{{.*}}.anon, align 4 // Check allocas for ranges // CHECK: [[ACC_RANGE1A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::range" @@ -53,7 +53,7 @@ int main() { // CHECK: [[OFFSET2A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::id" // Check lambda object addrspacecast -// CHECK: [[LOCAL_OBJECT:%.*]] = addrspacecast ptr %0 to ptr addrspace(4) +// CHECK: [[LOCAL_OBJECT:%.*]] = addrspacecast ptr %__SYCLKernel to ptr addrspace(4) // Check addrspacecast for ranges // CHECK: [[ACC_RANGE1AS:%.*]] = addrspacecast ptr [[ACC_RANGE1A]] to ptr addrspace(4) diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp index 3a54a5e305c9b..767a5fd7f0198 100644 --- a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp @@ -49,8 +49,8 @@ int main() { // CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class) align 4 %[[ARR_ARG:.*]]) // Check local lambda object alloca -// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon, align 4 -// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4) +// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon, align 4 +// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4) // Check for Array init loop // CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0 @@ -74,8 +74,8 @@ int main() { // CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]]) // Check local lambda object alloca -// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 -// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4) +// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 +// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4) // Check for Array init loop // CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon{{.*}}, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0 @@ -98,8 +98,8 @@ int main() { // CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]]) // Check local lambda object alloca -// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 -// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4) +// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 +// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4) // Check for Array init loop // CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon{{.*}}, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0 diff --git a/clang/test/CodeGenSYCL/no_opaque_accessor_inheritance.cpp b/clang/test/CodeGenSYCL/no_opaque_accessor_inheritance.cpp index e7a1f904698aa..e9021767ee715 100644 --- a/clang/test/CodeGenSYCL/no_opaque_accessor_inheritance.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_accessor_inheritance.cpp @@ -50,7 +50,7 @@ int main() { // CHECK: [[ARG_C]].addr.ascast = addrspacecast i32* [[ARG_C]].addr to i32 addrspace(4)* // // Lambda object alloca -// CHECK: [[KERNEL_OBJ:%[a-zA-Z0-9_]+]] = addrspacecast %class{{.*}}.anon* [[KERNEL]] to %class{{.*}}.anon addrspace(4)* +// CHECK: [[KERNEL_OBJ:%[a-zA-Z0-9_.]+]] = addrspacecast %class{{.*}}.anon* [[KERNEL]] to %class{{.*}}.anon addrspace(4)* // // Kernel argument stores // CHECK: store i32 [[ARG_A]], i32 addrspace(4)* [[ARG_A]].addr.ascast diff --git a/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp b/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp index 64d1d072e18c1..290c15f2a8887 100644 --- a/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp @@ -27,12 +27,12 @@ int main() { // Check alloca for pointer argument // CHECK: [[MEM_ARG]].addr = alloca i32 addrspace(1)* // Check lambda object alloca -// CHECK: [[ANONALLOCA:%[0-9]+]] = alloca %class.anon +// CHECK: [[ANONALLOCA:%[a-zA-Z0-9_]+]] = alloca %class.anon // Check allocas for ranges // CHECK: [[ARANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range" // CHECK: [[MRANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range" // CHECK: [[OIDA:%agg.tmp.*]] = alloca %"struct.cl::sycl::id" -// CHECK: [[ANON:%[0-9]+]] = addrspacecast %class.anon* [[ANONALLOCA]] to %class.anon addrspace(4)* +// CHECK: [[ANON:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANONALLOCA]] to %class.anon addrspace(4)* // CHECK: [[ARANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[ARANGEA]] to %"struct.cl::sycl::range" addrspace(4)* // CHECK: [[MRANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[MRANGEA]] to %"struct.cl::sycl::range" addrspace(4)* // CHECK: [[OIDT:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id"* [[OIDA]] to %"struct.cl::sycl::id" addrspace(4)* diff --git a/clang/test/CodeGenSYCL/no_opaque_kernel-param-acc-array.cpp b/clang/test/CodeGenSYCL/no_opaque_kernel-param-acc-array.cpp index 1c176d0b22f29..37f57c2429ed9 100644 --- a/clang/test/CodeGenSYCL/no_opaque_kernel-param-acc-array.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_kernel-param-acc-array.cpp @@ -39,7 +39,7 @@ int main() { // CHECK: [[MEM_ARG2:%[a-zA-Z0-9_.]+]] = alloca i32 addrspace(1)*, align 8 // CHECK lambda object alloca -// CHECK: [[LOCAL_OBJECTA:%0]] = alloca %class.anon, align 4 +// CHECK: [[LOCAL_OBJECTA:%__SYCLKernel]] = alloca %class.anon, align 4 // CHECK allocas for ranges // CHECK: [[ACC_RANGE1A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::range" diff --git a/clang/test/CodeGenSYCL/no_opaque_kernel-param-member-acc-array.cpp b/clang/test/CodeGenSYCL/no_opaque_kernel-param-member-acc-array.cpp index 894f528b31762..39b87d42bd168 100644 --- a/clang/test/CodeGenSYCL/no_opaque_kernel-param-member-acc-array.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_kernel-param-member-acc-array.cpp @@ -42,7 +42,7 @@ int main() { // CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8 // Check lambda object alloca -// CHECK: [[LOCAL_OBJECTA:%0]] = alloca %class{{.*}}.anon, align 4 +// CHECK: [[LOCAL_OBJECTA:%__SYCLKernel]] = alloca %class{{.*}}.anon, align 4 // Check allocas for ranges // CHECK: [[ACC_RANGE1A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::range" @@ -53,7 +53,7 @@ int main() { // CHECK: [[OFFSET2A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::id" // Check lambda object addrspacecast -// CHECK: [[LOCAL_OBJECT:%.*]] = addrspacecast %class{{.*}}.anon* %0 to %class{{.*}}.anon addrspace(4)* +// CHECK: [[LOCAL_OBJECT:%.*]] = addrspacecast %class{{.*}}.anon* %__SYCLKernel to %class{{.*}}.anon addrspace(4)* // Check addrspacecast for ranges // CHECK: [[ACC_RANGE1AS:%.*]] = addrspacecast %"struct.cl::sycl::range"* [[ACC_RANGE1A]] to %"struct.cl::sycl::range" addrspace(4)* diff --git a/clang/test/CodeGenSYCL/no_opaque_kernel-param-pod-array.cpp b/clang/test/CodeGenSYCL/no_opaque_kernel-param-pod-array.cpp index 5136b12691704..a026f813c67a7 100644 --- a/clang/test/CodeGenSYCL/no_opaque_kernel-param-pod-array.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_kernel-param-pod-array.cpp @@ -49,8 +49,8 @@ int main() { // CHECK-SAME:(%struct{{.*}}.__wrapper_class* noundef byval(%struct{{.*}}.__wrapper_class) align 4 %[[ARR_ARG:.*]]) // Check local lambda object alloca -// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon, align 4 -// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast %class{{.*}}.anon* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon addrspace(4)* +// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon, align 4 +// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast %class{{.*}}.anon* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon addrspace(4)* // Check for Array init loop // CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon, %class{{.*}}.anon addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 0 @@ -74,8 +74,8 @@ int main() { // CHECK-SAME:(%struct{{.*}}.__wrapper_class{{.*}}* noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]]) // Check local lambda object alloca -// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 -// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast %class{{.*}}.anon{{.*}}* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon{{.*}} addrspace(4)* +// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 +// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast %class{{.*}}.anon{{.*}}* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon{{.*}} addrspace(4)* // Check for Array init loop // CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon{{.*}}, %class{{.*}}.anon{{.*}} addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 0 @@ -100,8 +100,8 @@ int main() { // CHECK-SAME:(%struct{{.*}}.__wrapper_class{{.*}}* noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]]) // Check local lambda object alloca -// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 -// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast %class{{.*}}.anon{{.*}}* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon{{.*}} addrspace(4)* +// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 +// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast %class{{.*}}.anon{{.*}}* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon{{.*}} addrspace(4)* // Check for Array init loop // CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon{{.*}}, %class{{.*}}.anon{{.*}} addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 0 diff --git a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp index 1c879dac2b086..d20a67ef6f831 100644 --- a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp @@ -2,8 +2,8 @@ // CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(%opencl.sampler_t addrspace(2)* [[SAMPLER_ARG:%[a-zA-Z0-9_]+]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8 -// CHECK: [[ANON:%[0-9]+]] = alloca %class.anon, align 8 -// CHECK: [[ANONCAST:%[0-9]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)* +// CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8 +// CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)* // CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: [[BITCAST:%[0-9]+]] = bitcast %class.anon* [[ANON]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) #4 @@ -17,8 +17,8 @@ // Check alloca // CHECK: [[SAMPLER_ARG_WRAPPED]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8 // CHECK: [[ARG_A]].addr = alloca i32, align 4 -// CHECK: [[LAMBDAA:%[0-9]+]] = alloca %class.anon.0, align 8 -// CHECK: [[LAMBDA:%[0-9]+]] = addrspacecast %class.anon.0* [[LAMBDAA]] to %class.anon.0 addrspace(4)* +// CHECK: [[LAMBDAA:%[a-zA-Z0-9_]+]] = alloca %class.anon.0, align 8 +// CHECK: [[LAMBDA:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon.0* [[LAMBDAA]] to %class.anon.0 addrspace(4)* // Check argument store // CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG_WRAPPED]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG_WRAPPED]].addr.ascast, align 8 diff --git a/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp b/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp index 9906fc4a5ccb3..cc1dac510f27a 100644 --- a/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp @@ -31,7 +31,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_A(%union.MyUnion* noundef byval(%union.MyUnion) align 4 [[MEM_ARG:%[a-zA-Z0-9_]+]]) // Check lambda object alloca -// CHECK: [[LOCAL_OBJECT:%0]] = alloca %class.anon, align 4 +// CHECK: [[LOCAL_OBJECT:%__SYCLKernel]] = alloca %class.anon, align 4 // CHECK: [[LOCAL_OBJECTAS:%.*]] = addrspacecast %class.anon* [[LOCAL_OBJECT]] to %class.anon addrspace(4)* // CHECK: [[MEM_ARGAS:%.*]] = addrspacecast %union.MyUnion* [[MEM_ARG]] to %union.MyUnion addrspace(4)* diff --git a/clang/test/CodeGenSYCL/sampler.cpp b/clang/test/CodeGenSYCL/sampler.cpp index 5d4b23990b3e4..7c9b77ddaa962 100644 --- a/clang/test/CodeGenSYCL/sampler.cpp +++ b/clang/test/CodeGenSYCL/sampler.cpp @@ -2,8 +2,8 @@ // CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(ptr addrspace(2) [[SAMPLER_ARG:%[a-zA-Z0-9_]+]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca ptr addrspace(2), align 8 -// CHECK: [[ANON:%[0-9]+]] = alloca %class.anon, align 8 -// CHECK: [[ANONCAST:%[0-9]+]] = addrspacecast ptr [[ANON]] to ptr addrspace(4) +// CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8 +// CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[ANON]] to ptr addrspace(4) // CHECK: store ptr addrspace(2) [[SAMPLER_ARG]], ptr addrspace(4) [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[ANON]]) #4 // CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) [[ANONCAST]], i32 0, i32 0 @@ -16,8 +16,8 @@ // Check alloca // CHECK: [[SAMPLER_ARG_WRAPPED]].addr = alloca ptr addrspace(2), align 8 // CHECK: [[ARG_A]].addr = alloca i32, align 4 -// CHECK: [[LAMBDAA:%[0-9]+]] = alloca %class.anon.0, align 8 -// CHECK: [[LAMBDA:%[0-9]+]] = addrspacecast ptr [[LAMBDAA]] to ptr addrspace(4) +// CHECK: [[LAMBDAA:%[a-zA-Z0-9_]+]] = alloca %class.anon.0, align 8 +// CHECK: [[LAMBDA:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[LAMBDAA]] to ptr addrspace(4) // Check argument store // CHECK: store ptr addrspace(2) [[SAMPLER_ARG_WRAPPED]], ptr addrspace(4) [[SAMPLER_ARG_WRAPPED]].addr.ascast, align 8 diff --git a/clang/test/CodeGenSYCL/union-kernel-param.cpp b/clang/test/CodeGenSYCL/union-kernel-param.cpp index 704d7a256fcc2..d34dec879992c 100644 --- a/clang/test/CodeGenSYCL/union-kernel-param.cpp +++ b/clang/test/CodeGenSYCL/union-kernel-param.cpp @@ -2,10 +2,6 @@ // This test checks a kernel argument that is union with both array and non-array fields. -#include "Inputs/sycl.hpp" - -using namespace cl::sycl; - union MyUnion { int FldInt; char FldChar; @@ -31,7 +27,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_A(ptr noundef byval(%union.MyUnion) align 4 [[MEM_ARG:%[a-zA-Z0-9_]+]]) // Check lambda object alloca -// CHECK: [[LOCAL_OBJECT:%0]] = alloca %class.anon, align 4 +// CHECK: [[LOCAL_OBJECT:%__SYCLKernel]] = alloca %class.anon, align 4 // CHECK: [[LOCAL_OBJECTAS:%.*]] = addrspacecast ptr [[LOCAL_OBJECT]] to ptr addrspace(4) // CHECK: [[MEM_ARGAS:%.*]] = addrspacecast ptr [[MEM_ARG]] to ptr addrspace(4) diff --git a/clang/test/SemaSYCL/accessor_inheritance.cpp b/clang/test/SemaSYCL/accessor_inheritance.cpp index d73b734cade13..1b4b8b138419f 100644 --- a/clang/test/SemaSYCL/accessor_inheritance.cpp +++ b/clang/test/SemaSYCL/accessor_inheritance.cpp @@ -44,7 +44,7 @@ int main() { // CHECK: ParmVarDecl{{.*}} used _arg_C 'int' // Check lambda initialization -// CHECK: VarDecl {{.*}} used '(lambda at {{.*}}accessor_inheritance.cpp +// CHECK: VarDecl {{.*}} used __SYCLKernel '(lambda at {{.*}}accessor_inheritance.cpp // CHECK-NEXT: InitListExpr {{.*}} // CHECK-NEXT: InitListExpr {{.*}} 'AccessorDerived' // CHECK-NEXT: InitListExpr {{.*}} 'AccessorBase' diff --git a/clang/test/SemaSYCL/array-kernel-param.cpp b/clang/test/SemaSYCL/array-kernel-param.cpp index 6b5c190324793..cfcc379562e64 100644 --- a/clang/test/SemaSYCL/array-kernel-param.cpp +++ b/clang/test/SemaSYCL/array-kernel-param.cpp @@ -178,7 +178,7 @@ int main() { // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'sycl::id<1>' // CHECK-NEXT: CompoundStmt // CHECK-NEXT: DeclStmt -// CHECK-NEXT: VarDecl {{.*}} used '(lambda at {{.*}}array-kernel-param.cpp{{.*}})' cinit +// CHECK-NEXT: VarDecl {{.*}} used __SYCLKernel '(lambda at {{.*}}array-kernel-param.cpp{{.*}})' cinit // CHECK-NEXT: InitListExpr {{.*}} '(lambda at {{.*}}array-kernel-param.cpp{{.*}})' // CHECK-NEXT: InitListExpr {{.*}} 'StructWithAccessors' // CHECK-NEXT: InitListExpr {{.*}} 'Accessor[2]' @@ -219,7 +219,7 @@ int main() { // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_c 'int' // CHECK-NEXT: CompoundStmt // CHECK-NEXT: DeclStmt -// CHECK-NEXT: VarDecl {{.*}} used '(lambda at {{.*}}array-kernel-param.cpp{{.*}})' cinit +// CHECK-NEXT: VarDecl {{.*}} used __SYCLKernel '(lambda at {{.*}}array-kernel-param.cpp{{.*}})' cinit // CHECK-NEXT: InitListExpr {{.*}} '(lambda at {{.*}}array-kernel-param.cpp{{.*}})' // Initializer for struct array i.e. DecomposedStruct DecompStructArray[2] @@ -330,7 +330,7 @@ int main() { // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_s 'S':'S' // CHECK-NEXT: CompoundStmt // CHECK-NEXT: DeclStmt -// CHECK-NEXT: VarDecl {{.*}} used '(lambda at {{.*}}array-kernel-param.cpp{{.*}})' cinit +// CHECK-NEXT: VarDecl {{.*}} used __SYCLKernel '(lambda at {{.*}}array-kernel-param.cpp{{.*}})' cinit // CHECK-NEXT: InitListExpr {{.*}} '(lambda at {{.*}}array-kernel-param.cpp{{.*}})' // CHECK-NEXT: CXXConstructExpr {{.*}} 'S':'S' 'void (const S &) noexcept' // CHECK-NEXT: ImplicitCastExpr diff --git a/clang/test/SemaSYCL/basic-kernel-wrapper.cpp b/clang/test/SemaSYCL/basic-kernel-wrapper.cpp index 8a70097e09b68..696590de8e5f6 100644 --- a/clang/test/SemaSYCL/basic-kernel-wrapper.cpp +++ b/clang/test/SemaSYCL/basic-kernel-wrapper.cpp @@ -34,7 +34,7 @@ int main() { // Check lambda declaration inside the wrapper // CHECK: DeclStmt -// CHECK-NEXT: VarDecl {{.*}} used '(lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' +// CHECK-NEXT: VarDecl {{.*}} used __SYCLKernel '(lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' // Check accessor initialization diff --git a/clang/test/SemaSYCL/inheritance.cpp b/clang/test/SemaSYCL/inheritance.cpp index 684fa835e8599..889c2ba35b65c 100644 --- a/clang/test/SemaSYCL/inheritance.cpp +++ b/clang/test/SemaSYCL/inheritance.cpp @@ -50,7 +50,7 @@ int main() { // Check initializers for derived and base classes. // Each class has it's own initializer list // Base classes should be initialized first. -// CHECK: VarDecl {{.*}} derived 'derived' cinit +// CHECK: VarDecl {{.*}} used __SYCLKernel 'derived' cinit // CHECK-NEXT: InitListExpr {{.*}} 'derived' // CHECK-NEXT: CXXConstructExpr {{.*}} 'base' 'void (const base &) noexcept' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'const base' lvalue From fdeaab61a79832638fa15f92217ce794b7d29f8e Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 14 Jun 2022 10:51:19 -0700 Subject: [PATCH 14/15] Apply code review comments. --- clang/lib/Sema/SemaSYCL.cpp | 10 ++++------ clang/test/SemaSYCL/inheritance.cpp | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 26b5b71e7379a..5170df326dc20 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2772,14 +2772,12 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { const CXXRecordDecl *KernelObj) { TypeSourceInfo *TSInfo = KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr; - // Generate a name kernel object variable. - Twine Name = Twine("__SYCLKernel"); - SmallString<12> Buffer; - StringRef NameRef = Name.toStringRef(Buffer); - auto &Ident = Ctx.Idents.getOwn(NameRef); + IdentifierInfo *Ident = KernelObj->getIdentifier(); + if (!Ident) + Ident = &Ctx.Idents.getOwn("__SYCLKernel"); VarDecl *VD = VarDecl::Create( - Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), &Ident, + Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), Ident, QualType(KernelObj->getTypeForDecl(), 0), TSInfo, SC_None); return VD; } diff --git a/clang/test/SemaSYCL/inheritance.cpp b/clang/test/SemaSYCL/inheritance.cpp index 889c2ba35b65c..3e341e2629a1c 100644 --- a/clang/test/SemaSYCL/inheritance.cpp +++ b/clang/test/SemaSYCL/inheritance.cpp @@ -50,7 +50,7 @@ int main() { // Check initializers for derived and base classes. // Each class has it's own initializer list // Base classes should be initialized first. -// CHECK: VarDecl {{.*}} used __SYCLKernel 'derived' cinit +// CHECK: VarDecl {{.*}} used derived 'derived' cinit // CHECK-NEXT: InitListExpr {{.*}} 'derived' // CHECK-NEXT: CXXConstructExpr {{.*}} 'base' 'void (const base &) noexcept' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'const base' lvalue From 0a5834b5cac83d03585fbad63194f733364984ac Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 14 Jun 2022 11:06:10 -0700 Subject: [PATCH 15/15] Change getOwn to get. --- clang/lib/Sema/SemaSYCL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 5170df326dc20..b70b0089dba75 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2774,7 +2774,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr; IdentifierInfo *Ident = KernelObj->getIdentifier(); if (!Ident) - Ident = &Ctx.Idents.getOwn("__SYCLKernel"); + Ident = &Ctx.Idents.get("__SYCLKernel"); VarDecl *VD = VarDecl::Create( Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), Ident,