diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index f90fca9d937fc..5b57f0a25cec8 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -123,6 +123,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) { + assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!"); + switch (ID) { case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ced081d429164..3049915260646 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11596,10 +11596,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CallInst *CI = cast(VL0); setInsertPointAfterBundle(E); - Intrinsic::ID IID = Intrinsic::not_intrinsic; - if (Function *FI = CI->getCalledFunction()) - IID = FI->getIntrinsicID(); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); @@ -11610,18 +11606,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { SmallVector OpVecs; SmallVector TysForDecl; // Add return type if intrinsic is overloaded on it. - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); for (unsigned I : seq(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. - if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, I)) { + if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { CallInst *CEI = cast(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(ScalarArg->getType()); continue; } @@ -11633,7 +11629,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(OpVec->getType()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 991491de20892..76961629aeceb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -498,16 +498,17 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { "DbgInfoIntrinsic should have been dropped during VPlan construction"); State.setDebugLocFrom(CI.getDebugLoc()); + bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; FunctionType *VFTy = nullptr; if (Variant) VFTy = Variant->getFunctionType(); for (unsigned Part = 0; Part < State.UF; ++Part) { SmallVector TysForDecl; // Add return type if intrinsic is overloaded on it. - if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) { + if (UseIntrinsic && + isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) TysForDecl.push_back( VectorType::get(CI.getType()->getScalarType(), State.VF)); - } SmallVector Args; for (const auto &I : enumerate(operands())) { // Some intrinsics have a scalar argument - don't replace it with a @@ -516,18 +517,19 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { // e.g. linear parameters for pointers. Value *Arg; if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) || - (VectorIntrinsicID != Intrinsic::not_intrinsic && + (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))) Arg = State.get(I.value(), VPIteration(0, 0)); else Arg = State.get(I.value(), Part); - if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) + if (UseIntrinsic && + isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) TysForDecl.push_back(Arg->getType()); Args.push_back(Arg); } Function *VectorF; - if (VectorIntrinsicID != Intrinsic::not_intrinsic) { + if (UseIntrinsic) { // Use vector version of the intrinsic. Module *M = State.Builder.GetInsertBlock()->getModule(); VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index 876d58131bd7a..16506b3a57573 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux)" --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux|goo)" --version 2 ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF @@ -15,13 +15,13 @@ define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { ; ; SVE_OR_NEON-LABEL: define void @test_linear8 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_OR_NEON: [[TMP13:%.*]] = call @vec_foo_linear8_nomask_sve(ptr [[TMP12:%.*]]) +; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_foo_linear8_nomask_sve(ptr [[TMP14:%.*]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_TF: [[TMP19:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP18:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] +; SVE_TF: [[TMP21:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP20:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; entry: br label %for.body @@ -48,12 +48,12 @@ define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly % ; ; SVE_OR_NEON-LABEL: define void @test_vector_linear4 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_baz_vector_linear4_nomask_sve( [[WIDE_LOAD:%.*]], ptr [[TMP14:%.*]]) +; SVE_OR_NEON: [[TMP17:%.*]] = call @vec_baz_vector_linear4_nomask_sve( [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_vector_linear4 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] ; entry: br label %for.body @@ -85,7 +85,7 @@ define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) { ; ; SVE_TF-LABEL: define void @test_linear8_bad_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]] ; entry: br label %for.body @@ -112,12 +112,12 @@ define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) ; ; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP14:%.*]] = call @vec_foo_linear16_nomask_sve(ptr [[TMP13:%.*]]) +; SVE_OR_NEON: [[TMP16:%.*]] = call @vec_foo_linear16_nomask_sve(ptr [[TMP15:%.*]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4]] ; ; SVE_TF-LABEL: define void @test_linear16_wide_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR5]] +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]] ; entry: br label %for.body @@ -145,13 +145,13 @@ define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly ; ; SVE_OR_NEON-LABEL: define void @test_linear4_linear8 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP13:%.*]], ptr [[TMP14:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE_OR_NEON: [[TMP17:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP15:%.*]], ptr [[TMP16:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear4_linear8 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[TMP21:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP19:%.*]], ptr [[TMP20:%.*]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]] +; SVE_TF: [[TMP23:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP21:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]] ; entry: br label %for.body @@ -179,12 +179,12 @@ define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) { ; ; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP13:%.*]] = call @vec_bar_linear3_nomask_sve(i32 [[TMP12:%.*]]) +; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_bar_linear3_nomask_sve(i32 [[TMP14:%.*]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear3_non_ptr ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR7:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]] ; entry: br label %for.body @@ -212,12 +212,12 @@ define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) { ; ; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP13:%.*]] = call @vec_bar_linearn5_nomask_sve(i32 [[TMP12:%.*]]) +; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_bar_linearn5_nomask_sve(i32 [[TMP14:%.*]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR7:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR8:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]] ; entry: br label %for.body @@ -237,10 +237,44 @@ for.cond.cleanup: ret void } +define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n) { +; NEON-LABEL: define void @test_linear8_return_void +; NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) { +; NEON: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear8_return_void +; SVE_OR_NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: call void @vec_goo_linear8_nomask_sve( [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]]) +; SVE_OR_NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR8:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear8_return_void +; SVE_TF-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SVE_TF: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gep.in = getelementptr i64, ptr %in, i64 %indvars.iv + %num = load i64, ptr %gep.in, align 8 + %gep.out = getelementptr i64, ptr %out, i64 %indvars.iv + call void @goo(i64 %num, ptr %gep.out) #6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + declare i64 @foo(ptr) declare i32 @baz(i32, ptr) declare i32 @quux(ptr, ptr) declare i32 @bar(i32) +declare void @goo(i64, ptr) ; neon vector variants of foo declare <2 x i64> @vec_foo_linear8_nomask_neon(ptr) @@ -249,6 +283,7 @@ declare <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32>, ptr) declare <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr, ptr) declare <4 x i32> @vec_bar_linear3_nomask_neon(i32) declare <4 x i32> @vec_bar_linearn5_nomask_neon(i32) +declare void @vec_goo_linear8_nomask_neon(<2 x i64>, ptr) ; scalable vector variants of foo declare @vec_foo_linear8_mask_sve(ptr, ) @@ -258,6 +293,8 @@ declare @vec_baz_vector_linear4_nomask_sve( declare @vec_quux_linear4_linear8_mask_sve(ptr, ptr, ) declare @vec_bar_linear3_nomask_sve(i32) declare @vec_bar_linearn5_nomask_sve(i32) +declare void @vec_goo_linear8_nomask_sve(, ptr) +declare void @vec_goo_linear8_mask_sve(, ptr, ) attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(vec_foo_linear8_nomask_sve),_ZGVsMxl8_foo(vec_foo_linear8_mask_sve),_ZGVnN2l8_foo(vec_foo_linear8_nomask_neon)" } attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(vec_baz_vector_linear4_nomask_sve),_ZGVnN4vl4_baz(vec_baz_vector_linear4_nomask_neon)" } @@ -265,3 +302,4 @@ attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(vec_foo attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(vec_quux_linear4_linear8_mask_sve),_ZGVnN4l4l8_quux(vec_quux_linear4_linear8_nomask_neon)" } attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(vec_bar_linear3_nomask_sve),_ZGVnN4l3_bar(vec_bar_linear3_nomask_neon)" } attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsNxln5_bar(vec_bar_linearn5_nomask_sve),_ZGVnN4ln5_bar(vec_bar_linearn5_nomask_neon)" } +attributes #6 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl8_goo(vec_goo_linear8_nomask_sve),_ZGVsMxvl8_goo(vec_goo_linear8_mask_sve),_ZGVsN2vl8_goo(vec_goo_linear8_nomask_neon)" }