Skip to content

Commit 4c7f820

Browse files
committed
Update @llvm.powi to handle different int sizes for the exponent
This can be seen as a follow up to commit 0ee439b, that changed the second argument of __powidf2, __powisf2 and __powitf2 in compiler-rt from si_int to int. That was to align with how those runtimes are defined in libgcc. One thing that seem to have been missing in that patch was to make sure that the rest of LLVM also handle that the argument now depends on the size of int (not using the si_int machine mode for 32-bit). When using __builtin_powi for a target with 16-bit int clang crashed. And when emitting libcalls to those rtlib functions, typically when lowering @llvm.powi), the backend would always prepare the exponent argument as an i32 which caused miscompiles when the rtlib was compiled with 16-bit int. The solution used here is to use an overloaded type for the second argument in @llvm.powi. This way clang can use the "correct" type when lowering __builtin_powi, and then later when emitting the libcall it is assumed that the type used in @llvm.powi matches the rtlib function. One thing that needed some extra attention was that when vectorizing calls several passes did not support that several arguments could be overloaded in the intrinsics. This patch allows overload of a scalar operand by adding hasVectorInstrinsicOverloadedScalarOpd, with an entry for powi. Differential Revision: https://reviews.llvm.org/D99439
1 parent 204014e commit 4c7f820

File tree

74 files changed

+505
-326
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+505
-326
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

+14-3
Original file line numberDiff line numberDiff line change
@@ -2946,10 +2946,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
29462946

29472947
case Builtin::BI__builtin_powi:
29482948
case Builtin::BI__builtin_powif:
2949-
case Builtin::BI__builtin_powil:
2950-
return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
2951-
*this, E, Intrinsic::powi, Intrinsic::experimental_constrained_powi));
2949+
case Builtin::BI__builtin_powil: {
2950+
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
2951+
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
29522952

2953+
if (Builder.getIsFPConstrained()) {
2954+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2955+
Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
2956+
Src0->getType());
2957+
return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
2958+
}
2959+
2960+
Function *F = CGM.getIntrinsic(Intrinsic::powi,
2961+
{ Src0->getType(), Src1->getType() });
2962+
return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
2963+
}
29532964
case Builtin::BI__builtin_isgreater:
29542965
case Builtin::BI__builtin_isgreaterequal:
29552966
case Builtin::BI__builtin_isless:

clang/test/CodeGen/avr-builtins.c

+21
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,24 @@ unsigned long long byteswap64(unsigned long long x) {
104104

105105
// CHECK: define{{.*}} i64 @byteswap64
106106
// CHECK: i64 @llvm.bswap.i64(i64
107+
108+
double powi(double x, int y) {
109+
return __builtin_powi(x, y);
110+
}
111+
112+
// CHECK: define{{.*}} float @powi
113+
// CHECK: float @llvm.powi.f32.i16(float %0, i16 %1)
114+
115+
float powif(float x, int y) {
116+
return __builtin_powif(x, y);
117+
}
118+
119+
// CHECK: define{{.*}} float @powif
120+
// CHECK: float @llvm.powi.f32.i16(float %0, i16 %1)
121+
122+
long double powil(long double x, int y) {
123+
return __builtin_powil(x, y);
124+
}
125+
126+
// CHECK: define{{.*}} float @powil
127+
// CHECK: float @llvm.powi.f32.i16(float %0, i16 %1)

clang/test/CodeGen/math-builtins.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,12 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
133133

134134
__builtin_powi(f,f); __builtin_powif(f,f); __builtin_powil(f,f);
135135

136-
// NO__ERRNO: declare double @llvm.powi.f64(double, i32) [[READNONE_INTRINSIC]]
137-
// NO__ERRNO: declare float @llvm.powi.f32(float, i32) [[READNONE_INTRINSIC]]
138-
// NO__ERRNO: declare x86_fp80 @llvm.powi.f80(x86_fp80, i32) [[READNONE_INTRINSIC]]
139-
// HAS_ERRNO: declare double @llvm.powi.f64(double, i32) [[READNONE_INTRINSIC]]
140-
// HAS_ERRNO: declare float @llvm.powi.f32(float, i32) [[READNONE_INTRINSIC]]
141-
// HAS_ERRNO: declare x86_fp80 @llvm.powi.f80(x86_fp80, i32) [[READNONE_INTRINSIC]]
136+
// NO__ERRNO: declare double @llvm.powi.f64.i32(double, i32) [[READNONE_INTRINSIC]]
137+
// NO__ERRNO: declare float @llvm.powi.f32.i32(float, i32) [[READNONE_INTRINSIC]]
138+
// NO__ERRNO: declare x86_fp80 @llvm.powi.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC]]
139+
// HAS_ERRNO: declare double @llvm.powi.f64.i32(double, i32) [[READNONE_INTRINSIC]]
140+
// HAS_ERRNO: declare float @llvm.powi.f32.i32(float, i32) [[READNONE_INTRINSIC]]
141+
// HAS_ERRNO: declare x86_fp80 @llvm.powi.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC]]
142142

143143
/* math */
144144
__builtin_acos(f); __builtin_acosf(f); __builtin_acosl(f); __builtin_acosf128(f);

clang/test/CodeGen/msp430-builtins.c

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -triple msp430-unknown-unknown -O3 -emit-llvm -o- %s | FileCheck %s
3+
// REQUIRES: msp430-registered-target
4+
5+
_Static_assert(sizeof(int) == 2, "Assumption failed");
6+
_Static_assert(sizeof(long) == 4, "Assumption failed");
7+
_Static_assert(sizeof(long long) == 8, "Assumption failed");
8+
_Static_assert(sizeof(float) == 4, "Assumption failed");
9+
_Static_assert(sizeof(double) == 8, "Assumption failed");
10+
_Static_assert(sizeof(long double) == 8, "Assumption failed");
11+
12+
// CHECK-LABEL: @powif(
13+
// CHECK-NEXT: entry:
14+
// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.powi.f32.i16(float [[X:%.*]], i16 [[Y:%.*]])
15+
// CHECK-NEXT: ret float [[TMP0]]
16+
//
17+
float powif(float x, int y) {
18+
return __builtin_powif(x, y);
19+
}
20+
21+
// CHECK-LABEL: @powi(
22+
// CHECK-NEXT: entry:
23+
// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.powi.f64.i16(double [[X:%.*]], i16 [[Y:%.*]])
24+
// CHECK-NEXT: ret double [[TMP0]]
25+
//
26+
double powi(double x, int y) {
27+
return __builtin_powi(x, y);
28+
}
29+
30+
// CHECK-LABEL: @powil(
31+
// CHECK-NEXT: entry:
32+
// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.powi.f64.i16(double [[X:%.*]], i16 [[Y:%.*]])
33+
// CHECK-NEXT: ret double [[TMP0]]
34+
//
35+
long double powil(long double x, int y) {
36+
return __builtin_powil(x, y);
37+
}

llvm/docs/LangRef.rst

+8-5
Original file line numberDiff line numberDiff line change
@@ -13647,13 +13647,16 @@ This is an overloaded intrinsic. You can use ``llvm.powi`` on any
1364713647
floating-point or vector of floating-point type. Not all targets support
1364813648
all types however.
1364913649

13650+
Generally, the only supported type for the exponent is the one matching
13651+
with the C type ``int``.
13652+
1365013653
::
1365113654

13652-
declare float @llvm.powi.f32(float %Val, i32 %power)
13653-
declare double @llvm.powi.f64(double %Val, i32 %power)
13654-
declare x86_fp80 @llvm.powi.f80(x86_fp80 %Val, i32 %power)
13655-
declare fp128 @llvm.powi.f128(fp128 %Val, i32 %power)
13656-
declare ppc_fp128 @llvm.powi.ppcf128(ppc_fp128 %Val, i32 %power)
13655+
declare float @llvm.powi.f32.i32(float %Val, i32 %power)
13656+
declare double @llvm.powi.f64.i16(double %Val, i16 %power)
13657+
declare x86_fp80 @llvm.powi.f80.i32(x86_fp80 %Val, i32 %power)
13658+
declare fp128 @llvm.powi.f128.i32(fp128 %Val, i32 %power)
13659+
declare ppc_fp128 @llvm.powi.ppcf128.i32(ppc_fp128 %Val, i32 %power)
1365713660

1365813661
Overview:
1365913662
"""""""""

llvm/include/llvm/Analysis/VectorUtils.h

+5
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,11 @@ bool isTriviallyVectorizable(Intrinsic::ID ID);
317317
/// Identifies if the vector form of the intrinsic has a scalar operand.
318318
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
319319

320+
/// Identifies if the vector form of the intrinsic has a scalar operand that has
321+
/// an overloaded type.
322+
bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID,
323+
unsigned ScalarOpdIdx);
324+
320325
/// Returns intrinsic ID for call.
321326
/// For the input call instruction it finds mapping intrinsic and returns
322327
/// its intrinsic ID, in case it does not found it return not_intrinsic.

llvm/include/llvm/CodeGen/ISDOpcodes.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -851,8 +851,8 @@ enum NodeType {
851851
STRICT_FP_TO_FP16,
852852

853853
/// Perform various unary floating-point operations inspired by libm. For
854-
/// FPOWI, the result is undefined if if the integer operand doesn't fit
855-
/// into 32 bits.
854+
/// FPOWI, the result is undefined if if the integer operand doesn't fit into
855+
/// sizeof(int).
856856
FNEG,
857857
FABS,
858858
FSQRT,

llvm/include/llvm/IR/Intrinsics.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
652652
// rounding mode. LLVM purposely does not model changes to the FP
653653
// environment so they can be treated as readnone.
654654
def int_sqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
655-
def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>;
655+
def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_anyint_ty]>;
656656
def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
657657
def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
658658
def int_pow : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],

llvm/lib/Analysis/VectorUtils.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,16 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
114114
}
115115
}
116116

117+
bool llvm::hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID,
118+
unsigned ScalarOpdIdx) {
119+
switch (ID) {
120+
case Intrinsic::powi:
121+
return (ScalarOpdIdx == 1);
122+
default:
123+
return false;
124+
}
125+
}
126+
117127
/// Returns intrinsic ID for call.
118128
/// For the input call instruction it finds mapping intrinsic and returns
119129
/// its ID, in case it does not found it return not_intrinsic.

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -4044,6 +4044,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
40444044
Exponent));
40454045
break;
40464046
}
4047+
unsigned Offset = Node->isStrictFPOpcode() ? 1 : 0;
4048+
bool ExponentHasSizeOfInt =
4049+
DAG.getLibInfo().getIntSize() ==
4050+
Node->getOperand(1 + Offset).getValueType().getSizeInBits();
4051+
if (!ExponentHasSizeOfInt) {
4052+
// If the exponent does not match with sizeof(int) a libcall to
4053+
// RTLIB::POWI would use the wrong type for the argument.
4054+
DAG.getContext()->emitError("POWI exponent does not match sizeof(int)");
4055+
Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
4056+
break;
4057+
}
40474058
ExpandFPLibCall(Node, LC, Results);
40484059
break;
40494060
}

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
//===----------------------------------------------------------------------===//
2020

2121
#include "LegalizeTypes.h"
22+
#include "llvm/Analysis/TargetLibraryInfo.h"
2223
#include "llvm/Support/ErrorHandling.h"
2324
#include "llvm/Support/raw_ostream.h"
2425
using namespace llvm;
@@ -572,7 +573,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
572573
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
573574
bool IsStrict = N->isStrictFPOpcode();
574575
unsigned Offset = IsStrict ? 1 : 0;
575-
assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 &&
576+
assert((N->getOperand(1 + Offset).getValueType() == MVT::i16 ||
577+
N->getOperand(1 + Offset).getValueType() == MVT::i32) &&
576578
"Unsupported power type!");
577579
RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
578580
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
@@ -583,6 +585,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
583585
return DAG.getUNDEF(N->getValueType(0));
584586
}
585587

588+
if (DAG.getLibInfo().getIntSize() !=
589+
N->getOperand(1 + Offset).getValueType().getSizeInBits()) {
590+
// If the exponent does not match with sizeof(int) a libcall to RTLIB::POWI
591+
// would use the wrong type for the argument.
592+
DAG.getContext()->emitError("POWI exponent does not match sizeof(int)");
593+
return DAG.getUNDEF(N->getValueType(0));
594+
}
595+
586596
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
587597
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
588598
N->getOperand(1 + Offset) };

llvm/lib/Target/Mips/Mips16HardFloat.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ static const char *const IntrinsicInline[] = {
359359
"llvm.log10.f32", "llvm.log10.f64",
360360
"llvm.nearbyint.f32", "llvm.nearbyint.f64",
361361
"llvm.pow.f32", "llvm.pow.f64",
362-
"llvm.powi.f32", "llvm.powi.f64",
362+
"llvm.powi.f32.i32", "llvm.powi.f64.i32",
363363
"llvm.rint.f32", "llvm.rint.f64",
364364
"llvm.round.f32", "llvm.round.f64",
365365
"llvm.sin.f32", "llvm.sin.f64",

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,7 @@ static Instruction *foldFDivPowDivisor(BinaryOperator &I,
13001300
Args.push_back(II->getArgOperand(0));
13011301
Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(1), &I));
13021302
break;
1303-
case Intrinsic::powi:
1303+
case Intrinsic::powi: {
13041304
// Require 'ninf' assuming that makes powi(X, -INT_MIN) acceptable.
13051305
// That is, X ** (huge negative number) is 0.0, ~1.0, or INF and so
13061306
// dividing by that is INF, ~1.0, or 0.0. Code that uses powi allows
@@ -1310,7 +1310,10 @@ static Instruction *foldFDivPowDivisor(BinaryOperator &I,
13101310
return nullptr;
13111311
Args.push_back(II->getArgOperand(0));
13121312
Args.push_back(Builder.CreateNeg(II->getArgOperand(1)));
1313-
break;
1313+
Type *Tys[] = {I.getType(), II->getArgOperand(1)->getType()};
1314+
Value *Pow = Builder.CreateIntrinsic(IID, Tys, Args, &I);
1315+
return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
1316+
}
13141317
case Intrinsic::exp:
13151318
case Intrinsic::exp2:
13161319
Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(0), &I));

llvm/lib/Transforms/Scalar/Scalarizer.cpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -510,8 +510,8 @@ static bool isTriviallyScalariable(Intrinsic::ID ID) {
510510
// All of the current scalarizable intrinsics only have one mangled type.
511511
static Function *getScalarIntrinsicDeclaration(Module *M,
512512
Intrinsic::ID ID,
513-
VectorType *Ty) {
514-
return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
513+
ArrayRef<Type*> Tys) {
514+
return Intrinsic::getDeclaration(M, ID, Tys);
515515
}
516516

517517
/// If a call to a vector typed intrinsic function, split into a scalar call per
@@ -537,6 +537,9 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
537537

538538
Scattered.resize(NumArgs);
539539

540+
SmallVector<llvm::Type *, 3> Tys;
541+
Tys.push_back(VT->getScalarType());
542+
540543
// Assumes that any vector type has the same number of elements as the return
541544
// vector type, which is true for all current intrinsics.
542545
for (unsigned I = 0; I != NumArgs; ++I) {
@@ -546,13 +549,15 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
546549
assert(Scattered[I].size() == NumElems && "mismatched call operands");
547550
} else {
548551
ScalarOperands[I] = OpI;
552+
if (hasVectorInstrinsicOverloadedScalarOpd(ID, I))
553+
Tys.push_back(OpI->getType());
549554
}
550555
}
551556

552557
ValueVector Res(NumElems);
553558
ValueVector ScalarCallOps(NumArgs);
554559

555-
Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
560+
Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, Tys);
556561
IRBuilder<> Builder(&CI);
557562

558563
// Perform actual scalarization, taking care to preserve any scalar operands.

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+5-9
Original file line numberDiff line numberDiff line change
@@ -1664,7 +1664,8 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
16641664
static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
16651665
IRBuilderBase &B) {
16661666
Value *Args[] = {Base, Expo};
1667-
Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
1667+
Type *Types[] = {Base->getType(), Expo->getType()};
1668+
Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Types);
16681669
return B.CreateCall(F, Args);
16691670
}
16701671

@@ -1765,24 +1766,19 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
17651766
return FMul;
17661767
}
17671768

1768-
APSInt IntExpo(32, /*isUnsigned=*/false);
1769+
APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
17691770
// powf(x, n) -> powi(x, n) if n is a constant signed integer value
17701771
if (ExpoF->isInteger() &&
17711772
ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
17721773
APFloat::opOK) {
17731774
return createPowWithIntegerExponent(
1774-
Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
1775+
Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B);
17751776
}
17761777
}
17771778

17781779
// powf(x, itofp(y)) -> powi(x, y)
17791780
if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
1780-
// FIXME: Currently we always use 32 bits for the exponent in llvm.powi. In
1781-
// the future we want to use the target dependent "size of int", or
1782-
// otherwise we could end up using the wrong type for the exponent when
1783-
// mapping llvm.powi back to an rtlib call. See
1784-
// https://reviews.llvm.org/D99439 for such a fix.
1785-
if (Value *ExpoI = getIntToFPVal(Expo, B, 32))
1781+
if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
17861782
return createPowWithIntegerExponent(Base, ExpoI, M, B);
17871783
}
17881784

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -5098,22 +5098,25 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
50985098
"Either the intrinsic cost or vector call cost must be valid");
50995099

51005100
for (unsigned Part = 0; Part < UF; ++Part) {
5101+
SmallVector<Type *, 2> TysForDecl = {CI->getType()};
51015102
SmallVector<Value *, 4> Args;
51025103
for (auto &I : enumerate(ArgOperands.operands())) {
51035104
// Some intrinsics have a scalar argument - don't replace it with a
51045105
// vector.
51055106
Value *Arg;
51065107
if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, I.index()))
51075108
Arg = State.get(I.value(), Part);
5108-
else
5109+
else {
51095110
Arg = State.get(I.value(), VPIteration(0, 0));
5111+
if (hasVectorInstrinsicOverloadedScalarOpd(ID, I.index()))
5112+
TysForDecl.push_back(Arg->getType());
5113+
}
51105114
Args.push_back(Arg);
51115115
}
51125116

51135117
Function *VectorF;
51145118
if (UseVectorIntrinsic) {
51155119
// Use vector version of the intrinsic.
5116-
Type *TysForDecl[] = {CI->getType()};
51175120
if (VF.isVector())
51185121
TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
51195122
VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -5499,6 +5499,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
54995499

55005500
Value *ScalarArg = nullptr;
55015501
std::vector<Value *> OpVecs;
5502+
SmallVector<Type *, 2> TysForDecl =
5503+
{FixedVectorType::get(CI->getType(), E->Scalars.size())};
55025504
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
55035505
ValueList OpVL;
55045506
// Some intrinsics have scalar arguments. This argument should not be
@@ -5507,6 +5509,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
55075509
CallInst *CEI = cast<CallInst>(VL0);
55085510
ScalarArg = CEI->getArgOperand(j);
55095511
OpVecs.push_back(CEI->getArgOperand(j));
5512+
if (hasVectorInstrinsicOverloadedScalarOpd(IID, j))
5513+
TysForDecl.push_back(ScalarArg->getType());
55105514
continue;
55115515
}
55125516

@@ -5523,8 +5527,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
55235527
false /*HasGlobalPred*/);
55245528
CF = VFDatabase(*CI).getVectorizedFunction(Shape);
55255529
} else {
5526-
Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())};
5527-
CF = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
5530+
CF = Intrinsic::getDeclaration(F->getParent(), ID, TysForDecl);
55285531
}
55295532

55305533
SmallVector<OperandBundleDef, 1> OpBundles;

0 commit comments

Comments
 (0)