matthiaskrgr
diff --git a/‎clang/lib/CodeGen/CGBuiltin.cpp
+14-3 b/‎clang/lib/CodeGen/CGBuiltin.cpp
+14-3
diff --git a/‎clang/test/CodeGen/avr-builtins.c
+21 b/‎clang/test/CodeGen/avr-builtins.c
+21
diff --git a/‎clang/test/CodeGen/math-builtins.c
+6-6 b/‎clang/test/CodeGen/math-builtins.c
+6-6
diff --git a/‎clang/test/CodeGen/msp430-builtins.c
+37 b/‎clang/test/CodeGen/msp430-builtins.c
+37
diff --git a/‎llvm/docs/LangRef.rst
+8-5 b/‎llvm/docs/LangRef.rst
+8-5
diff --git a/‎llvm/include/llvm/Analysis/VectorUtils.h
+5 b/‎llvm/include/llvm/Analysis/VectorUtils.h
+5
diff --git a/‎llvm/include/llvm/CodeGen/ISDOpcodes.h
+2-2 b/‎llvm/include/llvm/CodeGen/ISDOpcodes.h
+2-2
diff --git a/‎llvm/include/llvm/IR/Intrinsics.td
+1-1 b/‎llvm/include/llvm/IR/Intrinsics.td
+1-1
diff --git a/‎llvm/lib/Analysis/VectorUtils.cpp
+10 b/‎llvm/lib/Analysis/VectorUtils.cpp
+10
diff --git a/‎llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+11 b/‎llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+11
diff --git a/‎llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+11-1 b/‎llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+11-1
diff --git a/‎llvm/lib/Target/Mips/Mips16HardFloat.cpp
+1-1 b/‎llvm/lib/Target/Mips/Mips16HardFloat.cpp
+1-1
diff --git a/‎llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+5-2 b/‎llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+5-2
diff --git a/‎llvm/lib/Transforms/Scalar/Scalarizer.cpp
+8-3 b/‎llvm/lib/Transforms/Scalar/Scalarizer.cpp
+8-3
diff --git a/‎llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+5-9 b/‎llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+5-9
diff --git a/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+5-2 b/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+5-2
diff --git a/‎llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+5-2 b/‎llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+5-2
@@ -2946,10 +2946,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
   case Builtin::BI__builtin_powi:
   case Builtin::BI__builtin_powif:
-  case Builtin::BI__builtin_powil:
-    return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
-        *this, E, Intrinsic::powi, Intrinsic::experimental_constrained_powi));
+  case Builtin::BI__builtin_powil: {
+    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
 
+    if (Builder.getIsFPConstrained()) {
+      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
+                                     Src0->getType());
+      return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
+    }
+
+    Function *F = CGM.getIntrinsic(Intrinsic::powi,
+                                   { Src0->getType(), Src1->getType() });
+    return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
+  }
   case Builtin::BI__builtin_isgreater:
   case Builtin::BI__builtin_isgreaterequal:
   case Builtin::BI__builtin_isless:
 
@@ -104,3 +104,24 @@ unsigned long long byteswap64(unsigned long long x) {
 
 // CHECK: define{{.*}} i64 @byteswap64
 // CHECK: i64 @llvm.bswap.i64(i64
+
+double powi(double x, int y) {
+  return __builtin_powi(x, y);
+}
+
+// CHECK: define{{.*}} float @powi
+// CHECK: float @llvm.powi.f32.i16(float %0, i16 %1)
+
+float powif(float x, int y) {
+    return __builtin_powif(x, y);
+}
+
+// CHECK: define{{.*}} float @powif
+// CHECK: float @llvm.powi.f32.i16(float %0, i16 %1)
+
+long double powil(long double x, int y) {
+    return __builtin_powil(x, y);
+}
+
+// CHECK: define{{.*}} float @powil
+// CHECK: float @llvm.powi.f32.i16(float %0, i16 %1)
@@ -133,12 +133,12 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
 
   __builtin_powi(f,f);        __builtin_powif(f,f);       __builtin_powil(f,f);
 
-// NO__ERRNO: declare double @llvm.powi.f64(double, i32) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare float @llvm.powi.f32(float, i32) [[READNONE_INTRINSIC]]
-// NO__ERRNO: declare x86_fp80 @llvm.powi.f80(x86_fp80, i32) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare double @llvm.powi.f64(double, i32) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare float @llvm.powi.f32(float, i32) [[READNONE_INTRINSIC]]
-// HAS_ERRNO: declare x86_fp80 @llvm.powi.f80(x86_fp80, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare double @llvm.powi.f64.i32(double, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare float @llvm.powi.f32.i32(float, i32) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare x86_fp80 @llvm.powi.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare double @llvm.powi.f64.i32(double, i32) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare float @llvm.powi.f32.i32(float, i32) [[READNONE_INTRINSIC]]
+// HAS_ERRNO: declare x86_fp80 @llvm.powi.f80.i32(x86_fp80, i32) [[READNONE_INTRINSIC]]
 
   /* math */
   __builtin_acos(f);       __builtin_acosf(f);      __builtin_acosl(f); __builtin_acosf128(f);
 
@@ -0,0 +1,37 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple msp430-unknown-unknown -O3 -emit-llvm -o- %s | FileCheck %s
+// REQUIRES: msp430-registered-target
+
+_Static_assert(sizeof(int) == 2, "Assumption failed");
+_Static_assert(sizeof(long) == 4, "Assumption failed");
+_Static_assert(sizeof(long long) == 8, "Assumption failed");
+_Static_assert(sizeof(float) == 4, "Assumption failed");
+_Static_assert(sizeof(double) == 8, "Assumption failed");
+_Static_assert(sizeof(long double) == 8, "Assumption failed");
+
+// CHECK-LABEL: @powif(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.powi.f32.i16(float [[X:%.*]], i16 [[Y:%.*]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float powif(float x, int y) {
+    return __builtin_powif(x, y);
+}
+
+// CHECK-LABEL: @powi(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.powi.f64.i16(double [[X:%.*]], i16 [[Y:%.*]])
+// CHECK-NEXT:    ret double [[TMP0]]
+//
+double powi(double x, int y) {
+  return __builtin_powi(x, y);
+}
+
+// CHECK-LABEL: @powil(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.powi.f64.i16(double [[X:%.*]], i16 [[Y:%.*]])
+// CHECK-NEXT:    ret double [[TMP0]]
+//
+long double powil(long double x, int y) {
+    return __builtin_powil(x, y);
+}
@@ -13647,13 +13647,16 @@ This is an overloaded intrinsic. You can use ``llvm.powi`` on any
 floating-point or vector of floating-point type. Not all targets support
 all types however.
 
+Generally, the only supported type for the exponent is the one matching
+with the C type ``int``.
+
 ::
 
-      declare float     @llvm.powi.f32(float  %Val, i32 %power)
-      declare double    @llvm.powi.f64(double %Val, i32 %power)
-      declare x86_fp80  @llvm.powi.f80(x86_fp80  %Val, i32 %power)
-      declare fp128     @llvm.powi.f128(fp128 %Val, i32 %power)
-      declare ppc_fp128 @llvm.powi.ppcf128(ppc_fp128  %Val, i32 %power)
+      declare float     @llvm.powi.f32.i32(float  %Val, i32 %power)
+      declare double    @llvm.powi.f64.i16(double %Val, i16 %power)
+      declare x86_fp80  @llvm.powi.f80.i32(x86_fp80  %Val, i32 %power)
+      declare fp128     @llvm.powi.f128.i32(fp128 %Val, i32 %power)
+      declare ppc_fp128 @llvm.powi.ppcf128.i32(ppc_fp128  %Val, i32 %power)
 
 Overview:
 """""""""
 
@@ -317,6 +317,11 @@ bool isTriviallyVectorizable(Intrinsic::ID ID);
 /// Identifies if the vector form of the intrinsic has a scalar operand.
 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
 
+/// Identifies if the vector form of the intrinsic has a scalar operand that has
+/// an overloaded type.
+bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID,
+                                            unsigned ScalarOpdIdx);
+
 /// Returns intrinsic ID for call.
 /// For the input call instruction it finds mapping intrinsic and returns
 /// its intrinsic ID, in case it does not found it return not_intrinsic.
 
@@ -851,8 +851,8 @@ enum NodeType {
   STRICT_FP_TO_FP16,
 
   /// Perform various unary floating-point operations inspired by libm. For
-  /// FPOWI, the result is undefined if if the integer operand doesn't fit
-  /// into 32 bits.
+  /// FPOWI, the result is undefined if if the integer operand doesn't fit into
+  /// sizeof(int).
   FNEG,
   FABS,
   FSQRT,
 
@@ -652,7 +652,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
   // rounding mode. LLVM purposely does not model changes to the FP
   // environment so they can be treated as readnone.
   def int_sqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>;
+  def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_anyint_ty]>;
   def int_sin  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_cos  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_pow  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
 
@@ -114,6 +114,16 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
   }
 }
 
+bool llvm::hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID,
+                                                  unsigned ScalarOpdIdx) {
+  switch (ID) {
+  case Intrinsic::powi:
+    return (ScalarOpdIdx == 1);
+  default:
+    return false;
+  }
+}
+
 /// Returns intrinsic ID for call.
 /// For the input call instruction it finds mapping intrinsic and returns
 /// its ID, in case it does not found it return not_intrinsic.
 
@@ -4044,6 +4044,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                     Exponent));
       break;
     }
+    unsigned Offset = Node->isStrictFPOpcode() ? 1 : 0;
+    bool ExponentHasSizeOfInt =
+        DAG.getLibInfo().getIntSize() ==
+        Node->getOperand(1 + Offset).getValueType().getSizeInBits();
+    if (!ExponentHasSizeOfInt) {
+      // If the exponent does not match with sizeof(int) a libcall to
+      // RTLIB::POWI would use the wrong type for the argument.
+      DAG.getContext()->emitError("POWI exponent does not match sizeof(int)");
+      Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+      break;
+    }
     ExpandFPLibCall(Node, LC, Results);
     break;
   }
 
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -572,7 +573,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
   bool IsStrict = N->isStrictFPOpcode();
   unsigned Offset = IsStrict ? 1 : 0;
-  assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 &&
+  assert((N->getOperand(1 + Offset).getValueType() == MVT::i16 ||
+          N->getOperand(1 + Offset).getValueType() == MVT::i32) &&
          "Unsupported power type!");
   RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
@@ -583,6 +585,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
     return DAG.getUNDEF(N->getValueType(0));
   }
 
+  if (DAG.getLibInfo().getIntSize() !=
+      N->getOperand(1 + Offset).getValueType().getSizeInBits()) {
+    // If the exponent does not match with sizeof(int) a libcall to RTLIB::POWI
+    // would use the wrong type for the argument.
+    DAG.getContext()->emitError("POWI exponent does not match sizeof(int)");
+    return DAG.getUNDEF(N->getValueType(0));
+  }
+
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
                      N->getOperand(1 + Offset) };
 
@@ -359,7 +359,7 @@ static const char *const IntrinsicInline[] = {
   "llvm.log10.f32", "llvm.log10.f64",
   "llvm.nearbyint.f32", "llvm.nearbyint.f64",
   "llvm.pow.f32", "llvm.pow.f64",
-  "llvm.powi.f32", "llvm.powi.f64",
+  "llvm.powi.f32.i32", "llvm.powi.f64.i32",
   "llvm.rint.f32", "llvm.rint.f64",
   "llvm.round.f32", "llvm.round.f64",
   "llvm.sin.f32", "llvm.sin.f64",
 
@@ -1300,7 +1300,7 @@ static Instruction *foldFDivPowDivisor(BinaryOperator &I,
     Args.push_back(II->getArgOperand(0));
     Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(1), &I));
     break;
-  case Intrinsic::powi:
+  case Intrinsic::powi: {
     // Require 'ninf' assuming that makes powi(X, -INT_MIN) acceptable.
     // That is, X ** (huge negative number) is 0.0, ~1.0, or INF and so
     // dividing by that is INF, ~1.0, or 0.0. Code that uses powi allows
@@ -1310,7 +1310,10 @@ static Instruction *foldFDivPowDivisor(BinaryOperator &I,
       return nullptr;
     Args.push_back(II->getArgOperand(0));
     Args.push_back(Builder.CreateNeg(II->getArgOperand(1)));
-    break;
+    Type *Tys[] = {I.getType(), II->getArgOperand(1)->getType()};
+    Value *Pow = Builder.CreateIntrinsic(IID, Tys, Args, &I);
+    return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
+  }
   case Intrinsic::exp:
   case Intrinsic::exp2:
     Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(0), &I));
 
@@ -510,8 +510,8 @@ static bool isTriviallyScalariable(Intrinsic::ID ID) {
 // All of the current scalarizable intrinsics only have one mangled type.
 static Function *getScalarIntrinsicDeclaration(Module *M,
                                                Intrinsic::ID ID,
-                                               VectorType *Ty) {
-  return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
+                                               ArrayRef<Type*> Tys) {
+  return Intrinsic::getDeclaration(M, ID, Tys);
 }
 
 /// If a call to a vector typed intrinsic function, split into a scalar call per
@@ -537,6 +537,9 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
 
   Scattered.resize(NumArgs);
 
+  SmallVector<llvm::Type *, 3> Tys;
+  Tys.push_back(VT->getScalarType());
+
   // Assumes that any vector type has the same number of elements as the return
   // vector type, which is true for all current intrinsics.
   for (unsigned I = 0; I != NumArgs; ++I) {
@@ -546,13 +549,15 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
       assert(Scattered[I].size() == NumElems && "mismatched call operands");
     } else {
       ScalarOperands[I] = OpI;
+      if (hasVectorInstrinsicOverloadedScalarOpd(ID, I))
+        Tys.push_back(OpI->getType());
     }
   }
 
   ValueVector Res(NumElems);
   ValueVector ScalarCallOps(NumArgs);
 
-  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
+  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, Tys);
   IRBuilder<> Builder(&CI);
 
   // Perform actual scalarization, taking care to preserve any scalar operands.
 
@@ -1664,7 +1664,8 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
 static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
                                            IRBuilderBase &B) {
   Value *Args[] = {Base, Expo};
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
+  Type *Types[] = {Base->getType(), Expo->getType()};
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Types);
   return B.CreateCall(F, Args);
 }
 
@@ -1765,24 +1766,19 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
       return FMul;
     }
 
-    APSInt IntExpo(32, /*isUnsigned=*/false);
+    APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
     // powf(x, n) -> powi(x, n) if n is a constant signed integer value
     if (ExpoF->isInteger() &&
         ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
             APFloat::opOK) {
       return createPowWithIntegerExponent(
-          Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
+          Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B);
     }
   }
 
   // powf(x, itofp(y)) -> powi(x, y)
   if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
-    // FIXME: Currently we always use 32 bits for the exponent in llvm.powi. In
-    // the future we want to use the target dependent "size of int", or
-    // otherwise we could end up using the wrong type for the exponent when
-    // mapping llvm.powi back to an rtlib call. See
-    // https://reviews.llvm.org/D99439 for such a fix.
-    if (Value *ExpoI = getIntToFPVal(Expo, B, 32))
+    if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
       return createPowWithIntegerExponent(Base, ExpoI, M, B);
   }
 
 
@@ -5098,22 +5098,25 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
          "Either the intrinsic cost or vector call cost must be valid");
 
   for (unsigned Part = 0; Part < UF; ++Part) {
+    SmallVector<Type *, 2> TysForDecl = {CI->getType()};
     SmallVector<Value *, 4> Args;
     for (auto &I : enumerate(ArgOperands.operands())) {
       // Some intrinsics have a scalar argument - don't replace it with a
       // vector.
       Value *Arg;
       if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, I.index()))
         Arg = State.get(I.value(), Part);
-      else
+      else {
         Arg = State.get(I.value(), VPIteration(0, 0));
+        if (hasVectorInstrinsicOverloadedScalarOpd(ID, I.index()))
+          TysForDecl.push_back(Arg->getType());
+      }
       Args.push_back(Arg);
     }
 
     Function *VectorF;
     if (UseVectorIntrinsic) {
       // Use vector version of the intrinsic.
-      Type *TysForDecl[] = {CI->getType()};
       if (VF.isVector())
         TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
       VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
 
@@ -5499,6 +5499,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
 
       Value *ScalarArg = nullptr;
       std::vector<Value *> OpVecs;
+      SmallVector<Type *, 2> TysForDecl =
+          {FixedVectorType::get(CI->getType(), E->Scalars.size())};
       for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
         ValueList OpVL;
         // Some intrinsics have scalar arguments. This argument should not be
@@ -5507,6 +5509,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
           CallInst *CEI = cast<CallInst>(VL0);
           ScalarArg = CEI->getArgOperand(j);
           OpVecs.push_back(CEI->getArgOperand(j));
+          if (hasVectorInstrinsicOverloadedScalarOpd(IID, j))
+            TysForDecl.push_back(ScalarArg->getType());
           continue;
         }
 
@@ -5523,8 +5527,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
                          false /*HasGlobalPred*/);
         CF = VFDatabase(*CI).getVectorizedFunction(Shape);
       } else {
-        Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())};
-        CF = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
+        CF = Intrinsic::getDeclaration(F->getParent(), ID, TysForDecl);
       }
 
       SmallVector<OperandBundleDef, 1> OpBundles;