diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 03fb7ca9bc3c3..2fc15c81446b7 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12355,7 +12355,8 @@ def err_builtin_invalid_arg_type: Error < "a vector of integers|" "an unsigned integer|" "an 'int'|" - "a vector of floating points}1 (was %2)">; + "a vector of floating points|" + "a vector of arithmetic element type}1 (was %2)">; def err_builtin_matrix_disabled: Error< "matrix types extension is disabled. Pass -fenable-matrix to enable it">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0d52083b06946..3a9b6dc303dea 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1756,6 +1756,17 @@ static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC, PrimType ElemT = *S.getContext().classify(ElemType); unsigned NumElems = Arg.getNumElems(); + if (ElemType->isRealFloatingType()) { + if (ID != Builtin::BI__builtin_reduce_add && + ID != Builtin::BI__builtin_reduce_mul) + llvm_unreachable("Only reduce_add and reduce_mul are supported for " + "floating-point types."); + // Floating-point arithmetic is not valid for constant expression + // initialization. Returning false defers checks to integral constant + // expression validation, preventing a bad deref of Floating as an integer. + return false; + } + INT_TYPE_SWITCH_NO_BOOL(ElemT, { T Result = Arg.atIndex(0).deref(); unsigned BitWidth = Result.bitWidth(); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5cd893d70695c..a1bf936f31503 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4314,12 +4314,41 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } - case Builtin::BI__builtin_reduce_add: + case Builtin::BI__builtin_reduce_add: { + QualType QT = E->getArg(0)->getType(); + if (QT->hasFloatingRepresentation()) { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + assert(Op0->getType()->isVectorTy()); + unsigned VecSize = QT->getAs()->getNumElements(); + Value *Sum = Builder.CreateExtractElement(Op0, static_cast(0)); + for (unsigned I = 1; I < VecSize; I++) { + Value *Elt = Builder.CreateExtractElement(Op0, I); + Sum = Builder.CreateFAdd(Sum, Elt); + } + return RValue::get(Sum); + } + assert(QT->hasIntegerRepresentation()); return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add")); - case Builtin::BI__builtin_reduce_mul: + } + case Builtin::BI__builtin_reduce_mul: { + QualType QT = E->getArg(0)->getType(); + if (QT->hasFloatingRepresentation()) { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + assert(Op0->getType()->isVectorTy()); + unsigned VecSize = QT->getAs()->getNumElements(); + Value *Product = + Builder.CreateExtractElement(Op0, static_cast(0)); + for (unsigned I = 1; I < VecSize; I++) { + Value *Elt = Builder.CreateExtractElement(Op0, I); + Product = Builder.CreateFMul(Product, Elt); + } + return RValue::get(Product); + } + assert(QT->hasIntegerRepresentation()); return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul")); + } case Builtin::BI__builtin_reduce_xor: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 28dcfaac2e84f..e89b0652edc2e 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2886,11 +2886,31 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, TheCall->setType(ElTy); break; } + case Builtin::BI__builtin_reduce_add: + case Builtin::BI__builtin_reduce_mul: { + if (PrepareBuiltinReduceMathOneArgCall(TheCall)) + return ExprError(); + + const Expr *Arg = TheCall->getArg(0); + const auto *TyA = Arg->getType()->getAs(); + + QualType ElTy; + if (TyA) + ElTy = TyA->getElementType(); + else if (Arg->getType()->isSizelessVectorType()) + ElTy = Arg->getType()->getSizelessVectorEltType(Context); + + if (ElTy.isNull()) { + Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* vector of integers or floating points */ 10 + << Arg->getType(); + return ExprError(); + } + TheCall->setType(ElTy); + break; + } // These builtins support vectors of integers only. - // TODO: ADD/MUL should support floating-point types. - case Builtin::BI__builtin_reduce_add: - case Builtin::BI__builtin_reduce_mul: case Builtin::BI__builtin_reduce_xor: case Builtin::BI__builtin_reduce_or: case Builtin::BI__builtin_reduce_and: { diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 723764010d9a3..29ea51d619df3 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -1056,6 +1056,14 @@ namespace RecuceAdd { static_assert(__builtin_reduce_add((vector4uint){~0U, 0, 0, 1}) == 0); static_assert(__builtin_reduce_add((vector4ulong){~0ULL, 0, 0, 1}) == 0); + static_assert(__builtin_reduce_add((vector4float){}) == 0.0); + // both-error@-1 {{static assertion expression is not an integral constant expression}} + static_assert(__builtin_reduce_add((vector4float){1.1, 2.2, 3.3, 4.4}) == 11.0); + // both-error@-1 {{static assertion expression is not an integral constant expression}} + static_assert(__builtin_reduce_add((vector4double){100.1, 200.2, 300.3, 400.4}) == 1001.0); + // both-error@-1 {{static assertion expression is not an integral constant expression}} + + #ifdef __SIZEOF_INT128__ typedef __int128 v4i128 __attribute__((__vector_size__(128 * 2))); @@ -1091,6 +1099,13 @@ namespace ReduceMul { (~0U - 1)); #endif static_assert(__builtin_reduce_mul((vector4ulong){~0ULL, 1, 1, 2}) == ~0ULL - 1); + + static_assert(__builtin_reduce_mul((vector4float){}) == 0.0); + // both-error@-1 {{static assertion expression is not an integral constant expression}} + static_assert(__builtin_reduce_mul((vector4float){1.0, 2.0, 3.0, 1.0}) == 6.0); + // both-error@-1 {{static assertion expression is not an integral constant expression}} + static_assert(__builtin_reduce_mul((vector4double){3.0, 4.0, 1.0, 1.0}) == 12.0); + // both-error@-1 {{static assertion expression is not an integral constant expression}} } namespace ReduceAnd { diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c index e12fd729c84c0..d0ab57f41edc2 100644 --- a/clang/test/CodeGen/builtins-reduction-math.c +++ b/clang/test/CodeGen/builtins-reduction-math.c @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=SVE %s typedef float float4 __attribute__((ext_vector_type(4))); +typedef double double4 __attribute__((ext_vector_type(4))); typedef short int si8 __attribute__((ext_vector_type(8))); typedef unsigned int u4 __attribute__((ext_vector_type(4))); @@ -61,6 +62,28 @@ void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) { unsigned long long r5 = __builtin_reduce_min(cvi1); } +void test_builtin_reduce_addf(float4 vf4, double4 vd4) { + // CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16 + // CHECK-NEXT: [[ARRF1:%.+]] = extractelement <4 x float> [[VF4]], i64 0 + // CHECK-NEXT: [[ARRF2:%.+]] = extractelement <4 x float> [[VF4]], i64 1 + // CHECK-NEXT: [[ADDF1:%.+]] = fadd float [[ARRF1]], [[ARRF2]] + // CHECK-NEXT: [[ARRF3:%.+]] = extractelement <4 x float> [[VF4]], i64 2 + // CHECK-NEXT: [[ADDF2:%.+]] = fadd float [[ADDF1]], [[ARRF3]] + // CHECK-NEXT: [[ARRF4:%.+]] = extractelement <4 x float> [[VF4]], i64 3 + // CHECK-NEXT: [[ADDF3:%.+]] = fadd float [[ADDF2]], [[ARRF4]] + float r2 = __builtin_reduce_add(vf4); + + // CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16 + // CHECK-NEXT: [[ARR1:%.+]] = extractelement <4 x double> [[VD4]], i64 0 + // CHECK-NEXT: [[ARR2:%.+]] = extractelement <4 x double> [[VD4]], i64 1 + // CHECK-NEXT: [[ADD1:%.+]] = fadd double [[ARR1]], [[ARR2]] + // CHECK-NEXT: [[ARR3:%.+]] = extractelement <4 x double> [[VD4]], i64 2 + // CHECK-NEXT: [[ADD2:%.+]] = fadd double [[ADD1]], [[ARR3]] + // CHECK-NEXT: [[ARR4:%.+]] = extractelement <4 x double> [[VD4]], i64 3 + // CHECK-NEXT: [[ADD3:%.+]] = fadd double [[ADD2]], [[ARR4]] + double r3 = __builtin_reduce_add(vd4); +} + void test_builtin_reduce_add(si8 vi1, u4 vu1) { // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 // CHECK-NEXT: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VI1]]) @@ -83,6 +106,28 @@ void test_builtin_reduce_add(si8 vi1, u4 vu1) { unsigned long long r5 = __builtin_reduce_add(cvu1); } +void test_builtin_reduce_mulf(float4 vf4, double4 vd4) { + // CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16 + // CHECK-NEXT: [[ARRF1:%.+]] = extractelement <4 x float> [[VF4]], i64 0 + // CHECK-NEXT: [[ARRF2:%.+]] = extractelement <4 x float> [[VF4]], i64 1 + // CHECK-NEXT: [[MULF1:%.+]] = fmul float [[ARRF1]], [[ARRF2]] + // CHECK-NEXT: [[ARRF3:%.+]] = extractelement <4 x float> [[VF4]], i64 2 + // CHECK-NEXT: [[MULF2:%.+]] = fmul float [[MULF1]], [[ARRF3]] + // CHECK-NEXT: [[ARRF4:%.+]] = extractelement <4 x float> [[VF4]], i64 3 + // CHECK-NEXT: [[MULF3:%.+]] = fmul float [[MULF2]], [[ARRF4]] + float r2 = __builtin_reduce_mul(vf4); + + // CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16 + // CHECK-NEXT: [[ARR1:%.+]] = extractelement <4 x double> [[VD4]], i64 0 + // CHECK-NEXT: [[ARR2:%.+]] = extractelement <4 x double> [[VD4]], i64 1 + // CHECK-NEXT: [[MUL1:%.+]] = fmul double [[ARR1]], [[ARR2]] + // CHECK-NEXT: [[ARR3:%.+]] = extractelement <4 x double> [[VD4]], i64 2 + // CHECK-NEXT: [[MUL2:%.+]] = fmul double [[MUL1]], [[ARR3]] + // CHECK-NEXT: [[ARR4:%.+]] = extractelement <4 x double> [[VD4]], i64 3 + // CHECK-NEXT: [[MUL3:%.+]] = fmul double [[MUL2]], [[ARR4]] + double r3 = __builtin_reduce_mul(vd4); +} + void test_builtin_reduce_mul(si8 vi1, u4 vu1) { // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 // CHECK-NEXT: call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[VI1]]) diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c index 9b0d91bfd6e3d..dd164b8e42297 100644 --- a/clang/test/Sema/builtins-reduction-math.c +++ b/clang/test/Sema/builtins-reduction-math.c @@ -36,7 +36,7 @@ void test_builtin_reduce_min(int i, float4 v, int3 iv) { // expected-error@-1 {{1st argument must be a vector type (was 'int')}} } -void test_builtin_reduce_add(int i, float4 v, int3 iv) { +void test_builtin_reduce_add(int i, float f, int3 iv) { struct Foo s = __builtin_reduce_add(iv); // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} @@ -47,13 +47,13 @@ void test_builtin_reduce_add(int i, float4 v, int3 iv) { // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} i = __builtin_reduce_add(i); - // expected-error@-1 {{1st argument must be a vector of integers (was 'int')}} + // expected-error@-1 {{1st argument must be a vector of arithmetic element type (was 'int')}} - i = __builtin_reduce_add(v); - // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}} + f = __builtin_reduce_add(f); + // expected-error@-1 {{1st argument must be a vector of arithmetic element type (was 'float')}} } -void test_builtin_reduce_mul(int i, float4 v, int3 iv) { +void test_builtin_reduce_mul(int i, float f, int3 iv) { struct Foo s = __builtin_reduce_mul(iv); // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} @@ -64,10 +64,10 @@ void test_builtin_reduce_mul(int i, float4 v, int3 iv) { // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} i = __builtin_reduce_mul(i); - // expected-error@-1 {{1st argument must be a vector of integers (was 'int')}} + // expected-error@-1 {{1st argument must be a vector of arithmetic element type (was 'int')}} - i = __builtin_reduce_mul(v); - // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}} + f = __builtin_reduce_mul(f); + // expected-error@-1 {{1st argument must be a vector of arithmetic element type (was 'float')}} } void test_builtin_reduce_xor(int i, float4 v, int3 iv) { diff --git a/clang/test/Sema/constant_builtins_vector.cpp b/clang/test/Sema/constant_builtins_vector.cpp index 8659fa9e46612..af35f7f599989 100644 --- a/clang/test/Sema/constant_builtins_vector.cpp +++ b/clang/test/Sema/constant_builtins_vector.cpp @@ -746,6 +746,12 @@ constexpr long long reduceAddLong2 = __builtin_reduce_add((vector4long){(1LL << static_assert(__builtin_reduce_add((vector4uint){~0U, 0, 0, 1}) == 0); static_assert(__builtin_reduce_add((vector4ulong){~0ULL, 0, 0, 1}) == 0); +constexpr float reduceAddFloat = __builtin_reduce_add((vector4float){1.0, 2.0, 3.0, 4.0}); +// expected-error@-1 {{must be initialized by a constant expression}} + +constexpr double reduceAddDouble = __builtin_reduce_add((vector4double){-1.0, 2.0, -3.0, 4.0}); +// expected-error@-1 {{must be initialized by a constant expression}} + static_assert(__builtin_reduce_mul((vector4char){}) == 0); static_assert(__builtin_reduce_mul((vector4char){1, 2, 3, 4}) == 24); static_assert(__builtin_reduce_mul((vector4short){1, 2, 30, 40}) == 2400); @@ -766,6 +772,12 @@ constexpr long long reduceMulLong2 = __builtin_reduce_mul((vector4long){(1LL << static_assert(__builtin_reduce_mul((vector4uint){~0U, 1, 1, 2}) == ~0U - 1); static_assert(__builtin_reduce_mul((vector4ulong){~0ULL, 1, 1, 2}) == ~0ULL - 1); +constexpr float reduceMulFloat = __builtin_reduce_mul((vector4float){1.0, 2.0, 3.0, 1.0}); +// expected-error@-1 {{must be initialized by a constant expression}} + +constexpr double reduceMulDouble = __builtin_reduce_mul((vector4double){3.0, 4.0, 1.0, 1.0}); +// expected-error@-1 {{must be initialized by a constant expression}} + static_assert(__builtin_reduce_and((vector4char){}) == 0); static_assert(__builtin_reduce_and((vector4char){(char)0x11, (char)0x22, (char)0x44, (char)0x88}) == 0); static_assert(__builtin_reduce_and((vector4short){(short)0x1111, (short)0x2222, (short)0x4444, (short)0x8888}) == 0);