Skip to content

[Clang] Add float type support to __builtin_reduce_add and __builtin_reduce_multipy #120367

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -12355,7 +12355,8 @@ def err_builtin_invalid_arg_type: Error <
"a vector of integers|"
"an unsigned integer|"
"an 'int'|"
"a vector of floating points}1 (was %2)">;
"a vector of floating points|"
"a vector of arithmetic element type}1 (was %2)">;

def err_builtin_matrix_disabled: Error<
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1756,6 +1756,17 @@ static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
PrimType ElemT = *S.getContext().classify(ElemType);
unsigned NumElems = Arg.getNumElems();

if (ElemType->isRealFloatingType()) {
if (ID != Builtin::BI__builtin_reduce_add &&
ID != Builtin::BI__builtin_reduce_mul)
llvm_unreachable("Only reduce_add and reduce_mul are supported for "
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this actually unreachable? You have a mix of both real error handling code and an unreachable. Code after an unreachable (like the return) can be removed by a compiler, which would cause some bad behavior here if this ever got hit.

Copy link
Member Author

@farzonl farzonl Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Context the float case was exposed by the changes in SemaChecking.cpp which allowed reduce_add and reduce_mul to operate on floating point vectors. The remaining reduce builtins Builtin::BI__builtin_reduce_xor, Builtin::BI__builtin_reduce_or, and Builtin::BI__builtin_reduce_and are not reachable here because the integer checks in SemaChecking.cpp still apply to them. So yes this branch should be unreachable for all non reduce_add and reduce_mul reduction cases.

Copy link
Member Author

@farzonl farzonl Jan 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@llvm-beanz I don't understand how this code could be removed by the compiler. The code after the unreachable is in a different block. The unreachable is gated by the builtin check.

"floating-point types.");
// Floating-point arithmetic is not valid for constant expression
// initialization. Returning false defers checks to integral constant
// expression validation, preventing a bad deref of Floating as an integer.
return false;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to do something like this:

if(ElemType->isRealFloatingType()) {
    FPOptions FPO = Call->getFPFeaturesInEffect(S.getASTContext().getLangOpts());
    llvm::RoundingMode RM = getRoundingMode(FPO);
    Floating Result = Arg.atIndex(0).deref<Floating>();
    APFloat currResult = Result.getAPFloat();
    for (unsigned I = 1; I != NumElems; ++I) {
      Floating Elem = Arg.atIndex(I).deref<Floating>();
      if (ID == Builtin::BI__builtin_reduce_add) {
        if(APFloat::opStatus::opOK != currResult.add(Elem.getAPFloat(),RM))
          return false;
      } else if (ID == Builtin::BI__builtin_reduce_mul) {
        if(APFloat::opStatus::opOK != currResult.multiply(Elem.getAPFloat(),RM))
          return false;
      } else
        llvm_unreachable("Only reduce_add and reduce_mul are supported for floating-point types.");
    }
    S.Stk.push<Floating>(Floating(currResult));
    return true;
  }

However there are checks further for isIntegralPointer() that make any code here to compute the reduction not make sense.

}

INT_TYPE_SWITCH_NO_BOOL(ElemT, {
T Result = Arg.atIndex(0).deref<T>();
unsigned BitWidth = Result.bitWidth();
Expand Down
33 changes: 31 additions & 2 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4314,12 +4314,41 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
*this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
}

case Builtin::BI__builtin_reduce_add:
case Builtin::BI__builtin_reduce_add: {
QualType QT = E->getArg(0)->getType();
if (QT->hasFloatingRepresentation()) {
Value *Op0 = EmitScalarExpr(E->getArg(0));
assert(Op0->getType()->isVectorTy());
unsigned VecSize = QT->getAs<VectorType>()->getNumElements();
Value *Sum = Builder.CreateExtractElement(Op0, static_cast<uint64_t>(0));
for (unsigned I = 1; I < VecSize; I++) {
Value *Elt = Builder.CreateExtractElement(Op0, I);
Sum = Builder.CreateFAdd(Sum, Elt);
}
return RValue::get(Sum);
}
assert(QT->hasIntegerRepresentation());
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
case Builtin::BI__builtin_reduce_mul:
}
case Builtin::BI__builtin_reduce_mul: {
QualType QT = E->getArg(0)->getType();
if (QT->hasFloatingRepresentation()) {
Value *Op0 = EmitScalarExpr(E->getArg(0));
assert(Op0->getType()->isVectorTy());
unsigned VecSize = QT->getAs<VectorType>()->getNumElements();
Value *Product =
Builder.CreateExtractElement(Op0, static_cast<uint64_t>(0));
for (unsigned I = 1; I < VecSize; I++) {
Value *Elt = Builder.CreateExtractElement(Op0, I);
Product = Builder.CreateFMul(Product, Elt);
}
return RValue::get(Product);
}
assert(QT->hasIntegerRepresentation());
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
}
case Builtin::BI__builtin_reduce_xor:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
Expand Down
26 changes: 23 additions & 3 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2886,11 +2886,31 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
TheCall->setType(ElTy);
break;
}
case Builtin::BI__builtin_reduce_add:
case Builtin::BI__builtin_reduce_mul: {
if (PrepareBuiltinReduceMathOneArgCall(TheCall))
return ExprError();

const Expr *Arg = TheCall->getArg(0);
const auto *TyA = Arg->getType()->getAs<VectorType>();

QualType ElTy;
if (TyA)
ElTy = TyA->getElementType();
else if (Arg->getType()->isSizelessVectorType())
ElTy = Arg->getType()->getSizelessVectorEltType(Context);

if (ElTy.isNull()) {
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* vector of integers or floating points */ 10
<< Arg->getType();
return ExprError();
}
TheCall->setType(ElTy);
break;
}

// These builtins support vectors of integers only.
// TODO: ADD/MUL should support floating-point types.
case Builtin::BI__builtin_reduce_add:
case Builtin::BI__builtin_reduce_mul:
case Builtin::BI__builtin_reduce_xor:
case Builtin::BI__builtin_reduce_or:
case Builtin::BI__builtin_reduce_and: {
Expand Down
15 changes: 15 additions & 0 deletions clang/test/AST/ByteCode/builtin-functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,14 @@ namespace RecuceAdd {
static_assert(__builtin_reduce_add((vector4uint){~0U, 0, 0, 1}) == 0);
static_assert(__builtin_reduce_add((vector4ulong){~0ULL, 0, 0, 1}) == 0);

static_assert(__builtin_reduce_add((vector4float){}) == 0.0);
// both-error@-1 {{static assertion expression is not an integral constant expression}}
static_assert(__builtin_reduce_add((vector4float){1.1, 2.2, 3.3, 4.4}) == 11.0);
// both-error@-1 {{static assertion expression is not an integral constant expression}}
static_assert(__builtin_reduce_add((vector4double){100.1, 200.2, 300.3, 400.4}) == 1001.0);
// both-error@-1 {{static assertion expression is not an integral constant expression}}



#ifdef __SIZEOF_INT128__
typedef __int128 v4i128 __attribute__((__vector_size__(128 * 2)));
Expand Down Expand Up @@ -1091,6 +1099,13 @@ namespace ReduceMul {
(~0U - 1));
#endif
static_assert(__builtin_reduce_mul((vector4ulong){~0ULL, 1, 1, 2}) == ~0ULL - 1);

static_assert(__builtin_reduce_mul((vector4float){}) == 0.0);
// both-error@-1 {{static assertion expression is not an integral constant expression}}
static_assert(__builtin_reduce_mul((vector4float){1.0, 2.0, 3.0, 1.0}) == 6.0);
// both-error@-1 {{static assertion expression is not an integral constant expression}}
static_assert(__builtin_reduce_mul((vector4double){3.0, 4.0, 1.0, 1.0}) == 12.0);
// both-error@-1 {{static assertion expression is not an integral constant expression}}
}

namespace ReduceAnd {
Expand Down
45 changes: 45 additions & 0 deletions clang/test/CodeGen/builtins-reduction-math.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=SVE %s

typedef float float4 __attribute__((ext_vector_type(4)));
typedef double double4 __attribute__((ext_vector_type(4)));
typedef short int si8 __attribute__((ext_vector_type(8)));
typedef unsigned int u4 __attribute__((ext_vector_type(4)));

Expand Down Expand Up @@ -61,6 +62,28 @@ void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) {
unsigned long long r5 = __builtin_reduce_min(cvi1);
}

void test_builtin_reduce_addf(float4 vf4, double4 vd4) {
// CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16
// CHECK-NEXT: [[ARRF1:%.+]] = extractelement <4 x float> [[VF4]], i64 0
// CHECK-NEXT: [[ARRF2:%.+]] = extractelement <4 x float> [[VF4]], i64 1
// CHECK-NEXT: [[ADDF1:%.+]] = fadd float [[ARRF1]], [[ARRF2]]
// CHECK-NEXT: [[ARRF3:%.+]] = extractelement <4 x float> [[VF4]], i64 2
// CHECK-NEXT: [[ADDF2:%.+]] = fadd float [[ADDF1]], [[ARRF3]]
// CHECK-NEXT: [[ARRF4:%.+]] = extractelement <4 x float> [[VF4]], i64 3
// CHECK-NEXT: [[ADDF3:%.+]] = fadd float [[ADDF2]], [[ARRF4]]
float r2 = __builtin_reduce_add(vf4);

// CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16
// CHECK-NEXT: [[ARR1:%.+]] = extractelement <4 x double> [[VD4]], i64 0
// CHECK-NEXT: [[ARR2:%.+]] = extractelement <4 x double> [[VD4]], i64 1
// CHECK-NEXT: [[ADD1:%.+]] = fadd double [[ARR1]], [[ARR2]]
// CHECK-NEXT: [[ARR3:%.+]] = extractelement <4 x double> [[VD4]], i64 2
// CHECK-NEXT: [[ADD2:%.+]] = fadd double [[ADD1]], [[ARR3]]
// CHECK-NEXT: [[ARR4:%.+]] = extractelement <4 x double> [[VD4]], i64 3
// CHECK-NEXT: [[ADD3:%.+]] = fadd double [[ADD2]], [[ARR4]]
double r3 = __builtin_reduce_add(vd4);
}

void test_builtin_reduce_add(si8 vi1, u4 vu1) {
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
// CHECK-NEXT: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VI1]])
Expand All @@ -83,6 +106,28 @@ void test_builtin_reduce_add(si8 vi1, u4 vu1) {
unsigned long long r5 = __builtin_reduce_add(cvu1);
}

void test_builtin_reduce_mulf(float4 vf4, double4 vd4) {
// CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16
// CHECK-NEXT: [[ARRF1:%.+]] = extractelement <4 x float> [[VF4]], i64 0
// CHECK-NEXT: [[ARRF2:%.+]] = extractelement <4 x float> [[VF4]], i64 1
// CHECK-NEXT: [[MULF1:%.+]] = fmul float [[ARRF1]], [[ARRF2]]
// CHECK-NEXT: [[ARRF3:%.+]] = extractelement <4 x float> [[VF4]], i64 2
// CHECK-NEXT: [[MULF2:%.+]] = fmul float [[MULF1]], [[ARRF3]]
// CHECK-NEXT: [[ARRF4:%.+]] = extractelement <4 x float> [[VF4]], i64 3
// CHECK-NEXT: [[MULF3:%.+]] = fmul float [[MULF2]], [[ARRF4]]
float r2 = __builtin_reduce_mul(vf4);

// CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16
// CHECK-NEXT: [[ARR1:%.+]] = extractelement <4 x double> [[VD4]], i64 0
// CHECK-NEXT: [[ARR2:%.+]] = extractelement <4 x double> [[VD4]], i64 1
// CHECK-NEXT: [[MUL1:%.+]] = fmul double [[ARR1]], [[ARR2]]
// CHECK-NEXT: [[ARR3:%.+]] = extractelement <4 x double> [[VD4]], i64 2
// CHECK-NEXT: [[MUL2:%.+]] = fmul double [[MUL1]], [[ARR3]]
// CHECK-NEXT: [[ARR4:%.+]] = extractelement <4 x double> [[VD4]], i64 3
// CHECK-NEXT: [[MUL3:%.+]] = fmul double [[MUL2]], [[ARR4]]
double r3 = __builtin_reduce_mul(vd4);
}

void test_builtin_reduce_mul(si8 vi1, u4 vu1) {
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
// CHECK-NEXT: call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[VI1]])
Expand Down
16 changes: 8 additions & 8 deletions clang/test/Sema/builtins-reduction-math.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void test_builtin_reduce_min(int i, float4 v, int3 iv) {
// expected-error@-1 {{1st argument must be a vector type (was 'int')}}
}

void test_builtin_reduce_add(int i, float4 v, int3 iv) {
void test_builtin_reduce_add(int i, float f, int3 iv) {
struct Foo s = __builtin_reduce_add(iv);
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}

Expand All @@ -47,13 +47,13 @@ void test_builtin_reduce_add(int i, float4 v, int3 iv) {
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}

i = __builtin_reduce_add(i);
// expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
// expected-error@-1 {{1st argument must be a vector of arithmetic element type (was 'int')}}

i = __builtin_reduce_add(v);
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
f = __builtin_reduce_add(f);
// expected-error@-1 {{1st argument must be a vector of arithmetic element type (was 'float')}}
}

void test_builtin_reduce_mul(int i, float4 v, int3 iv) {
void test_builtin_reduce_mul(int i, float f, int3 iv) {
struct Foo s = __builtin_reduce_mul(iv);
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}

Expand All @@ -64,10 +64,10 @@ void test_builtin_reduce_mul(int i, float4 v, int3 iv) {
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}

i = __builtin_reduce_mul(i);
// expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
// expected-error@-1 {{1st argument must be a vector of arithmetic element type (was 'int')}}

i = __builtin_reduce_mul(v);
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
f = __builtin_reduce_mul(f);
// expected-error@-1 {{1st argument must be a vector of arithmetic element type (was 'float')}}
}

void test_builtin_reduce_xor(int i, float4 v, int3 iv) {
Expand Down
12 changes: 12 additions & 0 deletions clang/test/Sema/constant_builtins_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,12 @@ constexpr long long reduceAddLong2 = __builtin_reduce_add((vector4long){(1LL <<
static_assert(__builtin_reduce_add((vector4uint){~0U, 0, 0, 1}) == 0);
static_assert(__builtin_reduce_add((vector4ulong){~0ULL, 0, 0, 1}) == 0);

constexpr float reduceAddFloat = __builtin_reduce_add((vector4float){1.0, 2.0, 3.0, 4.0});
// expected-error@-1 {{must be initialized by a constant expression}}

constexpr double reduceAddDouble = __builtin_reduce_add((vector4double){-1.0, 2.0, -3.0, 4.0});
// expected-error@-1 {{must be initialized by a constant expression}}

static_assert(__builtin_reduce_mul((vector4char){}) == 0);
static_assert(__builtin_reduce_mul((vector4char){1, 2, 3, 4}) == 24);
static_assert(__builtin_reduce_mul((vector4short){1, 2, 30, 40}) == 2400);
Expand All @@ -766,6 +772,12 @@ constexpr long long reduceMulLong2 = __builtin_reduce_mul((vector4long){(1LL <<
static_assert(__builtin_reduce_mul((vector4uint){~0U, 1, 1, 2}) == ~0U - 1);
static_assert(__builtin_reduce_mul((vector4ulong){~0ULL, 1, 1, 2}) == ~0ULL - 1);

constexpr float reduceMulFloat = __builtin_reduce_mul((vector4float){1.0, 2.0, 3.0, 1.0});
// expected-error@-1 {{must be initialized by a constant expression}}

constexpr double reduceMulDouble = __builtin_reduce_mul((vector4double){3.0, 4.0, 1.0, 1.0});
// expected-error@-1 {{must be initialized by a constant expression}}

static_assert(__builtin_reduce_and((vector4char){}) == 0);
static_assert(__builtin_reduce_and((vector4char){(char)0x11, (char)0x22, (char)0x44, (char)0x88}) == 0);
static_assert(__builtin_reduce_and((vector4short){(short)0x1111, (short)0x2222, (short)0x4444, (short)0x8888}) == 0);
Expand Down
Loading