Skip to content

Commit cc9d2d9

Browse files
committed
[Clang] Support generic bit counting builtins on fixed boolean vectors
Summary: Boolean vectors as implemented in clang can be bit-casted to an integer that is rounded up to the next primitive sized integer. Users can do this themselves, but since the counting bits are very likely to be used with bitmasks like this and the generic forms are expected to be generic it seems reasonable that we handle this case directly.
1 parent e90ce51 commit cc9d2d9

File tree

8 files changed

+346
-209
lines changed

8 files changed

+346
-209
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4182,7 +4182,7 @@ builtin, the mangler emits their usual pattern without any special treatment.
41824182
-----------------------
41834183
41844184
``__builtin_popcountg`` returns the number of 1 bits in the argument. The
4185-
argument can be of any unsigned integer type.
4185+
argument can be of any unsigned integer type or fixed boolean vector.
41864186
41874187
**Syntax**:
41884188
@@ -4214,7 +4214,7 @@ such as ``unsigned __int128`` and C23 ``unsigned _BitInt(N)``.
42144214
42154215
``__builtin_clzg`` (respectively ``__builtin_ctzg``) returns the number of
42164216
leading (respectively trailing) 0 bits in the first argument. The first argument
4217-
can be of any unsigned integer type.
4217+
can be of any unsigned integer type or fixed boolean vector.
42184218
42194219
If the first argument is 0 and an optional second argument of ``int`` type is
42204220
provided, then the second argument is returned. If the first argument is 0, but

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,12 @@ Non-comprehensive list of changes in this release
143143
- Added ``__builtin_masked_load`` and ``__builtin_masked_store`` for conditional
144144
memory loads from vectors. Binds to the LLVM intrinsic of the same name.
145145

146+
- The ``__builtin_popcountg``, ``__builtin_ctzg``, and ``__builtin_clzg``
147+
functions now accept fixed-size boolean vectors.
148+
146149
- Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and ``ptrauth_intrinsics``
147150
features has been deprecated, and is restricted to the arm64e target only. The
148151
correct method to check for these features is to test for the ``__PTRAUTH__``
149-
macro.
150152

151153
- Added a new builtin, ``__builtin_dedup_pack``, to remove duplicate types from a parameter pack.
152154
This feature is particularly useful in template metaprogramming for normalizing type lists.

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,22 @@ static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC,
141141
S.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr);
142142
}
143143

144+
static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
145+
assert(Val.getFieldDesc()->isPrimitiveArray() &&
146+
Val.getFieldDesc()->getElemQualType()->isBooleanType() &&
147+
"Not a boolean vector");
148+
unsigned NumElts = Val.getNumElems();
149+
150+
// Each element is one bit, so create an integer with NumElts bits.
151+
llvm::APSInt Result(NumElts, 0);
152+
for (unsigned I = 0; I < NumElts; ++I) {
153+
if (Val.elem<bool>(I))
154+
Result.setBit(I);
155+
}
156+
157+
return Result;
158+
}
159+
144160
static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
145161
const InterpFrame *Frame,
146162
const CallExpr *Call) {
@@ -638,8 +654,14 @@ static bool interp__builtin_abs(InterpState &S, CodePtr OpPC,
638654
static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC,
639655
const InterpFrame *Frame,
640656
const CallExpr *Call) {
641-
PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
642-
APSInt Val = popToAPSInt(S.Stk, ArgT);
657+
APSInt Val;
658+
if (Call->getArg(0)->getType()->isExtVectorBoolType()) {
659+
const Pointer &Arg = S.Stk.pop<Pointer>();
660+
Val = convertBoolVectorToInt(Arg);
661+
} else {
662+
PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
663+
Val = popToAPSInt(S.Stk, ArgT);
664+
}
643665
pushInteger(S, Val.popcount(), Call->getType());
644666
return true;
645667
}
@@ -935,8 +957,14 @@ static bool interp__builtin_clz(InterpState &S, CodePtr OpPC,
935957
PrimType FallbackT = *S.getContext().classify(Call->getArg(1));
936958
Fallback = popToAPSInt(S.Stk, FallbackT);
937959
}
938-
PrimType ValT = *S.getContext().classify(Call->getArg(0));
939-
const APSInt &Val = popToAPSInt(S.Stk, ValT);
960+
APSInt Val;
961+
if (Call->getArg(0)->getType()->isExtVectorBoolType()) {
962+
const Pointer &Arg = S.Stk.pop<Pointer>();
963+
Val = convertBoolVectorToInt(Arg);
964+
} else {
965+
PrimType ValT = *S.getContext().classify(Call->getArg(0));
966+
Val = popToAPSInt(S.Stk, ValT);
967+
}
940968

941969
// When the argument is 0, the result of GCC builtins is undefined, whereas
942970
// for Microsoft intrinsics, the result is the bit-width of the argument.
@@ -966,8 +994,14 @@ static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC,
966994
PrimType FallbackT = *S.getContext().classify(Call->getArg(1));
967995
Fallback = popToAPSInt(S.Stk, FallbackT);
968996
}
969-
PrimType ValT = *S.getContext().classify(Call->getArg(0));
970-
const APSInt &Val = popToAPSInt(S.Stk, ValT);
997+
APSInt Val;
998+
if (Call->getArg(0)->getType()->isExtVectorBoolType()) {
999+
const Pointer &Arg = S.Stk.pop<Pointer>();
1000+
Val = convertBoolVectorToInt(Arg);
1001+
} else {
1002+
PrimType ValT = *S.getContext().classify(Call->getArg(0));
1003+
Val = popToAPSInt(S.Stk, ValT);
1004+
}
9711005

9721006
if (Val == 0) {
9731007
if (Fallback) {

clang/lib/AST/ExprConstant.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11314,6 +11314,24 @@ static bool EvaluateVector(const Expr* E, APValue& Result, EvalInfo &Info) {
1131411314
return VectorExprEvaluator(Info, Result).Visit(E);
1131511315
}
1131611316

11317+
static llvm::APInt ConvertBoolVectorToInt(const APValue &Val) {
11318+
assert(Val.isVector() && "expected vector APValue");
11319+
unsigned NumElts = Val.getVectorLength();
11320+
11321+
// Each element is one bit, so create an integer with NumElts bits.
11322+
llvm::APInt Result(NumElts, 0);
11323+
11324+
for (unsigned I = 0; I < NumElts; ++I) {
11325+
const APValue &Elt = Val.getVectorElt(I);
11326+
assert(Elt.isInt() && "expected integer element in bool vector");
11327+
11328+
if (Elt.getInt().getBoolValue())
11329+
Result.setBit(I);
11330+
}
11331+
11332+
return Result;
11333+
}
11334+
1131711335
bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) {
1131811336
const VectorType *VTy = E->getType()->castAs<VectorType>();
1131911337
unsigned NElts = VTy->getNumElements();
@@ -13456,8 +13474,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1345613474
case Builtin::BI__lzcnt:
1345713475
case Builtin::BI__lzcnt64: {
1345813476
APSInt Val;
13459-
if (!EvaluateInteger(E->getArg(0), Val, Info))
13477+
if (E->getArg(0)->getType()->isExtVectorBoolType()) {
13478+
APValue Vec;
13479+
if (!EvaluateVector(E->getArg(0), Vec, Info))
13480+
return false;
13481+
Val = ConvertBoolVectorToInt(Vec);
13482+
} else if (!EvaluateInteger(E->getArg(0), Val, Info)) {
1346013483
return false;
13484+
}
1346113485

1346213486
std::optional<APSInt> Fallback;
1346313487
if ((BuiltinOp == Builtin::BI__builtin_clzg ||
@@ -13542,8 +13566,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1354213566
case Builtin::BI__builtin_ctzg:
1354313567
case Builtin::BI__builtin_elementwise_cttz: {
1354413568
APSInt Val;
13545-
if (!EvaluateInteger(E->getArg(0), Val, Info))
13569+
if (E->getArg(0)->getType()->isExtVectorBoolType()) {
13570+
APValue Vec;
13571+
if (!EvaluateVector(E->getArg(0), Vec, Info))
13572+
return false;
13573+
Val = ConvertBoolVectorToInt(Vec);
13574+
} else if (!EvaluateInteger(E->getArg(0), Val, Info)) {
1354613575
return false;
13576+
}
1354713577

1354813578
std::optional<APSInt> Fallback;
1354913579
if ((BuiltinOp == Builtin::BI__builtin_ctzg ||
@@ -13758,8 +13788,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1375813788
case Builtin::BI__popcnt:
1375913789
case Builtin::BI__popcnt64: {
1376013790
APSInt Val;
13761-
if (!EvaluateInteger(E->getArg(0), Val, Info))
13791+
if (E->getArg(0)->getType()->isExtVectorBoolType()) {
13792+
APValue Vec;
13793+
if (!EvaluateVector(E->getArg(0), Vec, Info))
13794+
return false;
13795+
Val = ConvertBoolVectorToInt(Vec);
13796+
} else if (!EvaluateInteger(E->getArg(0), Val, Info)) {
1376213797
return false;
13798+
}
1376313799

1376413800
return Success(Val.popcount(), E);
1376513801
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,26 @@ getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
16931693
llvm_unreachable("invalid interlocking");
16941694
}
16951695

1696+
static llvm::Value *EmitBitCountExpr(CodeGenFunction &CGF, const Expr *E) {
1697+
llvm::Value *ArgValue = CGF.EmitScalarExpr(E);
1698+
llvm::Type *ArgType = ArgValue->getType();
1699+
1700+
// Boolean vectors can be casted directly to its bitfield representation. We
1701+
// intentionally do not round up to the next power of two size and let LLVM
1702+
// handle the trailing bits.
1703+
if (auto *VT = dyn_cast<llvm::FixedVectorType>(ArgType);
1704+
VT && VT->getElementType()->isIntegerTy(1)) {
1705+
llvm::Type *StorageType =
1706+
llvm::Type::getIntNTy(CGF.getLLVMContext(), VT->getNumElements());
1707+
ArgValue = CGF.emitBoolVecConversion(
1708+
ArgValue, StorageType->getPrimitiveSizeInBits(), "insertvec");
1709+
ArgValue = CGF.Builder.CreateBitCast(ArgValue, StorageType);
1710+
ArgType = ArgValue->getType();
1711+
}
1712+
1713+
return ArgValue;
1714+
}
1715+
16961716
/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
16971717
/// bits and a bit position and read and optionally modify the bit at that
16981718
/// position. The position index can be arbitrarily large, i.e. it can be larger
@@ -2020,7 +2040,7 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
20202040
assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
20212041
"Unsupported builtin check kind");
20222042

2023-
Value *ArgValue = EmitScalarExpr(E);
2043+
Value *ArgValue = EmitBitCountExpr(*this, E);
20242044
if (!SanOpts.has(SanitizerKind::Builtin))
20252045
return ArgValue;
20262046

@@ -3334,7 +3354,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
33343354
E->getNumArgs() > 1;
33353355

33363356
Value *ArgValue =
3337-
HasFallback ? EmitScalarExpr(E->getArg(0))
3357+
HasFallback ? EmitBitCountExpr(*this, E->getArg(0))
33383358
: EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
33393359

33403360
llvm::Type *ArgType = ArgValue->getType();
@@ -3371,7 +3391,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
33713391
E->getNumArgs() > 1;
33723392

33733393
Value *ArgValue =
3374-
HasFallback ? EmitScalarExpr(E->getArg(0))
3394+
HasFallback ? EmitBitCountExpr(*this, E->getArg(0))
33753395
: EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
33763396

33773397
llvm::Type *ArgType = ArgValue->getType();
@@ -3456,7 +3476,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
34563476
case Builtin::BI__builtin_popcountl:
34573477
case Builtin::BI__builtin_popcountll:
34583478
case Builtin::BI__builtin_popcountg: {
3459-
Value *ArgValue = EmitScalarExpr(E->getArg(0));
3479+
Value *ArgValue = EmitBitCountExpr(*this, E->getArg(0));
34603480

34613481
llvm::Type *ArgType = ArgValue->getType();
34623482
Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);

clang/lib/Sema/SemaChecking.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2214,7 +2214,7 @@ static bool BuiltinPopcountg(Sema &S, CallExpr *TheCall) {
22142214

22152215
QualType ArgTy = Arg->getType();
22162216

2217-
if (!ArgTy->isUnsignedIntegerType()) {
2217+
if (!ArgTy->isUnsignedIntegerType() && !ArgTy->isExtVectorBoolType()) {
22182218
S.Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
22192219
<< 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0
22202220
<< ArgTy;
@@ -2239,7 +2239,7 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) {
22392239

22402240
QualType Arg0Ty = Arg0->getType();
22412241

2242-
if (!Arg0Ty->isUnsignedIntegerType()) {
2242+
if (!Arg0Ty->isUnsignedIntegerType() && !Arg0Ty->isExtVectorBoolType()) {
22432243
S.Diag(Arg0->getBeginLoc(), diag::err_builtin_invalid_arg_type)
22442244
<< 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0
22452245
<< Arg0Ty;

clang/test/AST/ByteCode/builtin-functions.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,7 @@ namespace SourceLocation {
454454
}
455455

456456
#define BITSIZE(x) (sizeof(x) * 8)
457+
constexpr bool __attribute__((ext_vector_type(4))) v4b{};
457458
namespace popcount {
458459
static_assert(__builtin_popcount(~0u) == __CHAR_BIT__ * sizeof(unsigned int), "");
459460
static_assert(__builtin_popcount(0) == 0, "");
@@ -471,6 +472,7 @@ namespace popcount {
471472
static_assert(__builtin_popcountg(0ul) == 0, "");
472473
static_assert(__builtin_popcountg(~0ull) == __CHAR_BIT__ * sizeof(unsigned long long), "");
473474
static_assert(__builtin_popcountg(0ull) == 0, "");
475+
static_assert(__builtin_popcountg(v4b) == 0, "");
474476
#ifdef __SIZEOF_INT128__
475477
static_assert(__builtin_popcountg(~(unsigned __int128)0) == __CHAR_BIT__ * sizeof(unsigned __int128), "");
476478
static_assert(__builtin_popcountg((unsigned __int128)0) == 0, "");
@@ -743,6 +745,7 @@ namespace clz {
743745
char clz62[__builtin_clzg((unsigned _BitInt(128))0xf) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1];
744746
char clz63[__builtin_clzg((unsigned _BitInt(128))0xf, 42) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1];
745747
#endif
748+
char clz64[__builtin_clzg(v4b, 0) == 0 ? 1 : -1];
746749
}
747750

748751
namespace ctz {
@@ -813,6 +816,7 @@ namespace ctz {
813816
char ctz62[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1)) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1];
814817
char ctz63[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1), 42) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1];
815818
#endif
819+
char clz64[__builtin_ctzg(v4b, 0) == 0 ? 1 : -1];
816820
}
817821

818822
namespace bswap {

0 commit comments

Comments
 (0)