Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,8 @@ Each builtin accesses memory according to a provided boolean mask. These are
provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first
argument is always boolean mask vector. The ``__builtin_masked_load`` builtin
takes an optional third vector argument that will be used for the result of the
masked-off lanes. These builtins assume the memory is always aligned.
masked-off lanes. These builtins assume the memory is unaligned, use
``__builtin_assume_aligned`` if alignment is desired.

The ``__builtin_masked_expand_load`` and ``__builtin_masked_compress_store``
builtins have the same interface but store the result in consecutive indices.
Expand All @@ -969,17 +970,17 @@ Example:
using v8b = bool [[clang::ext_vector_type(8)]];
using v8i = int [[clang::ext_vector_type(8)]];

v8i load(v8b mask, v8i *ptr) { return __builtin_masked_load(mask, ptr); }
v8i load(v8b mask, int *ptr) { return __builtin_masked_load(mask, ptr); }

v8i load_expand(v8b mask, v8i *ptr) {
v8i load_expand(v8b mask, int *ptr) {
return __builtin_masked_expand_load(mask, ptr);
}

void store(v8b mask, v8i val, v8i *ptr) {
void store(v8b mask, v8i val, int *ptr) {
__builtin_masked_store(mask, val, ptr);
}

void store_compress(v8b mask, v8i val, v8i *ptr) {
void store_compress(v8b mask, v8i val, int *ptr) {
__builtin_masked_compress_store(mask, val, ptr);
}

Expand Down
13 changes: 8 additions & 5 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4277,14 +4277,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Value *Ptr = EmitScalarExpr(E->getArg(1));

llvm::Type *RetTy = CGM.getTypes().ConvertType(E->getType());
CharUnits Align = CGM.getNaturalTypeAlignment(E->getType(), nullptr);
llvm::Value *AlignVal =
llvm::ConstantInt::get(Int32Ty, Align.getQuantity());

llvm::Value *PassThru = llvm::PoisonValue::get(RetTy);
if (E->getNumArgs() > 2)
PassThru = EmitScalarExpr(E->getArg(2));

CharUnits Align = CGM.getNaturalTypeAlignment(
E->getType()->getAs<VectorType>()->getElementType(), nullptr);
llvm::Value *AlignVal =
llvm::ConstantInt::get(Int32Ty, Align.getQuantity());

llvm::Value *Result;
if (BuiltinID == Builtin::BI__builtin_masked_load) {
Function *F =
Expand Down Expand Up @@ -4335,7 +4336,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Type *ValLLTy = CGM.getTypes().ConvertType(ValTy);
llvm::Type *PtrTy = Ptr->getType();

CharUnits Align = CGM.getNaturalTypeAlignment(ValTy, nullptr);
CharUnits Align = CGM.getNaturalTypeAlignment(
E->getArg(1)->getType()->getAs<VectorType>()->getElementType(),
nullptr);
llvm::Value *AlignVal =
llvm::ConstantInt::get(Int32Ty, Align.getQuantity());

Expand Down
42 changes: 13 additions & 29 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2268,19 +2268,17 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) {
}

static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg,
unsigned Pos, bool Vector = true) {
unsigned Pos) {
QualType MaskTy = MaskArg->getType();
if (!MaskTy->isExtVectorBoolType())
return S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* vector of */ 4 << /* booleans */ 6 << /* no fp */ 0
<< MaskTy;

QualType PtrTy = PtrArg->getType();
if (!PtrTy->isPointerType() ||
(Vector && !PtrTy->getPointeeType()->isVectorType()) ||
(!Vector && PtrTy->getPointeeType()->isVectorType()))
if (!PtrTy->isPointerType() || PtrTy->getPointeeType()->isVectorType())
return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr)
<< Pos << (Vector ? "pointer to vector" : "scalar pointer");
<< Pos << "scalar pointer";
return false;
}

Expand All @@ -2297,24 +2295,18 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) {
QualType PtrTy = PtrArg->getType();
QualType PointeeTy = PtrTy->getPointeeType();
const VectorType *MaskVecTy = MaskTy->getAs<VectorType>();
const VectorType *DataVecTy = PointeeTy->getAs<VectorType>();

QualType RetTy =
S.Context.getExtVectorType(PointeeTy, MaskVecTy->getNumElements());
if (TheCall->getNumArgs() == 3) {
Expr *PassThruArg = TheCall->getArg(2);
QualType PassThruTy = PassThruArg->getType();
if (!S.Context.hasSameType(PassThruTy, PointeeTy))
if (!S.Context.hasSameType(PassThruTy, RetTy))
return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr)
<< /* third argument */ 3 << PointeeTy;
<< /* third argument */ 3 << RetTy;
}

if (MaskVecTy->getNumElements() != DataVecTy->getNumElements())
return ExprError(
S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
<< S.getASTContext().BuiltinInfo.getQuotedName(
TheCall->getBuiltinCallee())
<< MaskTy << PointeeTy);

TheCall->setType(PointeeTy);
TheCall->setType(RetTy);
return TheCall;
}

Expand All @@ -2339,18 +2331,10 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) {

QualType PointeeTy = PtrTy->getPointeeType();
const VectorType *MaskVecTy = MaskTy->getAs<VectorType>();
const VectorType *ValVecTy = ValTy->getAs<VectorType>();
const VectorType *PtrVecTy = PointeeTy->getAs<VectorType>();

if (MaskVecTy->getNumElements() != ValVecTy->getNumElements() ||
MaskVecTy->getNumElements() != PtrVecTy->getNumElements())
return ExprError(
S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
<< S.getASTContext().BuiltinInfo.getQuotedName(
TheCall->getBuiltinCallee())
<< MaskTy << PointeeTy);
QualType RetTy =
S.Context.getExtVectorType(PointeeTy, MaskVecTy->getNumElements());

if (!S.Context.hasSameType(ValTy, PointeeTy))
if (!S.Context.hasSameType(ValTy, RetTy))
return ExprError(S.Diag(TheCall->getBeginLoc(),
diag::err_vec_builtin_incompatible_vector)
<< TheCall->getDirectCallee() << /*isMorethantwoArgs*/ 2
Expand All @@ -2368,7 +2352,7 @@ static ExprResult BuiltinMaskedGather(Sema &S, CallExpr *TheCall) {
Expr *MaskArg = TheCall->getArg(0);
Expr *IdxArg = TheCall->getArg(1);
Expr *PtrArg = TheCall->getArg(2);
if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3, /*Vector=*/false))
if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3))
return ExprError();

QualType IdxTy = IdxArg->getType();
Expand Down Expand Up @@ -2413,7 +2397,7 @@ static ExprResult BuiltinMaskedScatter(Sema &S, CallExpr *TheCall) {
Expr *ValArg = TheCall->getArg(2);
Expr *PtrArg = TheCall->getArg(3);

if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3, /*Vector=*/false))
if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3))
return ExprError();

QualType IdxTy = IdxArg->getType();
Expand Down
16 changes: 8 additions & 8 deletions clang/test/CodeGen/builtin-masked.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ typedef _Bool v8b __attribute__((ext_vector_type(8)));
// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8
// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP2]], i32 32, <8 x i1> [[TMP1]], <8 x i32> poison)
// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP2]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison)
// CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]]
//
v8i test_load(v8b m, v8i *p) {
v8i test_load(v8b m, int *p) {
return __builtin_masked_load(m, p);
}

Expand All @@ -45,10 +45,10 @@ v8i test_load(v8b m, v8i *p) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[T_ADDR]], align 32
// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 32, <8 x i1> [[TMP2]], <8 x i32> [[TMP4]])
// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP2]], <8 x i32> [[TMP4]])
// CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]]
//
v8i test_load_passthru(v8b m, v8i *p, v8i t) {
v8i test_load_passthru(v8b m, int *p, v8i t) {
return __builtin_masked_load(m, p, t);
}

Expand All @@ -74,7 +74,7 @@ v8i test_load_passthru(v8b m, v8i *p, v8i t) {
// CHECK-NEXT: [[MASKED_EXPAND_LOAD:%.*]] = call <8 x i32> @llvm.masked.expandload.v8i32(ptr [[TMP3]], <8 x i1> [[TMP2]], <8 x i32> [[TMP4]])
// CHECK-NEXT: ret <8 x i32> [[MASKED_EXPAND_LOAD]]
//
v8i test_load_expand(v8b m, v8i *p, v8i t) {
v8i test_load_expand(v8b m, int *p, v8i t) {
return __builtin_masked_expand_load(m, p, t);
}

Expand All @@ -97,10 +97,10 @@ v8i test_load_expand(v8b m, v8i *p, v8i t) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[P_ADDR]], align 8
// CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP3]], ptr [[TMP4]], i32 32, <8 x i1> [[TMP2]])
// CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP3]], ptr [[TMP4]], i32 4, <8 x i1> [[TMP2]])
// CHECK-NEXT: ret void
//
void test_store(v8b m, v8i v, v8i *p) {
void test_store(v8b m, v8i v, int *p) {
__builtin_masked_store(m, v, p);
}

Expand All @@ -126,7 +126,7 @@ void test_store(v8b m, v8i v, v8i *p) {
// CHECK-NEXT: call void @llvm.masked.compressstore.v8i32(<8 x i32> [[TMP3]], ptr [[TMP4]], <8 x i1> [[TMP2]])
// CHECK-NEXT: ret void
//
void test_compress_store(v8b m, v8i v, v8i *p) {
void test_compress_store(v8b m, v8i v, int *p) {
__builtin_masked_compress_store(m, v, p);
}

Expand Down
30 changes: 10 additions & 20 deletions clang/test/Sema/builtin-masked.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,34 @@ typedef _Bool v8b __attribute__((ext_vector_type(8)));
typedef _Bool v2b __attribute__((ext_vector_type(2)));
typedef float v8f __attribute__((ext_vector_type(8)));

void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
void test_masked_load(int *pf, v8b mask, v2b mask2, v2b thru) {
(void)__builtin_masked_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}}
(void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}}
(void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_load' must have the same number of elements}}
(void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}}
(void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}}
(void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}}
(void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_load' must have the same number of elements}}
(void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a scalar pointer}}
(void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'int __attribute__((ext_vector_type(2)))' (vector of 2 'int' values)}}
}

void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
void test_masked_store(int *pf, v8f *pf2, v8b mask, v2b mask2) {
__builtin_masked_store(mask); // expected-error {{too few arguments to function call, expected 3, have 1}}
__builtin_masked_store(mask, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}}
__builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}}
__builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}}
__builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}}
__builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to '__builtin_masked_store' must have the same number of elements}}
__builtin_masked_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_store' must have the same type}}
__builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a scalar pointer}}
}

void test_masked_expand_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
void test_masked_expand_load(int *pf, v8b mask, v2b mask2, v2b thru) {
(void)__builtin_masked_expand_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}}
(void)__builtin_masked_expand_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}}
(void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_expand_load' must have the same number of elements}}
(void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}}
(void)__builtin_masked_expand_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}}
(void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}}
(void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_expand_load' must have the same number of elements}}
(void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd argument must be a scalar pointer}}
(void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'int __attribute__((ext_vector_type(2)))' (vector of 2 'int' values)}}
}

void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
void test_masked_compress_store(int *pf, v8f *pf2, v8b mask, v2b mask2) {
__builtin_masked_compress_store(mask); // expected-error {{too few arguments to function call, expected 3, have 1}}
__builtin_masked_compress_store(mask, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}}
__builtin_masked_compress_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}}
__builtin_masked_compress_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}}
__builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}}
__builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all arguments to '__builtin_masked_compress_store' must have the same number of elements}}
__builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_compress_store' must have the same type}}
__builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd argument must be a scalar pointer}}
}

void test_masked_gather(int *p, v8i idx, v8b mask, v2b mask2, v2b thru) {
Expand Down