Skip to content

Commit c989283

Browse files
authored
[Clang] Change masked load / store builtin interface to take scalar pointer (#156063)
Summary: Right now these enformce alignment, which isn't convenient for the user on platforms that support unaligned accesses. The options are to either permit passing the alignment manually, or just assume it's unaligned unless the user specifies it. I've added #156057 which should make the requiested alignment show up on the intrinsic if the user passed `__builtin_assume_aligned`, however that's only with optimizations. This shouldn't cause issues unless the backend categorically decides to reject an unaligned access.
1 parent 6f58c16 commit c989283

File tree

5 files changed

+45
-67
lines changed

5 files changed

+45
-67
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,8 @@ Each builtin accesses memory according to a provided boolean mask. These are
950950
provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first
951951
argument is always boolean mask vector. The ``__builtin_masked_load`` builtin
952952
takes an optional third vector argument that will be used for the result of the
953-
masked-off lanes. These builtins assume the memory is always aligned.
953+
masked-off lanes. These builtins assume the memory is unaligned, use
954+
``__builtin_assume_aligned`` if alignment is desired.
954955

955956
The ``__builtin_masked_expand_load`` and ``__builtin_masked_compress_store``
956957
builtins have the same interface but store the result in consecutive indices.
@@ -969,17 +970,17 @@ Example:
969970
using v8b = bool [[clang::ext_vector_type(8)]];
970971
using v8i = int [[clang::ext_vector_type(8)]];
971972

972-
v8i load(v8b mask, v8i *ptr) { return __builtin_masked_load(mask, ptr); }
973+
v8i load(v8b mask, int *ptr) { return __builtin_masked_load(mask, ptr); }
973974
974-
v8i load_expand(v8b mask, v8i *ptr) {
975+
v8i load_expand(v8b mask, int *ptr) {
975976
return __builtin_masked_expand_load(mask, ptr);
976977
}
977978
978-
void store(v8b mask, v8i val, v8i *ptr) {
979+
void store(v8b mask, v8i val, int *ptr) {
979980
__builtin_masked_store(mask, val, ptr);
980981
}
981982
982-
void store_compress(v8b mask, v8i val, v8i *ptr) {
983+
void store_compress(v8b mask, v8i val, int *ptr) {
983984
__builtin_masked_compress_store(mask, val, ptr);
984985
}
985986

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4277,14 +4277,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
42774277
llvm::Value *Ptr = EmitScalarExpr(E->getArg(1));
42784278

42794279
llvm::Type *RetTy = CGM.getTypes().ConvertType(E->getType());
4280-
CharUnits Align = CGM.getNaturalTypeAlignment(E->getType(), nullptr);
4281-
llvm::Value *AlignVal =
4282-
llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
4283-
42844280
llvm::Value *PassThru = llvm::PoisonValue::get(RetTy);
42854281
if (E->getNumArgs() > 2)
42864282
PassThru = EmitScalarExpr(E->getArg(2));
42874283

4284+
CharUnits Align = CGM.getNaturalTypeAlignment(
4285+
E->getType()->getAs<VectorType>()->getElementType(), nullptr);
4286+
llvm::Value *AlignVal =
4287+
llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
4288+
42884289
llvm::Value *Result;
42894290
if (BuiltinID == Builtin::BI__builtin_masked_load) {
42904291
Function *F =
@@ -4335,7 +4336,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
43354336
llvm::Type *ValLLTy = CGM.getTypes().ConvertType(ValTy);
43364337
llvm::Type *PtrTy = Ptr->getType();
43374338

4338-
CharUnits Align = CGM.getNaturalTypeAlignment(ValTy, nullptr);
4339+
CharUnits Align = CGM.getNaturalTypeAlignment(
4340+
E->getArg(1)->getType()->getAs<VectorType>()->getElementType(),
4341+
nullptr);
43394342
llvm::Value *AlignVal =
43404343
llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
43414344

clang/lib/Sema/SemaChecking.cpp

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2268,19 +2268,17 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) {
22682268
}
22692269

22702270
static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg,
2271-
unsigned Pos, bool Vector = true) {
2271+
unsigned Pos) {
22722272
QualType MaskTy = MaskArg->getType();
22732273
if (!MaskTy->isExtVectorBoolType())
22742274
return S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
22752275
<< 1 << /* vector of */ 4 << /* booleans */ 6 << /* no fp */ 0
22762276
<< MaskTy;
22772277

22782278
QualType PtrTy = PtrArg->getType();
2279-
if (!PtrTy->isPointerType() ||
2280-
(Vector && !PtrTy->getPointeeType()->isVectorType()) ||
2281-
(!Vector && PtrTy->getPointeeType()->isVectorType()))
2279+
if (!PtrTy->isPointerType() || PtrTy->getPointeeType()->isVectorType())
22822280
return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr)
2283-
<< Pos << (Vector ? "pointer to vector" : "scalar pointer");
2281+
<< Pos << "scalar pointer";
22842282
return false;
22852283
}
22862284

@@ -2297,24 +2295,18 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) {
22972295
QualType PtrTy = PtrArg->getType();
22982296
QualType PointeeTy = PtrTy->getPointeeType();
22992297
const VectorType *MaskVecTy = MaskTy->getAs<VectorType>();
2300-
const VectorType *DataVecTy = PointeeTy->getAs<VectorType>();
23012298

2299+
QualType RetTy =
2300+
S.Context.getExtVectorType(PointeeTy, MaskVecTy->getNumElements());
23022301
if (TheCall->getNumArgs() == 3) {
23032302
Expr *PassThruArg = TheCall->getArg(2);
23042303
QualType PassThruTy = PassThruArg->getType();
2305-
if (!S.Context.hasSameType(PassThruTy, PointeeTy))
2304+
if (!S.Context.hasSameType(PassThruTy, RetTy))
23062305
return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr)
2307-
<< /* third argument */ 3 << PointeeTy;
2306+
<< /* third argument */ 3 << RetTy;
23082307
}
23092308

2310-
if (MaskVecTy->getNumElements() != DataVecTy->getNumElements())
2311-
return ExprError(
2312-
S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
2313-
<< S.getASTContext().BuiltinInfo.getQuotedName(
2314-
TheCall->getBuiltinCallee())
2315-
<< MaskTy << PointeeTy);
2316-
2317-
TheCall->setType(PointeeTy);
2309+
TheCall->setType(RetTy);
23182310
return TheCall;
23192311
}
23202312

@@ -2339,18 +2331,10 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) {
23392331

23402332
QualType PointeeTy = PtrTy->getPointeeType();
23412333
const VectorType *MaskVecTy = MaskTy->getAs<VectorType>();
2342-
const VectorType *ValVecTy = ValTy->getAs<VectorType>();
2343-
const VectorType *PtrVecTy = PointeeTy->getAs<VectorType>();
2344-
2345-
if (MaskVecTy->getNumElements() != ValVecTy->getNumElements() ||
2346-
MaskVecTy->getNumElements() != PtrVecTy->getNumElements())
2347-
return ExprError(
2348-
S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
2349-
<< S.getASTContext().BuiltinInfo.getQuotedName(
2350-
TheCall->getBuiltinCallee())
2351-
<< MaskTy << PointeeTy);
2334+
QualType RetTy =
2335+
S.Context.getExtVectorType(PointeeTy, MaskVecTy->getNumElements());
23522336

2353-
if (!S.Context.hasSameType(ValTy, PointeeTy))
2337+
if (!S.Context.hasSameType(ValTy, RetTy))
23542338
return ExprError(S.Diag(TheCall->getBeginLoc(),
23552339
diag::err_vec_builtin_incompatible_vector)
23562340
<< TheCall->getDirectCallee() << /*isMorethantwoArgs*/ 2
@@ -2368,7 +2352,7 @@ static ExprResult BuiltinMaskedGather(Sema &S, CallExpr *TheCall) {
23682352
Expr *MaskArg = TheCall->getArg(0);
23692353
Expr *IdxArg = TheCall->getArg(1);
23702354
Expr *PtrArg = TheCall->getArg(2);
2371-
if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3, /*Vector=*/false))
2355+
if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3))
23722356
return ExprError();
23732357

23742358
QualType IdxTy = IdxArg->getType();
@@ -2413,7 +2397,7 @@ static ExprResult BuiltinMaskedScatter(Sema &S, CallExpr *TheCall) {
24132397
Expr *ValArg = TheCall->getArg(2);
24142398
Expr *PtrArg = TheCall->getArg(3);
24152399

2416-
if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3, /*Vector=*/false))
2400+
if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3))
24172401
return ExprError();
24182402

24192403
QualType IdxTy = IdxArg->getType();

clang/test/CodeGen/builtin-masked.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ typedef _Bool v8b __attribute__((ext_vector_type(8)));
1919
// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1
2020
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
2121
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8
22-
// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP2]], i32 32, <8 x i1> [[TMP1]], <8 x i32> poison)
22+
// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP2]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison)
2323
// CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]]
2424
//
25-
v8i test_load(v8b m, v8i *p) {
25+
v8i test_load(v8b m, int *p) {
2626
return __builtin_masked_load(m, p);
2727
}
2828

@@ -45,10 +45,10 @@ v8i test_load(v8b m, v8i *p) {
4545
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
4646
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8
4747
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[T_ADDR]], align 32
48-
// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 32, <8 x i1> [[TMP2]], <8 x i32> [[TMP4]])
48+
// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP2]], <8 x i32> [[TMP4]])
4949
// CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]]
5050
//
51-
v8i test_load_passthru(v8b m, v8i *p, v8i t) {
51+
v8i test_load_passthru(v8b m, int *p, v8i t) {
5252
return __builtin_masked_load(m, p, t);
5353
}
5454

@@ -74,7 +74,7 @@ v8i test_load_passthru(v8b m, v8i *p, v8i t) {
7474
// CHECK-NEXT: [[MASKED_EXPAND_LOAD:%.*]] = call <8 x i32> @llvm.masked.expandload.v8i32(ptr [[TMP3]], <8 x i1> [[TMP2]], <8 x i32> [[TMP4]])
7575
// CHECK-NEXT: ret <8 x i32> [[MASKED_EXPAND_LOAD]]
7676
//
77-
v8i test_load_expand(v8b m, v8i *p, v8i t) {
77+
v8i test_load_expand(v8b m, int *p, v8i t) {
7878
return __builtin_masked_expand_load(m, p, t);
7979
}
8080

@@ -97,10 +97,10 @@ v8i test_load_expand(v8b m, v8i *p, v8i t) {
9797
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
9898
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32
9999
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[P_ADDR]], align 8
100-
// CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP3]], ptr [[TMP4]], i32 32, <8 x i1> [[TMP2]])
100+
// CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP3]], ptr [[TMP4]], i32 4, <8 x i1> [[TMP2]])
101101
// CHECK-NEXT: ret void
102102
//
103-
void test_store(v8b m, v8i v, v8i *p) {
103+
void test_store(v8b m, v8i v, int *p) {
104104
__builtin_masked_store(m, v, p);
105105
}
106106

@@ -126,7 +126,7 @@ void test_store(v8b m, v8i v, v8i *p) {
126126
// CHECK-NEXT: call void @llvm.masked.compressstore.v8i32(<8 x i32> [[TMP3]], ptr [[TMP4]], <8 x i1> [[TMP2]])
127127
// CHECK-NEXT: ret void
128128
//
129-
void test_compress_store(v8b m, v8i v, v8i *p) {
129+
void test_compress_store(v8b m, v8i v, int *p) {
130130
__builtin_masked_compress_store(m, v, p);
131131
}
132132

clang/test/Sema/builtin-masked.c

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,44 +5,34 @@ typedef _Bool v8b __attribute__((ext_vector_type(8)));
55
typedef _Bool v2b __attribute__((ext_vector_type(2)));
66
typedef float v8f __attribute__((ext_vector_type(8)));
77

8-
void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
8+
void test_masked_load(int *pf, v8b mask, v2b mask2, v2b thru) {
99
(void)__builtin_masked_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}}
1010
(void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}}
11-
(void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_load' must have the same number of elements}}
12-
(void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}}
13-
(void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}}
14-
(void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}}
15-
(void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_load' must have the same number of elements}}
11+
(void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a scalar pointer}}
12+
(void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'int __attribute__((ext_vector_type(2)))' (vector of 2 'int' values)}}
1613
}
1714

18-
void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
15+
void test_masked_store(int *pf, v8f *pf2, v8b mask, v2b mask2) {
1916
__builtin_masked_store(mask); // expected-error {{too few arguments to function call, expected 3, have 1}}
2017
__builtin_masked_store(mask, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}}
2118
__builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}}
2219
__builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}}
23-
__builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}}
24-
__builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to '__builtin_masked_store' must have the same number of elements}}
25-
__builtin_masked_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_store' must have the same type}}
20+
__builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a scalar pointer}}
2621
}
2722

28-
void test_masked_expand_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
23+
void test_masked_expand_load(int *pf, v8b mask, v2b mask2, v2b thru) {
2924
(void)__builtin_masked_expand_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}}
3025
(void)__builtin_masked_expand_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}}
31-
(void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_expand_load' must have the same number of elements}}
32-
(void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}}
33-
(void)__builtin_masked_expand_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}}
34-
(void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}}
35-
(void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_expand_load' must have the same number of elements}}
26+
(void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd argument must be a scalar pointer}}
27+
(void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'int __attribute__((ext_vector_type(2)))' (vector of 2 'int' values)}}
3628
}
3729

38-
void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
30+
void test_masked_compress_store(int *pf, v8f *pf2, v8b mask, v2b mask2) {
3931
__builtin_masked_compress_store(mask); // expected-error {{too few arguments to function call, expected 3, have 1}}
4032
__builtin_masked_compress_store(mask, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}}
4133
__builtin_masked_compress_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}}
4234
__builtin_masked_compress_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}}
43-
__builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}}
44-
__builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all arguments to '__builtin_masked_compress_store' must have the same number of elements}}
45-
__builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_compress_store' must have the same type}}
35+
__builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd argument must be a scalar pointer}}
4636
}
4737

4838
void test_masked_gather(int *p, v8i idx, v8b mask, v2b mask2, v2b thru) {

0 commit comments

Comments
 (0)