diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 25f4e3b3fbd26..b503283559db4 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -950,7 +950,8 @@ Each builtin accesses memory according to a provided boolean mask. These are provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first argument is always boolean mask vector. The ``__builtin_masked_load`` builtin takes an optional third vector argument that will be used for the result of the -masked-off lanes. These builtins assume the memory is always aligned. +masked-off lanes. These builtins assume the memory is unaligned, use +``__builtin_assume_aligned`` if alignment is desired. The ``__builtin_masked_expand_load`` and ``__builtin_masked_compress_store`` builtins have the same interface but store the result in consecutive indices. @@ -969,17 +970,17 @@ Example: using v8b = bool [[clang::ext_vector_type(8)]]; using v8i = int [[clang::ext_vector_type(8)]]; - v8i load(v8b mask, v8i *ptr) { return __builtin_masked_load(mask, ptr); } + v8i load(v8b mask, int *ptr) { return __builtin_masked_load(mask, ptr); } - v8i load_expand(v8b mask, v8i *ptr) { + v8i load_expand(v8b mask, int *ptr) { return __builtin_masked_expand_load(mask, ptr); } - void store(v8b mask, v8i val, v8i *ptr) { + void store(v8b mask, v8i val, int *ptr) { __builtin_masked_store(mask, val, ptr); } - void store_compress(v8b mask, v8i val, v8i *ptr) { + void store_compress(v8b mask, v8i val, int *ptr) { __builtin_masked_compress_store(mask, val, ptr); } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f7c3dea257d50..1cff1e0de71b7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4277,14 +4277,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Ptr = EmitScalarExpr(E->getArg(1)); llvm::Type *RetTy = CGM.getTypes().ConvertType(E->getType()); - CharUnits Align = CGM.getNaturalTypeAlignment(E->getType(), nullptr); - llvm::Value *AlignVal = - llvm::ConstantInt::get(Int32Ty, Align.getQuantity()); - llvm::Value *PassThru = llvm::PoisonValue::get(RetTy); if (E->getNumArgs() > 2) PassThru = EmitScalarExpr(E->getArg(2)); + CharUnits Align = CGM.getNaturalTypeAlignment( + E->getType()->getAs()->getElementType(), nullptr); + llvm::Value *AlignVal = + llvm::ConstantInt::get(Int32Ty, Align.getQuantity()); + llvm::Value *Result; if (BuiltinID == Builtin::BI__builtin_masked_load) { Function *F = @@ -4335,7 +4336,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Type *ValLLTy = CGM.getTypes().ConvertType(ValTy); llvm::Type *PtrTy = Ptr->getType(); - CharUnits Align = CGM.getNaturalTypeAlignment(ValTy, nullptr); + CharUnits Align = CGM.getNaturalTypeAlignment( + E->getArg(1)->getType()->getAs()->getElementType(), + nullptr); llvm::Value *AlignVal = llvm::ConstantInt::get(Int32Ty, Align.getQuantity()); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index b3b67230f7687..2520f06ef550e 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2268,7 +2268,7 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) { } static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg, - unsigned Pos, bool Vector = true) { + unsigned Pos) { QualType MaskTy = MaskArg->getType(); if (!MaskTy->isExtVectorBoolType()) return S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) @@ -2276,11 +2276,9 @@ static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg, << MaskTy; QualType PtrTy = PtrArg->getType(); - if (!PtrTy->isPointerType() || - (Vector && !PtrTy->getPointeeType()->isVectorType()) || - (!Vector && PtrTy->getPointeeType()->isVectorType())) + if (!PtrTy->isPointerType() || PtrTy->getPointeeType()->isVectorType()) return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr) - << Pos << (Vector ? "pointer to vector" : "scalar pointer"); + << Pos << "scalar pointer"; return false; } @@ -2297,24 +2295,18 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) { QualType PtrTy = PtrArg->getType(); QualType PointeeTy = PtrTy->getPointeeType(); const VectorType *MaskVecTy = MaskTy->getAs(); - const VectorType *DataVecTy = PointeeTy->getAs(); + QualType RetTy = + S.Context.getExtVectorType(PointeeTy, MaskVecTy->getNumElements()); if (TheCall->getNumArgs() == 3) { Expr *PassThruArg = TheCall->getArg(2); QualType PassThruTy = PassThruArg->getType(); - if (!S.Context.hasSameType(PassThruTy, PointeeTy)) + if (!S.Context.hasSameType(PassThruTy, RetTy)) return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr) - << /* third argument */ 3 << PointeeTy; + << /* third argument */ 3 << RetTy; } - if (MaskVecTy->getNumElements() != DataVecTy->getNumElements()) - return ExprError( - S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) - << S.getASTContext().BuiltinInfo.getQuotedName( - TheCall->getBuiltinCallee()) - << MaskTy << PointeeTy); - - TheCall->setType(PointeeTy); + TheCall->setType(RetTy); return TheCall; } @@ -2339,18 +2331,10 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) { QualType PointeeTy = PtrTy->getPointeeType(); const VectorType *MaskVecTy = MaskTy->getAs(); - const VectorType *ValVecTy = ValTy->getAs(); - const VectorType *PtrVecTy = PointeeTy->getAs(); - - if (MaskVecTy->getNumElements() != ValVecTy->getNumElements() || - MaskVecTy->getNumElements() != PtrVecTy->getNumElements()) - return ExprError( - S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) - << S.getASTContext().BuiltinInfo.getQuotedName( - TheCall->getBuiltinCallee()) - << MaskTy << PointeeTy); + QualType RetTy = + S.Context.getExtVectorType(PointeeTy, MaskVecTy->getNumElements()); - if (!S.Context.hasSameType(ValTy, PointeeTy)) + if (!S.Context.hasSameType(ValTy, RetTy)) return ExprError(S.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_incompatible_vector) << TheCall->getDirectCallee() << /*isMorethantwoArgs*/ 2 @@ -2368,7 +2352,7 @@ static ExprResult BuiltinMaskedGather(Sema &S, CallExpr *TheCall) { Expr *MaskArg = TheCall->getArg(0); Expr *IdxArg = TheCall->getArg(1); Expr *PtrArg = TheCall->getArg(2); - if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3, /*Vector=*/false)) + if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3)) return ExprError(); QualType IdxTy = IdxArg->getType(); @@ -2413,7 +2397,7 @@ static ExprResult BuiltinMaskedScatter(Sema &S, CallExpr *TheCall) { Expr *ValArg = TheCall->getArg(2); Expr *PtrArg = TheCall->getArg(3); - if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3, /*Vector=*/false)) + if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 3)) return ExprError(); QualType IdxTy = IdxArg->getType(); diff --git a/clang/test/CodeGen/builtin-masked.c b/clang/test/CodeGen/builtin-masked.c index adb1ad4b698ac..2ac05fac219fe 100644 --- a/clang/test/CodeGen/builtin-masked.c +++ b/clang/test/CodeGen/builtin-masked.c @@ -19,10 +19,10 @@ typedef _Bool v8b __attribute__((ext_vector_type(8))); // CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8 -// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP2]], i32 32, <8 x i1> [[TMP1]], <8 x i32> poison) +// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP2]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison) // CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]] // -v8i test_load(v8b m, v8i *p) { +v8i test_load(v8b m, int *p) { return __builtin_masked_load(m, p); } @@ -45,10 +45,10 @@ v8i test_load(v8b m, v8i *p) { // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[T_ADDR]], align 32 -// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 32, <8 x i1> [[TMP2]], <8 x i32> [[TMP4]]) +// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP2]], <8 x i32> [[TMP4]]) // CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]] // -v8i test_load_passthru(v8b m, v8i *p, v8i t) { +v8i test_load_passthru(v8b m, int *p, v8i t) { return __builtin_masked_load(m, p, t); } @@ -74,7 +74,7 @@ v8i test_load_passthru(v8b m, v8i *p, v8i t) { // CHECK-NEXT: [[MASKED_EXPAND_LOAD:%.*]] = call <8 x i32> @llvm.masked.expandload.v8i32(ptr [[TMP3]], <8 x i1> [[TMP2]], <8 x i32> [[TMP4]]) // CHECK-NEXT: ret <8 x i32> [[MASKED_EXPAND_LOAD]] // -v8i test_load_expand(v8b m, v8i *p, v8i t) { +v8i test_load_expand(v8b m, int *p, v8i t) { return __builtin_masked_expand_load(m, p, t); } @@ -97,10 +97,10 @@ v8i test_load_expand(v8b m, v8i *p, v8i t) { // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[P_ADDR]], align 8 -// CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP3]], ptr [[TMP4]], i32 32, <8 x i1> [[TMP2]]) +// CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP3]], ptr [[TMP4]], i32 4, <8 x i1> [[TMP2]]) // CHECK-NEXT: ret void // -void test_store(v8b m, v8i v, v8i *p) { +void test_store(v8b m, v8i v, int *p) { __builtin_masked_store(m, v, p); } @@ -126,7 +126,7 @@ void test_store(v8b m, v8i v, v8i *p) { // CHECK-NEXT: call void @llvm.masked.compressstore.v8i32(<8 x i32> [[TMP3]], ptr [[TMP4]], <8 x i1> [[TMP2]]) // CHECK-NEXT: ret void // -void test_compress_store(v8b m, v8i v, v8i *p) { +void test_compress_store(v8b m, v8i v, int *p) { __builtin_masked_compress_store(m, v, p); } diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c index eb0070b0276af..e24648da57fb1 100644 --- a/clang/test/Sema/builtin-masked.c +++ b/clang/test/Sema/builtin-masked.c @@ -5,44 +5,34 @@ typedef _Bool v8b __attribute__((ext_vector_type(8))); typedef _Bool v2b __attribute__((ext_vector_type(2))); typedef float v8f __attribute__((ext_vector_type(8))); -void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) { +void test_masked_load(int *pf, v8b mask, v2b mask2, v2b thru) { (void)__builtin_masked_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} (void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}} - (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_load' must have the same number of elements}} - (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} - (void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} - (void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}} - (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_load' must have the same number of elements}} + (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a scalar pointer}} + (void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'int __attribute__((ext_vector_type(2)))' (vector of 2 'int' values)}} } -void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { +void test_masked_store(int *pf, v8f *pf2, v8b mask, v2b mask2) { __builtin_masked_store(mask); // expected-error {{too few arguments to function call, expected 3, have 1}} __builtin_masked_store(mask, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}} __builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}} __builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}} - __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}} - __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to '__builtin_masked_store' must have the same number of elements}} - __builtin_masked_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_store' must have the same type}} + __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a scalar pointer}} } -void test_masked_expand_load(v8i *pf, v8b mask, v2b mask2, v2b thru) { +void test_masked_expand_load(int *pf, v8b mask, v2b mask2, v2b thru) { (void)__builtin_masked_expand_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} (void)__builtin_masked_expand_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}} - (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_expand_load' must have the same number of elements}} - (void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} - (void)__builtin_masked_expand_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} - (void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}} - (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_expand_load' must have the same number of elements}} + (void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd argument must be a scalar pointer}} + (void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'int __attribute__((ext_vector_type(2)))' (vector of 2 'int' values)}} } -void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { +void test_masked_compress_store(int *pf, v8f *pf2, v8b mask, v2b mask2) { __builtin_masked_compress_store(mask); // expected-error {{too few arguments to function call, expected 3, have 1}} __builtin_masked_compress_store(mask, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}} __builtin_masked_compress_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}} __builtin_masked_compress_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}} - __builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}} - __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all arguments to '__builtin_masked_compress_store' must have the same number of elements}} - __builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_compress_store' must have the same type}} + __builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd argument must be a scalar pointer}} } void test_masked_gather(int *p, v8i idx, v8b mask, v2b mask2, v2b thru) {