-
Notifications
You must be signed in to change notification settings - Fork 15.6k
[X86] Add constexpr handling for XOP/AVX512 rotate by immediate intrinsics #156047
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang Author: Simon Pilgrim (RKSimon) ChangesPatch is 28.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156047.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index dfff92839da4e..3cac91cf80f5d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -934,10 +934,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
- def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
- def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
- def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
- def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
def vpshlb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def vpshlw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def vpshld : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
@@ -962,6 +958,13 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vfrczpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
}
+let Features = "xop", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
+ def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
+ def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
+ def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
+}
+
let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermil2pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant char)">;
def vpermil2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant char)">;
@@ -2045,21 +2048,21 @@ let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorW
def reduceps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
}
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def prold128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prolq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
def prorq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def prold256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prord256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prolq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 1e7736955c065..c34d768f96211 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3325,6 +3325,32 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.lshr(RHS.getZExtValue());
});
+ case clang::X86::BI__builtin_ia32_vprotbi:
+ case clang::X86::BI__builtin_ia32_vprotdi:
+ case clang::X86::BI__builtin_ia32_vprotqi:
+ case clang::X86::BI__builtin_ia32_vprotwi:
+ case clang::X86::BI__builtin_ia32_prold128:
+ case clang::X86::BI__builtin_ia32_prold256:
+ case clang::X86::BI__builtin_ia32_prold512:
+ case clang::X86::BI__builtin_ia32_prolq128:
+ case clang::X86::BI__builtin_ia32_prolq256:
+ case clang::X86::BI__builtin_ia32_prolq512:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
+ return LHS.rotl(RHS);
+ });
+
+ case clang::X86::BI__builtin_ia32_prord128:
+ case clang::X86::BI__builtin_ia32_prord256:
+ case clang::X86::BI__builtin_ia32_prord512:
+ case clang::X86::BI__builtin_ia32_prorq128:
+ case clang::X86::BI__builtin_ia32_prorq256:
+ case clang::X86::BI__builtin_ia32_prorq512:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
+ return LHS.rotr(RHS);
+ });
+
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3ca4ac4b92ba7..b4f1e76187e25 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11835,6 +11835,62 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_vprotbi:
+ case clang::X86::BI__builtin_ia32_vprotdi:
+ case clang::X86::BI__builtin_ia32_vprotqi:
+ case clang::X86::BI__builtin_ia32_vprotwi:
+ case clang::X86::BI__builtin_ia32_prold128:
+ case clang::X86::BI__builtin_ia32_prold256:
+ case clang::X86::BI__builtin_ia32_prold512:
+ case clang::X86::BI__builtin_ia32_prolq128:
+ case clang::X86::BI__builtin_ia32_prolq256:
+ case clang::X86::BI__builtin_ia32_prolq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APSInt RHS = SourceRHS.getInt();
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ ResultElements.push_back(APValue(APSInt(LHS.rotl(RHS), DestUnsigned)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case clang::X86::BI__builtin_ia32_prord128:
+ case clang::X86::BI__builtin_ia32_prord256:
+ case clang::X86::BI__builtin_ia32_prord512:
+ case clang::X86::BI__builtin_ia32_prorq128:
+ case clang::X86::BI__builtin_ia32_prorq256:
+ case clang::X86::BI__builtin_ia32_prorq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APSInt RHS = SourceRHS.getInt();
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ ResultElements.push_back(APValue(APSInt(LHS.rotr(RHS), DestUnsigned)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index bddc5d11818ef..a109dad4ebdcc 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -4004,7 +4004,6 @@ __m512i test_mm512_cvtepu32_epi64(__m256i __X) {
// CHECK: zext <8 x i32> %{{.*}} to <8 x i64>
return _mm512_cvtepu32_epi64(__X);
}
-
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), 4294897296, 2, 4294967295, 0, 1, 4294967294, 3, 4294967292));
__m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
@@ -4026,7 +4025,6 @@ __m512i test_mm512_cvtepu16_epi32(__m256i __A) {
// CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
return _mm512_cvtepu16_epi32(__A);
}
-
TEST_CONSTEXPR(match_v16si(_mm512_cvtepu16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 65236, 2, 65535, 0, 1, 65534, 3, 65532, 5, 65530, 7, 65528, 9, 65526, 11, 65524));
__m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
@@ -4048,7 +4046,6 @@ __m512i test_mm512_cvtepu16_epi64(__m128i __A) {
// CHECK: zext <8 x i16> %{{.*}} to <8 x i64>
return _mm512_cvtepu16_epi64(__A);
}
-
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));
__m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
@@ -4065,12 +4062,12 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) {
return _mm512_maskz_cvtepu16_epi64(__U, __A);
}
-
__m512i test_mm512_rol_epi32(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi32
// CHECK: @llvm.fshl.v16i32
return _mm512_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_rol_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 96, -97, -129, 192, -193, 256, 288, -289, 352, -353, 416, -417, 480, -481));
__m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi32
@@ -4078,6 +4075,7 @@ __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_rol_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 99, 99, -129, 192, -193, 99, 99, 99, 99, -353, 99, 99, 480, -481));
__m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi32
@@ -4085,12 +4083,14 @@ __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_rol_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 96, -97, 0, 0, 0, 256, 288, -289, 352, 0, 416, -417, 0, 0));
__m512i test_mm512_rol_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi64
// CHECK: @llvm.fshl.v8i64
return _mm512_rol_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_rol_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));
__m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi64
@@ -4098,6 +4098,7 @@ __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_rol_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_rol_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));
__m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi64
@@ -4105,6 +4106,7 @@ __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_rol_epi64(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_rol_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));
__m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rolv_epi32
@@ -4151,6 +4153,7 @@ __m512i test_mm512_ror_epi32(__m512i __A) {
// CHECK: @llvm.fshr.v16i32
return _mm512_ror_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_ror_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824, 1207959552, -1207959553, 1476395008, -1476395009, 1744830464, -1744830465, 2013265920, -2013265921));
__m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi32
@@ -4158,6 +4161,7 @@ __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_ror_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_ror_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99, 99, 99, 99, -1476395009, 99, 99, 2013265920, -2013265921));
__m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi32
@@ -4165,12 +4169,14 @@ __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_ror_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_ror_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 402653184, -402653185, 0, 0, 0, 1073741824, 1207959552, -1207959553, 1476395008, 0, 1744830464, -1744830465, 0, 0));
__m512i test_mm512_ror_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_ror_epi64
// CHECK: @llvm.fshr.v8i64
return _mm512_ror_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_ror_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, -1729382256910270465LL, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 4611686018427387904LL));
__m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi64
@@ -4178,6 +4184,7 @@ __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_ror_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_ror_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 99, 99, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 99));
__m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi64
@@ -4185,7 +4192,7 @@ __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_ror_epi64(__U, __A, 5);
}
-
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_ror_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, 0, -2305843009213693953LL, 3458764513820540928LL, 0, 0));
__m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rorv_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 477326f35518a..78e01b944ec5d 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -5666,6 +5666,7 @@ __m128i test_mm_rol_epi32(__m128i __A) {
// CHECK: @llvm.fshl.v4i32
return _mm_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_rol_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97));
__m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_rol_epi32
@@ -5673,6 +5674,7 @@ __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_rol_epi32(((__m128i)(__v4si){99, 99, 99, 99}), 0x3, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 99, 99));
__m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_rol_epi32
@@ -5680,12 +5682,14 @@ __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_maskz_rol_epi32(0x9, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, 0, 0, -97));
__m256i test_mm256_rol_epi32(__m256i __A) {
// CHECK-LABEL: test_mm256_rol_epi32
// CHECK: @llvm.fshl.v8i32
return _mm256_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_rol_epi32(((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));
__m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_rol_epi32
@@ -5693,6 +5697,7 @@ __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mask_rol_epi32(((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));
__m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_rol_epi32
@@ -5700,12 +5705,14 @@ __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_rol_epi32(0x37, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));
__m128i test_mm_rol_epi64(__m128i __A) {
// CHECK-LABEL: test_mm_rol_epi64
// CHECK: @llvm.fshl.v2i64
return _mm_rol_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_rol_epi64(((__m128i)(__v2di){10, -11}), 19), 5242880, -5242881));
__m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_rol_epi64
@@ -5713,6 +5720,7 @@ __m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_rol_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_rol_epi64(((__m128i)(__v2di){99, 99}), 0x1, ((__m128i)(__v2di){10, -11}), 19), 5242880, 99));
__m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_rol_epi64
@@ -5720,12 +5728,14 @@ __m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_rol_epi64(__U, __...
[truncated]
|
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesPatch is 28.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156047.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index dfff92839da4e..3cac91cf80f5d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -934,10 +934,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
- def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
- def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
- def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
- def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
def vpshlb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def vpshlw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def vpshld : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
@@ -962,6 +958,13 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vfrczpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
}
+let Features = "xop", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
+ def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
+ def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
+ def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
+}
+
let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermil2pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant char)">;
def vpermil2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant char)">;
@@ -2045,21 +2048,21 @@ let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorW
def reduceps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
}
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def prold128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prolq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
def prorq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def prold256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prord256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prolq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 1e7736955c065..c34d768f96211 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3325,6 +3325,32 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.lshr(RHS.getZExtValue());
});
+ case clang::X86::BI__builtin_ia32_vprotbi:
+ case clang::X86::BI__builtin_ia32_vprotdi:
+ case clang::X86::BI__builtin_ia32_vprotqi:
+ case clang::X86::BI__builtin_ia32_vprotwi:
+ case clang::X86::BI__builtin_ia32_prold128:
+ case clang::X86::BI__builtin_ia32_prold256:
+ case clang::X86::BI__builtin_ia32_prold512:
+ case clang::X86::BI__builtin_ia32_prolq128:
+ case clang::X86::BI__builtin_ia32_prolq256:
+ case clang::X86::BI__builtin_ia32_prolq512:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
+ return LHS.rotl(RHS);
+ });
+
+ case clang::X86::BI__builtin_ia32_prord128:
+ case clang::X86::BI__builtin_ia32_prord256:
+ case clang::X86::BI__builtin_ia32_prord512:
+ case clang::X86::BI__builtin_ia32_prorq128:
+ case clang::X86::BI__builtin_ia32_prorq256:
+ case clang::X86::BI__builtin_ia32_prorq512:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
+ return LHS.rotr(RHS);
+ });
+
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3ca4ac4b92ba7..b4f1e76187e25 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11835,6 +11835,62 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_vprotbi:
+ case clang::X86::BI__builtin_ia32_vprotdi:
+ case clang::X86::BI__builtin_ia32_vprotqi:
+ case clang::X86::BI__builtin_ia32_vprotwi:
+ case clang::X86::BI__builtin_ia32_prold128:
+ case clang::X86::BI__builtin_ia32_prold256:
+ case clang::X86::BI__builtin_ia32_prold512:
+ case clang::X86::BI__builtin_ia32_prolq128:
+ case clang::X86::BI__builtin_ia32_prolq256:
+ case clang::X86::BI__builtin_ia32_prolq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APSInt RHS = SourceRHS.getInt();
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ ResultElements.push_back(APValue(APSInt(LHS.rotl(RHS), DestUnsigned)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case clang::X86::BI__builtin_ia32_prord128:
+ case clang::X86::BI__builtin_ia32_prord256:
+ case clang::X86::BI__builtin_ia32_prord512:
+ case clang::X86::BI__builtin_ia32_prorq128:
+ case clang::X86::BI__builtin_ia32_prorq256:
+ case clang::X86::BI__builtin_ia32_prorq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APSInt RHS = SourceRHS.getInt();
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ ResultElements.push_back(APValue(APSInt(LHS.rotr(RHS), DestUnsigned)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index bddc5d11818ef..a109dad4ebdcc 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -4004,7 +4004,6 @@ __m512i test_mm512_cvtepu32_epi64(__m256i __X) {
// CHECK: zext <8 x i32> %{{.*}} to <8 x i64>
return _mm512_cvtepu32_epi64(__X);
}
-
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), 4294897296, 2, 4294967295, 0, 1, 4294967294, 3, 4294967292));
__m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
@@ -4026,7 +4025,6 @@ __m512i test_mm512_cvtepu16_epi32(__m256i __A) {
// CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
return _mm512_cvtepu16_epi32(__A);
}
-
TEST_CONSTEXPR(match_v16si(_mm512_cvtepu16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 65236, 2, 65535, 0, 1, 65534, 3, 65532, 5, 65530, 7, 65528, 9, 65526, 11, 65524));
__m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
@@ -4048,7 +4046,6 @@ __m512i test_mm512_cvtepu16_epi64(__m128i __A) {
// CHECK: zext <8 x i16> %{{.*}} to <8 x i64>
return _mm512_cvtepu16_epi64(__A);
}
-
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));
__m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
@@ -4065,12 +4062,12 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) {
return _mm512_maskz_cvtepu16_epi64(__U, __A);
}
-
__m512i test_mm512_rol_epi32(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi32
// CHECK: @llvm.fshl.v16i32
return _mm512_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_rol_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 96, -97, -129, 192, -193, 256, 288, -289, 352, -353, 416, -417, 480, -481));
__m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi32
@@ -4078,6 +4075,7 @@ __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_rol_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 99, 99, -129, 192, -193, 99, 99, 99, 99, -353, 99, 99, 480, -481));
__m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi32
@@ -4085,12 +4083,14 @@ __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_rol_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 96, -97, 0, 0, 0, 256, 288, -289, 352, 0, 416, -417, 0, 0));
__m512i test_mm512_rol_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi64
// CHECK: @llvm.fshl.v8i64
return _mm512_rol_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_rol_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));
__m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi64
@@ -4098,6 +4098,7 @@ __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_rol_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_rol_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));
__m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi64
@@ -4105,6 +4106,7 @@ __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_rol_epi64(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_rol_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));
__m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rolv_epi32
@@ -4151,6 +4153,7 @@ __m512i test_mm512_ror_epi32(__m512i __A) {
// CHECK: @llvm.fshr.v16i32
return _mm512_ror_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_ror_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824, 1207959552, -1207959553, 1476395008, -1476395009, 1744830464, -1744830465, 2013265920, -2013265921));
__m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi32
@@ -4158,6 +4161,7 @@ __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_ror_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_ror_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99, 99, 99, 99, -1476395009, 99, 99, 2013265920, -2013265921));
__m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi32
@@ -4165,12 +4169,14 @@ __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_ror_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_ror_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 402653184, -402653185, 0, 0, 0, 1073741824, 1207959552, -1207959553, 1476395008, 0, 1744830464, -1744830465, 0, 0));
__m512i test_mm512_ror_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_ror_epi64
// CHECK: @llvm.fshr.v8i64
return _mm512_ror_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_ror_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, -1729382256910270465LL, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 4611686018427387904LL));
__m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi64
@@ -4178,6 +4184,7 @@ __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_ror_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_ror_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 99, 99, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 99));
__m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi64
@@ -4185,7 +4192,7 @@ __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_ror_epi64(__U, __A, 5);
}
-
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_ror_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, 0, -2305843009213693953LL, 3458764513820540928LL, 0, 0));
__m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rorv_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 477326f35518a..78e01b944ec5d 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -5666,6 +5666,7 @@ __m128i test_mm_rol_epi32(__m128i __A) {
// CHECK: @llvm.fshl.v4i32
return _mm_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_rol_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97));
__m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_rol_epi32
@@ -5673,6 +5674,7 @@ __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_rol_epi32(((__m128i)(__v4si){99, 99, 99, 99}), 0x3, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 99, 99));
__m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_rol_epi32
@@ -5680,12 +5682,14 @@ __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_maskz_rol_epi32(0x9, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, 0, 0, -97));
__m256i test_mm256_rol_epi32(__m256i __A) {
// CHECK-LABEL: test_mm256_rol_epi32
// CHECK: @llvm.fshl.v8i32
return _mm256_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_rol_epi32(((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));
__m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_rol_epi32
@@ -5693,6 +5697,7 @@ __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mask_rol_epi32(((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));
__m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_rol_epi32
@@ -5700,12 +5705,14 @@ __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_rol_epi32(0x37, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));
__m128i test_mm_rol_epi64(__m128i __A) {
// CHECK-LABEL: test_mm_rol_epi64
// CHECK: @llvm.fshl.v2i64
return _mm_rol_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_rol_epi64(((__m128i)(__v2di){10, -11}), 19), 5242880, -5242881));
__m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_rol_epi64
@@ -5713,6 +5720,7 @@ __m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_rol_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_rol_epi64(((__m128i)(__v2di){99, 99}), 0x1, ((__m128i)(__v2di){10, -11}), 19), 5242880, 99));
__m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_rol_epi64
@@ -5720,12 +5728,14 @@ __m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_rol_epi64(__U, __...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
phoebewang
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
No description provided.