Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -878,10 +878,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
def vpshlb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def vpshlw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def vpshld : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
Expand All @@ -906,6 +902,13 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vfrczpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
}

let Features = "xop", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
}

let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermil2pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant char)">;
def vpermil2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant char)">;
Expand Down Expand Up @@ -1989,21 +1992,21 @@ let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorW
def reduceps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
}

let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def prold128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prolq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
def prorq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def prold256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prord256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prolq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
Expand Down
24 changes: 24 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3325,6 +3325,30 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.lshr(RHS.getZExtValue());
});

case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
case clang::X86::BI__builtin_ia32_vprotwi:
case clang::X86::BI__builtin_ia32_prold128:
case clang::X86::BI__builtin_ia32_prold256:
case clang::X86::BI__builtin_ia32_prold512:
case clang::X86::BI__builtin_ia32_prolq128:
case clang::X86::BI__builtin_ia32_prolq256:
case clang::X86::BI__builtin_ia32_prolq512:
return interp__builtin_elementwise_int_binop(
S, OpPC, Call, BuiltinID,
[](const APSInt &LHS, const APSInt &RHS) { return LHS.rotl(RHS); });

case clang::X86::BI__builtin_ia32_prord128:
case clang::X86::BI__builtin_ia32_prord256:
case clang::X86::BI__builtin_ia32_prord512:
case clang::X86::BI__builtin_ia32_prorq128:
case clang::X86::BI__builtin_ia32_prorq256:
case clang::X86::BI__builtin_ia32_prorq512:
return interp__builtin_elementwise_int_binop(
S, OpPC, Call, BuiltinID,
[](const APSInt &LHS, const APSInt &RHS) { return LHS.rotr(RHS); });

case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
Expand Down
56 changes: 56 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11835,6 +11835,62 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
case clang::X86::BI__builtin_ia32_vprotwi:
case clang::X86::BI__builtin_ia32_prold128:
case clang::X86::BI__builtin_ia32_prold256:
case clang::X86::BI__builtin_ia32_prold512:
case clang::X86::BI__builtin_ia32_prolq128:
case clang::X86::BI__builtin_ia32_prolq256:
case clang::X86::BI__builtin_ia32_prolq512: {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;

QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
unsigned SourceLen = SourceLHS.getVectorLength();
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(SourceLen);

APSInt RHS = SourceRHS.getInt();

for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
ResultElements.push_back(APValue(APSInt(LHS.rotl(RHS), DestUnsigned)));
}

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case clang::X86::BI__builtin_ia32_prord128:
case clang::X86::BI__builtin_ia32_prord256:
case clang::X86::BI__builtin_ia32_prord512:
case clang::X86::BI__builtin_ia32_prorq128:
case clang::X86::BI__builtin_ia32_prorq256:
case clang::X86::BI__builtin_ia32_prorq512: {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;

QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
unsigned SourceLen = SourceLHS.getVectorLength();
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(SourceLen);

APSInt RHS = SourceRHS.getInt();

for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
ResultElements.push_back(APValue(APSInt(LHS.rotr(RHS), DestUnsigned)));
}

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
Expand Down
17 changes: 12 additions & 5 deletions clang/test/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -4004,7 +4004,6 @@ __m512i test_mm512_cvtepu32_epi64(__m256i __X) {
// CHECK: zext <8 x i32> %{{.*}} to <8 x i64>
return _mm512_cvtepu32_epi64(__X);
}

TEST_CONSTEXPR(match_v8di(_mm512_cvtepu32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), 4294897296, 2, 4294967295, 0, 1, 4294967294, 3, 4294967292));

__m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
Expand All @@ -4026,7 +4025,6 @@ __m512i test_mm512_cvtepu16_epi32(__m256i __A) {
// CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
return _mm512_cvtepu16_epi32(__A);
}

TEST_CONSTEXPR(match_v16si(_mm512_cvtepu16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 65236, 2, 65535, 0, 1, 65534, 3, 65532, 5, 65530, 7, 65528, 9, 65526, 11, 65524));

__m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
Expand All @@ -4048,7 +4046,6 @@ __m512i test_mm512_cvtepu16_epi64(__m128i __A) {
// CHECK: zext <8 x i16> %{{.*}} to <8 x i64>
return _mm512_cvtepu16_epi64(__A);
}

TEST_CONSTEXPR(match_v8di(_mm512_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));

__m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
Expand All @@ -4065,46 +4062,51 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) {
return _mm512_maskz_cvtepu16_epi64(__U, __A);
}


__m512i test_mm512_rol_epi32(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi32
// CHECK: @llvm.fshl.v16i32
return _mm512_rol_epi32(__A, 5);
}
TEST_CONSTEXPR(match_v16si(_mm512_rol_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 96, -97, -129, 192, -193, 256, 288, -289, 352, -353, 416, -417, 480, -481));

__m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi32
// CHECK: @llvm.fshl.v16i32
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_rol_epi32(__W, __U, __A, 5);
}
TEST_CONSTEXPR(match_v16si(_mm512_mask_rol_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 99, 99, -129, 192, -193, 99, 99, 99, 99, -353, 99, 99, 480, -481));

__m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi32
// CHECK: @llvm.fshl.v16i32
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_rol_epi32(__U, __A, 5);
}
TEST_CONSTEXPR(match_v16si(_mm512_maskz_rol_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 96, -97, 0, 0, 0, 256, 288, -289, 352, 0, 416, -417, 0, 0));

__m512i test_mm512_rol_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi64
// CHECK: @llvm.fshl.v8i64
return _mm512_rol_epi64(__A, 5);
}
TEST_CONSTEXPR(match_v8di(_mm512_rol_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));

__m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi64
// CHECK: @llvm.fshl.v8i64
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_rol_epi64(__W, __U, __A, 5);
}
TEST_CONSTEXPR(match_v8di(_mm512_mask_rol_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));

__m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi64
// CHECK: @llvm.fshl.v8i64
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_rol_epi64(__U, __A, 5);
}
TEST_CONSTEXPR(match_v8di(_mm512_maskz_rol_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));

__m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rolv_epi32
Expand Down Expand Up @@ -4151,41 +4153,46 @@ __m512i test_mm512_ror_epi32(__m512i __A) {
// CHECK: @llvm.fshr.v16i32
return _mm512_ror_epi32(__A, 5);
}
TEST_CONSTEXPR(match_v16si(_mm512_ror_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824, 1207959552, -1207959553, 1476395008, -1476395009, 1744830464, -1744830465, 2013265920, -2013265921));

__m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi32
// CHECK: @llvm.fshr.v16i32
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_ror_epi32(__W, __U, __A, 5);
}
TEST_CONSTEXPR(match_v16si(_mm512_mask_ror_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99, 99, 99, 99, -1476395009, 99, 99, 2013265920, -2013265921));

__m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi32
// CHECK: @llvm.fshr.v16i32
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_ror_epi32(__U, __A, 5);
}
TEST_CONSTEXPR(match_v16si(_mm512_maskz_ror_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 402653184, -402653185, 0, 0, 0, 1073741824, 1207959552, -1207959553, 1476395008, 0, 1744830464, -1744830465, 0, 0));

__m512i test_mm512_ror_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_ror_epi64
// CHECK: @llvm.fshr.v8i64
return _mm512_ror_epi64(__A, 5);
}
TEST_CONSTEXPR(match_v8di(_mm512_ror_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, -1729382256910270465LL, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 4611686018427387904LL));

__m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi64
// CHECK: @llvm.fshr.v8i64
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_ror_epi64(__W, __U, __A, 5);
}
TEST_CONSTEXPR(match_v8di(_mm512_mask_ror_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 99, 99, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 99));

__m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi64
// CHECK: @llvm.fshr.v8i64
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_ror_epi64(__U, __A, 5);
}

TEST_CONSTEXPR(match_v8di(_mm512_maskz_ror_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, 0, -2305843009213693953LL, 3458764513820540928LL, 0, 0));

__m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rorv_epi32
Expand Down
Loading