Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,17 @@ computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec,

outIndices.resize(numElts);
}
static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder,
mlir::Location loc, mlir::Value source,
mlir::Value mask,
mlir::Value inputVector,
const std::string &id) {
auto resultTy = cast<cir::VectorType>(mask.getType());
mlir::Value maskValue = getMaskVecValue(
builder, loc, inputVector, cast<cir::VectorType>(resultTy).getSize());
return emitIntrinsicCallOp(builder, loc, id, resultTy,
mlir::ValueRange{source, mask, maskValue});
}

static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
mlir::Location loc,
Expand Down Expand Up @@ -712,6 +723,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_compressstoreqi128_mask:
case X86::BI__builtin_ia32_compressstoreqi256_mask:
case X86::BI__builtin_ia32_compressstoreqi512_mask:
cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented X86 builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return {};
case X86::BI__builtin_ia32_expanddf128_mask:
case X86::BI__builtin_ia32_expanddf256_mask:
case X86::BI__builtin_ia32_expanddf512_mask:
Expand All @@ -729,7 +744,11 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_expandhi512_mask:
case X86::BI__builtin_ia32_expandqi128_mask:
case X86::BI__builtin_ia32_expandqi256_mask:
case X86::BI__builtin_ia32_expandqi512_mask:
case X86::BI__builtin_ia32_expandqi512_mask: {
mlir::Location loc = getLoc(expr->getExprLoc());
return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
"x86.avx512.mask.expand");
}
case X86::BI__builtin_ia32_compressdf128_mask:
case X86::BI__builtin_ia32_compressdf256_mask:
case X86::BI__builtin_ia32_compressdf512_mask:
Expand All @@ -747,11 +766,11 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_compresshi512_mask:
case X86::BI__builtin_ia32_compressqi128_mask:
case X86::BI__builtin_ia32_compressqi256_mask:
case X86::BI__builtin_ia32_compressqi512_mask:
cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented X86 builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return {};
case X86::BI__builtin_ia32_compressqi512_mask: {
mlir::Location loc = getLoc(expr->getExprLoc());
return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
"x86.avx512.mask.compress");
}
case X86::BI__builtin_ia32_gather3div2df:
case X86::BI__builtin_ia32_gather3div2di:
case X86::BI__builtin_ia32_gather3div4df:
Expand Down
33 changes: 33 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,36 @@ __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m25
// OGCG: @llvm.x86.avx512.mask.gather3siv8.si
return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
}

__m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {
// CIR-LABEL: _mm_mask_expand_pd
// CIR: %[[MASK:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
// CIR: %[[SHUF:.*]] = cir.vec.shuffle(%[[MASK]], %[[MASK]] : !cir.vector<8 x !cir.int<u, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !cir.int<u, 1>>

// LLVM-LABEL: test_mm_mask_expand_pd
// LLVM: %[[BC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
// LLVM: %[[SHUF:.*]] = shufflevector <8 x i1> %[[BC]], <8 x i1> %[[BC]], <2 x i32> <i32 0, i32 1>

// OGCG-LABEL: test_mm_mask_expand_pd
// OGCG: %[[BC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
// OGCG: %[[SHUF:.*]] = shufflevector <8 x i1> %[[BC]], <8 x i1> %[[BC]], <2 x i32> <i32 0, i32 1>

return _mm_mask_expand_pd(__W,__U,__A);
}

__m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) {
// CIR-LABEL: _mm_maskz_expand_pd
// CIR: %[[MASK:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
// CIR: %[[SHUF:.*]] = cir.vec.shuffle(%[[MASK]], %[[MASK]] : !cir.vector<8 x !cir.int<u, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !cir.int<u, 1>>

// LLVM-LABEL: test_mm_maskz_expand_pd
// LLVM: %[[BC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
// LLVM: %[[SHUF:.*]] = shufflevector <8 x i1> %[[BC]], <8 x i1> %[[BC]], <2 x i32> <i32 0, i32 1>

// OGCG-LABEL: test_mm_maskz_expand_pd
// OGCG: %[[BC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
// OGCG: %[[SHUF:.*]] = shufflevector <8 x i1> %[[BC]], <8 x i1> %[[BC]], <2 x i32> <i32 0, i32 1>

return _mm_maskz_expand_pd(__U,__A);
}

171 changes: 171 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

#include <immintrin.h>


__m128i test_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
// CIR-LABEL: test_mm_mask_compress_epi16
// %[[MASK8:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, %{{.+}}, %[[MASK8]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>

// LLVM-LABEL: test_mm_mask_compress_epi16
// %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>

// OGCG-LABEL: test_mm_mask_compress_epi16
// %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>

return _mm_mask_compress_epi16(__S, __U, __D);
}

__m128i test_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) {
// CIR-LABEL: test_mm_maskz_compress_epi16
// %[[MASK8:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, %{{.+}}, %[[MASK8]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>

// LLVM-LABEL: test_mm_maskz_compress_epi16
// %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>

// OGCG-LABEL: test_mm_maskz_compress_epi16
// %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>

return _mm_maskz_compress_epi16(__U, __D);
}

__m128i test_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
// CIR-LABEL: test_mm_mask_compress_epi8
// %[[MASK16:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, %{{.+}}, %[[MASK16]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>

// LLVM-LABEL: test_mm_mask_compress_epi8
// %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK16]])
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>

// OGCG-LABEL: test_mm_mask_compress_epi8
// %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK16]])
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>

return _mm_mask_compress_epi8(__S, __U, __D);
}

__m128i test_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) {
// CIR-LABEL: test_mm_maskz_compress_epi8
// %[[ZERO:.+]] = cir.call @_mm_setzero_si128() : () -> !cir.vector<2 x !s64i>
// %[[CAST1:.+]] = cir.cast bitcast %[[ZERO]] : !cir.vector<2 x !s64i> -> !cir.vector<16 x !s8i>
// %[[MASK16:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, %[[CAST1]], %[[MASK16]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
// %[[CAST2:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>

// LLVM-LABEL: test_mm_maskz_compress_epi8
// store <2 x i64> zeroinitializer, ptr %{{.+}}, align 16
// %[[CAST1:.+]] = bitcast <2 x i64> %{{.+}} to <16 x i8>
// %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.+}}, <16 x i8> %[[CAST1]], <16 x i1> %[[MASK16]])
// %[[CAST2:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>

// OGCG-LABEL: test_mm_maskz_compress_epi8
// store <2 x i64> zeroinitializer, ptr %{{.+}}, align 16
// %[[CAST1:.+]] = bitcast <2 x i64> %{{.+}} to <16 x i8>
// %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.+}}, <16 x i8> %[[CAST1]], <16 x i1> %[[MASK16]])
// %[[CAST2:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>

return _mm_maskz_compress_epi8(__U, __D);
}

__m128i test_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
// CIR-LABEL: test_mm_mask_expand_epi16
// %[[MASK16:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, %{{.+}}, %[[MASK16]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>

// LLVM-LABEL: test_mm_mask_expand_epi16
// %[[MASK16:.+]] = bitcast i8 %{{.+}} to <8 x i1>
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK16]])
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>

// OGCG-LABEL: test_mm_mask_expand_epi16
// %[[MASK16:.+]] = bitcast i8 %{{.+}} to <8 x i1>
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK16]])
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>

return _mm_mask_expand_epi16(__S, __U, __D);
}

__m128i test_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) {
// CIR-LABEL: test_mm_maskz_expand_epi16
// %[[MASK:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, %{{.+}}, %[[MASK]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>

// LLVM-LABEL: test_mm_maskz_expand_epi16
// %[[MASK:.+]] = bitcast i8 %{{.+}} to <8 x i1>
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK]])
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>

// OGCG-LABEL: test_mm_maskz_expand_epi16
// %[[MASK:.+]] = bitcast i8 %{{.+}} to <8 x i1>
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK]])
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>

return _mm_maskz_expand_epi16(__U, __D);
}

__m128i test_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
// CIR-LABEL: test_mm_mask_expand_epi8
// %[[MASK:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, %{{.+}}, %[[MASK]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>

// LLVM-LABEL: test_mm_mask_expand_epi8
// %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>

// OGCG-LABEL: test_mm_mask_expand_epi8
// %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>

return _mm_mask_expand_epi8(__S, __U, __D);
}

__m128i test_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) {
// CIR-LABEL: test_mm_maskz_expand_epi8
// %[[MASK:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, %{{.+}}, %[[MASK]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>

// LLVM-LABEL: test_mm_maskz_expand_epi8
// %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>

// OGCG-LABEL: test_mm_maskz_expand_epi8
// %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>

return _mm_maskz_expand_epi8(__U, __D);
}
Loading