Skip to content

Commit 8d120da

Browse files
committed
[CIR][CIRGen][Builtin][X86] Masked compress Intrinsics
This pr is related to the issue #167765 Added the support Masked compress builtin in CIR codeGen
1 parent 961bd15 commit 8d120da

File tree

3 files changed

+227
-10
lines changed

3 files changed

+227
-10
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,16 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
8888
}
8989
return maskVec;
9090
}
91-
static mlir::Value emitX86CompressExpand(CIRGenFunction &cgf, const CallExpr *expr,ArrayRef<mlir::Value> ops, bool IsCompress, const std::string &ID){
92-
auto ResultTy = cast<cir::VectorType>(ops[1].getType());
93-
mlir::Value MaskValue = getMaskVecValue(cgf, expr, ops[2], cast<cir::VectorType>(ResultTy).getSize());
94-
llvm::SmallVector<mlir::Value, 4> op{ops[0], ops[1], MaskValue};
95-
96-
return emitIntrinsicCallOp(cgf,expr, ID, ResultTy, op);
97-
91+
static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder,
92+
mlir::Location loc, mlir::Value source,
93+
mlir::Value mask,
94+
mlir::Value inputVector,
95+
const std::string &id) {
96+
auto ResultTy = cast<cir::VectorType>(mask.getType());
97+
mlir::Value MaskValue = getMaskVecValue(
98+
builder, loc, inputVector, cast<cir::VectorType>(ResultTy).getSize());
99+
return emitIntrinsicCallOp(builder, loc, id, ResultTy,
100+
mlir::ValueRange{source, mask, MaskValue});
98101
}
99102

100103
static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
@@ -657,6 +660,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
657660
case X86::BI__builtin_ia32_compressstoreqi128_mask:
658661
case X86::BI__builtin_ia32_compressstoreqi256_mask:
659662
case X86::BI__builtin_ia32_compressstoreqi512_mask:
663+
cgm.errorNYI(expr->getSourceRange(),
664+
std::string("unimplemented X86 builtin call: ") +
665+
getContext().BuiltinInfo.getName(builtinID));
666+
return {};
660667
case X86::BI__builtin_ia32_expanddf128_mask:
661668
case X86::BI__builtin_ia32_expanddf256_mask:
662669
case X86::BI__builtin_ia32_expanddf512_mask:
@@ -674,7 +681,11 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
674681
case X86::BI__builtin_ia32_expandhi512_mask:
675682
case X86::BI__builtin_ia32_expandqi128_mask:
676683
case X86::BI__builtin_ia32_expandqi256_mask:
677-
case X86::BI__builtin_ia32_expandqi512_mask:
684+
case X86::BI__builtin_ia32_expandqi512_mask:{
685+
mlir::Location loc = getLoc(expr->getExprLoc());
686+
return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
687+
"x86.avx512.mask.expand");
688+
}
678689
case X86::BI__builtin_ia32_compressdf128_mask:
679690
case X86::BI__builtin_ia32_compressdf256_mask:
680691
case X86::BI__builtin_ia32_compressdf512_mask:
@@ -693,8 +704,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
693704
case X86::BI__builtin_ia32_compressqi128_mask:
694705
case X86::BI__builtin_ia32_compressqi256_mask:
695706
case X86::BI__builtin_ia32_compressqi512_mask:{
696-
return emitX86CompressExpand(*this, expr, ops, true, "x86_avx512_mask_compress");
697-
}
707+
mlir::Location loc = getLoc(expr->getExprLoc());
708+
return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
709+
"x86.avx512.mask.compress");
710+
}
698711
case X86::BI__builtin_ia32_gather3div2df:
699712
case X86::BI__builtin_ia32_gather3div2di:
700713
case X86::BI__builtin_ia32_gather3div4df:

clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,36 @@ __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m25
199199
// OGCG: @llvm.x86.avx512.mask.gather3siv8.si
200200
return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
201201
}
202+
203+
__m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {
204+
// CIR-LABEL: _mm_mask_expand_pd
205+
// CIR: %[[MASK:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
206+
// CIR: %[[SHUF:.*]] = cir.vec.shuffle(%[[MASK]], %[[MASK]] : !cir.vector<8 x !cir.int<u, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !cir.int<u, 1>>
207+
208+
// LLVM-LABEL: test_mm_mask_expand_pd
209+
// LLVM: %[[BC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
210+
// LLVM: %[[SHUF:.*]] = shufflevector <8 x i1> %[[BC]], <8 x i1> %[[BC]], <2 x i32> <i32 0, i32 1>
211+
212+
// OGCG-LABEL: test_mm_mask_expand_pd
213+
// OGCG: %[[BC2:.*]] = bitcast i8 %{{.*}} to <8 x i1>
214+
// OGCG: %[[SHUF2:.*]] = shufflevector <8 x i1> %[[BC2]], <8 x i1> %[[BC2]], <2 x i32> <i32 0, i32 1>
215+
216+
return _mm_mask_expand_pd(__W,__U,__A);
217+
}
218+
219+
__m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) {
220+
// CIR-LABEL: _mm_maskz_expand_pd
221+
// CIR: %[[MASK:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
222+
// CIR: %[[SHUF:.*]] = cir.vec.shuffle(%[[MASK]], %[[MASK]] : !cir.vector<8 x !cir.int<u, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !cir.int<u, 1>>
223+
224+
// LLVM-LABEL: test_mm_maskz_expand_pd
225+
// LLVM: %[[BC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
226+
// LLVM: %[[SHUF:.*]] = shufflevector <8 x i1> %[[BC]], <8 x i1> %[[BC]], <2 x i32> <i32 0, i32 1>
227+
228+
// OGCG-LABEL: test_mm_maskz_expand_pd
229+
// OGCG: %[[BC2:.*]] = bitcast i8 %{{.*}} to <8 x i1>
230+
// OGCG: %[[SHUF2:.*]] = shufflevector <8 x i1> %[[BC2]], <8 x i1> %[[BC2]], <2 x i32> <i32 0, i32 1>
231+
232+
return _mm_maskz_expand_pd(__U,__A);
233+
}
234+
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
2+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
3+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
4+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
5+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
6+
7+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
8+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
9+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
10+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
11+
12+
#include <immintrin.h>
13+
14+
15+
__m128i test_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
16+
// CIR-LABEL: test_mm_mask_compress_epi16
17+
// %[[MASK8:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
18+
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, %{{.+}}, %[[MASK8]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
19+
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
20+
21+
// LLVM-LABEL: test_mm_mask_compress_epi16
22+
// %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
23+
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
24+
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
25+
26+
// OGCG-LABEL: test_mm_mask_compress_epi16
27+
// %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
28+
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
29+
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
30+
31+
return _mm_mask_compress_epi16(__S, __U, __D);
32+
}
33+
34+
__m128i test_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) {
35+
// CIR-LABEL: test_mm_maskz_compress_epi16
36+
// %[[MASK8:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
37+
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, %{{.+}}, %[[MASK8]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
38+
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
39+
40+
// LLVM-LABEL: test_mm_maskz_compress_epi16
41+
// %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
42+
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
43+
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
44+
45+
// OGCG-LABEL: test_mm_maskz_compress_epi16
46+
// %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
47+
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
48+
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
49+
50+
return _mm_maskz_compress_epi16(__U, __D);
51+
}
52+
53+
__m128i test_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
54+
// CIR-LABEL: test_mm_mask_compress_epi8
55+
// %[[MASK16:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
56+
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, %{{.+}}, %[[MASK16]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
57+
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
58+
59+
// LLVM-LABEL: test_mm_mask_compress_epi8
60+
// %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
61+
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK16]])
62+
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
63+
64+
// OGCG-LABEL: test_mm_mask_compress_epi8
65+
// %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
66+
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK16]])
67+
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
68+
69+
return _mm_mask_compress_epi8(__S, __U, __D);
70+
}
71+
72+
__m128i test_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) {
73+
// CIR-LABEL: test_mm_maskz_compress_epi8
74+
// %[[ZERO:.+]] = cir.call @_mm_setzero_si128() : () -> !cir.vector<2 x !s64i>
75+
// %[[CAST1:.+]] = cir.cast bitcast %[[ZERO]] : !cir.vector<2 x !s64i> -> !cir.vector<16 x !s8i>
76+
// %[[MASK16:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
77+
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, %[[CAST1]], %[[MASK16]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
78+
// %[[CAST2:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
79+
80+
// LLVM-LABEL: test_mm_maskz_compress_epi8
81+
// store <2 x i64> zeroinitializer, ptr %{{.+}}, align 16
82+
// %[[CAST1:.+]] = bitcast <2 x i64> %{{.+}} to <16 x i8>
83+
// %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
84+
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.+}}, <16 x i8> %[[CAST1]], <16 x i1> %[[MASK16]])
85+
// %[[CAST2:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
86+
87+
// OGCG-LABEL: test_mm_maskz_compress_epi8
88+
// store <2 x i64> zeroinitializer, ptr %{{.+}}, align 16
89+
// %[[CAST1:.+]] = bitcast <2 x i64> %{{.+}} to <16 x i8>
90+
// %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
91+
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %{{.+}}, <16 x i8> %[[CAST1]], <16 x i1> %[[MASK16]])
92+
// %[[CAST2:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
93+
94+
return _mm_maskz_compress_epi8(__U, __D);
95+
}
96+
97+
__m128i test_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
98+
// CIR-LABEL: test_mm_mask_expand_epi16
99+
// %[[MASK16:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
100+
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, %{{.+}}, %[[MASK16]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
101+
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
102+
103+
// LLVM-LABEL: test_mm_mask_expand_epi16
104+
// %[[MASK16:.+]] = bitcast i8 %{{.+}} to <8 x i1>
105+
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK16]])
106+
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
107+
108+
// OGCG-LABEL: test_mm_mask_expand_epi16
109+
// %[[MASK16:.+]] = bitcast i8 %{{.+}} to <8 x i1>
110+
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK16]])
111+
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
112+
113+
return _mm_mask_expand_epi16(__S, __U, __D);
114+
}
115+
116+
__m128i test_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) {
117+
// CIR-LABEL: test_mm_maskz_expand_epi16
118+
// %[[MASK:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
119+
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, %{{.+}}, %[[MASK]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
120+
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
121+
122+
// LLVM-LABEL: test_mm_maskz_expand_epi16
123+
// %[[MASK:.+]] = bitcast i8 %{{.+}} to <8 x i1>
124+
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK]])
125+
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
126+
127+
// OGCG-LABEL: test_mm_maskz_expand_epi16
128+
// %[[MASK:.+]] = bitcast i8 %{{.+}} to <8 x i1>
129+
// %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK]])
130+
// %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
131+
132+
return _mm_maskz_expand_epi16(__U, __D);
133+
}
134+
135+
__m128i test_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
136+
// CIR-LABEL: test_mm_mask_expand_epi8
137+
// %[[MASK:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
138+
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, %{{.+}}, %[[MASK]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
139+
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
140+
141+
// LLVM-LABEL: test_mm_mask_expand_epi8
142+
// %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
143+
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
144+
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
145+
146+
// OGCG-LABEL: test_mm_mask_expand_epi8
147+
// %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
148+
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
149+
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
150+
151+
return _mm_mask_expand_epi8(__S, __U, __D);
152+
}
153+
154+
__m128i test_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) {
155+
// CIR-LABEL: test_mm_maskz_expand_epi8
156+
// %[[MASK:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
157+
// %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, %{{.+}}, %[[MASK]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
158+
// %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
159+
160+
// LLVM-LABEL: test_mm_maskz_expand_epi8
161+
// %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
162+
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
163+
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
164+
165+
// OGCG-LABEL: test_mm_maskz_expand_epi8
166+
// %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
167+
// %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
168+
// %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
169+
170+
return _mm_maskz_expand_epi8(__U, __D);
171+
}

0 commit comments

Comments
 (0)