Skip to content

Commit 047cd91

Browse files
phoebewangtstellar
authored andcommitted
[X86][EVEX512] Add HasEVEX512 when NoVLX used for 512-bit patterns (#91106)
With KNL/KNC being deprecated, we don't need to care about such no VLX cases anymore. We may remove such patterns in the future. Fixes #90844 (cherry picked from commit 7963d9a)
1 parent 58e44d3 commit 047cd91

File tree

3 files changed

+43
-22
lines changed

3 files changed

+43
-22
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -29841,7 +29841,9 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
2984129841
return R;
2984229842

2984329843
// AVX512 implicitly uses modulo rotation amounts.
29844-
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
29844+
if ((Subtarget.hasVLX() ||
29845+
(Subtarget.hasAVX512() && Subtarget.hasEVEX512())) &&
29846+
32 <= EltSizeInBits) {
2984529847
// Attempt to rotate by immediate.
2984629848
if (IsCstSplat) {
2984729849
unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI;

llvm/lib/Target/X86/X86InstrAVX512.td

+21-21
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,7 @@ defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
814814

815815
// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
816816
// smaller extract to enable EVEX->VEX.
817-
let Predicates = [NoVLX] in {
817+
let Predicates = [NoVLX, HasEVEX512] in {
818818
def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
819819
(v2i64 (VEXTRACTI128rr
820820
(v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
@@ -3068,7 +3068,7 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
30683068
addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
30693069
}
30703070

3071-
let Predicates = [HasAVX512, NoVLX] in {
3071+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
30723072
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
30733073
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
30743074

@@ -3099,7 +3099,7 @@ let Predicates = [HasAVX512, NoVLX] in {
30993099
defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
31003100
}
31013101

3102-
let Predicates = [HasBWI, NoVLX] in {
3102+
let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
31033103
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
31043104
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
31053105

@@ -3493,7 +3493,7 @@ multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
34933493

34943494
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
34953495
// available. Use a 512-bit operation and extract.
3496-
let Predicates = [HasAVX512, NoVLX] in {
3496+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
34973497
defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
34983498
defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
34993499
defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
@@ -3505,7 +3505,7 @@ let Predicates = [HasAVX512, NoVLX] in {
35053505
defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
35063506
}
35073507

3508-
let Predicates = [HasBWI, NoVLX] in {
3508+
let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
35093509
defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
35103510
defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
35113511

@@ -4998,8 +4998,8 @@ defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
49984998
defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
49994999
SchedWriteVecALU, HasAVX512, 1>, T8;
50005000

5001-
// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5002-
let Predicates = [HasDQI, NoVLX] in {
5001+
// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512.
5002+
let Predicates = [HasDQI, NoVLX, HasEVEX512] in {
50035003
def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
50045004
(EXTRACT_SUBREG
50055005
(VPMULLQZrr
@@ -5055,7 +5055,7 @@ multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
50555055
sub_xmm)>;
50565056
}
50575057

5058-
let Predicates = [HasAVX512, NoVLX] in {
5058+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
50595059
defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
50605060
defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
50615061
defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
@@ -6032,7 +6032,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
60326032
SchedWriteVecShift>;
60336033

60346034
// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6035-
let Predicates = [HasAVX512, NoVLX] in {
6035+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
60366036
def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
60376037
(EXTRACT_SUBREG (v8i64
60386038
(VPSRAQZrr
@@ -6161,14 +6161,14 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecS
61616161
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
61626162
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
61636163

6164-
defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6165-
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6166-
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6167-
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6164+
defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>;
6165+
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>;
6166+
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>;
6167+
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>;
61686168

61696169

61706170
// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6171-
let Predicates = [HasAVX512, NoVLX] in {
6171+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
61726172
def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
61736173
(EXTRACT_SUBREG (v8i64
61746174
(VPROLVQZrr
@@ -6219,7 +6219,7 @@ let Predicates = [HasAVX512, NoVLX] in {
62196219
}
62206220

62216221
// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6222-
let Predicates = [HasAVX512, NoVLX] in {
6222+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
62236223
def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
62246224
(EXTRACT_SUBREG (v8i64
62256225
(VPRORVQZrr
@@ -9816,7 +9816,7 @@ defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
98169816
truncstore_us_vi8, masked_truncstore_us_vi8,
98179817
X86vtruncus, X86vmtruncus>;
98189818

9819-
let Predicates = [HasAVX512, NoVLX] in {
9819+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
98209820
def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
98219821
(v8i16 (EXTRACT_SUBREG
98229822
(v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
@@ -9827,7 +9827,7 @@ def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
98279827
VR256X:$src, sub_ymm)))), sub_xmm))>;
98289828
}
98299829

9830-
let Predicates = [HasBWI, NoVLX] in {
9830+
let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
98319831
def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
98329832
(v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
98339833
VR256X:$src, sub_ymm))), sub_xmm))>;
@@ -10370,7 +10370,7 @@ multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
1037010370
defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
1037110371
EVEX_V128;
1037210372
}
10373-
let Predicates = [prd, NoVLX] in {
10373+
let Predicates = [prd, NoVLX, HasEVEX512] in {
1037410374
defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
1037510375
defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
1037610376
}
@@ -11157,7 +11157,7 @@ defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
1115711157
SchedWriteVecALU>;
1115811158

1115911159
// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11160-
let Predicates = [HasAVX512, NoVLX] in {
11160+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
1116111161
def : Pat<(v4i64 (abs VR256X:$src)),
1116211162
(EXTRACT_SUBREG
1116311163
(VPABSQZrr
@@ -11173,7 +11173,7 @@ let Predicates = [HasAVX512, NoVLX] in {
1117311173
// Use 512bit version to implement 128/256 bit.
1117411174
multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
1117511175
AVX512VLVectorVTInfo _, Predicate prd> {
11176-
let Predicates = [prd, NoVLX] in {
11176+
let Predicates = [prd, NoVLX, HasEVEX512] in {
1117711177
def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
1117811178
(EXTRACT_SUBREG
1117911179
(!cast<Instruction>(InstrStr # "Zrr")
@@ -11792,7 +11792,7 @@ let Predicates = [HasAVX512] in {
1179211792
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
1179311793
}
1179411794

11795-
let Predicates = [HasAVX512, NoVLX] in {
11795+
let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
1179611796
def : Pat<(v16i8 (vnot VR128X:$src)),
1179711797
(EXTRACT_SUBREG
1179811798
(VPTERNLOGQZrri

llvm/test/CodeGen/X86/pr90844.ll

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-evex512 < %s | FileCheck %s
3+
4+
define void @PR90844() {
5+
; CHECK-LABEL: PR90844:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
8+
; CHECK-NEXT: vmovaps %xmm0, (%rax)
9+
; CHECK-NEXT: retq
10+
entry:
11+
%0 = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> poison, <2 x i32> poison, <2 x i32> <i32 8, i32 24>)
12+
%1 = and <2 x i32> %0, <i32 16711935, i32 -134152448>
13+
%2 = or disjoint <2 x i32> zeroinitializer, %1
14+
%3 = zext <2 x i32> %2 to <2 x i64>
15+
%4 = shl nuw <2 x i64> %3, <i64 32, i64 32>
16+
%5 = or disjoint <2 x i64> %4, zeroinitializer
17+
store <2 x i64> %5, ptr poison, align 16
18+
ret void
19+
}

0 commit comments

Comments
 (0)