Skip to content

Commit

Permalink
[X86] Support EGPR for KMOV
Browse files Browse the repository at this point in the history
KMOV is essential for copy between k-registers and GPRs.
After EGPR is introduced in llvm#70958, we should extend KMOV for these new
registers.

TAG: CPU2017 can be built with feature egpr successfully.
  • Loading branch information
KanRobert committed Nov 29, 2023
1 parent d0c8d41 commit 5adafcb
Show file tree
Hide file tree
Showing 14 changed files with 589 additions and 50 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O,
O << "\t{vex2}";
else if (Flags & X86::IP_USE_VEX3)
O << "\t{vex3}";
else if (Flags & X86::IP_USE_EVEX)
else if ((Flags & X86::IP_USE_EVEX) ||
(TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitEVEXPrefix)
O << "\t{evex}";

if (Flags & X86::IP_USE_DISP8)
Expand Down
32 changes: 19 additions & 13 deletions llvm/lib/Target/X86/X86DomainReassignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,16 +619,22 @@ void X86DomainReassignment::initConverters() {
std::make_unique<InstrReplacerDstCOPY>(From, To);
};

createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm);
bool HasEGPR = STI->hasEGPR();
createReplacerDstCOPY(X86::MOVZX32rm16,
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacerDstCOPY(X86::MOVZX64rm16,
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);

createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);

if (STI->hasDQI()) {
createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX16rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX32rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX64rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);

createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
Expand All @@ -639,8 +645,8 @@ void X86DomainReassignment::initConverters() {
Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
};

createReplacer(X86::MOV16rm, X86::KMOVWkm);
createReplacer(X86::MOV16mr, X86::KMOVWmk);
createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
createReplacer(X86::MOV16rr, X86::KMOVWkk);
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
Expand All @@ -650,11 +656,11 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::XOR16rr, X86::KXORWrr);

if (STI->hasBWI()) {
createReplacer(X86::MOV32rm, X86::KMOVDkm);
createReplacer(X86::MOV64rm, X86::KMOVQkm);
createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);

createReplacer(X86::MOV32mr, X86::KMOVDmk);
createReplacer(X86::MOV64mr, X86::KMOVQmk);
createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);

createReplacer(X86::MOV32rr, X86::KMOVDkk);
createReplacer(X86::MOV64rr, X86::KMOVQkk);
Expand Down Expand Up @@ -695,8 +701,8 @@ void X86DomainReassignment::initConverters() {

createReplacer(X86::AND8rr, X86::KANDBrr);

createReplacer(X86::MOV8rm, X86::KMOVBkm);
createReplacer(X86::MOV8mr, X86::KMOVBmk);
createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
createReplacer(X86::MOV8rr, X86::KMOVBkk);

createReplacer(X86::NOT8r, X86::KNOTBrr);
Expand Down
13 changes: 9 additions & 4 deletions llvm/lib/Target/X86/X86ExpandPseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,10 +466,15 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);

auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
.addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
.addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
bool HasEGPR = STI->hasEGPR();
auto MIBLo =
BuildMI(MBB, MBBI, DL,
TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
.addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
auto MIBHi =
BuildMI(MBB, MBBI, DL,
TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
.addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));

for (int i = 0; i < X86::AddrNumOperands; ++i) {
MIBLo.add(MBBI->getOperand(1 + i));
Expand Down
70 changes: 45 additions & 25 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -2853,46 +2853,56 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E
// - copy from GPR to mask register and vice versa
//
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
string OpcodeStr, RegisterClass KRC,
ValueType vvt, X86MemOperand x86memop> {
string OpcodeStr, RegisterClass KRC, ValueType vvt,
X86MemOperand x86memop, string Suffix = ""> {
let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
Sched<[WriteMove]>;
def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set KRC:$dst, (vvt (load addr:$src)))]>,
Sched<[WriteLoad]>;
def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store KRC:$src, addr:$dst)]>,
Sched<[WriteStore]>;
def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
Sched<[WriteMove]>;
def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set KRC:$dst, (vvt (load addr:$src)))]>,
Sched<[WriteLoad]>;
def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store KRC:$src, addr:$dst)]>,
Sched<[WriteStore]>;
}
}

multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
string OpcodeStr,
RegisterClass KRC, RegisterClass GRC> {
let hasSideEffects = 0 in {
def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
Sched<[WriteMove]>;
def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
Sched<[WriteMove]>;
string OpcodeStr, RegisterClass KRC,
RegisterClass GRC, string Suffix = ""> {
let hasSideEffects = 0, explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
Sched<[WriteMove]>;
def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
Sched<[WriteMove]>;
}
}

let Predicates = [HasDQI] in
let Predicates = [HasDQI, NoEGPR] in
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
VEX, PD;
let Predicates = [HasDQI, HasEGPR, In64BitMode] in
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
EVEX, PD;

let Predicates = [HasAVX512] in
let Predicates = [HasAVX512, NoEGPR] in
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
VEX, PS;
let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
EVEX, PS;

let Predicates = [HasBWI] in {
let Predicates = [HasBWI, NoEGPR] in {
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
VEX, PD, REX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
Expand All @@ -2902,6 +2912,16 @@ let Predicates = [HasBWI] in {
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
VEX, XD, REX_W;
}
let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
EVEX, PD, REX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
EVEX, XD;
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
EVEX, PS, REX_W;
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
EVEX, XD, REX_W;
}

// GR from/to mask register
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
Expand Down
33 changes: 26 additions & 7 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,10 +495,12 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
return false;
case X86::MOV8rm:
case X86::KMOVBkm:
case X86::KMOVBkm_EVEX:
MemBytes = 1;
return true;
case X86::MOV16rm:
case X86::KMOVWkm:
case X86::KMOVWkm_EVEX:
case X86::VMOVSHZrm:
case X86::VMOVSHZrm_alt:
MemBytes = 2;
Expand All @@ -511,6 +513,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
case X86::VMOVSSZrm:
case X86::VMOVSSZrm_alt:
case X86::KMOVDkm:
case X86::KMOVDkm_EVEX:
MemBytes = 4;
return true;
case X86::MOV64rm:
Expand All @@ -524,6 +527,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::KMOVQkm:
case X86::KMOVQkm_EVEX:
MemBytes = 8;
return true;
case X86::MOVAPSrm:
Expand Down Expand Up @@ -593,10 +597,12 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
return false;
case X86::MOV8mr:
case X86::KMOVBmk:
case X86::KMOVBmk_EVEX:
MemBytes = 1;
return true;
case X86::MOV16mr:
case X86::KMOVWmk:
case X86::KMOVWmk_EVEX:
case X86::VMOVSHZmr:
MemBytes = 2;
return true;
Expand All @@ -605,6 +611,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
case X86::VMOVSSmr:
case X86::VMOVSSZmr:
case X86::KMOVDmk:
case X86::KMOVDmk_EVEX:
MemBytes = 4;
return true;
case X86::MOV64mr:
Expand All @@ -616,6 +623,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
case X86::MMX_MOVQ64mr:
case X86::MMX_MOVNTQmr:
case X86::KMOVQmk:
case X86::KMOVQmk_EVEX:
MemBytes = 8;
return true;
case X86::MOVAPSmr:
Expand Down Expand Up @@ -3519,6 +3527,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
const X86Subtarget &Subtarget) {
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
bool HasEGPR = Subtarget.hasEGPR();

// SrcReg(MaskReg) -> DestReg(GR64)
// SrcReg(MaskReg) -> DestReg(GR32)
Expand All @@ -3527,10 +3536,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
if (X86::VK16RegClass.contains(SrcReg)) {
if (X86::GR64RegClass.contains(DestReg)) {
assert(Subtarget.hasBWI());
return X86::KMOVQrk;
return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
}
if (X86::GR32RegClass.contains(DestReg))
return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
: (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
}

// SrcReg(GR64) -> DestReg(MaskReg)
Expand All @@ -3540,10 +3550,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
if (X86::VK16RegClass.contains(DestReg)) {
if (X86::GR64RegClass.contains(SrcReg)) {
assert(Subtarget.hasBWI());
return X86::KMOVQkr;
return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
}
if (X86::GR32RegClass.contains(SrcReg))
return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
: (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
}


Expand Down Expand Up @@ -3710,6 +3721,7 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
bool HasAVX = STI.hasAVX();
bool HasAVX512 = STI.hasAVX512();
bool HasVLX = STI.hasVLX();
bool HasEGPR = STI.hasEGPR();

assert(RC != nullptr && "Invalid target register class");
switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
Expand All @@ -3725,7 +3737,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::MOV8rm : X86::MOV8mr;
case 2:
if (X86::VK16RegClass.hasSubClassEq(RC))
return Load ? X86::KMOVWkm : X86::KMOVWmk;
return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
: (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
return Load ? X86::MOV16rm : X86::MOV16mr;
case 4:
Expand All @@ -3743,7 +3756,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
if (X86::VK32RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && "KMOVD requires BWI");
return Load ? X86::KMOVDkm : X86::KMOVDmk;
return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
: (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
}
// All of these mask pair classes have the same spill size, the same kind
// of kmov instructions can be used with all of them.
Expand Down Expand Up @@ -3774,7 +3788,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
if (X86::VK64RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && "KMOVQ requires BWI");
return Load ? X86::KMOVQkm : X86::KMOVQmk;
return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
: (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
}
llvm_unreachable("Unknown 8-byte regclass");
case 10:
Expand Down Expand Up @@ -7717,9 +7732,13 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::VMOVDQA64Zrm:
case X86::VMOVDQU64Zrm:
case X86::KMOVBkm:
case X86::KMOVBkm_EVEX:
case X86::KMOVWkm:
case X86::KMOVWkm_EVEX:
case X86::KMOVDkm:
case X86::KMOVDkm_EVEX:
case X86::KMOVQkm:
case X86::KMOVQkm_EVEX:
return true;
}
};
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,8 @@ def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
// X86 Instruction Predicate Definitions.
def TruePredicate : Predicate<"true">;

def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;

Expand Down
14 changes: 14 additions & 0 deletions llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -show-mc-encoding | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s

define void @kmov(i1 %cmp23.not) {
; CHECK-LABEL: kmov:
; CHECK: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
;
; EGPR-LABEL: kmov:
; EGPR: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf]
entry:
%0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00
store double %0, ptr null, align 8
ret void
}
Loading

0 comments on commit 5adafcb

Please sign in to comment.