Skip to content

Commit f5f66e2

Browse files
authored
[X86]Support lowering for APX Promoted SHA/MOVDIR/CRC32/INVPCID/CET instructions (#76786)
R16-R31 was added into GPRs in #70958, This patch supports the lowering for promoted SHA/MOVDIR/CRC32/INVPCID/CET. RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
1 parent 59af659 commit f5f66e2

12 files changed

+478
-175
lines changed

llvm/lib/Target/X86/X86DomainReassignment.cpp

Lines changed: 29 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -619,40 +619,30 @@ void X86DomainReassignment::initConverters() {
619619
std::make_unique<InstrReplacerDstCOPY>(From, To);
620620
};
621621

622-
bool HasEGPR = STI->hasEGPR();
623-
createReplacerDstCOPY(X86::MOVZX32rm16,
624-
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
625-
createReplacerDstCOPY(X86::MOVZX64rm16,
626-
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
622+
#define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC
623+
createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
624+
createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
627625

628-
createReplacerDstCOPY(X86::MOVZX32rr16,
629-
HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
630-
createReplacerDstCOPY(X86::MOVZX64rr16,
631-
HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
626+
createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
627+
createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
632628

633629
if (STI->hasDQI()) {
634-
createReplacerDstCOPY(X86::MOVZX16rm8,
635-
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
636-
createReplacerDstCOPY(X86::MOVZX32rm8,
637-
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
638-
createReplacerDstCOPY(X86::MOVZX64rm8,
639-
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
640-
641-
createReplacerDstCOPY(X86::MOVZX16rr8,
642-
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
643-
createReplacerDstCOPY(X86::MOVZX32rr8,
644-
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
645-
createReplacerDstCOPY(X86::MOVZX64rr8,
646-
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
630+
createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
631+
createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
632+
createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
633+
634+
createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
635+
createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
636+
createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
647637
}
648638

649639
auto createReplacer = [&](unsigned From, unsigned To) {
650640
Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
651641
};
652642

653-
createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
654-
createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
655-
createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
643+
createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
644+
createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk));
645+
createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
656646
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
657647
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
658648
createReplacer(X86::NOT16r, X86::KNOTWrr);
@@ -661,14 +651,14 @@ void X86DomainReassignment::initConverters() {
661651
createReplacer(X86::XOR16rr, X86::KXORWrr);
662652

663653
if (STI->hasBWI()) {
664-
createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
665-
createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
654+
createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm));
655+
createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm));
666656

667-
createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
668-
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
657+
createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk));
658+
createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk));
669659

670-
createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
671-
createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
660+
createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk));
661+
createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk));
672662

673663
createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
674664
createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
@@ -696,8 +686,8 @@ void X86DomainReassignment::initConverters() {
696686

697687
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
698688
// to prove only Z flag is used.
699-
//createReplacer(X86::TEST32rr, X86::KTESTDrr);
700-
//createReplacer(X86::TEST64rr, X86::KTESTQrr);
689+
// createReplacer(X86::TEST32rr, X86::KTESTDrr);
690+
// createReplacer(X86::TEST64rr, X86::KTESTQrr);
701691
}
702692

703693
if (STI->hasDQI()) {
@@ -706,9 +696,9 @@ void X86DomainReassignment::initConverters() {
706696

707697
createReplacer(X86::AND8rr, X86::KANDBrr);
708698

709-
createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
710-
createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
711-
createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
699+
createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
700+
createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk));
701+
createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
712702

713703
createReplacer(X86::NOT8r, X86::KNOTBrr);
714704

@@ -719,11 +709,12 @@ void X86DomainReassignment::initConverters() {
719709

720710
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
721711
// to prove only Z flag is used.
722-
//createReplacer(X86::TEST8rr, X86::KTESTBrr);
723-
//createReplacer(X86::TEST16rr, X86::KTESTWrr);
712+
// createReplacer(X86::TEST8rr, X86::KTESTBrr);
713+
// createReplacer(X86::TEST16rr, X86::KTESTWrr);
724714

725715
createReplacer(X86::XOR8rr, X86::KXORBrr);
726716
}
717+
#undef GET_EGPR_IF_ENABLED
727718
}
728719

729720
bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {

llvm/lib/Target/X86/X86FastISel.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3046,22 +3046,24 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
30463046
switch (II->getIntrinsicID()) {
30473047
default:
30483048
llvm_unreachable("Unexpected intrinsic.");
3049+
#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
30493050
case Intrinsic::x86_sse42_crc32_32_8:
3050-
Opc = X86::CRC32r32r8;
3051+
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8);
30513052
RC = &X86::GR32RegClass;
30523053
break;
30533054
case Intrinsic::x86_sse42_crc32_32_16:
3054-
Opc = X86::CRC32r32r16;
3055+
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16);
30553056
RC = &X86::GR32RegClass;
30563057
break;
30573058
case Intrinsic::x86_sse42_crc32_32_32:
3058-
Opc = X86::CRC32r32r32;
3059+
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32);
30593060
RC = &X86::GR32RegClass;
30603061
break;
30613062
case Intrinsic::x86_sse42_crc32_64_64:
3062-
Opc = X86::CRC32r64r64;
3063+
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64);
30633064
RC = &X86::GR64RegClass;
30643065
break;
3066+
#undef GET_EGPR_IF_ENABLED
30653067
}
30663068

30673069
const Value *LHS = II->getArgOperand(0);

llvm/lib/Target/X86/X86InstrSystem.td

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
695695
Requires<[Not64BitMode, HasINVPCID]>;
696696
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
697697
"invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
698-
Requires<[In64BitMode, HasINVPCID]>;
698+
Requires<[In64BitMode]>;
699699

700700
def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
701701
"invpcid\t{$src2, $src1|$src1, $src2}", []>,
702-
EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>;
702+
EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
703703
} // SchedRW
704704

705-
let Predicates = [In64BitMode, HasINVPCID] in {
705+
let Predicates = [HasINVPCID, NoEGPR] in {
706706
// The instruction can only use a 64 bit register as the register argument
707707
// in 64 bit mode, while the intrinsic only accepts a 32 bit argument
708708
// corresponding to it.
@@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in {
714714
addr:$src2)>;
715715
}
716716

717+
let Predicates = [HasINVPCID, HasEGPR] in {
718+
def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
719+
(INVPCID64_EVEX
720+
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
721+
addr:$src2)>;
722+
}
723+
717724

718725
//===----------------------------------------------------------------------===//
719726
// SMAP Instruction

llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
; RUN: llc < %s -fast-isel -pass-remarks-missed=sdagisel -mtriple=i686-unknown-unknown -mattr=+crc32 2>&1 >/dev/null | FileCheck %s -check-prefix=STDERR-X86 -allow-empty
44
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X86
55
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86
6-
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X64
7-
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X64
6+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
7+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
8+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR
89

910
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
1011

@@ -21,9 +22,15 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
2122
;
2223
; X64-LABEL: test_mm_crc32_u8:
2324
; X64: # %bb.0:
24-
; X64-NEXT: movl %edi, %eax
25-
; X64-NEXT: crc32b %sil, %eax
26-
; X64-NEXT: retq
25+
; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
26+
; X64-NEXT: crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
27+
; X64-NEXT: retq # encoding: [0xc3]
28+
;
29+
; EGPR-LABEL: test_mm_crc32_u8:
30+
; EGPR: # %bb.0:
31+
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
32+
; EGPR-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
33+
; EGPR-NEXT: retq # encoding: [0xc3]
2734
%trunc = trunc i32 %a1 to i8
2835
%res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
2936
ret i32 %res
@@ -41,9 +48,15 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
4148
;
4249
; X64-LABEL: test_mm_crc32_u16:
4350
; X64: # %bb.0:
44-
; X64-NEXT: movl %edi, %eax
45-
; X64-NEXT: crc32w %si, %eax
46-
; X64-NEXT: retq
51+
; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
52+
; X64-NEXT: crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
53+
; X64-NEXT: retq # encoding: [0xc3]
54+
;
55+
; EGPR-LABEL: test_mm_crc32_u16:
56+
; EGPR: # %bb.0:
57+
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
58+
; EGPR-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
59+
; EGPR-NEXT: retq # encoding: [0xc3]
4760
%trunc = trunc i32 %a1 to i16
4861
%res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
4962
ret i32 %res
@@ -59,9 +72,15 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
5972
;
6073
; X64-LABEL: test_mm_crc32_u32:
6174
; X64: # %bb.0:
62-
; X64-NEXT: movl %edi, %eax
63-
; X64-NEXT: crc32l %esi, %eax
64-
; X64-NEXT: retq
75+
; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
76+
; X64-NEXT: crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
77+
; X64-NEXT: retq # encoding: [0xc3]
78+
;
79+
; EGPR-LABEL: test_mm_crc32_u32:
80+
; EGPR: # %bb.0:
81+
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
82+
; EGPR-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
83+
; EGPR-NEXT: retq # encoding: [0xc3]
6584
%res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
6685
ret i32 %res
6786
}

llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,23 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
3-
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
2+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s
3+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s
4+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR
45

56
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
67

78
; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
89
define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
910
; CHECK-LABEL: test_mm_crc64_u8:
1011
; CHECK: # %bb.0:
11-
; CHECK-NEXT: crc32b %sil, %edi
12-
; CHECK-NEXT: movl %edi, %eax
13-
; CHECK-NEXT: retq
12+
; CHECK-NEXT: crc32b %sil, %edi # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe]
13+
; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
14+
; CHECK-NEXT: retq # encoding: [0xc3]
15+
;
16+
; EGPR-LABEL: test_mm_crc64_u8:
17+
; EGPR: # %bb.0:
18+
; EGPR-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
19+
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
20+
; EGPR-NEXT: retq # encoding: [0xc3]
1421
%trunc = trunc i32 %a1 to i8
1522
%res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
1623
ret i64 %res
@@ -20,9 +27,15 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
2027
define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
2128
; CHECK-LABEL: test_mm_crc64_u64:
2229
; CHECK: # %bb.0:
23-
; CHECK-NEXT: movq %rdi, %rax
24-
; CHECK-NEXT: crc32q %rsi, %rax
25-
; CHECK-NEXT: retq
30+
; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
31+
; CHECK-NEXT: crc32q %rsi, %rax # encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
32+
; CHECK-NEXT: retq # encoding: [0xc3]
33+
;
34+
; EGPR-LABEL: test_mm_crc64_u64:
35+
; EGPR: # %bb.0:
36+
; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
37+
; EGPR-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
38+
; EGPR-NEXT: retq # encoding: [0xc3]
2639
%res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
2740
ret i64 %res
2841
}

llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86
33
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64
4+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR
45

56
define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
67
; X86-LABEL: crc32_32_8:
@@ -14,6 +15,12 @@ define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
1415
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
1516
; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
1617
; X64-NEXT: retq ## encoding: [0xc3]
18+
;
19+
; EGPR-LABEL: crc32_32_8:
20+
; EGPR: ## %bb.0:
21+
; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
22+
; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
23+
; EGPR-NEXT: retq ## encoding: [0xc3]
1724
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
1825
ret i32 %tmp
1926
}
@@ -31,6 +38,12 @@ define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
3138
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
3239
; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
3340
; X64-NEXT: retq ## encoding: [0xc3]
41+
;
42+
; EGPR-LABEL: crc32_32_16:
43+
; EGPR: ## %bb.0:
44+
; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
45+
; EGPR-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
46+
; EGPR-NEXT: retq ## encoding: [0xc3]
3447
%tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
3548
ret i32 %tmp
3649
}
@@ -48,6 +61,12 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
4861
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
4962
; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
5063
; X64-NEXT: retq ## encoding: [0xc3]
64+
;
65+
; EGPR-LABEL: crc32_32_32:
66+
; EGPR: ## %bb.0:
67+
; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
68+
; EGPR-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
69+
; EGPR-NEXT: retq ## encoding: [0xc3]
5170
%tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
5271
ret i32 %tmp
5372
}

llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s
3+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR
34

45
declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
56
declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
@@ -10,6 +11,12 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
1011
; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
1112
; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
1213
; CHECK-NEXT: retq ## encoding: [0xc3]
14+
;
15+
; EGPR-LABEL: crc32_64_8:
16+
; EGPR: ## %bb.0:
17+
; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
18+
; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
19+
; EGPR-NEXT: retq ## encoding: [0xc3]
1320
%tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
1421
ret i64 %tmp
1522
}
@@ -20,6 +27,12 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
2027
; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
2128
; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
2229
; CHECK-NEXT: retq ## encoding: [0xc3]
30+
;
31+
; EGPR-LABEL: crc32_64_64:
32+
; EGPR: ## %bb.0:
33+
; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
34+
; EGPR-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
35+
; EGPR-NEXT: retq ## encoding: [0xc3]
2336
%tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
2437
ret i64 %tmp
2538
}

0 commit comments

Comments
 (0)