Skip to content

Commit e043128

Browse files
authored
x64: use new versions of SSE add and sub (#10718)
* x64: use new `addss` and `addsd` instructions * Bless Cranelift tests for `adds*` * winch: add more conversions for pairing XMM registers * winch: use the new `addss` and `addsd` instructions * x64: use new `subss` and `subsd` instructions * winch: use the new `subss` and `subsd` instructions
1 parent 19ad7b3 commit e043128

File tree

17 files changed

+177
-213
lines changed

17 files changed

+177
-213
lines changed

cranelift/assembler-x64/meta/src/instructions/add.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ pub fn list() -> Vec<Inst> {
6262
inst("lock_adcl", fmt("MR", [rw(m32), r(r32)]), rex([0xf0, 0x11]).r(), _64b | compat),
6363
inst("lock_adcq", fmt("MR", [rw(m64), r(r64)]), rex([0xf0, 0x11]).w().r(), _64b),
6464
// Vector instructions.
65+
inst("addss", fmt("A", [rw(xmm), r(xmm_m32)]), rex([0xF3, 0x0F, 0x58]).r(), _64b | compat | sse),
66+
inst("addsd", fmt("A", [rw(xmm), r(xmm_m64)]), rex([0xF2, 0x0F, 0x58]).r(), _64b | compat | sse2),
6567
inst("addps", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x0F, 0x58]).r(), _64b | compat | sse),
6668
inst("addpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x58]).r(), _64b | compat | sse2),
6769
]

cranelift/assembler-x64/meta/src/instructions/sub.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ pub fn list() -> Vec<Inst> {
6262
inst("lock_sbbl", fmt("MR", [rw(m32), r(r32)]), rex([0xf0, 0x19]).r(), _64b | compat),
6363
inst("lock_sbbq", fmt("MR", [rw(m64), r(r64)]), rex([0xf0, 0x19]).w().r(), _64b),
6464
// Vector instructions.
65+
inst("subss", fmt("A", [rw(xmm), r(xmm_m32)]), rex([0xF3, 0x0F, 0x5C]).r(), _64b | compat | sse),
66+
inst("subsd", fmt("A", [rw(xmm), r(xmm_m64)]), rex([0xF2, 0x0F, 0x5C]).r(), _64b | compat | sse2),
6567
inst("subps", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x0F, 0x5C]).r(), _64b | compat | sse),
6668
inst("subpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x5C]).r(), _64b | compat | sse2),
6769
]

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -876,11 +876,7 @@
876876
(enum Rorx))
877877

878878
(type SseOpcode extern
879-
(enum Addps
880-
Addpd
881-
Addss
882-
Addsd
883-
Andps
879+
(enum Andps
884880
Andpd
885881
Andnps
886882
Andnpd
@@ -1044,10 +1040,6 @@
10441040
Sqrtpd
10451041
Sqrtss
10461042
Sqrtsd
1047-
Subps
1048-
Subpd
1049-
Subss
1050-
Subsd
10511043
Ucomiss
10521044
Ucomisd
10531045
Unpcklps
@@ -3836,19 +3828,17 @@
38363828

38373829
;; Helper for creating `addss` instructions.
38383830
(decl x64_addss (Xmm XmmMem) Xmm)
3839-
(rule (x64_addss src1 src2)
3840-
(xmm_rm_r_unaligned (SseOpcode.Addss) src1 src2))
38413831
(rule 1 (x64_addss src1 src2)
38423832
(if-let true (use_avx))
38433833
(xmm_rmir_vex (AvxOpcode.Vaddss) src1 src2))
3834+
(rule (x64_addss src1 src2) (x64_addss_a src1 src2))
38443835

38453836
;; Helper for creating `addsd` instructions.
38463837
(decl x64_addsd (Xmm XmmMem) Xmm)
3847-
(rule (x64_addsd src1 src2)
3848-
(xmm_rm_r_unaligned (SseOpcode.Addsd) src1 src2))
38493838
(rule 1 (x64_addsd src1 src2)
38503839
(if-let true (use_avx))
38513840
(xmm_rmir_vex (AvxOpcode.Vaddsd) src1 src2))
3841+
(rule (x64_addsd src1 src2) (x64_addsd_a src1 src2))
38523842

38533843
;; Helper for creating `addps` instructions.
38543844
(decl x64_addps (Xmm XmmMem) Xmm)
@@ -3866,19 +3856,17 @@
38663856

38673857
;; Helper for creating `subss` instructions.
38683858
(decl x64_subss (Xmm XmmMem) Xmm)
3869-
(rule (x64_subss src1 src2)
3870-
(xmm_rm_r_unaligned (SseOpcode.Subss) src1 src2))
38713859
(rule 1 (x64_subss src1 src2)
38723860
(if-let true (use_avx))
38733861
(xmm_rmir_vex (AvxOpcode.Vsubss) src1 src2))
3862+
(rule (x64_subss src1 src2) (x64_subss_a src1 src2))
38743863

38753864
;; Helper for creating `subsd` instructions.
38763865
(decl x64_subsd (Xmm XmmMem) Xmm)
3877-
(rule (x64_subsd src1 src2)
3878-
(xmm_rm_r_unaligned (SseOpcode.Subsd) src1 src2))
38793866
(rule 1 (x64_subsd src1 src2)
38803867
(if-let true (use_avx))
38813868
(xmm_rmir_vex (AvxOpcode.Vsubsd) src1 src2))
3869+
(rule (x64_subsd src1 src2) (x64_subsd_a src1 src2))
38823870

38833871
;; Helper for creating `subps` instructions.
38843872
(decl x64_subps (Xmm XmmMem) Xmm)

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -941,10 +941,6 @@ pub(crate) enum InstructionSet {
941941
#[allow(dead_code)] // some variants here aren't used just yet
942942
#[allow(missing_docs)]
943943
pub enum SseOpcode {
944-
Addps,
945-
Addpd,
946-
Addss,
947-
Addsd,
948944
Andps,
949945
Andpd,
950946
Andnps,
@@ -1109,10 +1105,6 @@ pub enum SseOpcode {
11091105
Sqrtpd,
11101106
Sqrtss,
11111107
Sqrtsd,
1112-
Subps,
1113-
Subpd,
1114-
Subss,
1115-
Subsd,
11161108
Ucomiss,
11171109
Ucomisd,
11181110
Unpcklps,
@@ -1137,9 +1129,7 @@ impl SseOpcode {
11371129
pub(crate) fn available_from(&self) -> InstructionSet {
11381130
use InstructionSet::*;
11391131
match self {
1140-
SseOpcode::Addps
1141-
| SseOpcode::Addss
1142-
| SseOpcode::Andps
1132+
SseOpcode::Andps
11431133
| SseOpcode::Andnps
11441134
| SseOpcode::Comiss
11451135
| SseOpcode::Cmpps
@@ -1166,16 +1156,12 @@ impl SseOpcode {
11661156
| SseOpcode::Shufps
11671157
| SseOpcode::Sqrtps
11681158
| SseOpcode::Sqrtss
1169-
| SseOpcode::Subps
1170-
| SseOpcode::Subss
11711159
| SseOpcode::Ucomiss
11721160
| SseOpcode::Unpcklps
11731161
| SseOpcode::Unpckhps
11741162
| SseOpcode::Xorps => SSE,
11751163

1176-
SseOpcode::Addpd
1177-
| SseOpcode::Addsd
1178-
| SseOpcode::Andpd
1164+
SseOpcode::Andpd
11791165
| SseOpcode::Andnpd
11801166
| SseOpcode::Cmppd
11811167
| SseOpcode::Cmpsd
@@ -1266,8 +1252,6 @@ impl SseOpcode {
12661252
| SseOpcode::Pxor
12671253
| SseOpcode::Sqrtpd
12681254
| SseOpcode::Sqrtsd
1269-
| SseOpcode::Subpd
1270-
| SseOpcode::Subsd
12711255
| SseOpcode::Ucomisd
12721256
| SseOpcode::Xorpd
12731257
| SseOpcode::Punpckldq
@@ -1365,10 +1349,6 @@ impl SseOpcode {
13651349
impl fmt::Debug for SseOpcode {
13661350
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
13671351
let name = match self {
1368-
SseOpcode::Addps => "addps",
1369-
SseOpcode::Addpd => "addpd",
1370-
SseOpcode::Addss => "addss",
1371-
SseOpcode::Addsd => "addsd",
13721352
SseOpcode::Andpd => "andpd",
13731353
SseOpcode::Andps => "andps",
13741354
SseOpcode::Andnps => "andnps",
@@ -1533,10 +1513,6 @@ impl fmt::Debug for SseOpcode {
15331513
SseOpcode::Sqrtpd => "sqrtpd",
15341514
SseOpcode::Sqrtss => "sqrtss",
15351515
SseOpcode::Sqrtsd => "sqrtsd",
1536-
SseOpcode::Subps => "subps",
1537-
SseOpcode::Subpd => "subpd",
1538-
SseOpcode::Subss => "subss",
1539-
SseOpcode::Subsd => "subsd",
15401516
SseOpcode::Ucomiss => "ucomiss",
15411517
SseOpcode::Ucomisd => "ucomisd",
15421518
SseOpcode::Unpcklps => "unpcklps",

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2205,10 +2205,6 @@ pub(crate) fn emit(
22052205

22062206
let rex = RexFlags::clear_w();
22072207
let (prefix, opcode, length) = match op {
2208-
SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
2209-
SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
2210-
SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
2211-
SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
22122208
SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
22132209
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
22142210
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
@@ -2297,10 +2293,6 @@ pub(crate) fn emit(
22972293
SseOpcode::Punpckhdq => (LegacyPrefixes::_66, 0x0F6A, 2),
22982294
SseOpcode::Punpckhqdq => (LegacyPrefixes::_66, 0x0F6D, 2),
22992295
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
2300-
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
2301-
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
2302-
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
2303-
SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
23042296
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
23052297
SseOpcode::Unpckhps => (LegacyPrefixes::None, 0x0F15, 2),
23062298
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
@@ -3110,7 +3102,7 @@ pub(crate) fn emit(
31103102

31113103
let (add_op, cmp_op, and_op, or_op, min_max_op) = match size {
31123104
OperandSize::Size32 => (
3113-
SseOpcode::Addss,
3105+
asm::inst::addss_a::new(dst, lhs).into(),
31143106
SseOpcode::Ucomiss,
31153107
SseOpcode::Andps,
31163108
SseOpcode::Orps,
@@ -3121,7 +3113,7 @@ pub(crate) fn emit(
31213113
},
31223114
),
31233115
OperandSize::Size64 => (
3124-
SseOpcode::Addsd,
3116+
asm::inst::addsd_a::new(dst, lhs).into(),
31253117
SseOpcode::Ucomisd,
31263118
SseOpcode::Andpd,
31273119
SseOpcode::Orpd,
@@ -3154,8 +3146,7 @@ pub(crate) fn emit(
31543146
// read-only operand: perform an addition between the two operands, which has the
31553147
// desired NaN propagation effects.
31563148
sink.bind_label(propagate_nan, state.ctrl_plane_mut());
3157-
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(lhs), dst);
3158-
inst.emit(sink, info, state);
3149+
Inst::External { inst: add_op }.emit(sink, info, state);
31593150

31603151
one_way_jmp(sink, CC::P, done);
31613152

@@ -3538,13 +3529,12 @@ pub(crate) fn emit(
35383529
*dst_size == OperandSize::Size64,
35393530
);
35403531

3541-
let add_op = if *dst_size == OperandSize::Size64 {
3542-
SseOpcode::Addsd
3543-
} else {
3544-
SseOpcode::Addss
3532+
let inst = match *dst_size {
3533+
OperandSize::Size64 => asm::inst::addsd_a::new(dst, dst.to_reg()).into(),
3534+
OperandSize::Size32 => asm::inst::addss_a::new(dst, dst.to_reg()).into(),
3535+
_ => unreachable!(),
35453536
};
3546-
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), dst);
3547-
inst.emit(sink, info, state);
3537+
Inst::External { inst }.emit(sink, info, state);
35483538

35493539
sink.bind_label(done, state.ctrl_plane_mut());
35503540
}
@@ -3784,13 +3774,13 @@ pub(crate) fn emit(
37843774

37853775
let (sub_op, cast_op, cmp_op, trunc_op) = match src_size {
37863776
OperandSize::Size32 => (
3787-
SseOpcode::Subss,
3777+
asm::inst::subss_a::new(tmp_xmm2, tmp_xmm.to_reg()).into(),
37883778
SseOpcode::Movd,
37893779
SseOpcode::Ucomiss,
37903780
SseOpcode::Cvttss2si,
37913781
),
37923782
OperandSize::Size64 => (
3793-
SseOpcode::Subsd,
3783+
asm::inst::subsd_a::new(tmp_xmm2, tmp_xmm.to_reg()).into(),
37943784
SseOpcode::Movq,
37953785
SseOpcode::Ucomisd,
37963786
SseOpcode::Cvttsd2si,
@@ -3875,8 +3865,7 @@ pub(crate) fn emit(
38753865
let inst = Inst::gen_move(tmp_xmm2, src, types::F64);
38763866
inst.emit(sink, info, state);
38773867

3878-
let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), tmp_xmm2);
3879-
inst.emit(sink, info, state);
3868+
Inst::External { inst: sub_op }.emit(sink, info, state);
38803869

38813870
let inst = Inst::xmm_to_gpr(trunc_op, tmp_xmm2.to_reg(), dst, *dst_size);
38823871
inst.emit(sink, info, state);

cranelift/codegen/src/isa/x64/inst/emit_tests.rs

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -3219,66 +3219,6 @@ fn test_x64_emit() {
32193219
// ========================================================
32203220
// XMM_RM_R: float binary ops
32213221

3222-
insns.push((
3223-
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0),
3224-
"F30F58C1",
3225-
"addss %xmm0, %xmm1, %xmm0",
3226-
));
3227-
insns.push((
3228-
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13),
3229-
"F3450F58EB",
3230-
"addss %xmm13, %xmm11, %xmm13",
3231-
));
3232-
insns.push((
3233-
Inst::xmm_rm_r(
3234-
SseOpcode::Addss,
3235-
RegMem::mem(Amode::imm_reg_reg_shift(
3236-
123,
3237-
Gpr::unwrap_new(r10),
3238-
Gpr::unwrap_new(rdx),
3239-
2,
3240-
)),
3241-
w_xmm0,
3242-
),
3243-
"F3410F5844927B",
3244-
"addss %xmm0, 123(%r10,%rdx,4), %xmm0",
3245-
));
3246-
insns.push((
3247-
Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4),
3248-
"F2410F58E7",
3249-
"addsd %xmm4, %xmm15, %xmm4",
3250-
));
3251-
3252-
insns.push((
3253-
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1),
3254-
"F30F5CC8",
3255-
"subss %xmm1, %xmm0, %xmm1",
3256-
));
3257-
insns.push((
3258-
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1),
3259-
"F3410F5CCC",
3260-
"subss %xmm1, %xmm12, %xmm1",
3261-
));
3262-
insns.push((
3263-
Inst::xmm_rm_r(
3264-
SseOpcode::Subss,
3265-
RegMem::mem(Amode::imm_reg_reg_shift(
3266-
321,
3267-
Gpr::unwrap_new(r10),
3268-
Gpr::unwrap_new(rax),
3269-
3,
3270-
)),
3271-
w_xmm10,
3272-
),
3273-
"F3450F5C94C241010000",
3274-
"subss %xmm10, 321(%r10,%rax,8), %xmm10",
3275-
));
3276-
insns.push((
3277-
Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14),
3278-
"F2440F5CF5",
3279-
"subsd %xmm14, %xmm5, %xmm14",
3280-
));
3281-
32823222
insns.push((
32833223
Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4),
32843224
"F30F59E5",

0 commit comments

Comments
 (0)