From a66e5882a837f8a64b3cd2ca613408921d39905b Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Fri, 4 Dec 2020 14:51:55 -0800 Subject: [PATCH] [machinst x64]: implement dot product --- cranelift/codegen/src/isa/x64/inst/args.rs | 3 +++ cranelift/codegen/src/isa/x64/inst/emit.rs | 1 + cranelift/codegen/src/isa/x64/inst/emit_tests.rs | 6 ++++++ cranelift/codegen/src/isa/x64/lower.rs | 14 ++++++++++++++ 4 files changed, 24 insertions(+) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 7e3b3f22a23e..a0b7dab87b04 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -498,6 +498,7 @@ pub enum SseOpcode { Pinsrb, Pinsrw, Pinsrd, + Pmaddwd, Pmaxsb, Pmaxsw, Pmaxsd, @@ -598,6 +599,7 @@ impl SseOpcode { | SseOpcode::Mulps | SseOpcode::Mulss | SseOpcode::Orps + | SseOpcode::Pmaddwd | SseOpcode::Rcpss | SseOpcode::Rsqrtss | SseOpcode::Sqrtps @@ -842,6 +844,7 @@ impl fmt::Debug for SseOpcode { SseOpcode::Pinsrb => "pinsrb", SseOpcode::Pinsrw => "pinsrw", SseOpcode::Pinsrd => "pinsrd", + SseOpcode::Pmaddwd => "pmaddwd", SseOpcode::Pmaxsb => "pmaxsb", SseOpcode::Pmaxsw => "pmaxsw", SseOpcode::Pmaxsd => "pmaxsd", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index c0d94d2ab63b..f8a7c7246dae 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1805,6 +1805,7 @@ pub(crate) fn emit( SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2), SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2), SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3), + SseOpcode::Pmaddwd => (LegacyPrefixes::_66, 0x0FF5, 2), SseOpcode::Pmovsxbd => (LegacyPrefixes::_66, 0x0F3821, 3), SseOpcode::Pmovsxbw => (LegacyPrefixes::_66, 0x0F3820, 3), SseOpcode::Pmovsxbq => (LegacyPrefixes::_66, 0x0F3822, 3), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index bda26e3f27dc..5faa37d2d25d 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3067,6 +3067,12 @@ fn test_x64_emit() { "pmuludq %xmm8, %xmm9", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmaddwd, RegMem::reg(xmm8), w_xmm9), + "66450FF5C8", + "pmaddwd %xmm8, %xmm9", + )); + insns.push(( Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6), "66410F383CF7", diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 4b09681548d3..7c49ca00284f 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -4144,6 +4144,20 @@ fn lower_insn_to_regs>( } } + Opcode::WideningPairwiseDotProductS => { + let lhs = put_input_in_reg(ctx, inputs[0]); + let lhs_ty = ctx.input_ty(insn, 0); + let rhs = input_to_reg_mem(ctx, inputs[1]); + let dst = get_output_reg(ctx, outputs[0]); + let dst_ty = ty.unwrap(); + assert!( + dst_ty == types::I32X4 && lhs_ty == types::I16X8, + "dot product only expands two I16x8 vectors into an I32x4 vector" + ); + ctx.emit(Inst::gen_move(dst, lhs, lhs_ty)); + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddwd, rhs, dst)) + } + Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm