diff --git a/build.rs b/build.rs index f049208a6066..278646aa9236 100644 --- a/build.rs +++ b/build.rs @@ -190,12 +190,9 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool { match (testsuite, testname) { ("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true, - ("simd", "simd_i16x8_extmul_i8x16") => return true, ("simd", "simd_i16x8_q15mulr_sat_s") => return true, ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true, - ("simd", "simd_i32x4_extmul_i16x8") => return true, ("simd", "simd_i32x4_trunc_sat_f64x2") => return true, - ("simd", "simd_i64x2_extmul_i32x4") => return true, ("simd", "simd_int_to_int_extend") => return true, ("simd", _) => return false, _ => {} @@ -229,10 +226,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { // These are new instructions that are not really implemented in any backend. ("simd", "simd_conversions") | ("simd", "simd_i16x8_extadd_pairwise_i8x16") - | ("simd", "simd_i16x8_extmul_i8x16") - | ("simd", "simd_i32x4_extadd_pairwise_i16x8") - | ("simd", "simd_i32x4_extmul_i16x8") - | ("simd", "simd_i64x2_extmul_i32x4") => return true, + | ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true, _ => {} }, diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 13ad1ca8364f..2c7ce30090d0 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -593,6 +593,9 @@ pub enum SseOpcode { Pmovzxwd, Pmovzxwq, Pmovzxdq, + Pmuldq, + Pmulhw, + Pmulhuw, Pmulld, Pmullw, Pmuludq, @@ -617,7 +620,9 @@ pub enum SseOpcode { Psubusw, Ptest, Punpckhbw, + Punpckhwd, Punpcklbw, + Punpcklwd, Pxor, Rcpss, Roundps, @@ -742,6 +747,8 @@ impl SseOpcode { | SseOpcode::Pminsw | SseOpcode::Pminub | SseOpcode::Pmovmskb + | SseOpcode::Pmulhw + | SseOpcode::Pmulhuw | SseOpcode::Pmullw | SseOpcode::Pmuludq | SseOpcode::Por @@ -763,7 +770,9 @@ impl SseOpcode { | SseOpcode::Psubusb | SseOpcode::Psubusw | SseOpcode::Punpckhbw + | SseOpcode::Punpckhwd | SseOpcode::Punpcklbw + | SseOpcode::Punpcklwd | SseOpcode::Pxor | SseOpcode::Sqrtpd | SseOpcode::Sqrtsd @@ -808,6 +817,7 @@ impl SseOpcode { | SseOpcode::Pmovzxwd | SseOpcode::Pmovzxwq | SseOpcode::Pmovzxdq + | SseOpcode::Pmuldq | SseOpcode::Pmulld | SseOpcode::Ptest | SseOpcode::Roundps @@ -953,6 +963,9 @@ impl fmt::Debug for SseOpcode { SseOpcode::Pmovzxwd => "pmovzxwd", SseOpcode::Pmovzxwq => "pmovzxwq", SseOpcode::Pmovzxdq => "pmovzxdq", + SseOpcode::Pmuldq => "pmuldq", + SseOpcode::Pmulhw => "pmulhw", + SseOpcode::Pmulhuw => "pmulhuw", SseOpcode::Pmulld => "pmulld", SseOpcode::Pmullw => "pmullw", SseOpcode::Pmuludq => "pmuludq", @@ -977,7 +990,9 @@ impl fmt::Debug for SseOpcode { SseOpcode::Psubusw => "psubusw", SseOpcode::Ptest => "ptest", SseOpcode::Punpckhbw => "punpckhbw", + SseOpcode::Punpckhwd => "punpckhwd", SseOpcode::Punpcklbw => "punpcklbw", + SseOpcode::Punpcklwd => "punpcklwd", SseOpcode::Pxor => "pxor", SseOpcode::Rcpss => "rcpss", SseOpcode::Roundps => "roundps", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 534b6be168b6..47ae56fb2277 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1509,6 +1509,9 @@ pub(crate) fn emit( SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2), SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3), SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3), + SseOpcode::Pmuldq => (LegacyPrefixes::_66, 0x0F3828, 3), + SseOpcode::Pmulhw => (LegacyPrefixes::_66, 0x0FE5, 2), + SseOpcode::Pmulhuw => (LegacyPrefixes::_66, 0x0FE4, 2), SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3), SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2), SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2), @@ -1523,7 +1526,9 @@ pub(crate) fn emit( SseOpcode::Psubusb => (LegacyPrefixes::_66, 0x0FD8, 2), SseOpcode::Psubusw => (LegacyPrefixes::_66, 0x0FD9, 2), SseOpcode::Punpckhbw => (LegacyPrefixes::_66, 0x0F68, 2), + SseOpcode::Punpckhwd => (LegacyPrefixes::_66, 0x0F69, 2), SseOpcode::Punpcklbw => (LegacyPrefixes::_66, 0x0F60, 2), + SseOpcode::Punpcklwd => (LegacyPrefixes::_66, 0x0F61, 2), SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2), SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2), SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2), diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 295b5daa32fc..e19c67ada3ca 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1662,7 +1662,407 @@ fn lower_insn_to_regs>( Opcode::Imul => { let ty = ty.unwrap(); - if ty == types::I64X2 { + if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) { + if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) { + let swiden_input = &[ + InsnInput { + insn: swiden0_high, + input: 0, + }, + InsnInput { + insn: swiden1_high, + input: 0, + }, + ]; + let input0_ty = ctx.input_ty(swiden0_high, 0); + let input1_ty = ctx.input_ty(swiden1_high, 0); + let output_ty = ctx.output_ty(insn, 0); + let lhs = put_input_in_reg(ctx, swiden_input[0]); + let rhs = put_input_in_reg(ctx, swiden_input[1]); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + match (input0_ty, input1_ty, output_ty) { + (types::I8X16, types::I8X16, types::I16X8) => { + // i16x8.extmul_high_i8x16_s + let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty)); + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Palignr, + RegMem::reg(lhs), + tmp_reg, + 8, + OperandSize::Size32, + )); + ctx.emit(Inst::xmm_mov( + SseOpcode::Pmovsxbw, + RegMem::reg(lhs), + tmp_reg, + )); + + ctx.emit(Inst::gen_move(dst, rhs, output_ty)); + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Palignr, + RegMem::reg(rhs), + dst, + 8, + OperandSize::Size32, + )); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmullw, + RegMem::reg(tmp_reg.to_reg()), + dst, + )); + } + (types::I16X8, types::I16X8, types::I32X4) => { + // i32x4.extmul_high_i16x8_s + // MOVDQA xmm_y, xmm_a + ctx.emit(Inst::gen_move(dst, lhs, input0_ty)); + + // MOVDQA xmm_tmp, xmm_a + let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty)); + + // PMULLW xmm_y, xmm_b + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst)); + + // PMULHW xmm_tmp, xmm_b + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(rhs), tmp_reg)); + + // PUNPCKHWD xmm_y, xmm_tmp + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Punpckhwd, + RegMem::from(tmp_reg), + dst, + )); + //panic!("Make sure we enter this extmul path signed high {:?} -> {:?}", input0_ty, output_ty); + } + (types::I32X4, types::I32X4, types::I64X2) => { + // i64x2.extmul_high_i32x4_s + // PSHUFD xmm_tmp, xmm_a, 0xFA + let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Pshufd, + RegMem::reg(lhs), + tmp_reg, + 0xFA, + OperandSize::Size32, + )); + + // PSHUFD xmm_y, xmm_b, 0xFA + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Pshufd, + RegMem::reg(rhs), + dst, + 0xFA, + OperandSize::Size32, + )); + + // PMULDQ xmm_y, xmm_tmp + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmuldq, + RegMem::reg(tmp_reg.to_reg()), + dst, + )); + } + _ => panic!("Unsupported extmul_low_signed type"), + } + } + } else if let Some(swiden0_low) = matches_input(ctx, inputs[0], Opcode::SwidenLow) { + if let Some(swiden1_low) = matches_input(ctx, inputs[1], Opcode::SwidenLow) { + let swiden_input = &[ + InsnInput { + insn: swiden0_low, + input: 0, + }, + InsnInput { + insn: swiden1_low, + input: 0, + }, + ]; + let input0_ty = ctx.input_ty(swiden0_low, 0); + let input1_ty = ctx.input_ty(swiden1_low, 0); + let output_ty = ctx.output_ty(insn, 0); + let lhs = put_input_in_reg(ctx, swiden_input[0]); + let rhs = put_input_in_reg(ctx, swiden_input[1]); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + match (input0_ty, input1_ty, output_ty) { + (types::I8X16, types::I8X16, types::I16X8) => { + // i32x4.extmul_low_i8x16_s + let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); + ctx.emit(Inst::xmm_mov( + SseOpcode::Pmovsxbw, + RegMem::reg(lhs), + tmp_reg, + )); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmullw, + RegMem::reg(tmp_reg.to_reg()), + dst, + )); + } + (types::I16X8, types::I16X8, types::I32X4) => { + // i32x4.extmul_low_i16x8_s + // MOVDQA xmm_y, xmm_a + ctx.emit(Inst::gen_move(dst, lhs, input0_ty)); + + // MOVDQA xmm_tmp, xmm_a + let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty)); + + // PMULLW xmm_y, xmm_b + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst)); + + // PMULHW xmm_tmp, xmm_b + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(rhs), tmp_reg)); + + // PUNPCKLWD xmm_y, xmm_tmp + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Punpcklwd, + RegMem::from(tmp_reg), + dst, + )); + //panic!("Make sure we enter this extmul path signed low {:?} -> {:?}", input0_ty, output_ty); + } + (types::I32X4, types::I32X4, types::I64X2) => { + // i64x2.extmul_low_i32x4_s + // PSHUFD xmm_tmp, xmm_a, 0x50 + let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Pshufd, + RegMem::reg(lhs), + tmp_reg, + 0x50, + OperandSize::Size32, + )); + + // PSHUFD xmm_y, xmm_b, 0x50 + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Pshufd, + RegMem::reg(rhs), + dst, + 0x50, + OperandSize::Size32, + )); + + // PMULDQ xmm_y, xmm_tmp + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmuldq, + RegMem::reg(tmp_reg.to_reg()), + dst, + )); + } + _ => panic!("Unsupported extmul_low_signed type"), + } + } + } else if let Some(uwiden0_high) = matches_input(ctx, inputs[0], Opcode::UwidenHigh) { + if let Some(uwiden1_high) = matches_input(ctx, inputs[1], Opcode::UwidenHigh) { + let uwiden_input = &[ + InsnInput { + insn: uwiden0_high, + input: 0, + }, + InsnInput { + insn: uwiden1_high, + input: 0, + }, + ]; + let input0_ty = ctx.input_ty(uwiden0_high, 0); + let input1_ty = ctx.input_ty(uwiden1_high, 0); + let output_ty = ctx.output_ty(insn, 0); + let lhs = put_input_in_reg(ctx, uwiden_input[0]); + let rhs = put_input_in_reg(ctx, uwiden_input[1]); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + match (input0_ty, input1_ty, output_ty) { + (types::I8X16, types::I8X16, types::I16X8) => { + // i16x8.extmul_high_i8x16_u + let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp_reg, lhs, output_ty)); + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Palignr, + RegMem::reg(lhs), + tmp_reg, + 8, + OperandSize::Size32, + )); + ctx.emit(Inst::xmm_mov( + SseOpcode::Pmovzxbw, + RegMem::reg(lhs), + tmp_reg, + )); + + ctx.emit(Inst::gen_move(dst, rhs, output_ty)); + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Palignr, + RegMem::reg(rhs), + dst, + 8, + OperandSize::Size32, + )); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmullw, + RegMem::reg(tmp_reg.to_reg()), + dst, + )); + } + (types::I16X8, types::I16X8, types::I32X4) => { + // i32x4.extmul_high_i16x8_u + // MOVDQA xmm_y, xmm_a + ctx.emit(Inst::gen_move(dst, lhs, input0_ty)); + + // MOVDQA xmm_tmp, xmm_a + let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty)); + + // PMULLW xmm_y, xmm_b + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst)); + + // PMULHUW xmm_tmp, xmm_b + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmulhuw, + RegMem::reg(rhs), + tmp_reg, + )); + + // PUNPCKHWD xmm_y, xmm_tmp + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Punpckhwd, + RegMem::from(tmp_reg), + dst, + )); + //panic!("Make sure we enter this extmul path unsigned high {:?} -> {:?}", input0_ty, output_ty); + } + (types::I32X4, types::I32X4, types::I64X2) => { + // i64x2.extmul_high_i32x4_u + // PSHUFD xmm_tmp, xmm_a, 0xFA + let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Pshufd, + RegMem::reg(lhs), + tmp_reg, + 0xFA, + OperandSize::Size32, + )); + + // PSHUFD xmm_y, xmm_b, 0xFA + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Pshufd, + RegMem::reg(rhs), + dst, + 0xFA, + OperandSize::Size32, + )); + + // PMULUDQ xmm_y, xmm_tmp + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmuludq, + RegMem::reg(tmp_reg.to_reg()), + dst, + )); + } + _ => panic!("Unsupported extmul_low_signed type"), + } + } + } else if let Some(uwiden0_low) = matches_input(ctx, inputs[0], Opcode::UwidenLow) { + if let Some(uwiden1_low) = matches_input(ctx, inputs[1], Opcode::UwidenLow) { + let uwiden_input = &[ + InsnInput { + insn: uwiden0_low, + input: 0, + }, + InsnInput { + insn: uwiden1_low, + input: 0, + }, + ]; + + let input0_ty = ctx.input_ty(uwiden0_low, 0); + let input1_ty = ctx.input_ty(uwiden1_low, 0); + let output_ty = ctx.output_ty(insn, 0); + let lhs = put_input_in_reg(ctx, uwiden_input[0]); + let rhs = put_input_in_reg(ctx, uwiden_input[1]); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + match (input0_ty, input1_ty, output_ty) { + (types::I8X16, types::I8X16, types::I16X8) => { + // i16x8.extmul_low_i8x16_u + let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); + ctx.emit(Inst::xmm_mov( + SseOpcode::Pmovzxbw, + RegMem::reg(lhs), + tmp_reg, + )); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst)); + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmullw, + RegMem::reg(tmp_reg.to_reg()), + dst, + )); + } + (types::I16X8, types::I16X8, types::I32X4) => { + // i32x4.extmul_low_i16x8_u + // MOVDQA xmm_y, xmm_a + ctx.emit(Inst::gen_move(dst, lhs, input0_ty)); + + // MOVDQA xmm_tmp, xmm_a + let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty)); + + // PMULLW xmm_y, xmm_b + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst)); + + // PMULHUW xmm_tmp, xmm_b + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmulhuw, + RegMem::reg(rhs), + tmp_reg, + )); + + // PUNPCKLWD xmm_y, xmm_tmp + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Punpcklwd, + RegMem::from(tmp_reg), + dst, + )); + //panic!("Make sure we enter this extmul path unsigned low {:?} -> {:?}", input0_ty, output_ty); + } + (types::I32X4, types::I32X4, types::I64X2) => { + // y = i64x2.extmul_low_i32x4_u(a, b) is lowered to PSHUFD xmm_tmp, xmm_a, 0x50 + PSHUFD xmm_y, xmm_b, 0x50 + PMULUDQ xmm_y, xmm_tmp + // i64x2.extmul_low_i32x4_u + // PSHUFD xmm_tmp, xmm_a, 0x50 + let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Pshufd, + RegMem::reg(lhs), + tmp_reg, + 0x50, + OperandSize::Size32, + )); + + // PSHUFD xmm_y, xmm_b, 0x50 + ctx.emit(Inst::xmm_rm_r_imm( + SseOpcode::Pshufd, + RegMem::reg(rhs), + dst, + 0x50, + OperandSize::Size32, + )); + + // PMULUDQ xmm_y, xmm_tmp + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pmuludq, + RegMem::reg(tmp_reg.to_reg()), + dst, + )); + } + _ => panic!("Unsupported extmul_low_signed type"), + } + } + } else if ty == types::I64X2 { // Eventually one of these should be `input_to_reg_mem` (TODO). let lhs = put_input_in_reg(ctx, inputs[0]); let rhs = put_input_in_reg(ctx, inputs[1]); diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 3b03a193d709..8be5b24d8ec9 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1911,19 +1911,79 @@ pub fn translate_operator( state.push1(builder.ins().sqmul_round_sat(a, b)) } - Operator::I16x8ExtMulLowI8x16S - | Operator::I16x8ExtMulHighI8x16S - | Operator::I16x8ExtMulLowI8x16U - | Operator::I16x8ExtMulHighI8x16U - | Operator::I32x4ExtMulLowI16x8S - | Operator::I32x4ExtMulHighI16x8S - | Operator::I32x4ExtMulLowI16x8U - | Operator::I32x4ExtMulHighI16x8U - | Operator::I64x2ExtMulLowI32x4S - | Operator::I64x2ExtMulHighI32x4S - | Operator::I64x2ExtMulLowI32x4U - | Operator::I64x2ExtMulHighI32x4U - | Operator::I16x8ExtAddPairwiseI8x16S + Operator::I16x8ExtMulLowI8x16S => { + let (a, b) = pop2_with_bitcast(state, I8X16, builder); + let a_low = builder.ins().swiden_low(a); + let b_low = builder.ins().swiden_low(b); + state.push1(builder.ins().imul(a_low, b_low)); + } + Operator::I16x8ExtMulHighI8x16S => { + let (a, b) = pop2_with_bitcast(state, I8X16, builder); + let a_high = builder.ins().swiden_high(a); + let b_high = builder.ins().swiden_high(b); + state.push1(builder.ins().imul(a_high, b_high)); + } + Operator::I16x8ExtMulLowI8x16U => { + let (a, b) = pop2_with_bitcast(state, I8X16, builder); + let a_low = builder.ins().uwiden_low(a); + let b_low = builder.ins().uwiden_low(b); + state.push1(builder.ins().imul(a_low, b_low)); + } + Operator::I16x8ExtMulHighI8x16U => { + let (a, b) = pop2_with_bitcast(state, I8X16, builder); + let a_high = builder.ins().uwiden_high(a); + let b_high = builder.ins().uwiden_high(b); + state.push1(builder.ins().imul(a_high, b_high)); + } + Operator::I32x4ExtMulLowI16x8S => { + let (a, b) = pop2_with_bitcast(state, I16X8, builder); + let a_low = builder.ins().swiden_low(a); + let b_low = builder.ins().swiden_low(b); + state.push1(builder.ins().imul(a_low, b_low)); + } + Operator::I32x4ExtMulHighI16x8S => { + let (a, b) = pop2_with_bitcast(state, I16X8, builder); + let a_high = builder.ins().swiden_high(a); + let b_high = builder.ins().swiden_high(b); + state.push1(builder.ins().imul(a_high, b_high)); + } + Operator::I32x4ExtMulLowI16x8U => { + let (a, b) = pop2_with_bitcast(state, I16X8, builder); + let a_low = builder.ins().uwiden_low(a); + let b_low = builder.ins().uwiden_low(b); + state.push1(builder.ins().imul(a_low, b_low)); + } + Operator::I32x4ExtMulHighI16x8U => { + let (a, b) = pop2_with_bitcast(state, I16X8, builder); + let a_high = builder.ins().uwiden_high(a); + let b_high = builder.ins().uwiden_high(b); + state.push1(builder.ins().imul(a_high, b_high)); + } + Operator::I64x2ExtMulLowI32x4S => { + let (a, b) = pop2_with_bitcast(state, I32X4, builder); + let a_low = builder.ins().swiden_low(a); + let b_low = builder.ins().swiden_low(b); + state.push1(builder.ins().imul(a_low, b_low)); + } + Operator::I64x2ExtMulHighI32x4S => { + let (a, b) = pop2_with_bitcast(state, I32X4, builder); + let a_high = builder.ins().swiden_high(a); + let b_high = builder.ins().swiden_high(b); + state.push1(builder.ins().imul(a_high, b_high)); + } + Operator::I64x2ExtMulLowI32x4U => { + let (a, b) = pop2_with_bitcast(state, I32X4, builder); + let a_low = builder.ins().uwiden_low(a); + let b_low = builder.ins().uwiden_low(b); + state.push1(builder.ins().imul(a_low, b_low)); + } + Operator::I64x2ExtMulHighI32x4U => { + let (a, b) = pop2_with_bitcast(state, I32X4, builder); + let a_high = builder.ins().uwiden_high(a); + let b_high = builder.ins().uwiden_high(b); + state.push1(builder.ins().imul(a_high, b_high)); + } + Operator::I16x8ExtAddPairwiseI8x16S | Operator::I16x8ExtAddPairwiseI8x16U | Operator::I32x4ExtAddPairwiseI16x8S | Operator::I32x4ExtAddPairwiseI16x8U => {