bytecodealliance · cfallin · Aug 1, 2021 · Jun 15, 2021 · Jul 30, 2021
@@ -156,10 +156,8 @@ fn write_testsuite_tests(
     let testname = extract_name(path);
 
     writeln!(out, "#[test]")?;
-    if x64_should_panic(testsuite, &testname, strategy) {
-        writeln!(out, r#"#[should_panic]"#)?;
     // Ignore when using QEMU for running tests (limited memory).
-    } else if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
+    if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
         writeln!(out, "#[ignore]")?;
     }
 
@@ -182,22 +180,6 @@ fn write_testsuite_tests(
     Ok(())
 }
 
-/// For x64 backend features that are not supported yet, mark tests as panicking, so
-/// they stop "passing" once the features are properly implemented.
-fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
-    if !platform_is_x64() || strategy != "Cranelift" {
-        return false;
-    }
-
-    match (testsuite, testname) {
-        ("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
-        ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
-        ("simd", _) => return false,
-        _ => {}
-    }
-    false
-}
-
 /// Ignore tests that aren't supported yet.
 fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
     match strategy {
@@ -220,11 +202,13 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
             ("simd", _) if cfg!(feature = "old-x86-backend") => return true,
             // No simd support yet for s390x.
             ("simd", _) if platform_is_s390x() => return true,
-
-            // These are new instructions that are not really implemented in any backend.
+            // These are new instructions that are only known to be supported for x64.
             ("simd", "simd_i16x8_extadd_pairwise_i8x16")
-            | ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
-
+            | ("simd", "simd_i32x4_extadd_pairwise_i16x8")
+                if !platform_is_x64() =>
+            {
+                return true
+            }
             _ => {}
         },
         _ => panic!("unrecognized strategy"),

@@ -4114,16 +4114,37 @@ pub(crate) fn define(
         Inst::new(
             "uwiden_high",
             r#"
-        Widen the high lanes of `x` using unsigned extension.
+            Widen the high lanes of `x` using unsigned extension.
 
-        This will double the lane width and halve the number of lanes.
+            This will double the lane width and halve the number of lanes.
             "#,
             &formats.unary,
         )
         .operands_in(vec![x])
         .operands_out(vec![a]),
     );
 
+    let x = &Operand::new("x", I8or16or32xN);
+    let y = &Operand::new("y", I8or16or32xN);
+    let a = &Operand::new("a", I8or16or32xN);
+
+    ig.push(
+        Inst::new(
+            "iadd_pairwise",
+            r#"
+        Does lane-wise integer pairwise addition on two operands, putting the
+        combined results into a single vector result. Here a pair refers to adjacent
+        lanes in a vector, i.e. i*2 + (i*2+1) for i == num_lanes/2. The first operand
+        pairwise add results will make up the low half of the resulting vector while
+        the second operand pairwise add results will make up the upper half of the
+        resulting vector.
+            "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
     let I16x8 = &TypeVar::new(
         "I16x8",
         "A SIMD vector type containing 8 integer lanes each 16 bits wide.",

@@ -3519,7 +3519,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             });
         }
 
-        Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => unimplemented!("lowering {}", op),
+        Opcode::IaddPairwise | Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => {
+            unimplemented!("lowering {}", op)
+        }
     }
 
     Ok(())

@@ -2868,7 +2868,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         | Opcode::WideningPairwiseDotProductS
         | Opcode::SqmulRoundSat
         | Opcode::FvpromoteLow
-        | Opcode::Fvdemote => {
+        | Opcode::Fvdemote
+        | Opcode::IaddPairwise => {
             // TODO
             unimplemented!("Vector ops not implemented.");
         }

@@ -568,6 +568,7 @@ pub enum SseOpcode {
     Pinsrb,
     Pinsrw,
     Pinsrd,
+    Pmaddubsw,
     Pmaddwd,
     Pmaxsb,
     Pmaxsw,
@@ -746,6 +747,7 @@ impl SseOpcode {
             | SseOpcode::Pcmpgtd
             | SseOpcode::Pextrw
             | SseOpcode::Pinsrw
+            | SseOpcode::Pmaddubsw
             | SseOpcode::Pmaddwd
             | SseOpcode::Pmaxsw
             | SseOpcode::Pmaxub
@@ -944,6 +946,7 @@ impl fmt::Debug for SseOpcode {
             SseOpcode::Pinsrb => "pinsrb",
             SseOpcode::Pinsrw => "pinsrw",
             SseOpcode::Pinsrd => "pinsrd",
+            SseOpcode::Pmaddubsw => "pmaddubsw",
             SseOpcode::Pmaddwd => "pmaddwd",
             SseOpcode::Pmaxsb => "pmaxsb",
             SseOpcode::Pmaxsw => "pmaxsw",

@@ -1483,6 +1483,7 @@ pub(crate) fn emit(
                 SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
                 SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
                 SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
+                SseOpcode::Pmaddubsw => (LegacyPrefixes::_66, 0x0F3804, 3),
                 SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
                 SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
                 SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),

@@ -4927,6 +4927,165 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 }
             }
         }
+        Opcode::IaddPairwise => {
+            if let (Some(swiden_low), Some(swiden_high)) = (
+                matches_input(ctx, inputs[0], Opcode::SwidenLow),
+                matches_input(ctx, inputs[1], Opcode::SwidenHigh),
+            ) {
+                let swiden_input = &[
+                    InsnInput {
+                        insn: swiden_low,
+                        input: 0,
+                    },
+                    InsnInput {
+                        insn: swiden_high,
+                        input: 0,
+                    },
+                ];
+
+                let input_ty = ctx.input_ty(swiden_low, 0);
+                let output_ty = ctx.output_ty(insn, 0);
+                let src0 = put_input_in_reg(ctx, swiden_input[0]);
+                let src1 = put_input_in_reg(ctx, swiden_input[1]);
+                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+                if src0 != src1 {
+                    unimplemented!(
+                        "iadd_pairwise not implemented for general case with different inputs"
+                    );
+                }
+                match (input_ty, output_ty) {
+                    (types::I8X16, types::I16X8) => {
+                        static MUL_CONST: [u8; 16] = [0x01; 16];
+                        let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
+                        let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
+                        ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
+                        ctx.emit(Inst::xmm_mov(
+                            SseOpcode::Movdqa,
+                            RegMem::reg(mul_const_reg.to_reg()),
+                            dst,
+                        ));
+                        ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst));
+                    }
+                    (types::I16X8, types::I32X4) => {
+                        static MUL_CONST: [u8; 16] = [
+                            0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
+                            0x01, 0x00, 0x01, 0x00,
+                        ];
+                        let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
+                        let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
+                        ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
+                        ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
+                        ctx.emit(Inst::xmm_rm_r(
+                            SseOpcode::Pmaddwd,
+                            RegMem::reg(mul_const_reg.to_reg()),
+                            dst,
+                        ));
+                    }
+                    _ => {
+                        unimplemented!("Type not supported for {:?}", op);
+                    }
+                }
+            } else if let (Some(uwiden_low), Some(uwiden_high)) = (
+                matches_input(ctx, inputs[0], Opcode::UwidenLow),
+                matches_input(ctx, inputs[1], Opcode::UwidenHigh),
+            ) {
+                let uwiden_input = &[
+                    InsnInput {
+                        insn: uwiden_low,
+                        input: 0,
+                    },
+                    InsnInput {
+                        insn: uwiden_high,
+                        input: 0,
+                    },
+                ];
+
+                let input_ty = ctx.input_ty(uwiden_low, 0);
+                let output_ty = ctx.output_ty(insn, 0);
+                let src0 = put_input_in_reg(ctx, uwiden_input[0]);
+                let src1 = put_input_in_reg(ctx, uwiden_input[1]);
+                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+                if src0 != src1 {
+                    unimplemented!(
+                        "iadd_pairwise not implemented for general case with different inputs"
+                    );
+                }
+                match (input_ty, output_ty) {
+                    (types::I8X16, types::I16X8) => {
+                        static MUL_CONST: [u8; 16] = [0x01; 16];
+                        let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
+                        let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
+                        ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
+                        ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
+                        ctx.emit(Inst::xmm_rm_r(
+                            SseOpcode::Pmaddubsw,
+                            RegMem::reg(mul_const_reg.to_reg()),
+                            dst,
+                        ));
+                    }
+                    (types::I16X8, types::I32X4) => {
+                        static PXOR_CONST: [u8; 16] = [
+                            0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+                            0x00, 0x80, 0x00, 0x80,
+                        ];
+                        let pxor_const =
+                            ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST));
+                        let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
+                        ctx.emit(Inst::xmm_load_const(
+                            pxor_const,
+                            pxor_const_reg,
+                            types::I16X8,
+                        ));
+                        ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
+                        ctx.emit(Inst::xmm_rm_r(
+                            SseOpcode::Pxor,
+                            RegMem::reg(pxor_const_reg.to_reg()),
+                            dst,
+                        ));
+
+                        static MADD_CONST: [u8; 16] = [
+                            0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
+                            0x01, 0x00, 0x01, 0x00,
+                        ];
+                        let madd_const =
+                            ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST));
+                        let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
+                        ctx.emit(Inst::xmm_load_const(
+                            madd_const,
+                            madd_const_reg,
+                            types::I16X8,
+                        ));
+                        ctx.emit(Inst::xmm_rm_r(
+                            SseOpcode::Pmaddwd,
+                            RegMem::reg(madd_const_reg.to_reg()),
+                            dst,
+                        ));
+                        static ADDD_CONST2: [u8; 16] = [
+                            0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
+                            0x00, 0x00, 0x01, 0x00,
+                        ];
+                        let addd_const2 =
+                            ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2));
+                        let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
+                        ctx.emit(Inst::xmm_load_const(
+                            addd_const2,
+                            addd_const2_reg,
+                            types::I16X8,
+                        ));
+                        ctx.emit(Inst::xmm_rm_r(
+                            SseOpcode::Paddd,
+                            RegMem::reg(addd_const2_reg.to_reg()),
+                            dst,
+                        ));
+                    }
+                    _ => {
+                        unimplemented!("Type not supported for {:?}", op);
+                    }
+                }
+            } else {
+                unimplemented!("Operands not supported for {:?}", op);
+            }
+        }
         Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
             let input_ty = ctx.input_ty(insn, 0);
             let output_ty = ctx.output_ty(insn, 0);

@@ -630,6 +630,7 @@ where
         Opcode::Fence => unimplemented!("Fence"),
         Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
         Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
+        Opcode::IaddPairwise => unimplemented!("IaddPairwise"),
 
         // TODO: these instructions should be removed once the new backend makes these obsolete
         // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the

@@ -1879,6 +1879,30 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let a = pop1_with_bitcast(state, I32X4, builder);
             state.push1(builder.ins().uwiden_high(a))
         }
+        Operator::I16x8ExtAddPairwiseI8x16S => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            let widen_low = builder.ins().swiden_low(a);
+            let widen_high = builder.ins().swiden_high(a);
+            state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
+        }
+        Operator::I32x4ExtAddPairwiseI16x8S => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            let widen_low = builder.ins().swiden_low(a);
+            let widen_high = builder.ins().swiden_high(a);
+            state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
+        }
+        Operator::I16x8ExtAddPairwiseI8x16U => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            let widen_low = builder.ins().uwiden_low(a);
+            let widen_high = builder.ins().uwiden_high(a);
+            state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
+        }
+        Operator::I32x4ExtAddPairwiseI16x8U => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            let widen_low = builder.ins().uwiden_low(a);
+            let widen_high = builder.ins().uwiden_high(a);
+            state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
+        }
         Operator::F32x4Ceil | Operator::F64x2Ceil => {
             // This is something of a misuse of `type_of`, because that produces the return type
             // of `op`.  In this case we want the arg type, but we know it's the same as the
@@ -1982,12 +2006,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let b_high = builder.ins().uwiden_high(b);
             state.push1(builder.ins().imul(a_high, b_high));
         }
-        Operator::I16x8ExtAddPairwiseI8x16S
-        | Operator::I16x8ExtAddPairwiseI8x16U
-        | Operator::I32x4ExtAddPairwiseI16x8S
-        | Operator::I32x4ExtAddPairwiseI16x8U => {
-            return Err(wasm_unsupported!("proposed simd operator {:?}", op));
-        }
         Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {
             return Err(wasm_unsupported!("proposed tail-call operator {:?}", op));
         }
-Original file line number
+Diff line change
@@ Expand Up / @@ -3519,7 +3519,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( @@
                 });
             }
-            Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => unimplemented!("lowering {}", op),
+            Opcode::IaddPairwise | Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => {
+                unimplemented!("lowering {}", op)
+            }
         }
         Ok(())
@@ Expand Down @@