Enable the simd_i16x8_q15mulr_sat_s test on AArch64

bytecodealliance · Jun 25, 2021 · bd879ce · bd879ce
1 parent 1047c4e
commit bd879ce
Show file tree

Hide file tree

Showing 9 changed files with 96 additions and 176 deletions.
diff --git a/build.rs b/build.rs
@@ -231,7 +231,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
             ("simd", "simd_conversions")
             | ("simd", "simd_i16x8_extadd_pairwise_i8x16")
             | ("simd", "simd_i16x8_extmul_i8x16")
-            | ("simd", "simd_i16x8_q15mulr_sat_s")
             | ("simd", "simd_i32x4_extadd_pairwise_i16x8")
             | ("simd", "simd_i32x4_extmul_i16x8")
             | ("simd", "simd_i32x4_trunc_sat_f64x2")

diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -2479,6 +2479,33 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    let I16or32 = &TypeVar::new(
+        "I16or32",
+        "A scalar or vector integer type with 16- or 32-bit numbers",
+        TypeSetBuilder::new().ints(16..32).simd_lanes(4..8).build(),
+    );
+
+    let qx = &Operand::new("x", I16or32);
+    let qy = &Operand::new("y", I16or32);
+    let qa = &Operand::new("a", I16or32);
+
+    ig.push(
+        Inst::new(
+            "sqmul_round_sat",
+            r#"
+        Fixed-point multiplication of numbers in the QN format, where N + 1
+        is the number bitwidth:
+        `a := signed_saturate((x * y + 1 << (Q - 1)) >> Q)`
+
+        Polymorphic over all integer types (scalar and vector) with 16- or
+        32-bit numbers.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![qx, qy])
+        .operands_out(vec![qa]),
+    );
+
     ig.push(
         Inst::new(
             "udiv",

diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -2161,6 +2161,14 @@ impl MachInstEmit for Inst {
                     VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
                     VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
                     VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
+                    VecALUOp::Sqrdmulh => {
+                        debug_assert!(
+                            size.lane_size() == ScalarSize::Size16
+                                || size.lane_size() == ScalarSize::Size32
+                        );
+
+                        (0b001_01110_00_1 | enc_size << 1, 0b101101)
+                    }
                 };
                 let top11 = match alu_op {
                     VecALUOp::Smull | VecALUOp::Smull2 => top11,

diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -3610,6 +3610,30 @@ fn test_aarch64_binemit() {
         "smull2 v8.2d, v12.4s, v14.4s",
     ));
 
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqrdmulh,
+            rd: writable_vreg(31),
+            rn: vreg(0),
+            rm: vreg(31),
+            size: VectorSize::Size16x8,
+        },
+        "1FB47F6E",
+        "sqrdmulh v31.8h, v0.8h, v31.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqrdmulh,
+            rd: writable_vreg(7),
+            rn: vreg(7),
+            rm: vreg(23),
+            size: VectorSize::Size32x2,
+        },
+        "E7B4B72E",
+        "sqrdmulh v7.2s, v7.2s, v23.2s",
+    ));
+
     insns.push((
         Inst::VecMisc {
             op: VecMisc2::Not,

diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -311,6 +311,8 @@ pub enum VecALUOp {
     Smull,
     /// Signed multiply long (high halves)
     Smull2,
+    /// Signed saturating rounding doubling multiply returning high half
+    Sqrdmulh,
 }
 
 /// A Vector miscellaneous operation with two registers.
@@ -3980,6 +3982,7 @@ impl Inst {
                     VecALUOp::Zip1 => ("zip1", size),
                     VecALUOp::Smull => ("smull", size),
                     VecALUOp::Smull2 => ("smull2", size),
+                    VecALUOp::Sqrdmulh => ("sqrdmulh", size),
                 };
                 let rd_size = match alu_op {
                     VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),

diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1650,8 +1650,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             panic!("table_addr should have been removed by legalization!");
         }
 
-        Opcode::ConstAddr => unimplemented!(),
-
         Opcode::Nop => {
             // Nothing.
         }
@@ -2684,11 +2682,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             });
         }
 
-        Opcode::Vsplit | Opcode::Vconcat => {
-            // TODO
-            panic!("Vector ops not implemented.");
-        }
-
         Opcode::Isplit => {
             assert_eq!(
                 ctx.input_ty(insn, 0),
@@ -3524,9 +3517,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             }
         },
 
-        Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
-        Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
-        Opcode::Fvdemote => unimplemented!("Fvdemote"),
+        Opcode::SqmulRoundSat => {
+            let ty = ty.unwrap();
+
+            if !ty.is_vector() || (ty.lane_type() != I16 && ty.lane_type() != I32) {
+                return Err(CodegenError::Unsupported(format!(
+                    "Unsupported type: {:?}",
+                    ty
+                )));
+            }
+
+            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+
+            ctx.emit(Inst::VecRRR {
+                alu_op: VecALUOp::Sqrdmulh,
+                rd,
+                rn,
+                rm,
+                size: VectorSize::from_ty(ty),
+            });
+        }
+
+        _ => unimplemented!("lowering {}", op),
     }
 
     Ok(())

diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -976,9 +976,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             ctx.emit(Inst::AluRUImm32 { alu_op, rd, imm });
         }
 
-        Opcode::UaddSat | Opcode::SaddSat => unimplemented!(),
-        Opcode::UsubSat | Opcode::SsubSat => unimplemented!(),
-
         Opcode::Iabs => {
             let ty = ty.unwrap();
             let op = choose_32_64(ty, UnaryOp::Abs32, UnaryOp::Abs64);
@@ -1891,8 +1888,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             });
         }
 
-        Opcode::Bitrev => unimplemented!(),
-
         Opcode::Popcnt => {
             let ty = ty.unwrap();
             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
@@ -2151,8 +2146,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             }
         }
 
-        Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
-
         Opcode::Bitcast => {
             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
             let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
@@ -2399,8 +2392,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             ctx.emit(inst);
         }
 
-        Opcode::ConstAddr => unimplemented!(),
-
         Opcode::FuncAddr => {
             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
             let (extname, dist) = ctx.call_target(insn).unwrap();
@@ -2457,14 +2448,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             panic!("global_value should have been removed by legalization!");
         }
 
-        Opcode::TlsValue => {
-            panic!("Thread-local storage support not implemented!");
-        }
-
-        Opcode::GetPinnedReg | Opcode::SetPinnedReg => {
-            panic!("Pinned register support not implemented!");
-        }
-
         Opcode::Icmp => {
             let condcode = ctx.data(insn).cond_code().unwrap();
             let cond = Cond::from_intcc(condcode);
@@ -2825,54 +2808,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             ctx.emit(Inst::Fence);
         }
 
-        Opcode::RawBitcast
-        | Opcode::Splat
-        | Opcode::Swizzle
-        | Opcode::Insertlane
-        | Opcode::Extractlane
-        | Opcode::Imin
-        | Opcode::Umin
-        | Opcode::Imax
-        | Opcode::Umax
-        | Opcode::AvgRound
-        | Opcode::FminPseudo
-        | Opcode::FmaxPseudo
-        | Opcode::Uload8x8
-        | Opcode::Uload8x8Complex
-        | Opcode::Sload8x8
-        | Opcode::Sload8x8Complex
-        | Opcode::Uload16x4
-        | Opcode::Uload16x4Complex
-        | Opcode::Sload16x4
-        | Opcode::Sload16x4Complex
-        | Opcode::Uload32x2
-        | Opcode::Uload32x2Complex
-        | Opcode::Sload32x2
-        | Opcode::Sload32x2Complex
-        | Opcode::Vconst
-        | Opcode::Shuffle
-        | Opcode::Vsplit
-        | Opcode::Vconcat
-        | Opcode::Vselect
-        | Opcode::VanyTrue
-        | Opcode::VallTrue
-        | Opcode::VhighBits
-        | Opcode::ScalarToVector
-        | Opcode::Snarrow
-        | Opcode::Unarrow
-        | Opcode::SwidenLow
-        | Opcode::SwidenHigh
-        | Opcode::UwidenLow
-        | Opcode::UwidenHigh
-        | Opcode::WideningPairwiseDotProductS
-        | Opcode::FvpromoteLow
-        | Opcode::Fvdemote => {
-            // TODO
-            panic!("Vector ops not implemented.");
-        }
-
-        Opcode::Isplit | Opcode::Iconcat => panic!("Wide integer ops not implemented."),
-
         Opcode::Spill
         | Opcode::Fill
         | Opcode::FillNop
@@ -2988,6 +2923,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         | Opcode::X86MachoTlsGetAddr => {
             panic!("x86-specific opcode in supposedly arch-neutral IR!");
         }
+
+        _ => unimplemented!("lowering {}", op),
     }
 
     Ok(())