Enable simd_extmul_* for AArch64

bytecodealliance · Jul 8, 2021 · 00b264c · 00b264c
1 parent 89ed663
commit 00b264c
Show file tree

Hide file tree

Showing 8 changed files with 790 additions and 259 deletions.
diff --git a/build.rs b/build.rs
@@ -191,10 +191,8 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
     match (testsuite, testname) {
         ("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
         ("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
-        ("simd", "simd_i16x8_extmul_i8x16") => return true,
         ("simd", "simd_i16x8_q15mulr_sat_s") => return true,
         ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
-        ("simd", "simd_i32x4_extmul_i16x8") => return true,
         ("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
         ("simd", "simd_i64x2_extmul_i32x4") => return true,
         ("simd", "simd_int_to_int_extend") => return true,
@@ -230,11 +228,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
             // These are new instructions that are not really implemented in any backend.
             ("simd", "simd_conversions")
             | ("simd", "simd_i16x8_extadd_pairwise_i8x16")
-            | ("simd", "simd_i16x8_extmul_i8x16")
-            | ("simd", "simd_i32x4_extadd_pairwise_i16x8")
-            | ("simd", "simd_i32x4_extmul_i16x8")
-            | ("simd", "simd_i64x2_extmul_i32x4") => return true,
-
+            | ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
             _ => {}
         },
         _ => panic!("unrecognized strategy"),

diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -288,6 +288,22 @@ fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -
         | machreg_to_vec(rd.to_reg())
 }
 
+fn enc_vec_rrr_long(q: u32, u: u32, size: u32, bit14: u32, rm: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
+  debug_assert_eq!(q & 0b1, q);
+  debug_assert_eq!(u & 0b1, u);
+  debug_assert_eq!(size & 0b11, size);
+  debug_assert_eq!(bit14 & 0b1, bit14);
+
+  0b0_0_0_01110_00_1_00000_100000_00000_00000
+        | q << 30
+        | u << 29
+        | size << 22
+        | bit14 << 14
+        | (machreg_to_vec(rm) << 16)
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+}
+
 fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
     (0b01011010110 << 21)
         | size << 31
@@ -2172,6 +2188,26 @@ impl MachInstEmit for Inst {
 
                 sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
             }
+            &Inst::VecRRRLong {
+                rd,
+                rn,
+                rm,
+                alu_op,
+                high_half,
+            } => {
+                let (u, size, bit14) = match alu_op {
+                    VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
+                    VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
+                    VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
+                    VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
+                    VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
+                    VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
+                    VecRRRLongOp::Umlal8 => (0b1, 0b00, 0b0),
+                    VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0),
+                    VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0),
+                };
+                sink.put4(enc_vec_rrr_long(high_half as u32, u, size, bit14, rm, rn, rd));
+            }
             &Inst::VecRRR {
                 rd,
                 rn,
@@ -2241,13 +2277,7 @@ impl MachInstEmit for Inst {
                     VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
                     VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
                     VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
-                    VecALUOp::Umlal => {
-                        debug_assert!(!size.is_128bits());
-                        (0b001_01110_00_1 | enc_size << 1, 0b100000)
-                    }
                     VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
-                    VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
-                    VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
                     VecALUOp::Sqrdmulh => {
                         debug_assert!(
                             size.lane_size() == ScalarSize::Size16
@@ -2257,12 +2287,12 @@ impl MachInstEmit for Inst {
                         (0b001_01110_00_1 | enc_size << 1, 0b101101)
                     }
                 };
-                let top11 = match alu_op {
-                    VecALUOp::Smull | VecALUOp::Smull2 => top11,
-                    _ if is_float => top11 | (q << 9) | enc_float_size << 1,
-                    _ => top11 | (q << 9),
+                let top11 = if is_float {
+                  top11 | enc_float_size << 1
+                } else {
+                  top11
                 };
-                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
+                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
             }
             &Inst::VecLoadReplicate { rd, rn, size } => {
                 let (q, size) = size.enc_size();

diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -3629,18 +3629,6 @@ fn test_aarch64_binemit() {
         "addp v8.4s, v12.4s, v14.4s",
     ));
 
-    insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Umlal,
-            rd: writable_vreg(9),
-            rn: vreg(20),
-            rm: vreg(17),
-            size: VectorSize::Size32x2,
-        },
-        "8982B12E",
-        "umlal v9.2d, v20.2s, v17.2s",
-    ));
-
     insns.push((
         Inst::VecRRR {
             alu_op: VecALUOp::Zip1,
@@ -3690,77 +3678,221 @@ fn test_aarch64_binemit() {
     ));
 
     insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Smull,
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Smull8,
             rd: writable_vreg(16),
             rn: vreg(12),
             rm: vreg(1),
-            size: VectorSize::Size8x16,
+            high_half: false
         },
         "90C1210E",
         "smull v16.8h, v12.8b, v1.8b",
     ));
 
     insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Smull,
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umull8,
+            rd: writable_vreg(15),
+            rn: vreg(11),
+            rm: vreg(2),
+            high_half: false
+        },
+        "6FC1222E",
+        "umull v15.8h, v11.8b, v2.8b",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umlal8,
+            rd: writable_vreg(4),
+            rn: vreg(8),
+            rm: vreg(16),
+            high_half: false
+        },
+        "0481302E",
+        "umlal v4.8h, v8.8b, v16.8b",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Smull16,
             rd: writable_vreg(2),
             rn: vreg(13),
             rm: vreg(6),
-            size: VectorSize::Size16x8,
+            high_half: false,
         },
         "A2C1660E",
         "smull v2.4s, v13.4h, v6.4h",
     ));
 
     insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Smull,
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umull16,
+            rd: writable_vreg(3),
+            rn: vreg(14),
+            rm: vreg(7),
+            high_half: false,
+        },
+        "C3C1672E",
+        "umull v3.4s, v14.4h, v7.4h",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umlal16,
+            rd: writable_vreg(7),
+            rn: vreg(14),
+            rm: vreg(21),
+            high_half: false,
+        },
+        "C781752E",
+        "umlal v7.4s, v14.4h, v21.4h",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Smull32,
             rd: writable_vreg(8),
             rn: vreg(12),
             rm: vreg(14),
-            size: VectorSize::Size32x4,
+            high_half: false,
         },
         "88C1AE0E",
         "smull v8.2d, v12.2s, v14.2s",
     ));
 
     insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Smull2,
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umull32,
+            rd: writable_vreg(9),
+            rn: vreg(5),
+            rm: vreg(6),
+            high_half: false,
+        },
+        "A9C0A62E",
+        "umull v9.2d, v5.2s, v6.2s",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umlal32,
+            rd: writable_vreg(9),
+            rn: vreg(20),
+            rm: vreg(17),
+            high_half: false,
+        },
+        "8982B12E",
+        "umlal v9.2d, v20.2s, v17.2s",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Smull8,
             rd: writable_vreg(16),
             rn: vreg(12),
             rm: vreg(1),
-            size: VectorSize::Size8x16,
+            high_half: true,
         },
         "90C1214E",
         "smull2 v16.8h, v12.16b, v1.16b",
     ));
 
     insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Smull2,
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umull8,
+            rd: writable_vreg(29),
+            rn: vreg(22),
+            rm: vreg(10),
+            high_half: true,
+        },
+        "DDC22A6E",
+        "umull2 v29.8h, v22.16b, v10.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umlal8,
+            rd: writable_vreg(1),
+            rn: vreg(5),
+            rm: vreg(15),
+            high_half: true,
+        },
+        "A1802F6E",
+        "umlal2 v1.8h, v5.16b, v15.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Smull16,
             rd: writable_vreg(2),
             rn: vreg(13),
             rm: vreg(6),
-            size: VectorSize::Size16x8,
+            high_half: true,
         },
         "A2C1664E",
         "smull2 v2.4s, v13.8h, v6.8h",
     ));
 
     insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Smull2,
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umull16,
+            rd: writable_vreg(19),
+            rn: vreg(18),
+            rm: vreg(17),
+            high_half: true,
+        },
+        "53C2716E",
+        "umull2 v19.4s, v18.8h, v17.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umlal16,
+            rd: writable_vreg(11),
+            rn: vreg(10),
+            rm: vreg(12),
+            high_half: true,
+        },
+        "4B816C6E",
+        "umlal2 v11.4s, v10.8h, v12.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Smull32,
             rd: writable_vreg(8),
             rn: vreg(12),
             rm: vreg(14),
-            size: VectorSize::Size32x4,
+            high_half: true,
         },
         "88C1AE4E",
         "smull2 v8.2d, v12.4s, v14.4s",
     ));
 
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umull32,
+            rd: writable_vreg(4),
+            rn: vreg(12),
+            rm: vreg(16),
+            high_half: true,
+        },
+        "84C1B06E",
+        "umull2 v4.2d, v12.4s, v16.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRRLong {
+            alu_op: VecRRRLongOp::Umlal32,
+            rd: writable_vreg(10),
+            rn: vreg(29),
+            rm: vreg(2),
+            high_half: true,
+        },
+        "AA83A26E",
+        "umlal2 v10.2d, v29.4s, v2.4s",
+    ));
+
     insns.push((
         Inst::VecRRR {
             alu_op: VecALUOp::Sqrdmulh,