bytecodealliance · cfallin · Aug 5, 2022 · Jul 13, 2022 · Aug 5, 2022
@@ -335,8 +335,10 @@
  (rn Reg))
 
  ;; 3-op FPU instruction.
+ ;; 16-bit scalars require half-precision floating-point support (FEAT_FP16).
  (FpuRRRR
  (fpu_op FPUOp3)
+ (size ScalarSize)
  (rd WritableReg)
  (rn Reg)
  (rm Reg)
@@ -478,7 +480,7 @@
  (rd WritableReg)
  (rn Reg)
  (idx u8)
- (size VectorSize))
+ (size ScalarSize))
 
  ;; Signed move from a vector element to a GPR.
  (MovFromVecSigned
@@ -1011,8 +1013,7 @@
 ;; A floating-point unit (FPU) operation with three args.
 (type FPUOp3
  (enum
- (MAdd32)
- (MAdd64)
+ (MAdd)
 ))
 
 ;; A conversion from an FP to an integer value.
@@ -1108,6 +1109,8 @@
  ;; Bitwise exclusive or
  (Eor)
  ;; Bitwise select
+ ;; This opcode should only be used with the `vec_rrr_inplace`
+ ;; constructor.
  (Bsl)
  ;; Unsigned maximum pairwise
  (Umaxp)
@@ -1143,6 +1146,10 @@
  (Fmin)
  ;; Floating-point multiply
  (Fmul)
+ ;; Floating-point fused multiply-add vectors
+ ;; This opcode should only be used with the `vec_rrr_inplace`
+ ;; constructor.
+ (Fmla)
  ;; Add pairwise
  (Addp)
  ;; Zip vectors (primary) [meaning, high halves]
@@ -1364,6 +1371,9 @@
 (decl imm12_from_negated_u64 (Imm12) u64)
 (extern extractor imm12_from_negated_u64 imm12_from_negated_u64)
 
+(decl pure lshr_from_u64 (Type u64) ShiftOpAndAmt)
+(extern constructor lshr_from_u64 lshr_from_u64)
+
 (decl pure lshl_from_imm64 (Type Imm64) ShiftOpAndAmt)
 (extern constructor lshl_from_imm64 lshl_from_imm64)
 
@@ -1491,6 +1501,15 @@
 (rule (fpu_rr op src size)
  (let ((dst WritableReg (temp_writable_reg $F64))
  (_ Unit (emit (MInst.FpuRR op size dst src))))
+ dst))
+
+;; Helper for emitting `MInst.VecRRR` instructions which use three registers,
+;; one of which is both source and output.
+(decl vec_rrr_inplace (VecALUOp Reg Reg Reg VectorSize) Reg)
+(rule (vec_rrr_inplace op src1 src2 src3 size)
+ (let ((dst WritableReg (temp_writable_reg $I8X16))
+ (_1 Unit (emit (MInst.FpuMove128 dst src1)))
+ (_2 Unit (emit (MInst.VecRRR op dst src2 src3 size))))
  dst))
 
 ;; Helper for emitting `MInst.FpuRRR` instructions.
@@ -1500,6 +1519,13 @@
  (_ Unit (emit (MInst.FpuRRR op size dst src1 src2))))
  dst))
 
+;; Helper for emitting `MInst.FpuRRRR` instructions.
+(decl fpu_rrrr (FPUOp3 ScalarSize Reg Reg Reg) Reg)
+(rule (fpu_rrrr size op src1 src2 src3)
+ (let ((dst WritableReg (temp_writable_reg $F64))
+ (_ Unit (emit (MInst.FpuRRRR size op dst src1 src2 src3))))
+ dst))
+
 ;; Helper for emitting `MInst.FpuCmp` instructions.
 (decl fpu_cmp (ScalarSize Reg Reg) ProducesFlags)
 (rule (fpu_cmp size rn rm)
@@ -1541,6 +1567,15 @@
  (_ Unit (emit (MInst.AluRRRShift op (operand_size ty) dst src1 src2 shift))))
  dst))
 
+;; Helper for emitting `cmp` instructions, setting flags, with a right-shifted
+;; second operand register.
+(decl cmp_rr_shift (OperandSize Reg Reg u64) ProducesFlags)
+(rule (cmp_rr_shift size src1 src2 shift_amount)
+ (if-let shift (lshr_from_u64 $I64 shift_amount))
+ (ProducesFlags.ProducesFlagsSideEffect
+ (MInst.AluRRRShift (ALUOp.SubS) size (writable_zero_reg)
+ src1 src2 shift)))
+
 ;; Helper for emitting `MInst.AluRRRExtend` instructions.
 (decl alu_rrr_extend (ALUOp Type Reg Reg ExtendOp) Reg)
 (rule (alu_rrr_extend op ty src1 src2 extend)
@@ -1741,7 +1776,7 @@
  dst))
 
 ;; Helper for emitting `MInst.MovFromVec` instructions.
-(decl mov_from_vec (Reg u8 VectorSize) Reg)
+(decl mov_from_vec (Reg u8 ScalarSize) Reg)
 (rule (mov_from_vec rn idx size)
  (let ((dst WritableReg (temp_writable_reg $I64))
  (_ Unit (emit (MInst.MovFromVec dst rn idx size))))
@@ -1817,6 +1852,22 @@
  (MInst.CSNeg dst cond if_true if_false)
  dst)))
 
+;; Helper for generating `MInst.CCmpImm` instructions.
+(decl ccmp_imm (OperandSize u8 Reg UImm5 NZCV Cond) ConsumesFlags)
+(rule (ccmp_imm size 1 rn imm nzcv cond)
+ (let ((dst WritableReg (temp_writable_reg $I64)))
+ (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
+ (MInst.CCmpImm size rn imm nzcv cond)
+ (MInst.CSet dst cond)
+ (value_reg dst))))
+
+(rule (ccmp_imm size _ty_bits rn imm nzcv cond)
+ (let ((dst WritableReg (temp_writable_reg $I64)))
+ (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
+ (MInst.CCmpImm size rn imm nzcv cond)
+ (MInst.CSetm dst cond)
+ (value_reg dst))))
+
 ;; Helpers for generating `add` instructions.
 
 (decl add (Type Reg Reg) Reg)

@@ -620,7 +620,7 @@ impl ScalarSize {
  /// Convert to an integer operand size.
  pub fn operand_size(&self) -> OperandSize {
  match self {
- ScalarSize::Size32 => OperandSize::Size32,
+ ScalarSize::Size8 | ScalarSize::Size16 | ScalarSize::Size32 => OperandSize::Size32,
  ScalarSize::Size64 => OperandSize::Size64,
  _ => panic!("Unexpected operand_size request for: {:?}", self),
  }
@@ -687,8 +687,11 @@ impl VectorSize {
  debug_assert!(ty.is_vector());
 
  match ty {
+ B8X8 => VectorSize::Size8x8,
  B8X16 => VectorSize::Size8x16,
+ B16X4 => VectorSize::Size16x4,
  B16X8 => VectorSize::Size16x8,
+ B32X2 => VectorSize::Size32x2,
  B32X4 => VectorSize::Size32x4,
  B64X2 => VectorSize::Size64x2,
  F32X2 => VectorSize::Size32x2,

@@ -1790,6 +1790,7 @@ impl MachInstEmit for Inst {
  }
  &Inst::FpuRRRR {
  fpu_op,
+ size,
  rd,
  rn,
  rm,
@@ -1800,9 +1801,9 @@ impl MachInstEmit for Inst {
  let rm = allocs.next(rm);
  let ra = allocs.next(ra);
  let top17 = match fpu_op {
- FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
- FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
+ FPUOp3::MAdd => 0b000_11111_00_0_00000_0,
  };
+ let top17 = top17 | size.ftype() << 7;
  sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
  }
  &Inst::VecMisc { op, rd, rn, size } => {
@@ -2209,11 +2210,11 @@ impl MachInstEmit for Inst {
  let rd = allocs.next_writable(rd);
  let rn = allocs.next(rn);
  let (q, imm5, shift, mask) = match size {
- VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
- VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
- VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
- VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
- _ => unreachable!(),
+ ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111),
+ ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111),
+ ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011),
+ ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001),
+ _ => panic!("Unexpected scalar FP operand size: {:?}", size),
  };
  debug_assert_eq!(idx & mask, idx);
  let imm5 = imm5 | ((idx as u32) << shift);
@@ -2542,7 +2543,8 @@ impl MachInstEmit for Inst {
  | VecALUOp::Fdiv
  | VecALUOp::Fmax
  | VecALUOp::Fmin
- | VecALUOp::Fmul => true,
+ | VecALUOp::Fmul
+ | VecALUOp::Fmla => true,
  _ => false,
  };
  let enc_float_size = match (is_float, size) {
@@ -2617,6 +2619,7 @@ impl MachInstEmit for Inst {
  VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
  VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
  VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
+ VecALUOp::Fmla => (0b000_01110_00_1, 0b110011),
  VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
  VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
  VecALUOp::Sqrdmulh => {

@@ -2266,7 +2266,7 @@ fn test_aarch64_binemit() {
  rd: writable_xreg(3),
  rn: vreg(27),
  idx: 14,
- size: VectorSize::Size8x16,
+ size: ScalarSize::Size8,
  },
  "633F1D0E",
  "umov w3, v27.b[14]",
@@ -2276,7 +2276,7 @@ fn test_aarch64_binemit() {
  rd: writable_xreg(24),
  rn: vreg(5),
  idx: 3,
- size: VectorSize::Size16x8,
+ size: ScalarSize::Size16,
  },
  "B83C0E0E",
  "umov w24, v5.h[3]",
@@ -2286,7 +2286,7 @@ fn test_aarch64_binemit() {
  rd: writable_xreg(12),
  rn: vreg(17),
  idx: 1,
- size: VectorSize::Size32x4,
+ size: ScalarSize::Size32,
  },
  "2C3E0C0E",
  "mov w12, v17.s[1]",
@@ -2296,7 +2296,7 @@ fn test_aarch64_binemit() {
  rd: writable_xreg(21),
  rn: vreg(20),
  idx: 0,
- size: VectorSize::Size64x2,
+ size: ScalarSize::Size64,
  },
  "953E084E",
  "mov x21, v20.d[0]",
@@ -4054,6 +4054,42 @@ fn test_aarch64_binemit() {
  "fmul v2.2d, v0.2d, v5.2d",
  ));
 
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fmla,
+ rd: writable_vreg(2),
+ rn: vreg(0),
+ rm: vreg(5),
+ size: VectorSize::Size32x2,
+ },
+ "02CC250E",
+ "fmla v2.2s, v0.2s, v5.2s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fmla,
+ rd: writable_vreg(2),
+ rn: vreg(0),
+ rm: vreg(5),
+ size: VectorSize::Size32x4,
+ },
+ "02CC254E",
+ "fmla v2.4s, v0.4s, v5.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fmla,
+ rd: writable_vreg(2),
+ rn: vreg(0),
+ rm: vreg(5),
+ size: VectorSize::Size64x2,
+ },
+ "02CC654E",
+ "fmla v2.2d, v0.2d, v5.2d",
+ ));
+
  insns.push((
  Inst::VecRRR {
  alu_op: VecALUOp::Addp,
@@ -5911,7 +5947,8 @@ fn test_aarch64_binemit() {
 
  insns.push((
  Inst::FpuRRRR {
- fpu_op: FPUOp3::MAdd32,
+ fpu_op: FPUOp3::MAdd,
+ size: ScalarSize::Size32,
  rd: writable_vreg(15),
  rn: vreg(30),
  rm: vreg(31),
@@ -5923,7 +5960,8 @@ fn test_aarch64_binemit() {
 
  insns.push((
  Inst::FpuRRRR {
- fpu_op: FPUOp3::MAdd64,
+ fpu_op: FPUOp3::MAdd,
+ size: ScalarSize::Size64,
  rd: writable_vreg(15),
  rn: vreg(30),
  rm: vreg(31),

@@ -292,14 +292,6 @@ impl Imm12 {
  }
  }
 
- /// Create a zero immediate of this format.
- pub fn zero() -> Self {
- Imm12 {
- bits: 0,
- shift12: false,
- }
- }
-
  /// Bits for 2-bit "shift" field in e.g. AddI.
  pub fn shift_bits(&self) -> u32 {
  if self.shift12 {