Skip to content

Commit

Permalink
Split Fmla and Bsl out into new VecRRRMod op (#4638)
Browse files Browse the repository at this point in the history
Separates the following opcodes for AArch64 into a separate `VecALUModOp` enum,
which is emitted via the `VecRRRMod` instruction. This separates vector ALU
instructions which modify a register from instructions which write to a new register:
- `Bsl`
- `Fmla`

Addresses [a discussion](#4608 (comment)) in #4608.

Copyright (c) 2022 Arm Limited
  • Loading branch information
dheaton-arm authored Aug 8, 2022
1 parent 866ec46 commit 47a67d7
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 49 deletions.
36 changes: 21 additions & 15 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,14 @@
(rm Reg)
(size VectorSize))

;; A vector ALU op modifying a source register.
(VecRRRMod
(alu_op VecALUModOp)
(rd WritableReg)
(rn Reg)
(rm Reg)
(size VectorSize))

;; Vector two register miscellaneous instruction.
(VecMisc
(op VecMisc2)
Expand Down Expand Up @@ -1108,10 +1116,6 @@
(Orr)
;; Bitwise exclusive or
(Eor)
;; Bitwise select
;; This opcode should only be used with the `vec_rrr_inplace`
;; constructor.
(Bsl)
;; Unsigned maximum pairwise
(Umaxp)
;; Add
Expand Down Expand Up @@ -1146,10 +1150,6 @@
(Fmin)
;; Floating-point multiply
(Fmul)
;; Floating-point fused multiply-add vectors
;; This opcode should only be used with the `vec_rrr_inplace`
;; constructor.
(Fmla)
;; Add pairwise
(Addp)
;; Zip vectors (primary) [meaning, high halves]
Expand All @@ -1158,6 +1158,15 @@
(Sqrdmulh)
))

;; A Vector ALU operation which modifies a source register.
(type VecALUModOp
(enum
;; Bitwise select
(Bsl)
;; Floating-point fused multiply-add vectors
(Fmla)
))

;; A Vector miscellaneous operation with two registers.
(type VecMisc2
(enum
Expand Down Expand Up @@ -1508,11 +1517,11 @@

;; Helper for emitting `MInst.VecRRR` instructions which use three registers,
;; one of which is both source and output.
(decl vec_rrr_inplace (VecALUOp Reg Reg Reg VectorSize) Reg)
(rule (vec_rrr_inplace op src1 src2 src3 size)
(decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg)
(rule (vec_rrr_mod op src1 src2 src3 size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_1 Unit (emit (MInst.FpuMove128 dst src1)))
(_2 Unit (emit (MInst.VecRRR op dst src2 src3 size))))
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
dst))

;; Helper for emitting `MInst.FpuRRR` instructions.
Expand Down Expand Up @@ -2198,10 +2207,7 @@

(decl bsl (Type Reg Reg Reg) Reg)
(rule (bsl ty c x y)
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.FpuMove128 dst c)))
(_ Unit (emit (MInst.VecRRR (VecALUOp.Bsl) dst x y (vector_size ty)))))
dst))
(vec_rrr_mod (VecALUModOp.Bsl) c x y (vector_size ty)))

;; Helper for generating a `udf` instruction.

Expand Down
10 changes: 10 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,16 @@ impl VectorSize {

(q, size)
}

/// Return the encoding bit that is used by some floating-point SIMD
/// instructions for a particular operand size.
pub fn enc_float_size(&self) -> u32 {
match self.lane_size() {
ScalarSize::Size32 => 0b0,
ScalarSize::Size64 => 0b1,
size => panic!("Unsupported floating-point size for vector op: {:?}", size),
}
}
}

pub(crate) fn dynamic_to_fixed(ty: Type) -> Type {
Expand Down
34 changes: 22 additions & 12 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2543,17 +2543,9 @@ impl MachInstEmit for Inst {
| VecALUOp::Fdiv
| VecALUOp::Fmax
| VecALUOp::Fmin
| VecALUOp::Fmul
| VecALUOp::Fmla => true,
| VecALUOp::Fmul => true,
_ => false,
};
let enc_float_size = match (is_float, size) {
(true, VectorSize::Size32x2) => 0b0,
(true, VectorSize::Size32x4) => 0b0,
(true, VectorSize::Size64x2) => 0b1,
(true, _) => unimplemented!(),
_ => 0,
};

let (top11, bit15_10) = match alu_op {
VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
Expand All @@ -2574,7 +2566,6 @@ impl MachInstEmit for Inst {
VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
VecALUOp::Umaxp => {
debug_assert_ne!(size, VectorSize::Size64x2);

Expand Down Expand Up @@ -2619,7 +2610,6 @@ impl MachInstEmit for Inst {
VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
VecALUOp::Fmla => (0b000_01110_00_1, 0b110011),
VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
VecALUOp::Sqrdmulh => {
Expand All @@ -2632,12 +2622,32 @@ impl MachInstEmit for Inst {
}
};
let top11 = if is_float {
top11 | enc_float_size << 1
top11 | size.enc_float_size() << 1
} else {
top11
};
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
}
&Inst::VecRRRMod {
rd,
rn,
rm,
alu_op,
size,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let (q, _enc_size) = size.enc_size();

let (top11, bit15_10) = match alu_op {
VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),
VecALUModOp::Fmla => {
(0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
}
};
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
}
&Inst::VecLoadReplicate {
rd,
rn,
Expand Down
16 changes: 8 additions & 8 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3383,8 +3383,8 @@ fn test_aarch64_binemit() {
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Bsl,
Inst::VecRRRMod {
alu_op: VecALUModOp::Bsl,
rd: writable_vreg(8),
rn: vreg(9),
rm: vreg(1),
Expand Down Expand Up @@ -4055,8 +4055,8 @@ fn test_aarch64_binemit() {
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Fmla,
Inst::VecRRRMod {
alu_op: VecALUModOp::Fmla,
rd: writable_vreg(2),
rn: vreg(0),
rm: vreg(5),
Expand All @@ -4067,8 +4067,8 @@ fn test_aarch64_binemit() {
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Fmla,
Inst::VecRRRMod {
alu_op: VecALUModOp::Fmla,
rd: writable_vreg(2),
rn: vreg(0),
rm: vreg(5),
Expand All @@ -4079,8 +4079,8 @@ fn test_aarch64_binemit() {
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Fmla,
Inst::VecRRRMod {
alu_op: VecALUModOp::Fmla,
rd: writable_vreg(2),
rn: vreg(0),
rm: vreg(5),
Expand Down
39 changes: 26 additions & 13 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ mod emit_tests;

pub use crate::isa::aarch64::lower::isle::generated_code::{
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp,
VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp,
VecShiftImmOp,
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
VecRRRLongOp, VecShiftImmOp,
};

/// A floating-point unit (FPU) operation with two args, a register and an immediate.
Expand Down Expand Up @@ -957,14 +957,13 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::VecRRR {
alu_op, rd, rn, rm, ..
} => {
if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Fmla {
collector.reg_mod(rd);
} else {
collector.reg_def(rd);
}
&Inst::VecRRR { rd, rn, rm, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
}
&Inst::VecRRRMod { rd, rn, rm, .. } => {
collector.reg_mod(rd);
collector.reg_use(rn);
collector.reg_use(rm);
}
Expand Down Expand Up @@ -2208,7 +2207,6 @@ impl Inst {
VecALUOp::Bic => ("bic", VectorSize::Size8x16),
VecALUOp::Orr => ("orr", VectorSize::Size8x16),
VecALUOp::Eor => ("eor", VectorSize::Size8x16),
VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
VecALUOp::Umaxp => ("umaxp", size),
VecALUOp::Add => ("add", size),
VecALUOp::Sub => ("sub", size),
Expand All @@ -2226,7 +2224,6 @@ impl Inst {
VecALUOp::Fmax => ("fmax", size),
VecALUOp::Fmin => ("fmin", size),
VecALUOp::Fmul => ("fmul", size),
VecALUOp::Fmla => ("fmla", size),
VecALUOp::Addp => ("addp", size),
VecALUOp::Zip1 => ("zip1", size),
VecALUOp::Sqrdmulh => ("sqrdmulh", size),
Expand All @@ -2236,6 +2233,22 @@ impl Inst {
let rm = pretty_print_vreg_vector(rm, size, allocs);
format!("{} {}, {}, {}", op, rd, rn, rm)
}
&Inst::VecRRRMod {
rd,
rn,
rm,
alu_op,
size,
} => {
let (op, size) = match alu_op {
VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),
VecALUModOp::Fmla => ("fmla", size),
};
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
let rn = pretty_print_vreg_vector(rn, size, allocs);
let rm = pretty_print_vreg_vector(rm, size, allocs);
format!("{} {}, {}, {}", op, rd, rn, rm)
}
&Inst::VecRRRLong {
rd,
rn,
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@
;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _ _) (fma x y z)))
(vec_rrr_inplace (VecALUOp.Fmla) z x y (vector_size ty)))
(vec_rrr_mod (VecALUModOp.Fmla) z x y (vector_size ty)))

(rule (lower (has_type (ty_scalar_float ty) (fma x y z)))
(fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
Expand Down

0 comments on commit 47a67d7

Please sign in to comment.