Skip to content

Commit

Permalink
aarch64: Migrate uextend/sextend to ISLE
Browse files Browse the repository at this point in the history
This commit migrates the sign/zero extension instructions from
`lower_inst.rs` to ISLE. There's actually a fair amount going on in this
migration since a few other pieces needed touching up along the way as
well:

* First is the actual migration of `uextend` and `sextend`. These
  instructions are relatively simple but end up having a number of special
  cases. I've attempted to replicate all the cases here but
  double-checks would be good.

* This commit actually fixes a few issues where if the result of a vector
  extraction is sign/zero-extended into i128 that actually results in
  panics in the current backend.

* This commit adds exhaustive testing for
  extension-of-a-vector-extraction is a noop wrt extraction.

* A bugfix around ISLE glue was required to get this commit working,
  notably the case where the `RegMapper` implementation was trying to
  map an input to an output (meaning ISLE was passing through an input
  unmodified to the output) wasn't working. This requires a `mov`
  instruction to be generated and this commit updates the glue to do
  this. At the same time this commit updates the ISLE glue to share more
  infrastructure between x64 and aarch64 so both backends get this fix
  instead of just aarch64.

Overall I think that the translation to ISLE was a net benefit for these
instructions. It's relatively obvious what all the cases are now unlike
before where it took a few reads of the code and some boolean switches
to figure out which path was taken for each flavor of input. I think
there's still possible improvements here where, for example, the
`put_in_reg_{s,z}ext64` helper doesn't use this logic so technically
those helpers could also pattern match the "well atomic loads and vector
extractions automatically do this for us" but that's a possible future
improvement for later (and shouldn't be too too hard with some ISLE
refactoring).
  • Loading branch information
alexcrichton committed Dec 14, 2021
1 parent 20e090b commit d89410e
Show file tree
Hide file tree
Showing 11 changed files with 936 additions and 390 deletions.
53 changes: 45 additions & 8 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,34 @@
(_ Unit (emit (MInst.VecRRLong op dst src high_half))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.MovFromVec` instructions.
(decl mov_from_vec (Reg u8 VectorSize) Reg)
(rule (mov_from_vec rn idx size)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.MovFromVec dst rn idx size))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.MovFromVecSigned` instructions.
(decl mov_from_vec_signed (Reg u8 VectorSize OperandSize) Reg)
(rule (mov_from_vec_signed rn idx size scalar_size)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.MovFromVecSigned dst rn idx size scalar_size))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.Extend` instructions.
(decl extend (Reg bool u8 u8) Reg)
(rule (extend rn signed from_bits to_bits)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.Extend dst rn signed from_bits to_bits))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.LoadAcquire` instructions.
(decl load_acquire (Type Reg) Reg)
(rule (load_acquire ty addr)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.LoadAcquire ty dst addr))))
(writable_reg_to_reg dst)))

;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl imm (Type u64) Reg)
Expand Down Expand Up @@ -1543,21 +1571,15 @@
;; Place a `Value` into a register, sign extending it to 64-bits
(decl put_in_reg_sext64 (Value) Reg)
(rule (put_in_reg_sext64 val @ (value_type (fits_in_32 ty)))
(let ((dst WritableReg (temp_writable_reg $I32))
(src Reg (put_in_reg val))
(_ Unit (emit (MInst.Extend dst src $true (ty_bits ty) 64))))
(writable_reg_to_reg dst)))
(extend (put_in_reg val) $true (ty_bits ty) 64))

;; 64-bit passthrough.
(rule (put_in_reg_sext64 val @ (value_type $I64)) (put_in_reg val))

;; Place a `Value` into a register, zero extending it to 64-bits
(decl put_in_reg_zext64 (Value) Reg)
(rule (put_in_reg_zext64 val @ (value_type (fits_in_32 ty)))
(let ((dst WritableReg (temp_writable_reg $I32))
(src Reg (put_in_reg val))
(_ Unit (emit (MInst.Extend dst src $false (ty_bits ty) 64))))
(writable_reg_to_reg dst)))
(extend (put_in_reg val) $false (ty_bits ty) 64))

;; 64-bit passthrough.
(rule (put_in_reg_zext64 val @ (value_type $I64)) (put_in_reg val))
Expand Down Expand Up @@ -1599,3 +1621,18 @@
(rule (adds_op (fits_in_32 _ty)) (ALUOp.AddS32))
(rule (adds_op $I64) (ALUOp.AddS64))

;; An atomic load that can be sunk into another operation.
(type SinkableAtomicLoad extern (enum))

;; Extract a `SinkableAtomicLoad` that works with `Reg` from a value
;; operand.
(decl sinkable_atomic_load (SinkableAtomicLoad) Value)
(extern extractor sinkable_atomic_load sinkable_atomic_load)

;; Sink a `SinkableLoad` into a `Reg`.
;;
;; This is a side-effectful operation that notifies the context that the
;; instruction that produced the `SinkableAtomicLoad` has been sunk into another
;; instruction, and no longer needs to be lowered.
(decl sink_atomic_load (SinkableAtomicLoad) Reg)
(extern constructor sink_atomic_load sink_atomic_load)
86 changes: 86 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,89 @@
(result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
)
(value_reg result)))

;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; General rule for extending input to an output which fits in a single
;; register.
(rule (lower (has_type (fits_in_64 out) (uextend x @ (value_type in))))
(value_reg (extend (put_in_reg x) $false (ty_bits in) (ty_bits out))))

;; Extraction of a vector lane automatically extends as necessary, so we can
;; skip an explicit extending instruction.
(rule (lower (has_type (fits_in_64 out)
(uextend (def_inst (extractlane vec @ (value_type in)
(u8_from_uimm8 lane))))))
(value_reg (mov_from_vec (put_in_reg vec) lane (vector_size in))))

;; Atomic loads will also automatically zero their upper bits so the `uextend`
;; instruction can effectively get skipped here.
(rule (lower (has_type (fits_in_64 out)
(uextend (and (value_type in) (sinkable_atomic_load addr)))))
(value_reg (load_acquire in (sink_atomic_load addr))))

;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper
;; bits are all zero.
(rule (lower (has_type $I128 (uextend x)))
(value_regs (put_in_reg_zext64 x) (imm $I64 0)))

;; Like above where vector extraction automatically zero-extends extending to
;; i128 only requires generating a 0 constant for the upper bits.
(rule (lower (has_type $I128
(uextend (def_inst (extractlane vec @ (value_type in)
(u8_from_uimm8 lane))))))
(value_regs (mov_from_vec (put_in_reg vec) lane (vector_size in)) (imm $I64 0)))

;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; General rule for extending input to an output which fits in a single
;; register.
(rule (lower (has_type (fits_in_64 out) (sextend x @ (value_type in))))
(value_reg (extend (put_in_reg x) $true (ty_bits in) (ty_bits out))))

;; Extraction of a vector lane automatically extends as necessary, so we can
;; skip an explicit extending instruction.
(rule (lower (has_type (fits_in_64 out)
(sextend (def_inst (extractlane vec @ (value_type in)
(u8_from_uimm8 lane))))))
(value_reg (mov_from_vec_signed (put_in_reg vec)
lane
(vector_size in)
(size_from_ty out))))

;; 64-bit to 128-bit only needs to sign-extend the input to the upper bits.
(rule (lower (has_type $I128 (sextend x)))
(let (
(lo Reg (put_in_reg_sext64 x))
(hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63)))
)
(value_regs lo hi)))

;; Like above where vector extraction automatically zero-extends extending to
;; i128 only requires generating a 0 constant for the upper bits.
;;
;; Note that `mov_from_vec_signed` doesn't exist for i64x2, so that's
;; specifically excluded here.
(rule (lower (has_type $I128
(sextend (def_inst (extractlane vec @ (value_type in @ (not_i64x2))
(u8_from_uimm8 lane))))))
(let (
(lo Reg (mov_from_vec_signed (put_in_reg vec)
lane
(vector_size in)
(size_from_ty $I64)))
(hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63)))
)
(value_regs lo hi)))

;; Extension from an extraction of i64x2 into i128.
(rule (lower (has_type $I128
(sextend (def_inst (extractlane vec @ (value_type $I64X2)
(u8_from_uimm8 lane))))))
(let (
(lo Reg (mov_from_vec (put_in_reg vec)
lane
(VectorSize.Size64x2)))
(hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63)))
)
(value_regs lo hi)))
4 changes: 2 additions & 2 deletions cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1629,7 +1629,7 @@ pub(crate) fn emit_atomic_load<C: LowerCtx<I = Inst>>(
ctx: &mut C,
rt: Writable<Reg>,
insn: IRInst,
) {
) -> Inst {
assert!(ctx.data(insn).opcode() == Opcode::AtomicLoad);
let inputs = insn_inputs(ctx, insn);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
Expand All @@ -1638,7 +1638,7 @@ pub(crate) fn emit_atomic_load<C: LowerCtx<I = Inst>>(
// We're ignoring the result type of the load because the LoadAcquire will
// explicitly zero extend to the nearest word, and also zero the high half
// of an X register.
ctx.emit(Inst::LoadAcquire { access_ty, rt, rn });
Inst::LoadAcquire { access_ty, rt, rn }
}

fn load_op_to_ty(op: Opcode) -> Option<Type> {
Expand Down
100 changes: 34 additions & 66 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use super::{
Inst as MInst, JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode, OperandSize,
PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VectorSize, NZCV,
};
use crate::isa::aarch64::settings as aarch64_settings;
use crate::isa::aarch64::settings::Flags;
use crate::machinst::isle::*;
use crate::{
binemit::CodeOffset,
Expand All @@ -21,9 +21,8 @@ use crate::{
isa::aarch64::inst::aarch64_map_regs,
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
isa::unwind::UnwindInst,
machinst::{get_output_reg, ty_bits, InsnOutput, LowerCtx, RegRenamer},
machinst::{ty_bits, InsnOutput, LowerCtx},
};
use smallvec::SmallVec;
use std::boxed::Box;
use std::vec::Vec;

Expand All @@ -36,84 +35,34 @@ type BoxExternalName = Box<ExternalName>;
/// The main entry point for lowering with ISLE.
pub(crate) fn lower<C>(
lower_ctx: &mut C,
isa_flags: &aarch64_settings::Flags,
isa_flags: &Flags,
outputs: &[InsnOutput],
inst: Inst,
) -> Result<(), ()>
where
C: LowerCtx<I = MInst>,
{
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = IsleContext::new(lower_ctx, isa_flags);

let temp_regs = generated_code::constructor_lower(&mut isle_ctx, inst).ok_or(())?;
let mut temp_regs = temp_regs.regs().iter();

#[cfg(debug_assertions)]
{
let all_dsts_len = outputs
.iter()
.map(|out| get_output_reg(isle_ctx.lower_ctx, *out).len())
.sum();
debug_assert_eq!(
temp_regs.len(),
all_dsts_len,
"the number of temporary registers and destination registers do \
not match ({} != {}); ensure the correct registers are being \
returned.",
temp_regs.len(),
all_dsts_len,
);
}

// The ISLE generated code emits its own registers to define the
// instruction's lowered values in. We rename those registers to the
// registers they were assigned when their value was used as an operand in
// earlier lowerings.
let mut renamer = RegRenamer::default();
for output in outputs {
let dsts = get_output_reg(isle_ctx.lower_ctx, *output);
for (temp, dst) in temp_regs.by_ref().zip(dsts.regs()) {
renamer.add_rename(*temp, dst.to_reg());
}
}

for mut inst in isle_ctx.into_emitted_insts() {
aarch64_map_regs(&mut inst, &renamer);
lower_ctx.emit(inst);
}

Ok(())
}

pub struct IsleContext<'a, C> {
lower_ctx: &'a mut C,
#[allow(dead_code)] // dead for now, but probably not for long
isa_flags: &'a aarch64_settings::Flags,
emitted_insts: SmallVec<[MInst; 6]>,
lower_common(
lower_ctx,
isa_flags,
outputs,
inst,
|cx, insn| generated_code::constructor_lower(cx, insn),
aarch64_map_regs,
)
}

pub struct ExtendedValue {
val: Value,
extend: ExtendOp,
}

impl<'a, C> IsleContext<'a, C> {
pub fn new(lower_ctx: &'a mut C, isa_flags: &'a aarch64_settings::Flags) -> Self {
IsleContext {
lower_ctx,
isa_flags,
emitted_insts: SmallVec::new(),
}
}

pub fn into_emitted_insts(self) -> SmallVec<[MInst; 6]> {
self.emitted_insts
}
pub struct SinkableAtomicLoad {
atomic_load: Inst,
atomic_addr: Value,
}

impl<'a, C> generated_code::Context for IsleContext<'a, C>
impl<C> generated_code::Context for IsleContext<'_, C, Flags, 6>
where
C: LowerCtx<I = MInst>,
{
Expand Down Expand Up @@ -275,4 +224,23 @@ where
n => Some(n as u64),
}
}

fn sinkable_atomic_load(&mut self, val: Value) -> Option<SinkableAtomicLoad> {
let input = self.lower_ctx.get_value_as_source_or_const(val);
if let Some((atomic_load, 0)) = input.inst {
if self.lower_ctx.data(atomic_load).opcode() == Opcode::AtomicLoad {
let atomic_addr = self.lower_ctx.input_as_value(atomic_load, 0);
return Some(SinkableAtomicLoad {
atomic_load,
atomic_addr,
});
}
}
None
}

fn sink_atomic_load(&mut self, load: &SinkableAtomicLoad) -> Reg {
self.lower_ctx.sink_inst(load.atomic_load);
self.put_in_reg(load.atomic_addr)
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
src/prelude.isle d3d2a6a42fb778231a4cdca30995324e1293a9ca8073c5a27a501535759eb51f84a6718322a93dfba4b66ee4f0c9afce7dcec0428516ef0c5bc96e8c8b76925d
src/isa/aarch64/inst.isle cec03d88680e8da01424eecc05ef73a48e4055d29fe841fceaa3e6ea4e7cb9abb887401bb5acb2e058c9fc993188640990b699e88272d62e243781b231cdfb0d
src/isa/aarch64/lower.isle e1ae53adc953ad395feeecd8edc8bcfd288491a4e4a71510e5f06e221f767518c6e060ff0d795c7c2510b7d898cc8b9bc0313906412e0176605c33427926f828
src/isa/aarch64/inst.isle 70d7b319ba0b28173d2ef1820bd0e9c4b8cf7a5ab34475a43f03bdc5a6b945a7faf40d7b539a12050ddd8ebc4c6b0fe82df5940eaf966420bb4d58e7420d4206
src/isa/aarch64/lower.isle dfc622b2fecea98079fff182ce3443ada5448256662f598ea009caed3d9bcf6b4816f736a8c7f70142467febf8fc97230c57287f06e80e6101f3b401208c599c
Loading

0 comments on commit d89410e

Please sign in to comment.