Skip to content

Commit

Permalink
Merge pull request #3600 from alexcrichton/isle-5-uextend-sextend
Browse files Browse the repository at this point in the history
aarch64: Migrate `uextend`/`sextend` to ISLE
  • Loading branch information
fitzgen authored Dec 14, 2021
2 parents 20e090b + d89410e commit 8ac4d87
Show file tree
Hide file tree
Showing 11 changed files with 936 additions and 390 deletions.
53 changes: 45 additions & 8 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,34 @@
(_ Unit (emit (MInst.VecRRLong op dst src high_half))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.MovFromVec` instructions.
(decl mov_from_vec (Reg u8 VectorSize) Reg)
(rule (mov_from_vec rn idx size)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.MovFromVec dst rn idx size))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.MovFromVecSigned` instructions.
(decl mov_from_vec_signed (Reg u8 VectorSize OperandSize) Reg)
(rule (mov_from_vec_signed rn idx size scalar_size)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.MovFromVecSigned dst rn idx size scalar_size))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.Extend` instructions.
(decl extend (Reg bool u8 u8) Reg)
(rule (extend rn signed from_bits to_bits)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.Extend dst rn signed from_bits to_bits))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.LoadAcquire` instructions.
(decl load_acquire (Type Reg) Reg)
(rule (load_acquire ty addr)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.LoadAcquire ty dst addr))))
(writable_reg_to_reg dst)))

;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl imm (Type u64) Reg)
Expand Down Expand Up @@ -1543,21 +1571,15 @@
;; Place a `Value` into a register, sign extending it to 64-bits
(decl put_in_reg_sext64 (Value) Reg)
(rule (put_in_reg_sext64 val @ (value_type (fits_in_32 ty)))
(let ((dst WritableReg (temp_writable_reg $I32))
(src Reg (put_in_reg val))
(_ Unit (emit (MInst.Extend dst src $true (ty_bits ty) 64))))
(writable_reg_to_reg dst)))
(extend (put_in_reg val) $true (ty_bits ty) 64))

;; 64-bit passthrough.
(rule (put_in_reg_sext64 val @ (value_type $I64)) (put_in_reg val))

;; Place a `Value` into a register, zero extending it to 64-bits
(decl put_in_reg_zext64 (Value) Reg)
(rule (put_in_reg_zext64 val @ (value_type (fits_in_32 ty)))
(let ((dst WritableReg (temp_writable_reg $I32))
(src Reg (put_in_reg val))
(_ Unit (emit (MInst.Extend dst src $false (ty_bits ty) 64))))
(writable_reg_to_reg dst)))
(extend (put_in_reg val) $false (ty_bits ty) 64))

;; 64-bit passthrough.
(rule (put_in_reg_zext64 val @ (value_type $I64)) (put_in_reg val))
Expand Down Expand Up @@ -1599,3 +1621,18 @@
(rule (adds_op (fits_in_32 _ty)) (ALUOp.AddS32))
(rule (adds_op $I64) (ALUOp.AddS64))

;; An atomic load that can be sunk into another operation.
(type SinkableAtomicLoad extern (enum))

;; Extract a `SinkableAtomicLoad` that works with `Reg` from a value
;; operand.
(decl sinkable_atomic_load (SinkableAtomicLoad) Value)
(extern extractor sinkable_atomic_load sinkable_atomic_load)

;; Sink a `SinkableLoad` into a `Reg`.
;;
;; This is a side-effectful operation that notifies the context that the
;; instruction that produced the `SinkableAtomicLoad` has been sunk into another
;; instruction, and no longer needs to be lowered.
(decl sink_atomic_load (SinkableAtomicLoad) Reg)
(extern constructor sink_atomic_load sink_atomic_load)
86 changes: 86 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,89 @@
(result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
)
(value_reg result)))

;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; General rule for extending input to an output which fits in a single
;; register.
(rule (lower (has_type (fits_in_64 out) (uextend x @ (value_type in))))
(value_reg (extend (put_in_reg x) $false (ty_bits in) (ty_bits out))))

;; Extraction of a vector lane automatically extends as necessary, so we can
;; skip an explicit extending instruction.
(rule (lower (has_type (fits_in_64 out)
(uextend (def_inst (extractlane vec @ (value_type in)
(u8_from_uimm8 lane))))))
(value_reg (mov_from_vec (put_in_reg vec) lane (vector_size in))))

;; Atomic loads will also automatically zero their upper bits so the `uextend`
;; instruction can effectively get skipped here.
(rule (lower (has_type (fits_in_64 out)
(uextend (and (value_type in) (sinkable_atomic_load addr)))))
(value_reg (load_acquire in (sink_atomic_load addr))))

;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper
;; bits are all zero.
(rule (lower (has_type $I128 (uextend x)))
(value_regs (put_in_reg_zext64 x) (imm $I64 0)))

;; Like above where vector extraction automatically zero-extends extending to
;; i128 only requires generating a 0 constant for the upper bits.
(rule (lower (has_type $I128
(uextend (def_inst (extractlane vec @ (value_type in)
(u8_from_uimm8 lane))))))
(value_regs (mov_from_vec (put_in_reg vec) lane (vector_size in)) (imm $I64 0)))

;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; General rule for extending input to an output which fits in a single
;; register.
(rule (lower (has_type (fits_in_64 out) (sextend x @ (value_type in))))
(value_reg (extend (put_in_reg x) $true (ty_bits in) (ty_bits out))))

;; Extraction of a vector lane automatically extends as necessary, so we can
;; skip an explicit extending instruction.
(rule (lower (has_type (fits_in_64 out)
(sextend (def_inst (extractlane vec @ (value_type in)
(u8_from_uimm8 lane))))))
(value_reg (mov_from_vec_signed (put_in_reg vec)
lane
(vector_size in)
(size_from_ty out))))

;; 64-bit to 128-bit only needs to sign-extend the input to the upper bits.
(rule (lower (has_type $I128 (sextend x)))
(let (
(lo Reg (put_in_reg_sext64 x))
(hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63)))
)
(value_regs lo hi)))

;; Like above where vector extraction automatically zero-extends extending to
;; i128 only requires generating a 0 constant for the upper bits.
;;
;; Note that `mov_from_vec_signed` doesn't exist for i64x2, so that's
;; specifically excluded here.
(rule (lower (has_type $I128
(sextend (def_inst (extractlane vec @ (value_type in @ (not_i64x2))
(u8_from_uimm8 lane))))))
(let (
(lo Reg (mov_from_vec_signed (put_in_reg vec)
lane
(vector_size in)
(size_from_ty $I64)))
(hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63)))
)
(value_regs lo hi)))

;; Extension from an extraction of i64x2 into i128.
(rule (lower (has_type $I128
(sextend (def_inst (extractlane vec @ (value_type $I64X2)
(u8_from_uimm8 lane))))))
(let (
(lo Reg (mov_from_vec (put_in_reg vec)
lane
(VectorSize.Size64x2)))
(hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63)))
)
(value_regs lo hi)))
4 changes: 2 additions & 2 deletions cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1629,7 +1629,7 @@ pub(crate) fn emit_atomic_load<C: LowerCtx<I = Inst>>(
ctx: &mut C,
rt: Writable<Reg>,
insn: IRInst,
) {
) -> Inst {
assert!(ctx.data(insn).opcode() == Opcode::AtomicLoad);
let inputs = insn_inputs(ctx, insn);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
Expand All @@ -1638,7 +1638,7 @@ pub(crate) fn emit_atomic_load<C: LowerCtx<I = Inst>>(
// We're ignoring the result type of the load because the LoadAcquire will
// explicitly zero extend to the nearest word, and also zero the high half
// of an X register.
ctx.emit(Inst::LoadAcquire { access_ty, rt, rn });
Inst::LoadAcquire { access_ty, rt, rn }
}

fn load_op_to_ty(op: Opcode) -> Option<Type> {
Expand Down
100 changes: 34 additions & 66 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use super::{
Inst as MInst, JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode, OperandSize,
PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VectorSize, NZCV,
};
use crate::isa::aarch64::settings as aarch64_settings;
use crate::isa::aarch64::settings::Flags;
use crate::machinst::isle::*;
use crate::{
binemit::CodeOffset,
Expand All @@ -21,9 +21,8 @@ use crate::{
isa::aarch64::inst::aarch64_map_regs,
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
isa::unwind::UnwindInst,
machinst::{get_output_reg, ty_bits, InsnOutput, LowerCtx, RegRenamer},
machinst::{ty_bits, InsnOutput, LowerCtx},
};
use smallvec::SmallVec;
use std::boxed::Box;
use std::vec::Vec;

Expand All @@ -36,84 +35,34 @@ type BoxExternalName = Box<ExternalName>;
/// The main entry point for lowering with ISLE.
pub(crate) fn lower<C>(
lower_ctx: &mut C,
isa_flags: &aarch64_settings::Flags,
isa_flags: &Flags,
outputs: &[InsnOutput],
inst: Inst,
) -> Result<(), ()>
where
C: LowerCtx<I = MInst>,
{
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = IsleContext::new(lower_ctx, isa_flags);

let temp_regs = generated_code::constructor_lower(&mut isle_ctx, inst).ok_or(())?;
let mut temp_regs = temp_regs.regs().iter();

#[cfg(debug_assertions)]
{
let all_dsts_len = outputs
.iter()
.map(|out| get_output_reg(isle_ctx.lower_ctx, *out).len())
.sum();
debug_assert_eq!(
temp_regs.len(),
all_dsts_len,
"the number of temporary registers and destination registers do \
not match ({} != {}); ensure the correct registers are being \
returned.",
temp_regs.len(),
all_dsts_len,
);
}

// The ISLE generated code emits its own registers to define the
// instruction's lowered values in. We rename those registers to the
// registers they were assigned when their value was used as an operand in
// earlier lowerings.
let mut renamer = RegRenamer::default();
for output in outputs {
let dsts = get_output_reg(isle_ctx.lower_ctx, *output);
for (temp, dst) in temp_regs.by_ref().zip(dsts.regs()) {
renamer.add_rename(*temp, dst.to_reg());
}
}

for mut inst in isle_ctx.into_emitted_insts() {
aarch64_map_regs(&mut inst, &renamer);
lower_ctx.emit(inst);
}

Ok(())
}

pub struct IsleContext<'a, C> {
lower_ctx: &'a mut C,
#[allow(dead_code)] // dead for now, but probably not for long
isa_flags: &'a aarch64_settings::Flags,
emitted_insts: SmallVec<[MInst; 6]>,
lower_common(
lower_ctx,
isa_flags,
outputs,
inst,
|cx, insn| generated_code::constructor_lower(cx, insn),
aarch64_map_regs,
)
}

pub struct ExtendedValue {
val: Value,
extend: ExtendOp,
}

impl<'a, C> IsleContext<'a, C> {
pub fn new(lower_ctx: &'a mut C, isa_flags: &'a aarch64_settings::Flags) -> Self {
IsleContext {
lower_ctx,
isa_flags,
emitted_insts: SmallVec::new(),
}
}

pub fn into_emitted_insts(self) -> SmallVec<[MInst; 6]> {
self.emitted_insts
}
pub struct SinkableAtomicLoad {
atomic_load: Inst,
atomic_addr: Value,
}

impl<'a, C> generated_code::Context for IsleContext<'a, C>
impl<C> generated_code::Context for IsleContext<'_, C, Flags, 6>
where
C: LowerCtx<I = MInst>,
{
Expand Down Expand Up @@ -275,4 +224,23 @@ where
n => Some(n as u64),
}
}

fn sinkable_atomic_load(&mut self, val: Value) -> Option<SinkableAtomicLoad> {
let input = self.lower_ctx.get_value_as_source_or_const(val);
if let Some((atomic_load, 0)) = input.inst {
if self.lower_ctx.data(atomic_load).opcode() == Opcode::AtomicLoad {
let atomic_addr = self.lower_ctx.input_as_value(atomic_load, 0);
return Some(SinkableAtomicLoad {
atomic_load,
atomic_addr,
});
}
}
None
}

fn sink_atomic_load(&mut self, load: &SinkableAtomicLoad) -> Reg {
self.lower_ctx.sink_inst(load.atomic_load);
self.put_in_reg(load.atomic_addr)
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
src/prelude.isle d3d2a6a42fb778231a4cdca30995324e1293a9ca8073c5a27a501535759eb51f84a6718322a93dfba4b66ee4f0c9afce7dcec0428516ef0c5bc96e8c8b76925d
src/isa/aarch64/inst.isle cec03d88680e8da01424eecc05ef73a48e4055d29fe841fceaa3e6ea4e7cb9abb887401bb5acb2e058c9fc993188640990b699e88272d62e243781b231cdfb0d
src/isa/aarch64/lower.isle e1ae53adc953ad395feeecd8edc8bcfd288491a4e4a71510e5f06e221f767518c6e060ff0d795c7c2510b7d898cc8b9bc0313906412e0176605c33427926f828
src/isa/aarch64/inst.isle 70d7b319ba0b28173d2ef1820bd0e9c4b8cf7a5ab34475a43f03bdc5a6b945a7faf40d7b539a12050ddd8ebc4c6b0fe82df5940eaf966420bb4d58e7420d4206
src/isa/aarch64/lower.isle dfc622b2fecea98079fff182ce3443ada5448256662f598ea009caed3d9bcf6b4816f736a8c7f70142467febf8fc97230c57287f06e80e6101f3b401208c599c
Loading

0 comments on commit 8ac4d87

Please sign in to comment.