From 0df12d649a206efd8e06d44e5b1a68e424ccd789 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 26 Aug 2022 16:51:28 -0700 Subject: [PATCH] AArch64: port load and store operations to ISLE. This retains `lower_amode` in the handwritten code (@akirilov-arm reports that there is an upcoming patch to port this), but tweaks it slightly to take a `Value` rather than an `Inst`. --- cranelift/codegen/src/ir/dynamic_type.rs | 17 ++ cranelift/codegen/src/ir/mod.rs | 2 +- cranelift/codegen/src/isa/aarch64/abi.rs | 2 +- cranelift/codegen/src/isa/aarch64/inst.isle | 91 ++++++++- .../codegen/src/isa/aarch64/inst/args.rs | 15 -- cranelift/codegen/src/isa/aarch64/lower.isle | 172 +++++++++++++++++- cranelift/codegen/src/isa/aarch64/lower.rs | 87 +++------ .../codegen/src/isa/aarch64/lower/isle.rs | 25 ++- .../codegen/src/isa/aarch64/lower_inst.rs | 127 +------------ cranelift/codegen/src/machinst/isle.rs | 31 +++- cranelift/codegen/src/prelude.isle | 15 +- .../filetests/isa/aarch64/amodes.clif | 130 ++++++------- .../filetests/isa/aarch64/heap_addr.clif | 10 +- .../isa/aarch64/simd-lane-access-compile.clif | 4 +- .../filetests/filetests/isa/aarch64/simd.clif | 10 +- 15 files changed, 442 insertions(+), 296 deletions(-) diff --git a/cranelift/codegen/src/ir/dynamic_type.rs b/cranelift/codegen/src/ir/dynamic_type.rs index 91b13af98b34..f1ae30982114 100644 --- a/cranelift/codegen/src/ir/dynamic_type.rs +++ b/cranelift/codegen/src/ir/dynamic_type.rs @@ -1,6 +1,7 @@ //! Dynamic IR types use crate::ir::entities::DynamicType; +use crate::ir::types::*; use crate::ir::GlobalValue; use crate::ir::PrimaryMap; use crate::ir::Type; @@ -36,3 +37,19 @@ impl DynamicTypeData { /// All allocated dynamic types. pub type DynamicTypes = PrimaryMap; + +/// Convert a dynamic-vector type to a fixed-vector type. +pub fn dynamic_to_fixed(ty: Type) -> Type { + match ty { + I8X8XN => I8X8, + I8X16XN => I8X16, + I16X4XN => I16X4, + I16X8XN => I16X8, + I32X2XN => I32X2, + I32X4XN => I32X4, + I64X2XN => I64X2, + F32X4XN => F32X4, + F64X2XN => F64X2, + _ => unreachable!("unhandled type: {}", ty), + } +} diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index 5dc5ad612da2..8ba18987daed 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -35,7 +35,7 @@ pub use crate::ir::builder::{ }; pub use crate::ir::constant::{ConstantData, ConstantPool}; pub use crate::ir::dfg::{DataFlowGraph, ValueDef}; -pub use crate::ir::dynamic_type::{DynamicTypeData, DynamicTypes}; +pub use crate::ir::dynamic_type::{dynamic_to_fixed, DynamicTypeData, DynamicTypes}; pub use crate::ir::entities::{ Block, Constant, DynamicStackSlot, DynamicType, FuncRef, GlobalValue, Heap, Immediate, Inst, JumpTable, SigRef, StackSlot, Table, UserExternalNameRef, Value, diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index fb11d31d60bd..5eb355338237 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -5,7 +5,7 @@ use crate::ir::types; use crate::ir::types::*; use crate::ir::MemFlags; use crate::ir::Opcode; -use crate::ir::{ExternalName, LibCall, Signature}; +use crate::ir::{dynamic_to_fixed, ExternalName, LibCall, Signature}; use crate::isa; use crate::isa::aarch64::{inst::EmitState, inst::*, settings as aarch64_settings}; use crate::isa::unwind::UnwindInst; diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 856516ef2bd9..d580846863db 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1502,10 +1502,13 @@ (extern constructor cond_br_cond cond_br_cond) ;; Lower the address of a load or a store. -(decl amode (Type Inst u32) AMode) +(decl amode (Type Value u32) AMode) ;; TODO: Port lower_address() to ISLE. (extern constructor amode amode) +(decl pair_amode (Value u32) PairAMode) +(extern constructor pair_amode pair_amode) + ;; Matches an `AMode` that is just a register. (decl pure amode_is_reg (AMode) Reg) ;; TODO: Implement in ISLE. @@ -2337,6 +2340,92 @@ (rule (udf trap_code) (SideEffectNoResult.Inst (MInst.Udf trap_code))) +;; Helpers for generating various load instructions, with varying +;; widths and sign/zero-extending properties. +(decl aarch64_uload8 (AMode MemFlags) Reg) +(rule (aarch64_uload8 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ULoad8 dst amode flags)))) + dst)) +(decl aarch64_sload8 (AMode MemFlags) Reg) +(rule (aarch64_sload8 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.SLoad8 dst amode flags)))) + dst)) +(decl aarch64_uload16 (AMode MemFlags) Reg) +(rule (aarch64_uload16 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ULoad16 dst amode flags)))) + dst)) +(decl aarch64_sload16 (AMode MemFlags) Reg) +(rule (aarch64_sload16 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.SLoad16 dst amode flags)))) + dst)) +(decl aarch64_uload32 (AMode MemFlags) Reg) +(rule (aarch64_uload32 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ULoad32 dst amode flags)))) + dst)) +(decl aarch64_sload32 (AMode MemFlags) Reg) +(rule (aarch64_sload32 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.SLoad32 dst amode flags)))) + dst)) +(decl aarch64_uload64 (AMode MemFlags) Reg) +(rule (aarch64_uload64 amode flags) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ULoad64 dst amode flags)))) + dst)) +(decl aarch64_fpuload32 (AMode MemFlags) Reg) +(rule (aarch64_fpuload32 amode flags) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuLoad32 dst amode flags)))) + dst)) +(decl aarch64_fpuload64 (AMode MemFlags) Reg) +(rule (aarch64_fpuload64 amode flags) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuLoad64 dst amode flags)))) + dst)) +(decl aarch64_fpuload128 (AMode MemFlags) Reg) +(rule (aarch64_fpuload128 amode flags) + (let ((dst WritableReg (temp_writable_reg $F64X2)) + (_ Unit (emit (MInst.FpuLoad128 dst amode flags)))) + dst)) +(decl aarch64_loadp64 (PairAMode MemFlags) ValueRegs) +(rule (aarch64_loadp64 amode flags) + (let ((dst1 WritableReg (temp_writable_reg $I64)) + (dst2 WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.LoadP64 dst1 dst2 amode flags)))) + (value_regs dst1 dst2))) + +;; Helpers for generating various store instructions with varying +;; widths. +(decl aarch64_store8 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_store8 amode flags val) + (SideEffectNoResult.Inst (MInst.Store8 val amode flags))) +(decl aarch64_store16 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_store16 amode flags val) + (SideEffectNoResult.Inst (MInst.Store16 val amode flags))) +(decl aarch64_store32 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_store32 amode flags val) + (SideEffectNoResult.Inst (MInst.Store32 val amode flags))) +(decl aarch64_store64 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_store64 amode flags val) + (SideEffectNoResult.Inst (MInst.Store64 val amode flags))) +(decl aarch64_fpustore32 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_fpustore32 amode flags val) + (SideEffectNoResult.Inst (MInst.FpuStore32 val amode flags))) +(decl aarch64_fpustore64 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_fpustore64 amode flags val) + (SideEffectNoResult.Inst (MInst.FpuStore64 val amode flags))) +(decl aarch64_fpustore128 (AMode MemFlags Reg) SideEffectNoResult) +(rule (aarch64_fpustore128 amode flags val) + (SideEffectNoResult.Inst (MInst.FpuStore128 val amode flags))) +(decl aarch64_storep64 (PairAMode MemFlags Reg Reg) SideEffectNoResult) +(rule (aarch64_storep64 amode flags val1 val2) + (SideEffectNoResult.Inst (MInst.StoreP64 val1 val2 amode flags))) + ;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Type of extension performed by an immediate helper diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index ce2d70c0925e..4428be2a8370 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -773,18 +773,3 @@ impl VectorSize { } } } - -pub(crate) fn dynamic_to_fixed(ty: Type) -> Type { - match ty { - I8X8XN => I8X8, - I8X16XN => I8X16, - I16X4XN => I16X4, - I16X8XN => I16X8, - I32X2XN => I32X2, - I32X4XN => I32X4, - I64X2XN => I64X2, - F32X4XN => F32X4, - F64X2XN => F64X2, - _ => unreachable!("unhandled type: {}", ty), - } -} diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index d86de45a68ad..2571b877719c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -1777,10 +1777,10 @@ (rule (lower (has_type ty (splat (ireduce (iconst (u64_from_imm64 n)))))) (splat_const n (vector_size ty))) -(rule (lower (has_type ty (splat x @ (load flags _addr offset)))) +(rule (lower (has_type ty (splat x @ (load flags addr offset)))) (if-let mem_op (is_sinkable_inst x)) (let ((_ Unit (sink_inst mem_op)) - (addr AMode (amode (lane_type ty) mem_op offset)) + (addr AMode (amode (lane_type ty) addr offset)) (address Reg (load_addr addr))) (ld1r address (vector_size ty) flags))) @@ -2031,6 +2031,174 @@ (rule (lower (return args)) (lower_return (range 0 (value_slice_len args)) args)) +;;;; Rules for loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower + (has_type $I8 (load flags address offset))) + (aarch64_uload8 (amode $I8 address offset) flags)) +(rule (lower + (has_type $I16 (load flags address offset))) + (aarch64_uload16 (amode $I16 address offset) flags)) +(rule (lower + (has_type $I32 (load flags address offset))) + (aarch64_uload32 (amode $I32 address offset) flags)) +(rule (lower + (has_type $I64 (load flags address offset))) + (aarch64_uload64 (amode $I64 address offset) flags)) +(rule (lower + (has_type $R64 (load flags address offset))) + (aarch64_uload64 (amode $I64 address offset) flags)) +(rule (lower + (has_type $F32 (load flags address offset))) + (aarch64_fpuload32 (amode $F32 address offset) flags)) +(rule (lower + (has_type $F64 (load flags address offset))) + (aarch64_fpuload64 (amode $F64 address offset) flags)) +(rule (lower + (has_type $I128 (load flags address offset))) + (aarch64_loadp64 (pair_amode address offset) flags)) +(rule (lower + (has_type (ty_vec64 _) + (load flags address offset))) + (aarch64_fpuload128 (amode $F64 address offset) flags)) +(rule (lower + (has_type (ty_vec128 _) + (load flags address offset))) + (aarch64_fpuload128 (amode $I8X16 address offset) flags)) +(rule (lower + (has_type (ty_dyn_vec64 _) + (load flags address offset))) + (aarch64_fpuload64 (amode $F64 address offset) flags)) +(rule (lower + (has_type (ty_dyn_vec128 _) + (load flags address offset))) + (aarch64_fpuload128 (amode $I8X16 address offset) flags)) + +(rule (lower + (uload8 flags address offset)) + (aarch64_uload8 (amode $I8 address offset) flags)) +(rule (lower + (sload8 flags address offset)) + (aarch64_sload8 (amode $I8 address offset) flags)) +(rule (lower + (uload16 flags address offset)) + (aarch64_uload16 (amode $I16 address offset) flags)) +(rule (lower + (sload16 flags address offset)) + (aarch64_sload16 (amode $I16 address offset) flags)) +(rule (lower + (uload32 flags address offset)) + (aarch64_uload32 (amode $I32 address offset) flags)) +(rule (lower + (sload32 flags address offset)) + (aarch64_sload32 (amode $I32 address offset) flags)) + +(rule (lower + (sload8x8 flags address offset)) + (vec_extend (VecExtendOp.Sxtl) + (aarch64_fpuload64 (amode $F64 address offset) flags) + $false + (ScalarSize.Size16))) +(rule (lower + (uload8x8 flags address offset)) + (vec_extend (VecExtendOp.Uxtl) + (aarch64_fpuload64 (amode $F64 address offset) flags) + $false + (ScalarSize.Size16))) +(rule (lower + (sload16x4 flags address offset)) + (vec_extend (VecExtendOp.Sxtl) + (aarch64_fpuload64 (amode $F64 address offset) flags) + $false + (ScalarSize.Size32))) +(rule (lower + (uload16x4 flags address offset)) + (vec_extend (VecExtendOp.Uxtl) + (aarch64_fpuload64 (amode $F64 address offset) flags) + $false + (ScalarSize.Size32))) +(rule (lower + (sload32x2 flags address offset)) + (vec_extend (VecExtendOp.Sxtl) + (aarch64_fpuload64 (amode $F64 address offset) flags) + $false + (ScalarSize.Size64))) +(rule (lower + (uload32x2 flags address offset)) + (vec_extend (VecExtendOp.Uxtl) + (aarch64_fpuload64 (amode $F64 address offset) flags) + $false + (ScalarSize.Size64))) + +;;;; Rules for stores ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower + (store flags value @ (value_type $I8) address offset)) + (side_effect + (aarch64_store8 (amode $I8 address offset) flags value))) +(rule (lower + (store flags value @ (value_type $I16) address offset)) + (side_effect + (aarch64_store16 (amode $I16 address offset) flags value))) +(rule (lower + (store flags value @ (value_type $I32) address offset)) + (side_effect + (aarch64_store32 (amode $I32 address offset) flags value))) +(rule (lower + (store flags value @ (value_type $I64) address offset)) + (side_effect + (aarch64_store64 (amode $I64 address offset) flags value))) +(rule (lower + (store flags value @ (value_type $R64) address offset)) + (side_effect + (aarch64_store64 (amode $I64 address offset) flags value))) + +(rule (lower + (istore8 flags value address offset)) + (side_effect + (aarch64_store8 (amode $I8 address offset) flags value))) +(rule (lower + (istore16 flags value address offset)) + (side_effect + (aarch64_store16 (amode $I16 address offset) flags value))) +(rule (lower + (istore32 flags value address offset)) + (side_effect + (aarch64_store32 (amode $I32 address offset) flags value))) + +(rule (lower + (store flags value @ (value_type $F32) address offset)) + (side_effect + (aarch64_fpustore32 (amode $F32 address offset) flags value))) +(rule (lower + (store flags value @ (value_type $F64) address offset)) + (side_effect + (aarch64_fpustore64 (amode $F64 address offset) flags value))) + +(rule (lower + (store flags value @ (value_type $I128) address offset)) + (side_effect + (aarch64_storep64 (pair_amode address offset) flags + (value_regs_get value 0) + (value_regs_get value 1)))) + +(rule (lower + (store flags value @ (value_type (ty_vec64 _)) address offset)) + (side_effect + (aarch64_fpustore64 (amode $F64 address offset) flags value))) +(rule (lower + (store flags value @ (value_type (ty_vec128 _)) address offset)) + (side_effect + (aarch64_fpustore128 (amode $I8X16 address offset) flags value))) +(rule (lower + (store flags value @ (value_type (ty_dyn_vec64 _)) address offset)) + (side_effect + (aarch64_fpustore64 (amode $F64 address offset) flags value))) +(rule (lower + (store flags value @ (value_type (ty_dyn_vec128 _)) address offset)) + (side_effect + (aarch64_fpustore128 (amode $I8X16 address offset) flags value))) + ;;; Rules for `{get,set}_pinned_reg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (get_pinned_reg)) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 3ec6bf3bbe99..7921290a1d34 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -18,7 +18,7 @@ use crate::machinst::lower::*; use crate::machinst::{Reg, Writable}; use crate::{machinst::*, trace}; use crate::{CodegenError, CodegenResult}; -use smallvec::SmallVec; +use smallvec::{smallvec, SmallVec}; use std::cmp; pub mod isle; @@ -507,19 +507,19 @@ type AddressAddend64List = SmallVec<[Reg; 4]>; /// then possibly support extensions at these leaves. fn collect_address_addends( ctx: &mut Lower, - roots: &[InsnInput], + root: Value, ) -> (AddressAddend64List, AddressAddend32List, i64) { let mut result32: AddressAddend32List = SmallVec::new(); let mut result64: AddressAddend64List = SmallVec::new(); let mut offset: i64 = 0; - let mut workqueue: SmallVec<[InsnInput; 4]> = roots.iter().cloned().collect(); + let mut workqueue: SmallVec<[Value; 4]> = smallvec![root]; - while let Some(input) = workqueue.pop() { - debug_assert!(ty_bits(ctx.input_ty(input.insn, input.input)) == 64); - if let Some((op, insn)) = maybe_input_insn_multi( + while let Some(value) = workqueue.pop() { + debug_assert_eq!(ty_bits(ctx.value_ty(value)), 64); + if let Some((op, insn)) = maybe_value_multi( ctx, - input, + value, &[ Opcode::Uextend, Opcode::Sextend, @@ -551,12 +551,12 @@ fn collect_address_addends( } } Opcode::Uextend | Opcode::Sextend => { - let reg = put_input_in_reg(ctx, input, NarrowValueMode::None); + let reg = put_value_in_reg(ctx, value, NarrowValueMode::None); result64.push(reg); } Opcode::Iadd => { for input in 0..ctx.num_inputs(insn) { - let addend = InsnInput { insn, input }; + let addend = ctx.input_as_value(insn, input); workqueue.push(addend); } } @@ -567,7 +567,7 @@ fn collect_address_addends( _ => panic!("Unexpected opcode from maybe_input_insn_multi"), } } else { - let reg = put_input_in_reg(ctx, input, NarrowValueMode::ZeroExtend64); + let reg = put_value_in_reg(ctx, value, NarrowValueMode::ZeroExtend64); result64.push(reg); } } @@ -576,15 +576,11 @@ fn collect_address_addends( } /// Lower the address of a pair load or store. -pub(crate) fn lower_pair_address( - ctx: &mut Lower, - roots: &[InsnInput], - offset: i32, -) -> PairAMode { +pub(crate) fn lower_pair_address(ctx: &mut Lower, addr: Value, offset: i32) -> PairAMode { // Collect addends through an arbitrary tree of 32-to-64-bit sign/zero // extends and addition ops. We update these as we consume address // components, so they represent the remaining addends not yet handled. - let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots); + let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, addr); let offset = args_offset + (offset as i64); trace!( @@ -636,7 +632,7 @@ pub(crate) fn lower_pair_address( pub(crate) fn lower_address( ctx: &mut Lower, elem_ty: Type, - roots: &[InsnInput], + addr: Value, offset: i32, ) -> AMode { // TODO: support base_reg + scale * index_reg. For this, we would need to @@ -645,7 +641,7 @@ pub(crate) fn lower_address( // Collect addends through an arbitrary tree of 32-to-64-bit sign/zero // extends and addition ops. We update these as we consume address // components, so they represent the remaining addends not yet handled. - let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots); + let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, addr); let mut offset = args_offset + (offset as i64); trace!( @@ -1088,14 +1084,26 @@ pub(crate) fn maybe_input_insn( None } -/// Checks for an instance of any one of `ops` feeding the given input. -pub(crate) fn maybe_input_insn_multi( +/// Checks for an instance of `op` defining the given value. +pub(crate) fn maybe_value(c: &mut Lower, value: Value, op: Opcode) -> Option { + let inputs = c.get_value_as_source_or_const(value); + if let Some((src_inst, _)) = inputs.inst.as_inst() { + let data = c.data(src_inst); + if data.opcode() == op { + return Some(src_inst); + } + } + None +} + +/// Checks for an instance of any one of `ops` defining the given value. +pub(crate) fn maybe_value_multi( c: &mut Lower, - input: InsnInput, + value: Value, ops: &[Opcode], ) -> Option<(Opcode, IRInst)> { for &op in ops { - if let Some(inst) = maybe_input_insn(c, input, op) { + if let Some(inst) = maybe_value(c, value, op) { return Some((op, inst)); } } @@ -1452,41 +1460,6 @@ pub(crate) fn materialize_bool_result( } } -fn load_op_to_ty(op: Opcode) -> Option { - match op { - Opcode::Sload8 | Opcode::Uload8 => Some(I8), - Opcode::Sload16 | Opcode::Uload16 => Some(I16), - Opcode::Sload32 | Opcode::Uload32 => Some(I32), - Opcode::Load => None, - Opcode::Sload8x8 | Opcode::Uload8x8 => Some(I8X8), - Opcode::Sload16x4 | Opcode::Uload16x4 => Some(I16X4), - Opcode::Sload32x2 | Opcode::Uload32x2 => Some(I32X2), - _ => None, - } -} - -/// Helper to lower a load instruction; this is used in several places, because -/// a load can sometimes be merged into another operation. -pub(crate) fn lower_load< - F: FnMut(&mut Lower, ValueRegs>, Type, AMode) -> CodegenResult<()>, ->( - ctx: &mut Lower, - ir_inst: IRInst, - inputs: &[InsnInput], - output: InsnOutput, - mut f: F, -) -> CodegenResult<()> { - let op = ctx.data(ir_inst).opcode(); - - let elem_ty = load_op_to_ty(op).unwrap_or_else(|| ctx.output_ty(ir_inst, 0)); - - let off = ctx.data(ir_inst).load_store_offset().unwrap(); - let mem = lower_address(ctx, elem_ty, &inputs[..], off); - let rd = get_output_reg(ctx, output); - - f(ctx, rd, elem_ty, mem) -} - //============================================================================= // Lowering-backend trait implementation. diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 8c54d9a3d9da..db1a59e5530f 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -6,14 +6,14 @@ use generated_code::Context; // Types that the generated ISLE code uses via `use super::*`. use super::{ - insn_inputs, lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg, - zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, - CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, - JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, - PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV, + lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg, zero_reg, + AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, + ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, + MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, + ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV, }; use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm}; -use crate::isa::aarch64::lower::{lower_address, lower_splat_const}; +use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const}; use crate::isa::aarch64::settings::Flags as IsaFlags; use crate::machinst::valueregs; use crate::machinst::{isle::*, InputSourceInst}; @@ -484,13 +484,12 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } } - fn amode(&mut self, ty: Type, mem_op: Inst, offset: u32) -> AMode { - lower_address( - self.lower_ctx, - ty, - &insn_inputs(self.lower_ctx, mem_op)[..], - offset as i32, - ) + fn amode(&mut self, ty: Type, addr: Value, offset: u32) -> AMode { + lower_address(self.lower_ctx, ty, addr, offset as i32) + } + + fn pair_amode(&mut self, addr: Value, offset: u32) -> PairAMode { + lower_pair_address(self.lower_ctx, addr, offset as i32) } fn amode_is_reg(&mut self, address: &AMode) -> Option { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index c72ddea6ef3f..1202e8351087 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -101,133 +101,10 @@ pub(crate) fn lower_insn_to_regs( | Opcode::Sload16x4 | Opcode::Uload16x4 | Opcode::Sload32x2 - | Opcode::Uload32x2 => { - let sign_extend = match op { - Opcode::Sload8 | Opcode::Sload16 | Opcode::Sload32 => true, - _ => false, - }; - let flags = ctx - .memflags(insn) - .expect("Load instruction should have memflags"); - - let out_ty = ctx.output_ty(insn, 0); - if out_ty == I128 { - let off = ctx.data(insn).load_store_offset().unwrap(); - let mem = lower_pair_address(ctx, &inputs[..], off); - let dst = get_output_reg(ctx, outputs[0]); - ctx.emit(Inst::LoadP64 { - rt: dst.regs()[0], - rt2: dst.regs()[1], - mem, - flags, - }); - } else { - lower_load( - ctx, - insn, - &inputs[..], - outputs[0], - |ctx, dst, mut elem_ty, mem| { - if elem_ty.is_dynamic_vector() { - elem_ty = dynamic_to_fixed(elem_ty); - } - let rd = dst.only_reg().unwrap(); - let is_float = ty_has_float_or_vec_representation(elem_ty); - ctx.emit(match (ty_bits(elem_ty), sign_extend, is_float) { - (1, _, _) => Inst::ULoad8 { rd, mem, flags }, - (8, false, _) => Inst::ULoad8 { rd, mem, flags }, - (8, true, _) => Inst::SLoad8 { rd, mem, flags }, - (16, false, _) => Inst::ULoad16 { rd, mem, flags }, - (16, true, _) => Inst::SLoad16 { rd, mem, flags }, - (32, false, false) => Inst::ULoad32 { rd, mem, flags }, - (32, true, false) => Inst::SLoad32 { rd, mem, flags }, - (32, _, true) => Inst::FpuLoad32 { rd, mem, flags }, - (64, _, false) => Inst::ULoad64 { rd, mem, flags }, - // Note that we treat some of the vector loads as scalar floating-point loads, - // which is correct in a little endian environment. - (64, _, true) => Inst::FpuLoad64 { rd, mem, flags }, - (128, _, true) => Inst::FpuLoad128 { rd, mem, flags }, - _ => { - return Err(CodegenError::Unsupported(format!( - "Unsupported type in load: {:?}", - elem_ty - ))) - } - }); - - let vec_extend = match op { - Opcode::Sload8x8 => Some((VecExtendOp::Sxtl, ScalarSize::Size16)), - Opcode::Uload8x8 => Some((VecExtendOp::Uxtl, ScalarSize::Size16)), - Opcode::Sload16x4 => Some((VecExtendOp::Sxtl, ScalarSize::Size32)), - Opcode::Uload16x4 => Some((VecExtendOp::Uxtl, ScalarSize::Size32)), - Opcode::Sload32x2 => Some((VecExtendOp::Sxtl, ScalarSize::Size64)), - Opcode::Uload32x2 => Some((VecExtendOp::Uxtl, ScalarSize::Size64)), - _ => None, - }; - - if let Some((t, lane_size)) = vec_extend { - let rd = dst.only_reg().unwrap(); - ctx.emit(Inst::VecExtend { - t, - rd, - rn: rd.to_reg(), - high_half: false, - lane_size, - }); - } - - Ok(()) - }, - )?; - } - } + | Opcode::Uload32x2 => implemented_in_isle(ctx), Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { - let off = ctx.data(insn).load_store_offset().unwrap(); - let mut elem_ty = match op { - Opcode::Istore8 => I8, - Opcode::Istore16 => I16, - Opcode::Istore32 => I32, - Opcode::Store => ctx.input_ty(insn, 0), - _ => unreachable!(), - }; - let is_float = ty_has_float_or_vec_representation(elem_ty); - let flags = ctx - .memflags(insn) - .expect("Store instruction should have memflags"); - - let dst = put_input_in_regs(ctx, inputs[0]); - - if elem_ty == I128 { - let mem = lower_pair_address(ctx, &inputs[1..], off); - ctx.emit(Inst::StoreP64 { - rt: dst.regs()[0], - rt2: dst.regs()[1], - mem, - flags, - }); - } else { - if elem_ty.is_dynamic_vector() { - elem_ty = dynamic_to_fixed(elem_ty); - } - let rd = dst.only_reg().unwrap(); - let mem = lower_address(ctx, elem_ty, &inputs[1..], off); - ctx.emit(match (ty_bits(elem_ty), is_float) { - (1, _) | (8, _) => Inst::Store8 { rd, mem, flags }, - (16, _) => Inst::Store16 { rd, mem, flags }, - (32, false) => Inst::Store32 { rd, mem, flags }, - (32, true) => Inst::FpuStore32 { rd, mem, flags }, - (64, false) => Inst::Store64 { rd, mem, flags }, - (64, true) => Inst::FpuStore64 { rd, mem, flags }, - (128, _) => Inst::FpuStore128 { rd, mem, flags }, - _ => { - return Err(CodegenError::Unsupported(format!( - "Unsupported type in store: {:?}", - elem_ty - ))) - } - }); - } + implemented_in_isle(ctx) } Opcode::StackAddr => implemented_in_isle(ctx), diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index d62b2f831e71..9532526f8b75 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -9,8 +9,8 @@ use target_lexicon::Triple; pub use super::MachLabel; pub use crate::data_value::DataValue; pub use crate::ir::{ - ArgumentExtension, Constant, DynamicStackSlot, ExternalName, FuncRef, GlobalValue, Immediate, - SigRef, StackSlot, + dynamic_to_fixed, ArgumentExtension, Constant, DynamicStackSlot, ExternalName, FuncRef, + GlobalValue, Immediate, SigRef, StackSlot, }; pub use crate::isa::unwind::UnwindInst; pub use crate::machinst::{ @@ -397,6 +397,15 @@ macro_rules! isle_prelude_methods { } } + #[inline] + fn ty_vec64_ctor(&mut self, ty: Type) -> Option { + if ty.is_vector() && ty.bits() == 64 { + Some(ty) + } else { + None + } + } + #[inline] fn ty_vec64(&mut self, ty: Type) -> Option { if ty.is_vector() && ty.bits() == 64 { @@ -415,6 +424,24 @@ macro_rules! isle_prelude_methods { } } + #[inline] + fn ty_dyn_vec64(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 64 { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_dyn_vec128(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 128 { + Some(ty) + } else { + None + } + } + #[inline] fn ty_vec64_int(&mut self, ty: Type) -> Option { if ty.is_vector() && ty.bits() == 64 && ty.lane_type().is_int() { diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 7119a0cb87be..98df224bcf23 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -381,14 +381,25 @@ (decl ty_float_or_vec (Type) Type) (extern extractor ty_float_or_vec ty_float_or_vec) -;; A pure constructor that only matches 64-bit vector types. +;; A pure constructor/extractor that only matches 64-bit vector types. (decl pure ty_vec64 (Type) Type) -(extern constructor ty_vec64 ty_vec64) +(extern constructor ty_vec64 ty_vec64_ctor) +(extern extractor ty_vec64 ty_vec64) ;; An extractor that only matches 128-bit vector types. (decl ty_vec128 (Type) Type) (extern extractor ty_vec128 ty_vec128) +;; An extractor that only matches dynamic vector types with a 64-bit +;; base type. +(decl ty_dyn_vec64 (Type) Type) +(extern extractor ty_dyn_vec64 ty_dyn_vec64) + +;; An extractor that only matches dynamic vector types with a 128-bit +;; base type. +(decl ty_dyn_vec128 (Type) Type) +(extern extractor ty_dyn_vec128 ty_dyn_vec128) + ;; An extractor that only matches 64-bit vector types with integer ;; lanes (I8X8, I16X4, I32X2) (decl ty_vec64_int (Type) Type) diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif index c3254cc9469a..83b7b96bc206 100644 --- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif @@ -36,8 +36,8 @@ block0(v0: i32, v1: i32): } ; block0: -; mov w6, w0 -; ldr w0, [x6, w1, UXTW] +; mov w5, w0 +; ldr w0, [x5, w1, UXTW] ; ret function %f8(i64, i32) -> i32 { @@ -52,10 +52,10 @@ block0(v0: i64, v1: i32): } ; block0: -; add x6, x0, #68 -; add x6, x6, x0 -; add x6, x6, x1, SXTW -; ldr w0, [x6, w1, SXTW] +; add x5, x0, #68 +; add x5, x5, x0 +; add x5, x5, x1, SXTW +; ldr w0, [x5, w1, SXTW] ; ret function %f9(i64, i64, i64) -> i32 { @@ -85,10 +85,10 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; movz x8, #4100 -; add x8, x8, x1 -; add x8, x8, x2 -; ldr w0, [x8, x0] +; movz x7, #4100 +; add x7, x7, x1 +; add x7, x7, x2 +; ldr w0, [x7, x0] ; ret function %f10() -> i32 { @@ -99,8 +99,8 @@ block0: } ; block0: -; movz x2, #1234 -; ldr w0, [x2] +; movz x1, #1234 +; ldr w0, [x1] ; ret function %f11(i64) -> i32 { @@ -112,8 +112,8 @@ block0(v0: i64): } ; block0: -; add x4, x0, #8388608 -; ldr w0, [x4] +; add x3, x0, #8388608 +; ldr w0, [x3] ; ret function %f12(i64) -> i32 { @@ -125,8 +125,8 @@ block0(v0: i64): } ; block0: -; sub x4, x0, #4 -; ldr w0, [x4] +; sub x3, x0, #4 +; ldr w0, [x3] ; ret function %f13(i64) -> i32 { @@ -138,10 +138,10 @@ block0(v0: i64): } ; block0: -; movz w4, #51712 -; movk w4, #15258, LSL #16 -; add x4, x4, x0 -; ldr w0, [x4] +; movz w3, #51712 +; movk w3, #15258, LSL #16 +; add x3, x3, x0 +; ldr w0, [x3] ; ret function %f14(i32) -> i32 { @@ -152,8 +152,8 @@ block0(v0: i32): } ; block0: -; sxtw x4, w0 -; ldr w0, [x4] +; sxtw x3, w0 +; ldr w0, [x3] ; ret function %f15(i32, i32) -> i32 { @@ -166,8 +166,8 @@ block0(v0: i32, v1: i32): } ; block0: -; sxtw x6, w0 -; ldr w0, [x6, w1, SXTW] +; sxtw x5, w0 +; ldr w0, [x5, w1, SXTW] ; ret function %f18(i64, i64, i64) -> i32 { @@ -179,8 +179,8 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; movn w8, #4097 -; ldrsh x0, [x8] +; movn w7, #4097 +; ldrsh x0, [x7] ; ret function %f19(i64, i64, i64) -> i32 { @@ -192,8 +192,8 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; movz x8, #4098 -; ldrsh x0, [x8] +; movz x7, #4098 +; ldrsh x0, [x7] ; ret function %f20(i64, i64, i64) -> i32 { @@ -205,9 +205,9 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; movn w8, #4097 -; sxtw x10, w8 -; ldrsh x0, [x10] +; movn w7, #4097 +; sxtw x9, w7 +; ldrsh x0, [x9] ; ret function %f21(i64, i64, i64) -> i32 { @@ -219,9 +219,9 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; movz x8, #4098 -; sxtw x10, w8 -; ldrsh x0, [x10] +; movz x7, #4098 +; sxtw x9, w7 +; ldrsh x0, [x9] ; ret function %i128(i64) -> i128 { @@ -232,11 +232,11 @@ block0(v0: i64): } ; block0: -; mov x8, x0 -; ldp x3, x1, [x8] -; mov x11, x3 +; mov x6, x0 +; ldp x7, x1, [x6] +; mov x11, x7 ; stp x11, x1, [x0] -; mov x0, x3 +; mov x0, x7 ; ret function %i128_imm_offset(i64) -> i128 { @@ -247,11 +247,11 @@ block0(v0: i64): } ; block0: -; mov x8, x0 -; ldp x3, x1, [x8, #16] -; mov x11, x3 +; mov x6, x0 +; ldp x7, x1, [x6, #16] +; mov x11, x7 ; stp x11, x1, [x0, #16] -; mov x0, x3 +; mov x0, x7 ; ret function %i128_imm_offset_large(i64) -> i128 { @@ -262,11 +262,11 @@ block0(v0: i64): } ; block0: -; mov x8, x0 -; ldp x3, x1, [x8, #504] -; mov x11, x3 +; mov x6, x0 +; ldp x7, x1, [x6, #504] +; mov x11, x7 ; stp x11, x1, [x0, #504] -; mov x0, x3 +; mov x0, x7 ; ret function %i128_imm_offset_negative_large(i64) -> i128 { @@ -277,11 +277,11 @@ block0(v0: i64): } ; block0: -; mov x8, x0 -; ldp x3, x1, [x8, #-512] -; mov x11, x3 +; mov x6, x0 +; ldp x7, x1, [x6, #-512] +; mov x11, x7 ; stp x11, x1, [x0, #-512] -; mov x0, x3 +; mov x0, x7 ; ret function %i128_add_offset(i64) -> i128 { @@ -293,11 +293,11 @@ block0(v0: i64): } ; block0: -; mov x8, x0 -; ldp x3, x1, [x8, #32] -; mov x11, x3 +; mov x6, x0 +; ldp x7, x1, [x6, #32] +; mov x11, x7 ; stp x11, x1, [x0, #32] -; mov x0, x3 +; mov x0, x7 ; ret function %i128_32bit_sextend_simple(i32) -> i128 { @@ -309,11 +309,11 @@ block0(v0: i32): } ; block0: -; sxtw x8, w0 -; ldp x4, x1, [x8] -; sxtw x9, w0 -; mov x0, x4 -; stp x0, x1, [x9] +; sxtw x6, w0 +; ldp x10, x1, [x6] +; sxtw x7, w0 +; mov x0, x10 +; stp x0, x1, [x7] ; ret function %i128_32bit_sextend(i64, i32) -> i128 { @@ -327,13 +327,13 @@ block0(v0: i64, v1: i32): } ; block0: -; mov x10, x0 -; add x10, x10, x1, SXTW -; ldp x6, x7, [x10, #24] +; mov x8, x0 +; add x8, x8, x1, SXTW +; ldp x10, x11, [x8, #24] ; add x0, x0, x1, SXTW -; mov x15, x6 -; mov x1, x7 +; mov x15, x10 +; mov x1, x11 ; stp x15, x1, [x0, #24] -; mov x0, x6 +; mov x0, x10 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif index c8056c3d9e0b..74080e6d4126 100644 --- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif @@ -15,15 +15,15 @@ block0(v0: i64, v1: i32): ; block0: ; mov w10, w1 -; ldr x5, [x0] -; mov x11, x5 +; ldr x11, [x0] +; mov x11, x11 ; subs xzr, x10, x11 ; b.ls label1 ; b label2 ; block1: -; add x13, x0, x1, UXTW +; add x12, x0, x1, UXTW ; subs xzr, x10, x11 -; movz x14, #0 -; csel x0, x14, x13, hi +; movz x13, #0 +; csel x0, x13, x12, hi ; csdb ; ret ; block2: diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif index 76eb32cb8e8f..2f4f35f574f9 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif @@ -98,8 +98,8 @@ block0(v0: i64): } ; block0: -; ldr w2, [x0] -; fmov s0, w2 +; ldr w4, [x0] +; fmov s0, w4 ; ret function %load32_zero_int(i32) -> i32x4 { diff --git a/cranelift/filetests/filetests/isa/aarch64/simd.clif b/cranelift/filetests/filetests/isa/aarch64/simd.clif index 166d27b80b08..b26811e6fad8 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif @@ -86,9 +86,9 @@ block0(v0: i64, v1: i64): } ; block0: -; ldrb w4, [x0] +; ldrb w8, [x0] ; ld1r { v0.16b }, [x1] -; dup v1.16b, w4 +; dup v1.16b, w8 ; ret function %f8(i64, i64) -> i8x16, i8x16 { @@ -100,9 +100,9 @@ block0(v0: i64, v1: i64): } ; block0: -; ldrb w4, [x0] -; dup v0.16b, w4 -; dup v1.16b, w4 +; ldrb w8, [x0] +; dup v0.16b, w8 +; dup v1.16b, w8 ; ret function %f9() -> i32x2 {