diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs index d573e2c0b8..4821438e9f 100644 --- a/crates/core_arch/src/aarch64/mod.rs +++ b/crates/core_arch/src/aarch64/mod.rs @@ -18,6 +18,8 @@ pub use self::crypto::*; mod crc; pub use self::crc::*; +pub use super::acle::*; + #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/crates/core_arch/src/acle/barrier/common.rs b/crates/core_arch/src/acle/barrier/common.rs new file mode 100644 index 0000000000..0fb35534d1 --- /dev/null +++ b/crates/core_arch/src/acle/barrier/common.rs @@ -0,0 +1,14 @@ +//! Access types available on all architectures + +/// Full system is the required shareability domain, reads and writes are the +/// required access types +pub struct SY; + +dmb_dsb!(SY); + +impl super::super::sealed::Isb for SY { + #[inline(always)] + unsafe fn __isb(&self) { + super::isb(super::arg::SY) + } +} diff --git a/crates/core_arch/src/acle/barrier/cp15.rs b/crates/core_arch/src/acle/barrier/cp15.rs new file mode 100644 index 0000000000..7938acbbb4 --- /dev/null +++ b/crates/core_arch/src/acle/barrier/cp15.rs @@ -0,0 +1,27 @@ +// Reference: ARM11 MPCore Processor Technical Reference Manual (ARM DDI 0360E) Section 3.5 "Summary +// of CP15 instructions" + +/// Full system is the required shareability domain, reads and writes are the +/// required access types +pub struct SY; + +impl super::super::sealed::Dmb for SY { + #[inline(always)] + unsafe fn __dmb(&self) { + asm!("mcr p15, 0, r0, c7, c10, 5" : : : "memory" : "volatile") + } +} + +impl super::super::sealed::Dsb for SY { + #[inline(always)] + unsafe fn __dsb(&self) { + asm!("mcr p15, 0, r0, c7, c10, 4" : : : "memory" : "volatile") + } +} + +impl super::super::sealed::Isb for SY { + #[inline(always)] + unsafe fn __isb(&self) { + asm!("mcr p15, 0, r0, c7, c5, 4" : : : "memory" : "volatile") + } +} diff --git a/crates/core_arch/src/acle/barrier/mod.rs b/crates/core_arch/src/acle/barrier/mod.rs new file mode 100644 index 0000000000..b3cbf44d27 --- /dev/null +++ b/crates/core_arch/src/acle/barrier/mod.rs @@ -0,0 +1,154 @@ +// Reference: Section 7.4 "Hints" of ACLE + +// CP15 instruction +#[cfg(not(any( + // v8 + target_arch = "aarch64", + // v7 + target_feature = "v7", + // v6-M + target_feature = "mclass" +)))] +mod cp15; + +#[cfg(not(any( + target_arch = "aarch64", + target_feature = "v7", + target_feature = "mclass" +)))] +pub use self::cp15::*; + +// Dedicated instructions +#[cfg(any( + target_arch = "aarch64", + target_feature = "v7", + target_feature = "mclass" +))] +macro_rules! dmb_dsb { + ($A:ident) => { + impl super::super::sealed::Dmb for $A { + #[inline(always)] + unsafe fn __dmb(&self) { + super::dmb(super::arg::$A) + } + } + + impl super::super::sealed::Dsb for $A { + #[inline(always)] + unsafe fn __dsb(&self) { + super::dsb(super::arg::$A) + } + } + }; +} + +#[cfg(any( + target_arch = "aarch64", + target_feature = "v7", + target_feature = "mclass" +))] +mod common; + +#[cfg(any( + target_arch = "aarch64", + target_feature = "v7", + target_feature = "mclass" +))] +pub use self::common::*; + +#[cfg(any(target_arch = "aarch64", target_feature = "v7",))] +mod not_mclass; + +#[cfg(any(target_arch = "aarch64", target_feature = "v7",))] +pub use self::not_mclass::*; + +#[cfg(target_arch = "aarch64")] +mod v8; + +#[cfg(target_arch = "aarch64")] +pub use self::v8::*; + +/// Generates a DMB (data memory barrier) instruction or equivalent CP15 instruction. +/// +/// DMB ensures the observed ordering of memory accesses. Memory accesses of the specified type +/// issued before the DMB are guaranteed to be observed (in the specified scope) before memory +/// accesses issued after the DMB. +/// +/// For example, DMB should be used between storing data, and updating a flag variable that makes +/// that data available to another core. +/// +/// The __dmb() intrinsic also acts as a compiler memory barrier of the appropriate type. +#[inline(always)] +pub unsafe fn __dmb(arg: A) +where + A: super::sealed::Dmb, +{ + arg.__dmb() +} + +/// Generates a DSB (data synchronization barrier) instruction or equivalent CP15 instruction. +/// +/// DSB ensures the completion of memory accesses. A DSB behaves as the equivalent DMB and has +/// additional properties. After a DSB instruction completes, all memory accesses of the specified +/// type issued before the DSB are guaranteed to have completed. +/// +/// The __dsb() intrinsic also acts as a compiler memory barrier of the appropriate type. +#[inline(always)] +pub unsafe fn __dsb(arg: A) +where + A: super::sealed::Dsb, +{ + arg.__dsb() +} + +/// Generates an ISB (instruction synchronization barrier) instruction or equivalent CP15 +/// instruction. +/// +/// This instruction flushes the processor pipeline fetch buffers, so that following instructions +/// are fetched from cache or memory. +/// +/// An ISB is needed after some system maintenance operations. An ISB is also needed before +/// transferring control to code that has been loaded or modified in memory, for example by an +/// overlay mechanism or just-in-time code generator. (Note that if instruction and data caches are +/// separate, privileged cache maintenance operations would be needed in order to unify the caches.) +/// +/// The only supported argument for the __isb() intrinsic is 15, corresponding to the SY (full +/// system) scope of the ISB instruction. +#[inline(always)] +pub unsafe fn __isb(arg: A) +where + A: super::sealed::Isb, +{ + arg.__isb() +} + +extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dmb")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")] + fn dmb(_: i32); + + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dsb")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")] + fn dsb(_: i32); + + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.isb")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")] + fn isb(_: i32); +} + +// we put these in a module to prevent weirdness with glob re-exports +mod arg { + // See Section 7.3 Memory barriers of ACLE + pub const SY: i32 = 15; + pub const ST: i32 = 14; + pub const LD: i32 = 13; + pub const ISH: i32 = 11; + pub const ISHST: i32 = 10; + pub const ISHLD: i32 = 9; + pub const NSH: i32 = 7; + pub const NSHST: i32 = 6; + pub const NSHLD: i32 = 5; + pub const OSH: i32 = 3; + pub const OSHST: i32 = 2; + pub const OSHLD: i32 = 1; +} diff --git a/crates/core_arch/src/acle/barrier/not_mclass.rs b/crates/core_arch/src/acle/barrier/not_mclass.rs new file mode 100644 index 0000000000..385e1d5289 --- /dev/null +++ b/crates/core_arch/src/acle/barrier/not_mclass.rs @@ -0,0 +1,43 @@ +//! Access types available on v7 and v8 but not on v7(E)-M or v8-M + +/// Full system is the required shareability domain, writes are the required +/// access type +pub struct ST; + +dmb_dsb!(ST); + +/// Inner Shareable is the required shareability domain, reads and writes are +/// the required access types +pub struct ISH; + +dmb_dsb!(ISH); + +/// Inner Shareable is the required shareability domain, writes are the required +/// access type +pub struct ISHST; + +dmb_dsb!(ISHST); + +/// Non-shareable is the required shareability domain, reads and writes are the +/// required access types +pub struct NSH; + +dmb_dsb!(NSH); + +/// Non-shareable is the required shareability domain, writes are the required +/// access type +pub struct NSHST; + +dmb_dsb!(NSHST); + +/// Outer Shareable is the required shareability domain, reads and writes are +/// the required access types +pub struct OSH; + +dmb_dsb!(OSH); + +/// Outer Shareable is the required shareability domain, writes are the required +/// access type +pub struct OSHST; + +dmb_dsb!(OSHST); diff --git a/crates/core_arch/src/acle/barrier/v8.rs b/crates/core_arch/src/acle/barrier/v8.rs new file mode 100644 index 0000000000..2951a5a670 --- /dev/null +++ b/crates/core_arch/src/acle/barrier/v8.rs @@ -0,0 +1,23 @@ +/// Full system is the required shareability domain, reads are the required +/// access type +pub struct LD; + +dmb_dsb!(LD); + +/// Inner Shareable is the required shareability domain, reads are the required +/// access type +pub struct ISHLD; + +dmb_dsb!(ISHLD); + +/// Non-shareable is the required shareability domain, reads are the required +/// access type +pub struct NSHLD; + +dmb_dsb!(NSHLD); + +/// Outher Shareable is the required shareability domain, reads are the required +/// access type +pub struct OSHLD; + +dmb_dsb!(OSHLD); diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/acle/dsp.rs new file mode 100644 index 0000000000..e929e98e40 --- /dev/null +++ b/crates/core_arch/src/acle/dsp.rs @@ -0,0 +1,76 @@ +//! # References: +//! +//! - Section 8.3 "16-bit multiplications" +//! +//! Intrinsics that could live here: +//! +//! - [ ] __smulbb +//! - [ ] __smulbt +//! - [ ] __smultb +//! - [ ] __smultt +//! - [ ] __smulwb +//! - [ ] __smulwt +//! - [x] __qadd +//! - [x] __qsub +//! - [ ] __qdbl +//! - [ ] __smlabb +//! - [ ] __smlabt +//! - [ ] __smlatb +//! - [ ] __smlatt +//! - [ ] __smlawb +//! - [ ] __smlawt + +#[cfg(test)] +use stdsimd_test::assert_instr; + +extern "C" { + #[link_name = "llvm.arm.qadd"] + fn arm_qadd(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.qsub"] + fn arm_qsub(a: i32, b: i32) -> i32; + +} + +/// Signed saturating addition +/// +/// Returns the 32-bit saturating signed equivalent of a + b. +#[inline] +#[cfg_attr(test, assert_instr(qadd))] +pub unsafe fn __qadd(a: i32, b: i32) -> i32 { + arm_qadd(a, b) +} + +/// Signed saturating subtraction +/// +/// Returns the 32-bit saturating signed equivalent of a - b. +#[inline] +#[cfg_attr(test, assert_instr(qsub))] +pub unsafe fn __qsub(a: i32, b: i32) -> i32 { + arm_qsub(a, b) +} + +#[cfg(test)] +mod tests { + use core_arch::arm::*; + use std::mem; + use stdsimd_test::simd_test; + + #[test] + fn qadd() { + unsafe { + assert_eq!(super::__qadd(-10, 60), 50); + assert_eq!(super::__qadd(::std::i32::MAX, 10), ::std::i32::MAX); + assert_eq!(super::__qadd(::std::i32::MIN, -10), ::std::i32::MIN); + } + } + + #[test] + fn qsub() { + unsafe { + assert_eq!(super::__qsub(10, 60), -50); + assert_eq!(super::__qsub(::std::i32::MAX, -10), ::std::i32::MAX); + assert_eq!(super::__qsub(::std::i32::MIN, 10), ::std::i32::MIN); + } + } +} diff --git a/crates/core_arch/src/acle/ex.rs b/crates/core_arch/src/acle/ex.rs new file mode 100644 index 0000000000..0426c65186 --- /dev/null +++ b/crates/core_arch/src/acle/ex.rs @@ -0,0 +1,117 @@ +// Reference: Section 5.4.4 "LDREX / STREX" of ACLE + +/// Removes the exclusive lock created by LDREX +// Supported: v6, v6K, v7-M, v7-A, v7-R +// Not supported: v5, v6-M +// NOTE: there's no dedicated CLREX instruction in v6 ( u8 { + extern "C" { + #[link_name = "llvm.arm.ldrex.p0i8"] + fn ldrex8(p: *const u8) -> u32; + } + + ldrex8(p) as u8 +} + +/// Executes a exclusive LDR instruction for 16 bit value. +// Supported: v6K, v7-M, v7-A, v7-R, v8 +// Not supported: v5, v6, v6-M +#[cfg( + target_feature = "v6k", // includes v7-M but excludes v6-M +)] +pub unsafe fn __ldrexh(p: *const u16) -> u16 { + extern "C" { + #[link_name = "llvm.arm.ldrex.p0i16"] + fn ldrex16(p: *const u16) -> u32; + } + + ldrex16(p) as u16 +} + +/// Executes a exclusive LDR instruction for 32 bit value. +// Supported: v6, v7-M, v6K, v7-A, v7-R, v8 +// Not supported: v5, v6-M +#[cfg(any( + all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M + all(target_feature = "v7", target_feature = "mclass"), // v7-M +))] +pub unsafe fn __ldrex(p: *const u32) -> u32 { + extern "C" { + #[link_name = "llvm.arm.ldrex.p0i32"] + fn ldrex32(p: *const u32) -> u32; + } + + ldrex32(p) +} + +/// Executes a exclusive STR instruction for 8 bit values +/// +/// Returns `0` if the operation succeeded, or `1` if it failed +// supported: v6K, v7-M, v7-A, v7-R +// Not supported: v5, v6, v6-M +#[cfg( + target_feature = "v6k", // includes v7-M but excludes v6-M +)] +pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 { + extern "C" { + #[link_name = "llvm.arm.strex.p0i8"] + fn strex8(value: u32, addr: *mut u8) -> u32; + } + + strex8(value, addr) +} + +/// Executes a exclusive STR instruction for 16 bit values +/// +/// Returns `0` if the operation succeeded, or `1` if it failed +// Supported: v6K, v7-M, v7-A, v7-R, v8 +// Not supported: v5, v6, v6-M +#[cfg( + target_feature = "v6k", // includes v7-M but excludes v6-M +)] +pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 { + extern "C" { + #[link_name = "llvm.arm.strex.p0i16"] + fn strex16(value: u32, addr: *mut u16) -> u32; + } + + strex16(value as u32, addr) +} + +/// Executes a exclusive STR instruction for 32 bit values +/// +/// Returns `0` if the operation succeeded, or `1` if it failed +// Supported: v6, v7-M, v6K, v7-A, v7-R, v8 +// Not supported: v5, v6-M +#[cfg(any( + all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M + all(target_feature = "v7", target_feature = "mclass"), // v7-M +))] +pub unsafe fn __strex(value: u32, addr: *mut u32) -> u32 { + extern "C" { + #[link_name = "llvm.arm.strex.p0i32"] + fn strex32(value: u32, addr: *mut u32) -> u32; + } + + strex32(value, addr) +} diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs new file mode 100644 index 0000000000..20faed69cb --- /dev/null +++ b/crates/core_arch/src/acle/hints.rs @@ -0,0 +1,135 @@ +// # References +// +// - Section 7.4 "Hints" of ACLE +// - Section 7.7 "NOP" of ACLE + +/// Generates a WFI (wait for interrupt) hint instruction, or nothing. +/// +/// The WFI instruction allows (but does not require) the processor to enter a +/// low-power state until one of a number of asynchronous events occurs. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M +// LLVM says "instruction requires: armv6k" +#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[inline(always)] +pub unsafe fn __wfi() { + hint(HINT_WFI); +} + +/// Generates a WFE (wait for event) hint instruction, or nothing. +/// +/// The WFE instruction allows (but does not require) the processor to enter a +/// low-power state until some event occurs such as a SEV being issued by +/// another processor. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M +// LLVM says "instruction requires: armv6k" +#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[inline(always)] +pub unsafe fn __wfe() { + hint(HINT_WFE); +} + +/// Generates a SEV (send a global event) hint instruction. +/// +/// This causes an event to be signaled to all processors in a multiprocessor +/// system. It is a NOP on a uniprocessor system. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M +// LLVM says "instruction requires: armv6k" +#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[inline(always)] +pub unsafe fn __sev() { + hint(HINT_SEV); +} + +/// Generates a send a local event hint instruction. +/// +/// This causes an event to be signaled to only the processor executing this +/// instruction. In a multiprocessor system, it is not required to affect the +/// other processors. +// LLVM says "instruction requires: armv8" +#[cfg(any( + target_feature = "v8", // 32-bit ARMv8 + target_arch = "aarch64", // AArch64 +))] +#[inline(always)] +pub unsafe fn __sevl() { + hint(HINT_SEVL); +} + +/// Generates a YIELD hint instruction. +/// +/// This enables multithreading software to indicate to the hardware that it is +/// performing a task, for example a spin-lock, that could be swapped out to +/// improve overall system performance. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M +// LLVM says "instruction requires: armv6k" +#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[inline(always)] +pub unsafe fn __yield() { + hint(HINT_YIELD); +} + +/// Generates a DBG instruction. +/// +/// This provides a hint to debugging and related systems. The argument must be +/// a constant integer from 0 to 15 inclusive. See implementation documentation +/// for the effect (if any) of this instruction and the meaning of the +/// argument. This is available only when compliling for AArch32. +// Section 10.1 of ACLE says that the supported arches are: 7, 7-M +// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and +// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A +// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 +// "Thumb instruction set support" +#[cfg(target_feature = "v7")] +#[inline(always)] +#[rustc_args_required_const(0)] +pub unsafe fn __dbg(imm4: u32) { + macro_rules! call { + ($imm4:expr) => { + asm!(concat!("DBG ", stringify!($imm4)) : : : : "volatile") + } + } + + match imm4 & 0b1111 { + 0 => call!(0), + 1 => call!(1), + 2 => call!(2), + 3 => call!(3), + 4 => call!(4), + 5 => call!(5), + 6 => call!(6), + 7 => call!(7), + 8 => call!(8), + 9 => call!(9), + 10 => call!(10), + 11 => call!(11), + 12 => call!(12), + 13 => call!(13), + 14 => call!(14), + _ => call!(15), + } +} + +/// Generates an unspecified no-op instruction. +/// +/// Note that not all architectures provide a distinguished NOP instruction. On +/// those that do, it is unspecified whether this intrinsic generates it or +/// another instruction. It is not guaranteed that inserting this instruction +/// will increase execution time. +#[inline(always)] +pub unsafe fn __nop() { + asm!("NOP" : : : : "volatile") +} + +extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")] + fn hint(_: i32); +} + +// from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td +const HINT_NOP: i32 = 0; +const HINT_YIELD: i32 = 1; +const HINT_WFE: i32 = 2; +const HINT_WFI: i32 = 3; +const HINT_SEV: i32 = 4; +const HINT_SEVL: i32 = 5; diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs new file mode 100644 index 0000000000..5f29decf5a --- /dev/null +++ b/crates/core_arch/src/acle/mod.rs @@ -0,0 +1,158 @@ +//! ARM C Language Extensions (ACLE) +//! +//! # Developer notes +//! +//! Below is a list of built-in targets that are representative of the different ARM +//! architectures; the list includes the `target_feature`s they possess. +//! +//! - `armv4t-unknown-linux-gnueabi` - **ARMv4** - `+v4t` +//! - `armv5te-unknown-linux-gnueabi` - **ARMv5TE** - `+v4t +v5te` +//! - `arm-unknown-linux-gnueabi` - **ARMv6** - `+v4t +v5te +v6` +//! - `thumbv6m-none-eabi` - **ARMv6-M** - `+v4t +v5te +v6 +thumb-mode +mclass` +//! - `armv7-unknown-linux-gnueabihf` - **ARMv7-A** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +aclass` +//! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +rclass` +//! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass` +//! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass` +//! - `thumbv8m.main-none-eabi` - **ARMv8-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass` +//! - `armv8r-none-eabi` - **ARMv8-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +v8 +thumb2 +rclass` +//! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon` +//! +//! Section 10.1 of ACLE says: +//! +//! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes +//! its predecessor instruction set." +//! +//! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes +//! its predecessor instruction set." +//! +//! From that info and from looking at how LLVM features work (using custom targets) we can identify +//! features that are subsets of others: +//! +//! Legend: `a < b` reads as "`a` is a subset of `b`"; this means that if `b` is enabled then `a` is +//! enabled as well. +//! +//! - `v4t < v5te < v6 < v6k < v6t2 < v7 < v8` +//! - `v6 < v8m < v6t2` +//! - `v7 < v8m.main` +//! +//! *NOTE*: Section 5.4.7 of ACLE says: +//! +//! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the +//! intrinsics defined in Saturating intrinsics are available." +//! +//! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te +//! targets so we have to work around this difference. +//! +//! # References +//! +//! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest) + +// 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported +// via CP15 instructions. See Section 10.1 of ACLE +mod barrier; + +pub use self::barrier::*; + +mod hints; + +pub use self::hints::*; + +mod registers; + +pub use self::registers::*; + +mod ex; + +pub use self::ex::*; + +// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) +// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see +// section 5.4.7) +// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on +// '+v5te' rather than on '+dsp' +#[cfg(all( + not(target_arch = "aarch64"), + any( + // >= v5TE but excludes v7-M + all(target_feature = "v5te", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + ) +))] +mod dsp; + +#[cfg(all( + not(target_arch = "aarch64"), + any( + all(target_feature = "v5te", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp"), + ) +))] +pub use self::dsp::*; + +// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) +#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] +mod sat; + +#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] +pub use self::sat::*; + +// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says +// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated +#[cfg(all( + not(target_arch = "aarch64"), + any( + // v7-A, v7-R + all(target_feature = "v6", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp") + ) +))] +mod simd32; + +#[cfg(all( + not(target_arch = "aarch64"), + any( + all(target_feature = "v6", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp") + ) +))] +pub use self::simd32::*; + +mod sealed { + pub trait Dmb { + unsafe fn __dmb(&self); + } + + pub trait Dsb { + unsafe fn __dsb(&self); + } + + pub trait Isb { + unsafe fn __isb(&self); + } + + pub trait Rsr { + unsafe fn __rsr(&self) -> u32; + } + + pub trait Rsr64 { + unsafe fn __rsr64(&self) -> u64; + } + + pub trait Rsrp { + unsafe fn __rsrp(&self) -> *const u8; + } + + pub trait Wsr { + unsafe fn __wsr(&self, value: u32); + } + + pub trait Wsr64 { + unsafe fn __wsr64(&self, value: u64); + } + + pub trait Wsrp { + unsafe fn __wsrp(&self, value: *const u8); + } +} diff --git a/crates/core_arch/src/acle/registers/aarch32.rs b/crates/core_arch/src/acle/registers/aarch32.rs new file mode 100644 index 0000000000..f59af5d3ae --- /dev/null +++ b/crates/core_arch/src/acle/registers/aarch32.rs @@ -0,0 +1,4 @@ +/// Application Program Status Register +pub struct APSR; + +rsr!(APSR); diff --git a/crates/core_arch/src/acle/registers/mod.rs b/crates/core_arch/src/acle/registers/mod.rs new file mode 100644 index 0000000000..73fcc2c7b0 --- /dev/null +++ b/crates/core_arch/src/acle/registers/mod.rs @@ -0,0 +1,121 @@ +#[allow(unused_macros)] +macro_rules! rsr { + ($R:ident) => { + impl super::super::sealed::Rsr for $R { + unsafe fn __rsr(&self) -> u32 { + let r: u32; + asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile"); + r + } + } + }; +} + +#[allow(unused_macros)] +macro_rules! rsrp { + ($R:ident) => { + impl super::super::sealed::Rsrp for $R { + unsafe fn __rsrp(&self) -> *const u8 { + let r: *const u8; + asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile"); + r + } + } + }; +} + +#[allow(unused_macros)] +macro_rules! wsr { + ($R:ident) => { + impl super::super::sealed::Wsr for $R { + unsafe fn __wsr(&self, value: u32) { + asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile"); + } + } + }; +} + +#[allow(unused_macros)] +macro_rules! wsrp { + ($R:ident) => { + impl super::super::sealed::Wsrp for $R { + unsafe fn __wsrp(&self, value: *const u8) { + asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile"); + } + } + }; +} + +#[cfg(target_feature = "mclass")] +mod v6m; + +#[cfg(target_feature = "mclass")] +pub use self::v6m::*; + +#[cfg(all(target_feature = "v7", target_feature = "mclass"))] +mod v7m; + +#[cfg(all(target_feature = "v7", target_feature = "mclass"))] +pub use self::v7m::*; + +#[cfg(not(target_arch = "aarch64"))] +mod aarch32; + +#[cfg(not(target_arch = "aarch64"))] +pub use self::aarch32::*; + +/// Reads a 32-bit system register +#[inline(always)] +pub unsafe fn __rsr(reg: R) -> u32 +where + R: super::sealed::Rsr, +{ + reg.__rsr() +} + +/// Reads a 64-bit system register +#[cfg(target_arch = "aarch64")] +#[inline(always)] +pub unsafe fn __rsr64(reg: R) -> u64 +where + R: super::sealed::Rsr64, +{ + reg.__rsr64() +} + +/// Reads a system register containing an address +#[inline(always)] +pub unsafe fn __rsrp(reg: R) -> *const u8 +where + R: super::sealed::Rsrp, +{ + reg.__rsrp() +} + +/// Writes a 32-bit system register +#[inline(always)] +pub unsafe fn __wsr(reg: R, value: u32) +where + R: super::sealed::Wsr, +{ + reg.__wsr(value) +} + +/// Writes a 64-bit system register +#[cfg(target_arch = "aarch64")] +#[inline(always)] +pub unsafe fn __wsr64(reg: R, value: u64) +where + R: super::sealed::Wsr64, +{ + reg.__wsr64(value) +} + +/// Writes a system register containing an address +#[inline(always)] +pub unsafe fn __wsrp(reg: R, value: *const u8) +where + R: super::sealed::Wsrp, +{ + reg.__wsrp(value) +} diff --git a/crates/core_arch/src/acle/registers/v6m.rs b/crates/core_arch/src/acle/registers/v6m.rs new file mode 100644 index 0000000000..7acc63b6d1 --- /dev/null +++ b/crates/core_arch/src/acle/registers/v6m.rs @@ -0,0 +1,39 @@ +/// CONTROL register +pub struct CONTROL; + +rsr!(CONTROL); +wsr!(CONTROL); + +/// Execution Program Status Register +pub struct EPSR; + +rsr!(EPSR); + +/// Interrupt Program Status Register +pub struct IPSR; + +rsr!(IPSR); + +/// Main Stack Pointer +pub struct MSP; + +rsrp!(MSP); +wsrp!(MSP); + +/// Priority Mask Register +pub struct PRIMASK; + +rsr!(PRIMASK); +wsr!(PRIMASK); + +/// Process Stack Pointer +pub struct PSP; + +rsrp!(PSP); +wsrp!(PSP); + +/// Program Status Register +#[allow(non_camel_case_types)] +pub struct xPSR; + +rsr!(xPSR); diff --git a/crates/core_arch/src/acle/registers/v7m.rs b/crates/core_arch/src/acle/registers/v7m.rs new file mode 100644 index 0000000000..d1b1d474f1 --- /dev/null +++ b/crates/core_arch/src/acle/registers/v7m.rs @@ -0,0 +1,17 @@ +/// Base Priority Mask Register +pub struct BASEPRI; + +rsr!(BASEPRI); +wsr!(BASEPRI); + +/// Base Priority Mask Register (conditional write) +#[allow(non_camel_case_types)] +pub struct BASEPRI_MAX; + +wsr!(BASEPRI_MAX); + +/// Fault Mask Register +pub struct FAULTMASK; + +rsr!(FAULTMASK); +wsr!(FAULTMASK); diff --git a/crates/core_arch/src/acle/sat.rs b/crates/core_arch/src/acle/sat.rs new file mode 100644 index 0000000000..38c98d7342 --- /dev/null +++ b/crates/core_arch/src/acle/sat.rs @@ -0,0 +1,8 @@ +//! # References: +//! +//! - Section 8.4 "Saturating intrinsics" +//! +//! Intrinsics that could live here: +//! +//! - __ssat +//! - __usat diff --git a/crates/core_arch/src/arm/dsp.rs b/crates/core_arch/src/acle/simd32.rs similarity index 75% rename from crates/core_arch/src/arm/dsp.rs rename to crates/core_arch/src/acle/simd32.rs index 8385e7ed21..a259f90d2c 100644 --- a/crates/core_arch/src/arm/dsp.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -1,8 +1,66 @@ -//! ARM DSP Intrinsics. +//! # References //! -//! Based on "Arm C Language Extensions (ACLE) Version Q2 2018" +//! - Section 8.5 "32-bit SIMD intrinsics" of ACLE //! -//! https://developer.arm.com/products/software-development-tools/compilers/arm-compiler-5/docs/101028/0006 +//! Intrinsics that could live here +//! +//! - [x] __sel +//! - [ ] __ssat16 +//! - [ ] __usat16 +//! - [ ] __sxtab16 +//! - [ ] __sxtb16 +//! - [ ] __uxtab16 +//! - [ ] __uxtb16 +//! - [x] __qadd8 +//! - [x] __qsub8 +//! - [x] __sadd8 +//! - [x] __shadd8 +//! - [x] __shsub8 +//! - [ ] __ssub8 +//! - [ ] __uadd8 +//! - [ ] __uhadd8 +//! - [ ] __uhsub8 +//! - [ ] __uqadd8 +//! - [ ] __uqsub8 +//! - [ ] __usub8 +//! - [x] __usad8 +//! - [x] __usada8 +//! - [x] __qadd16 +//! - [x] __qasx +//! - [x] __qsax +//! - [x] __qsub16 +//! - [x] __sadd16 +//! - [x] __sasx +//! - [x] __shadd16 +//! - [ ] __shasx +//! - [ ] __shsax +//! - [x] __shsub16 +//! - [ ] __ssax +//! - [ ] __ssub16 +//! - [ ] __uadd16 +//! - [ ] __uasx +//! - [ ] __uhadd16 +//! - [ ] __uhasx +//! - [ ] __uhsax +//! - [ ] __uhsub16 +//! - [ ] __uqadd16 +//! - [ ] __uqasx +//! - [x] __uqsax +//! - [ ] __uqsub16 +//! - [ ] __usax +//! - [ ] __usub16 +//! - [x] __smlad +//! - [ ] __smladx +//! - [ ] __smlald +//! - [ ] __smlaldx +//! - [x] __smlsd +//! - [ ] __smlsdx +//! - [ ] __smlsld +//! - [ ] __smlsldx +//! - [x] __smuad +//! - [x] __smuadx +//! - [x] __smusd +//! - [x] __smusdx #[cfg(test)] use stdsimd_test::assert_instr; @@ -25,45 +83,39 @@ macro_rules! dsp_call { } extern "C" { - #[link_name = "llvm.arm.qadd"] - fn arm_qadd(a: i32, b: i32) -> i32; + #[link_name = "llvm.arm.qadd8"] + fn arm_qadd8(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.qsub8"] + fn arm_qsub8(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.qsub16"] + fn arm_qsub16(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.qadd16"] fn arm_qadd16(a: i32, b: i32) -> i32; - #[link_name = "llvm.arm.qadd8"] - fn arm_qadd8(a: i32, b: i32) -> i32; - #[link_name = "llvm.arm.qasx"] fn arm_qasx(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.qsax"] fn arm_qsax(a: i32, b: i32) -> i32; - #[link_name = "llvm.arm.qsub"] - fn arm_qsub(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.qsub8"] - fn arm_qsub8(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.qsub16"] - fn arm_qsub16(a: i32, b: i32) -> i32; - #[link_name = "llvm.arm.sadd16"] fn arm_sadd16(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.sadd8"] fn arm_sadd8(a: i32, b: i32) -> i32; - #[link_name = "llvm.arm.sasx"] - fn arm_sasx(a: i32, b: i32) -> i32; - #[link_name = "llvm.arm.smlad"] fn arm_smlad(a: i32, b: i32, c: i32) -> i32; #[link_name = "llvm.arm.smlsd"] fn arm_smlsd(a: i32, b: i32, c: i32) -> i32; + #[link_name = "llvm.arm.sasx"] + fn arm_sasx(a: i32, b: i32) -> i32; + #[link_name = "llvm.arm.sel"] fn arm_sel(a: i32, b: i32) -> i32; @@ -95,24 +147,6 @@ extern "C" { fn arm_usad8(a: i32, b: i32) -> u32; } -/// Signed saturating addition -/// -/// Returns the 32-bit saturating signed equivalent of a + b. -#[inline] -#[cfg_attr(test, assert_instr(qadd))] -pub unsafe fn qadd(a: i32, b: i32) -> i32 { - arm_qadd(a, b) -} - -/// Signed saturating subtraction -/// -/// Returns the 32-bit saturating signed equivalent of a - b. -#[inline] -#[cfg_attr(test, assert_instr(qsub))] -pub unsafe fn qsub(a: i32, b: i32) -> i32 { - arm_qsub(a, b) -} - /// Saturating four 8-bit integer additions /// /// Returns the 8-bit signed equivalent of @@ -123,7 +157,7 @@ pub unsafe fn qsub(a: i32, b: i32) -> i32 { /// res\[3\] = a\[3\] + b\[3\] #[inline] #[cfg_attr(test, assert_instr(qadd8))] -pub unsafe fn qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_qadd8, a, b) } @@ -137,7 +171,7 @@ pub unsafe fn qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[3\] = a\[3\] - b\[3\] #[inline] #[cfg_attr(test, assert_instr(qsub8))] -pub unsafe fn qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_qsub8, a, b) } @@ -149,7 +183,7 @@ pub unsafe fn qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[1\] = a\[1\] - b\[1\] #[inline] #[cfg_attr(test, assert_instr(qsub16))] -pub unsafe fn qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qsub16, a, b) } @@ -161,7 +195,7 @@ pub unsafe fn qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// res\[1\] = a\[1\] + b\[1\] #[inline] #[cfg_attr(test, assert_instr(qadd16))] -pub unsafe fn qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qadd16, a, b) } @@ -171,7 +205,7 @@ pub unsafe fn qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// res\[1\] = a\[1\] + b\[0\] #[inline] #[cfg_attr(test, assert_instr(qasx))] -pub unsafe fn qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qasx, a, b) } @@ -181,7 +215,7 @@ pub unsafe fn qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// res\[1\] = a\[1\] - b\[0\] #[inline] #[cfg_attr(test, assert_instr(qsax))] -pub unsafe fn qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qsax, a, b) } @@ -193,7 +227,7 @@ pub unsafe fn qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// and the GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(sadd16))] -pub unsafe fn sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_sadd16, a, b) } @@ -207,7 +241,7 @@ pub unsafe fn sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// and the GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(sadd8))] -pub unsafe fn sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_sadd8, a, b) } @@ -218,7 +252,7 @@ pub unsafe fn sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c #[inline] #[cfg_attr(test, assert_instr(smlad))] -pub unsafe fn smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { +pub unsafe fn __smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlad(::mem::transmute(a), ::mem::transmute(b), c) } @@ -229,7 +263,7 @@ pub unsafe fn smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { /// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c #[inline] #[cfg_attr(test, assert_instr(smlsd))] -pub unsafe fn smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { +pub unsafe fn __smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlsd(::mem::transmute(a), ::mem::transmute(b), c) } @@ -241,7 +275,7 @@ pub unsafe fn smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { /// and the GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(sasx))] -pub unsafe fn sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_sasx, a, b) } @@ -257,8 +291,7 @@ pub unsafe fn sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// where GE are bits of APSR #[inline] #[cfg_attr(test, assert_instr(sel))] -#[cfg(all(not(target_feature = "mclass")))] -pub unsafe fn sel(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __sel(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_sel, a, b) } @@ -272,7 +305,7 @@ pub unsafe fn sel(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[3\] = (a\[3\] + b\[3\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shadd8))] -pub unsafe fn shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_shadd8, a, b) } @@ -284,7 +317,7 @@ pub unsafe fn shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[1\] = (a\[1\] + b\[1\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shadd16))] -pub unsafe fn shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_shadd16, a, b) } @@ -298,7 +331,7 @@ pub unsafe fn shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// res\[3\] = (a\[3\] - b\[3\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shsub8))] -pub unsafe fn shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_shsub8, a, b) } @@ -310,7 +343,7 @@ pub unsafe fn shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[1\] = (a\[1\] - b\[1\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shsub16))] -pub unsafe fn shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_shsub16, a, b) } @@ -323,7 +356,7 @@ pub unsafe fn shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smuad))] -pub unsafe fn smuad(a: int16x2_t, b: int16x2_t) -> i32 { +pub unsafe fn __smuad(a: int16x2_t, b: int16x2_t) -> i32 { arm_smuad(::mem::transmute(a), ::mem::transmute(b)) } @@ -336,7 +369,7 @@ pub unsafe fn smuad(a: int16x2_t, b: int16x2_t) -> i32 { /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smuadx))] -pub unsafe fn smuadx(a: int16x2_t, b: int16x2_t) -> i32 { +pub unsafe fn __smuadx(a: int16x2_t, b: int16x2_t) -> i32 { arm_smuadx(::mem::transmute(a), ::mem::transmute(b)) } @@ -349,7 +382,7 @@ pub unsafe fn smuadx(a: int16x2_t, b: int16x2_t) -> i32 { /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smusd))] -pub unsafe fn smusd(a: int16x2_t, b: int16x2_t) -> i32 { +pub unsafe fn __smusd(a: int16x2_t, b: int16x2_t) -> i32 { arm_smusd(::mem::transmute(a), ::mem::transmute(b)) } @@ -362,7 +395,7 @@ pub unsafe fn smusd(a: int16x2_t, b: int16x2_t) -> i32 { /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smusdx))] -pub unsafe fn smusdx(a: int16x2_t, b: int16x2_t) -> i32 { +pub unsafe fn __smusdx(a: int16x2_t, b: int16x2_t) -> i32 { arm_smusdx(::mem::transmute(a), ::mem::transmute(b)) } @@ -374,7 +407,7 @@ pub unsafe fn smusdx(a: int16x2_t, b: int16x2_t) -> i32 { /// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) #[inline] #[cfg_attr(test, assert_instr(usad8))] -pub unsafe fn usad8(a: int8x4_t, b: int8x4_t) -> u32 { +pub unsafe fn __usad8(a: int8x4_t, b: int8x4_t) -> u32 { arm_usad8(::mem::transmute(a), ::mem::transmute(b)) } @@ -386,42 +419,23 @@ pub unsafe fn usad8(a: int8x4_t, b: int8x4_t) -> u32 { /// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c #[inline] #[cfg_attr(test, assert_instr(usad8))] -pub unsafe fn usad8a(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { - usad8(a, b) + c +pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { + __usad8(a, b) + c } #[cfg(test)] mod tests { - use core_arch::arm::*; - use core_arch::simd::*; + use core_arch::simd::{i16x2, i8x4}; use std::mem; use stdsimd_test::simd_test; - #[test] - fn qadd() { - unsafe { - assert_eq!(dsp::qadd(-10, 60), 50); - assert_eq!(dsp::qadd(::std::i32::MAX, 10), ::std::i32::MAX); - assert_eq!(dsp::qadd(::std::i32::MIN, -10), ::std::i32::MIN); - } - } - - #[test] - fn qsub() { - unsafe { - assert_eq!(dsp::qsub(10, 60), -50); - assert_eq!(dsp::qsub(::std::i32::MAX, -10), ::std::i32::MAX); - assert_eq!(dsp::qsub(::std::i32::MIN, 10), ::std::i32::MIN); - } - } - #[test] fn qadd8() { unsafe { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(2, -1, 0, 1); let c = i8x4::new(3, 1, 3, ::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::qadd8, a, b); + let r: i8x4 = dsp_call!(super::__qadd8, a, b); assert_eq!(r, c); } } @@ -432,7 +446,7 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MIN); let b = i8x4::new(2, -1, 0, 1); let c = i8x4::new(-1, 3, 3, ::std::i8::MIN); - let r: i8x4 = dsp_call!(dsp::qsub8, a, b); + let r: i8x4 = dsp_call!(super::__qsub8, a, b); assert_eq!(r, c); } } @@ -443,7 +457,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(2, -1); let c = i16x2::new(3, 1); - let r: i16x2 = dsp_call!(dsp::qadd16, a, b); + let r: i16x2 = dsp_call!(super::__qadd16, a, b); assert_eq!(r, c); } } @@ -454,7 +468,7 @@ mod tests { let a = i16x2::new(10, 20); let b = i16x2::new(20, -10); let c = i16x2::new(-10, 30); - let r: i16x2 = dsp_call!(dsp::qsub16, a, b); + let r: i16x2 = dsp_call!(super::__qsub16, a, b); assert_eq!(r, c); } } @@ -465,7 +479,7 @@ mod tests { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(-1, ::std::i16::MAX); - let r: i16x2 = dsp_call!(dsp::qasx, a, b); + let r: i16x2 = dsp_call!(super::__qasx, a, b); assert_eq!(r, c); } } @@ -476,7 +490,7 @@ mod tests { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(3, ::std::i16::MAX - 2); - let r: i16x2 = dsp_call!(dsp::qsax, a, b); + let r: i16x2 = dsp_call!(super::__qsax, a, b); assert_eq!(r, c); } } @@ -487,7 +501,7 @@ mod tests { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(3, -::std::i16::MAX); - let r: i16x2 = dsp_call!(dsp::sadd16, a, b); + let r: i16x2 = dsp_call!(super::__sadd16, a, b); assert_eq!(r, c); } } @@ -498,7 +512,7 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(4, 3, 2, 2); let c = i8x4::new(5, 5, 5, -::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::sadd8, a, b); + let r: i8x4 = dsp_call!(super::__sadd8, a, b); assert_eq!(r, c); } } @@ -509,7 +523,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(2, 1); let c = i16x2::new(0, 4); - let r: i16x2 = dsp_call!(dsp::sasx, a, b); + let r: i16x2 = dsp_call!(super::__sasx, a, b); assert_eq!(r, c); } } @@ -519,7 +533,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(3, 4); - let r = dsp::smlad(::mem::transmute(a), ::mem::transmute(b), 10); + let r = super::__smlad(::mem::transmute(a), ::mem::transmute(b), 10); assert_eq!(r, (1 * 3) + (2 * 4) + 10); } } @@ -529,7 +543,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(3, 4); - let r = dsp::smlsd(::mem::transmute(a), ::mem::transmute(b), 10); + let r = super::__smlsd(::mem::transmute(a), ::mem::transmute(b), 10); assert_eq!(r, ((1 * 3) - (2 * 4)) + 10); } } @@ -540,9 +554,9 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(4, 3, 2, 2); // call sadd8() to set GE bits - dsp::sadd8(::mem::transmute(a), ::mem::transmute(b)); + super::__sadd8(::mem::transmute(a), ::mem::transmute(b)); let c = i8x4::new(1, 2, 3, ::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::sel, a, b); + let r: i8x4 = dsp_call!(super::__sel, a, b); assert_eq!(r, c); } } @@ -553,7 +567,7 @@ mod tests { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(5, 4, 3, 2); let c = i8x4::new(3, 3, 3, 3); - let r: i8x4 = dsp_call!(dsp::shadd8, a, b); + let r: i8x4 = dsp_call!(super::__shadd8, a, b); assert_eq!(r, c); } } @@ -564,7 +578,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let c = i16x2::new(3, 3); - let r: i16x2 = dsp_call!(dsp::shadd16, a, b); + let r: i16x2 = dsp_call!(super::__shadd16, a, b); assert_eq!(r, c); } } @@ -575,7 +589,7 @@ mod tests { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(5, 4, 3, 2); let c = i8x4::new(-2, -1, 0, 1); - let r: i8x4 = dsp_call!(dsp::shsub8, a, b); + let r: i8x4 = dsp_call!(super::__shsub8, a, b); assert_eq!(r, c); } } @@ -586,7 +600,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let c = i16x2::new(-2, -1); - let r: i16x2 = dsp_call!(dsp::shsub16, a, b); + let r: i16x2 = dsp_call!(super::__shsub16, a, b); assert_eq!(r, c); } } @@ -596,7 +610,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::smuad(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__smuad(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, 13); } } @@ -606,7 +620,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::smuadx(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__smuadx(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, 14); } } @@ -616,7 +630,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::smusd(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__smusd(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, -3); } } @@ -626,7 +640,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::smusdx(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__smusdx(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, -6); } } @@ -636,7 +650,7 @@ mod tests { unsafe { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(4, 3, 2, 1); - let r = dsp::usad8(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__usad8(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, 8); } } @@ -647,7 +661,7 @@ mod tests { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(4, 3, 2, 1); let c = 10; - let r = dsp::usad8a(::mem::transmute(a), ::mem::transmute(b), c); + let r = super::__usada8(::mem::transmute(a), ::mem::transmute(b), c); assert_eq!(r, 8 + c); } } diff --git a/crates/core_arch/src/arm/cmsis.rs b/crates/core_arch/src/arm/cmsis.rs deleted file mode 100644 index bc8509d3e8..0000000000 --- a/crates/core_arch/src/arm/cmsis.rs +++ /dev/null @@ -1,330 +0,0 @@ -//! CMSIS: Cortex Microcontroller Software Interface Standard -//! -//! The version 5 of the standard can be found at: -//! -//! http://arm-software.github.io/CMSIS_5/Core/html/index.html -//! -//! The API reference of the standard can be found at: -//! -//! - Core function access -- http://arm-software.github.io/CMSIS_5/Core/html/group__Core__Register__gr.html -//! - Intrinsic functions for CPU instructions -- http://arm-software.github.io/CMSIS_5/Core/html/group__intrinsic__CPU__gr.html -//! -//! The reference C implementation used as the base of this Rust port can be -//! found at -//! -//! https://github.com/ARM-software/CMSIS_5/blob/5.3.0/CMSIS/Core/Include/cmsis_gcc.h - -#![allow(non_snake_case)] - -/* Core function access */ - -/// Enable IRQ Interrupts -/// -/// Enables IRQ interrupts by clearing the I-bit in the CPSR. Can only be -/// executed in Privileged modes. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(cpsie))] -pub unsafe fn __enable_irq() { - asm!("cpsie i" : : : "memory" : "volatile"); -} - -/// Disable IRQ Interrupts -/// -/// Disables IRQ interrupts by setting the I-bit in the CPSR. Can only be -/// executed in Privileged modes. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(cpsid))] -pub unsafe fn __disable_irq() { - asm!("cpsid i" : : : "memory" : "volatile"); -} - -/// Get Control Register -/// -/// Returns the content of the Control Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_CONTROL() -> u32 { - let result: u32; - asm!("mrs $0, CONTROL" : "=r"(result) : : : "volatile"); - result -} - -/// Set Control Register -/// -/// Writes the given value to the Control Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(msr))] -pub unsafe fn __set_CONTROL(control: u32) { - asm!("msr CONTROL, $0" : : "r"(control) : "memory" : "volatile"); -} - -/// Get IPSR Register -/// -/// Returns the content of the IPSR Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_IPSR() -> u32 { - let result: u32; - asm!("mrs $0, IPSR" : "=r"(result) : : : "volatile"); - result -} - -/// Get APSR Register -/// -/// Returns the content of the APSR Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_APSR() -> u32 { - let result: u32; - asm!("mrs $0, APSR" : "=r"(result) : : : "volatile"); - result -} - -/// Get xPSR Register -/// -/// Returns the content of the xPSR Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_xPSR() -> u32 { - let result: u32; - asm!("mrs $0, XPSR" : "=r"(result) : : : "volatile"); - result -} - -/// Get Process Stack Pointer -/// -/// Returns the current value of the Process Stack Pointer (PSP). -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_PSP() -> u32 { - let result: u32; - asm!("mrs $0, PSP" : "=r"(result) : : : "volatile"); - result -} - -/// Set Process Stack Pointer -/// -/// Assigns the given value to the Process Stack Pointer (PSP). -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(msr))] -pub unsafe fn __set_PSP(top_of_proc_stack: u32) { - asm!("msr PSP, $0" : : "r"(top_of_proc_stack) : : "volatile"); -} - -/// Get Main Stack Pointer -/// -/// Returns the current value of the Main Stack Pointer (MSP). -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_MSP() -> u32 { - let result: u32; - asm!("mrs $0, MSP" : "=r"(result) : : : "volatile"); - result -} - -/// Set Main Stack Pointer -/// -/// Assigns the given value to the Main Stack Pointer (MSP). -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(msr))] -pub unsafe fn __set_MSP(top_of_main_stack: u32) { - asm!("msr MSP, $0" : : "r"(top_of_main_stack) : : "volatile"); -} - -/// Get Priority Mask -/// -/// Returns the current state of the priority mask bit from the Priority Mask -/// Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_PRIMASK() -> u32 { - let result: u32; - asm!("mrs $0, PRIMASK" : "=r"(result) : : "memory" : "volatile"); - result -} - -/// Set Priority Mask -/// -/// Assigns the given value to the Priority Mask Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(msr))] -pub unsafe fn __set_PRIMASK(pri_mask: u32) { - asm!("msr PRIMASK, $0" : : "r"(pri_mask) : : "volatile"); -} - -#[cfg(any(target_feature = "v7", dox))] -mod v7 { - /// Enable FIQ - /// - /// Enables FIQ interrupts by clearing the F-bit in the CPSR. Can only be - /// executed in Privileged modes. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(cpsie))] - pub unsafe fn __enable_fault_irq() { - asm!("cpsie f" : : : "memory" : "volatile"); - } - - /// Disable FIQ - /// - /// Disables FIQ interrupts by setting the F-bit in the CPSR. Can only be - /// executed in Privileged modes. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(cpsid))] - pub unsafe fn __disable_fault_irq() { - asm!("cpsid f" : : : "memory" : "volatile"); - } - - /// Get Base Priority - /// - /// Returns the current value of the Base Priority register. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(mrs))] - pub unsafe fn __get_BASEPRI() -> u32 { - let result: u32; - asm!("mrs $0, BASEPRI" : "=r"(result) : : : "volatile"); - result - } - - /// Set Base Priority - /// - /// Assigns the given value to the Base Priority register. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(msr))] - pub unsafe fn __set_BASEPRI(base_pri: u32) { - asm!("msr BASEPRI, $0" : : "r"(base_pri) : "memory" : "volatile"); - } - - /// Set Base Priority with condition - /// - /// Assigns the given value to the Base Priority register only if BASEPRI - /// masking is disabled, or the new value increases the BASEPRI - /// priority level. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(mrs))] - pub unsafe fn __set_BASEPRI_MAX(base_pri: u32) { - asm!("msr BASEPRI_MAX, $0" : : "r"(base_pri) : "memory" : "volatile"); - } - - /// Get Fault Mask - /// - /// Returns the current value of the Fault Mask register. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(mrs))] - pub unsafe fn __get_FAULTMASK() -> u32 { - let result: u32; - asm!("mrs $0, FAULTMASK" : "=r"(result) : : : "volatile"); - result - } - - /// Set Fault Mask - /// - /// Assigns the given value to the Fault Mask register. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(msr))] - pub unsafe fn __set_FAULTMASK(fault_mask: u32) { - asm!("msr FAULTMASK, $0" : : "r"(fault_mask) : "memory" : "volatile"); - } -} - -#[cfg(any(target_feature = "v7", dox))] -pub use self::v7::*; - -/* Core instruction access */ - -/// No Operation -/// -/// No Operation does nothing. This instruction can be used for code alignment -/// purposes. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn __NOP() { - asm!("nop" : : : : "volatile"); -} - -/// Wait For Interrupt -/// -/// Wait For Interrupt is a hint instruction that suspends execution until one -/// of a number of events occurs. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(wfi))] -pub unsafe fn __WFI() { - asm!("wfi" : : : : "volatile"); -} - -/// Wait For Event -/// -/// Wait For Event is a hint instruction that permits the processor to enter a -/// low-power state until one of a number of events occurs. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(wfe))] -pub unsafe fn __WFE() { - asm!("wfe" : : : : "volatile"); -} - -/// Send Event -/// -/// Send Event is a hint instruction. It causes an event to be signaled to the -/// CPU. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(sev))] -pub unsafe fn __SEV() { - asm!("sev" : : : : "volatile"); -} - -/// Instruction Synchronization Barrier -/// -/// Instruction Synchronization Barrier flushes the pipeline in the processor, -/// so that all instructions following the ISB are fetched from cache or -/// memory, after the instruction has been completed. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(isb))] -pub unsafe fn __ISB() { - asm!("isb 0xF" : : : "memory" : "volatile"); -} - -/// Data Synchronization Barrier -/// -/// Acts as a special kind of Data Memory Barrier. It completes when all -/// explicit memory accesses before this instruction complete. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(dsb))] -pub unsafe fn __DSB() { - asm!("dsb 0xF" : : : "memory" : "volatile"); -} - -/// Data Memory Barrier -/// -/// Ensures the apparent order of the explicit memory operations before and -/// after the instruction, without ensuring their completion. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(dmb))] -pub unsafe fn __DMB() { - asm!("dmb 0xF" : : : "memory" : "volatile"); -} diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 30ff991f8d..e5b40c9bc7 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -11,11 +11,6 @@ mod armclang; pub use self::armclang::*; -#[cfg(any(target_feature = "mclass", dox))] -mod cmsis; -#[cfg(any(target_feature = "mclass", dox))] -pub use self::cmsis::*; - mod v6; pub use self::v6::*; @@ -24,11 +19,6 @@ mod v7; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] pub use self::v7::*; -#[cfg(any(all(target_feature = "v7", not(target_feature = "mclass")), dox))] -mod dsp; -#[cfg(any(all(target_feature = "v7", not(target_feature = "mclass")), dox))] -pub use self::dsp::*; - // NEON is supported on AArch64, and on ARM when built with the v7 and neon // features. Building ARM without neon produces incorrect codegen. #[cfg(any( @@ -44,6 +34,8 @@ mod neon; ))] pub use self::neon::*; +pub use super::acle::*; + #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs index 9705e091ca..1ca811ee75 100644 --- a/crates/core_arch/src/mod.rs +++ b/crates/core_arch/src/mod.rs @@ -3,6 +3,9 @@ #[macro_use] mod macros; +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +mod acle; + mod simd; #[cfg_attr( diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs index dec44401d9..66ee9dd894 100644 --- a/crates/stdsimd-test/src/lib.rs +++ b/crates/stdsimd-test/src/lib.rs @@ -153,6 +153,10 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // in some cases exceed the limit. "cvtpi2ps" => 25, + // core_arch/src/acle/simd32 + "usad8" => 27, + "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" => 29, + // Original limit was 20 instructions, but ARM DSP Intrinsics // are exactly 20 instructions long. So bump // the limit to 22 instead of adding here a