From c197560b7417280b06b74e6344231bffd3c658f2 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Mon, 13 Nov 2017 17:48:40 +0100 Subject: [PATCH 1/9] [stdsimd-test] testing conditional on more than one feature --- stdsimd-test/simd-test-macro/src/lib.rs | 28 ++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/stdsimd-test/simd-test-macro/src/lib.rs b/stdsimd-test/simd-test-macro/src/lib.rs index 3777feae79..9bf073d62f 100644 --- a/stdsimd-test/simd-test-macro/src/lib.rs +++ b/stdsimd-test/simd-test-macro/src/lib.rs @@ -33,7 +33,17 @@ pub fn simd_test( TokenNode::Op('=', _) => {} _ => panic!("expected #[simd_test = \"feature\"]"), } - let target_feature = &tokens[1]; + let target_features = match tokens[1].kind { + TokenNode::Literal(ref l) => l.to_string(), + _ => panic!("expected #[simd_test = \"feature\"]"), + }; + let target_features: Vec = target_features + .replace('"', "") + .replace('+', "") + .split(',') + .map(|v| String::from(v)) + .collect(); + let enable_feature = match tokens[1].kind { TokenNode::Literal(ref l) => l.to_string(), _ => panic!("expected #[simd_test = \"feature\"]"), @@ -41,17 +51,29 @@ pub fn simd_test( let enable_feature = enable_feature .trim_left_matches('"') .trim_right_matches('"'); - let enable_feature = string(&format!("+{}", enable_feature)); + let enable_feature = + string(&(format!("+{}", enable_feature).replace(',', ",+"))); let item = TokenStream::from(item); let name = find_name(item.clone()); let name: TokenStream = name.as_str().parse().unwrap(); + let mut cfg_target_features = quote::Tokens::new(); + use quote::ToTokens; + for feature in target_features { + let q = quote! { + cfg_feature_enabled!(#feature) && + }; + q.to_tokens(&mut cfg_target_features); + } + let q = quote!{ true }; + q.to_tokens(&mut cfg_target_features); + let ret: TokenStream = quote! { #[allow(non_snake_case)] #[test] fn #name() { - if cfg_feature_enabled!(#target_feature) { + if #cfg_target_features { return unsafe { #name() }; } else { ::stdsimd_test::assert_skip_test_ok(stringify!(#name)); From 400c50ebe2e036aa6bc73909016a5042b4b98664 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 5 Nov 2017 18:44:14 +0100 Subject: [PATCH 2/9] [x86] implement xsave intrinsics --- src/lib.rs | 2 +- src/x86/mod.rs | 4 + src/x86/xsave.rs | 404 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 src/x86/xsave.rs diff --git a/src/lib.rs b/src/lib.rs index ed18b00d3f..05df07fa21 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -121,7 +121,7 @@ #![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi, target_feature, cfg_target_feature, i128_type, asm, const_atomic_usize_new, stmt_expr_attributes)] -#![cfg_attr(test, feature(proc_macro, test))] +#![cfg_attr(test, feature(proc_macro, test, repr_align, attr_literals))] #![cfg_attr(feature = "cargo-clippy", allow(inline_always, too_many_arguments, cast_sign_loss, cast_lossless, cast_possible_wrap, diff --git a/src/x86/mod.rs b/src/x86/mod.rs index 96011521ab..6fa8f97d26 100644 --- a/src/x86/mod.rs +++ b/src/x86/mod.rs @@ -1,5 +1,7 @@ //! `x86` and `x86_64` intrinsics. +pub use self::xsave::*; + pub use self::sse::*; pub use self::sse2::*; pub use self::sse3::*; @@ -28,6 +30,8 @@ mod macros; #[macro_use] mod runtime; +mod xsave; + mod sse; mod sse2; mod sse3; diff --git a/src/x86/xsave.rs b/src/x86/xsave.rs new file mode 100644 index 0000000000..0007399db3 --- /dev/null +++ b/src/x86/xsave.rs @@ -0,0 +1,404 @@ +//! `xsave` and `xsaveopt` target feature intrinsics + +#![cfg_attr(feature = "cargo-clippy", allow(stutter))] + +#[cfg(test)] +use stdsimd_test::assert_instr; + +use x86::c_void; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.xsave"] + fn xsave(p: *mut i8, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xrstor"] + fn xrstor(p: *const c_void, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xsetbv"] + fn xsetbv(v: i32, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xgetbv"] + fn xgetbv(x: i32) -> i64; + #[link_name = "llvm.x86.xsave64"] + fn xsave64(p: *mut i8, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xrstor64"] + fn xrstor64(p: *const c_void, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xsaveopt"] + fn xsaveopt(p: *mut i8, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xsaveopt64"] + fn xsaveopt64(p: *mut i8, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xsavec"] + fn xsavec(p: *mut i8, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xsavec64"] + fn xsavec64(p: *mut i8, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xsaves"] + fn xsaves(p: *mut i8, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xsaves64"] + fn xsaves64(p: *mut i8, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xrstors"] + fn xrstors(p: *const c_void, hi: i32, lo: i32) -> (); + #[link_name = "llvm.x86.xrstors64"] + fn xrstors64(p: *const c_void, hi: i32, lo: i32) -> (); +} + +/// Perform a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits [62:0] in `save_mask` and XCR0. +/// `mem_addr` must be aligned on a 64-byte boundary. +/// +/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of +/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1. +#[inline(always)] +#[target_feature = "+xsave"] +#[cfg_attr(test, assert_instr(xsave))] +pub unsafe fn _xsave(mem_addr: *mut c_void, save_mask: u64) -> () { + xsave(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); +} + +/// Perform a full or partial restore of the enabled processor states using +/// the state information stored in memory at `mem_addr`. +/// +/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +#[inline(always)] +#[target_feature = "+xsave"] +#[cfg_attr(test, assert_instr(xrstor))] +pub unsafe fn _xrstor(mem_addr: *const c_void, rs_mask: u64) -> () { + xrstor(mem_addr, (rs_mask >> 32) as i32, rs_mask as i32); +} + +/// `XFEATURE_ENABLED_MASK` for `XCR` +/// +/// This intrinsic maps to `XSETBV` instruction. +const _XCR_XFEATURE_ENABLED_MASK: u32 = 0; + +/// Copy 64-bits from `val` to the extended control register (`XCR`) specified +/// by `a`. +/// +/// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported. +#[inline(always)] +#[target_feature = "+xsave"] +#[cfg_attr(test, assert_instr(xsetbv))] +pub unsafe fn _xsetbv(a: u32, val: u64) -> () { + xsetbv(a as i32, (val >> 32) as i32, val as i32); +} + +/// Reads the contents of the extended control register `XCR` +/// specified in `xcr_no`. +#[inline(always)] +#[target_feature = "+xsave"] +#[cfg_attr(test, assert_instr(xgetbv))] +pub unsafe fn _xgetbv(xcr_no: u32) -> u64 { + xgetbv(xcr_no as i32) as u64 +} + +/// Perform a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits [62:0] in `save_mask` and XCR0. +/// `mem_addr` must be aligned on a 64-byte boundary. +/// +/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of +/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1. +#[inline(always)] +#[target_feature = "+xsave"] +#[cfg_attr(test, assert_instr(xsave64))] +#[cfg(not(target_arch = "x86"))] +pub unsafe fn _xsave64(mem_addr: *mut c_void, save_mask: u64) -> () { + xsave64(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); +} + +/// Perform a full or partial restore of the enabled processor states using +/// the state information stored in memory at `mem_addr`. +/// +/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +#[inline(always)] +#[target_feature = "+xsave"] +#[cfg_attr(test, assert_instr(xrstor64))] +#[cfg(not(target_arch = "x86"))] +pub unsafe fn _xrstor64(mem_addr: *const c_void, rs_mask: u64) -> () { + xrstor64(mem_addr, (rs_mask >> 32) as i32, rs_mask as i32); +} + +/// Perform a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits [62:0] in `save_mask` and `XCR0`. +/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize +/// the manner in which data is saved. The performance of this instruction will +/// be equal to or better than using the `XSAVE` instruction. +#[inline(always)] +#[target_feature = "+xsave,+xsaveopt"] +#[cfg_attr(test, assert_instr(xsaveopt))] +pub unsafe fn _xsaveopt(mem_addr: *mut c_void, save_mask: u64) -> () { + xsaveopt(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); +} + +/// Perform a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits [62:0] in `save_mask` and `XCR0`. +/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize +/// the manner in which data is saved. The performance of this instruction will +/// be equal to or better than using the `XSAVE64` instruction. +#[inline(always)] +#[target_feature = "+xsave,+xsaveopt"] +#[cfg_attr(test, assert_instr(xsaveopt64))] +#[cfg(not(target_arch = "x86"))] +pub unsafe fn _xsaveopt64(mem_addr: *mut c_void, save_mask: u64) -> () { + xsaveopt64( + mem_addr as *mut i8, + (save_mask >> 32) as i32, + save_mask as i32, + ); +} + +/// Perform a full or partial save of the enabled processor states to memory +/// at `mem_addr`. +/// +/// `xsavec` differs from `xsave` in that it uses compaction and that it may +/// use init optimization. State is saved based on bits [62:0] in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +#[inline(always)] +#[target_feature = "+xsave,+xsavec"] +#[cfg_attr(test, assert_instr(xsavec))] +pub unsafe fn _xsavec(mem_addr: *mut c_void, save_mask: u64) -> () { + xsavec(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); +} + +/// Perform a full or partial save of the enabled processor states to memory +/// at `mem_addr`. +/// +/// `xsavec` differs from `xsave` in that it uses compaction and that it may +/// use init optimization. State is saved based on bits [62:0] in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +#[inline(always)] +#[target_feature = "+xsave,+xsavec"] +#[cfg_attr(test, assert_instr(xsavec64))] +#[cfg(not(target_arch = "x86"))] +pub unsafe fn _xsavec64(mem_addr: *mut c_void, save_mask: u64) -> () { + xsavec64(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); +} + +/// Perform a full or partial save of the enabled processor states to memory at +/// `mem_addr` +/// +/// `xsaves` differs from xsave in that it can save state components +/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the +/// modified optimization. State is saved based on bits [62:0] in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +#[inline(always)] +#[target_feature = "+xsave,+xsaves"] +#[cfg_attr(test, assert_instr(xsaves))] +pub unsafe fn _xsaves(mem_addr: *mut c_void, save_mask: u64) -> () { + xsaves(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); +} + +/// Perform a full or partial save of the enabled processor states to memory at +/// `mem_addr` +/// +/// `xsaves` differs from xsave in that it can save state components +/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the +/// modified optimization. State is saved based on bits [62:0] in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +#[inline(always)] +#[target_feature = "+xsave,+xsaves"] +#[cfg_attr(test, assert_instr(xsaves64))] +#[cfg(not(target_arch = "x86"))] +pub unsafe fn _xsaves64(mem_addr: *mut c_void, save_mask: u64) -> () { + xsaves64(mem_addr as *mut i8, (save_mask >> 32) as i32, save_mask as i32); +} + +/// Perform a full or partial restore of the enabled processor states using the +/// state information stored in memory at `mem_addr`. +/// +/// `xrstors` differs from `xrstor` in that it can restore state components +/// corresponding to bits set in the `IA32_XSS` `MSR`; `xrstors` cannot restore +/// from an `xsave` area in which the extended region is in the standard form. +/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +#[inline(always)] +#[target_feature = "+xsave,+xsaves"] +#[cfg_attr(test, assert_instr(xrstors))] +pub unsafe fn _xrstors(mem_addr: *const c_void, rs_mask: u64) -> () { + xrstors(mem_addr, (rs_mask >> 32) as i32, rs_mask as i32); +} +/// Perform a full or partial restore of the enabled processor states using the +/// state information stored in memory at `mem_addr`. +/// +/// `xrstors` differs from `xrstor` in that it can restore state components +/// corresponding to bits set in the `IA32_XSS` `MSR`; `xrstors` cannot restore +/// from an `xsave` area in which the extended region is in the standard form. +/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +#[inline(always)] +#[target_feature = "+xsave,+xsaves"] +#[cfg_attr(test, assert_instr(xrstors64))] +#[cfg(not(target_arch = "x86"))] +pub unsafe fn _xrstors64(mem_addr: *const c_void, rs_mask: u64) -> () { + xrstors64(mem_addr, (rs_mask >> 32) as i32, rs_mask as i32); +} + + +#[cfg(test)] +mod tests { + use super::*; + use stdsimd_test::simd_test; + use std::fmt; + + #[repr(align(64))] + struct Buffer { + data: [u64; 1024], // 8192 bytes + } + + impl Buffer { + fn new() -> Buffer { + Buffer { data: [0; 1024] } + } + fn ptr(&mut self) -> *mut c_void { + &mut self.data[0] as *mut _ as *mut c_void + } + } + + impl PartialEq for Buffer { + fn eq(&self, other: &Buffer) -> bool { + for i in 0..1024 { + if self.data[i] != other.data[i] { + return false; + } + } + true + } + } + + impl fmt::Debug for Buffer { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[")?; + for i in 0..1024 { + write!(f, "{}", self.data[i])?; + if i != 1023 { + write!(f, ", ")?; + } + } + write!(f, "]") + } + } + + #[simd_test = "xsave"] + unsafe fn xsave() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = Buffer::new(); + let mut b = Buffer::new(); + + _xsave(a.ptr(), m); + _xrstor(a.ptr(), m); + _xsave(b.ptr(), m); + assert_eq!(a, b); + } + + #[cfg(not(target_arch = "x86"))] + #[simd_test = "xsave"] + unsafe fn xsave64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = Buffer::new(); + let mut b = Buffer::new(); + + _xsave64(a.ptr(), m); + _xrstor64(a.ptr(), m); + _xsave64(b.ptr(), m); + assert_eq!(a, b); + } + + #[simd_test = "xsave"] + unsafe fn xgetbv_xsetbv() { + let xcr_n: u32 = _XCR_XFEATURE_ENABLED_MASK; + + let xcr: u64 = _xgetbv(xcr_n); + // FIXME: XSETBV is a privileged instruction we should only test this + // when running in privileged mode: + // + // _xsetbv(xcr_n, xcr); + let xcr_cpy: u64 = _xgetbv(xcr_n); + assert_eq!(xcr, xcr_cpy); + } + + #[simd_test = "xsave,xsaveopt"] + unsafe fn xsaveopt() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = Buffer::new(); + let mut b = Buffer::new(); + + _xsaveopt(a.ptr(), m); + _xrstor(a.ptr(), m); + _xsaveopt(b.ptr(), m); + assert_eq!(a, b); + } + + #[cfg(not(target_arch = "x86"))] + #[simd_test = "xsave,xsaveopt"] + unsafe fn xsaveopt64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = Buffer::new(); + let mut b = Buffer::new(); + + _xsaveopt64(a.ptr(), m); + _xrstor64(a.ptr(), m); + _xsaveopt64(b.ptr(), m); + assert_eq!(a, b); + } + + + #[simd_test = "xsave,xsavec"] + unsafe fn xsavec() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = Buffer::new(); + let mut b = Buffer::new(); + + _xsavec(a.ptr(), m); + _xrstor(a.ptr(), m); + _xsavec(b.ptr(), m); + assert_eq!(a, b); + } + + #[cfg(not(target_arch = "x86"))] + #[simd_test = "xsave,xsavec"] + unsafe fn xsavec64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = Buffer::new(); + let mut b = Buffer::new(); + + _xsavec64(a.ptr(), m); + _xrstor64(a.ptr(), m); + _xsavec64(b.ptr(), m); + assert_eq!(a, b); + } + + #[simd_test = "xsaves"] + unsafe fn xsaves() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = Buffer::new(); + let mut b = Buffer::new(); + + _xsaves(a.ptr(), m); + _xrstors(a.ptr(), m); + _xsaves(b.ptr(), m); + assert_eq!(a, b); + } + + #[cfg(not(target_arch = "x86"))] + #[simd_test = "xsaves"] + unsafe fn xsaves64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = Buffer::new(); + let mut b = Buffer::new(); + + _xsaves64(a.ptr(), m); + _xrstors64(a.ptr(), m); + _xsaves64(b.ptr(), m); + assert_eq!(a, b); + } +} From 8503aca1438bdf4120346d79ad4a4eb9bd156cff Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 5 Nov 2017 18:45:27 +0100 Subject: [PATCH 3/9] [x86] implement __read/write eflags --- src/x86/ia32.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ src/x86/mod.rs | 2 ++ 2 files changed, 52 insertions(+) create mode 100644 src/x86/ia32.rs diff --git a/src/x86/ia32.rs b/src/x86/ia32.rs new file mode 100644 index 0000000000..ac7ab8b080 --- /dev/null +++ b/src/x86/ia32.rs @@ -0,0 +1,50 @@ +//! `i386/ia32` intrinsics + +/// Reads EFLAGS. +#[cfg(target_arch = "x86")] +#[inline(always)] +pub unsafe fn __readeflags() -> u32 { + let eflags: u32; + asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile"); + eflags +} + +/// Reads EFLAGS. +#[cfg(target_arch = "x86_64")] +#[inline(always)] +pub unsafe fn __readeflags() -> u64 { + let eflags: u64; + asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile"); + eflags +} + +/// Write EFLAGS. +#[cfg(target_arch = "x86")] +#[inline(always)] +pub unsafe fn __writeeflags(eflags: u32) { + asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile"); +} + +/// Write EFLAGS. +#[cfg(target_arch = "x86_64")] +#[inline(always)] +pub unsafe fn __writeeflags(eflags: u64) { + asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile"); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_eflags() { + unsafe { + // reads eflags, writes them back, reads them again, + // and compare for equality: + let v = __readeflags(); + __writeeflags(v); + let u = __readeflags(); + assert_eq!(v, u); + } + } +} diff --git a/src/x86/mod.rs b/src/x86/mod.rs index 6fa8f97d26..79d1e06feb 100644 --- a/src/x86/mod.rs +++ b/src/x86/mod.rs @@ -1,5 +1,6 @@ //! `x86` and `x86_64` intrinsics. +pub use self::ia32::*; pub use self::xsave::*; pub use self::sse::*; @@ -31,6 +32,7 @@ mod macros; mod runtime; mod xsave; +mod ia32; mod sse; mod sse2; From 111b36744fe039ba46f1bccac502c2d22617364b Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 5 Nov 2017 18:46:49 +0100 Subject: [PATCH 4/9] [x86] implement cpuid intrinsics --- src/x86/cpuid.rs | 119 +++++++++++++++++++++++++++++++++++++++++++++++ src/x86/mod.rs | 4 +- 2 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 src/x86/cpuid.rs diff --git a/src/x86/cpuid.rs b/src/x86/cpuid.rs new file mode 100644 index 0000000000..d19f485e45 --- /dev/null +++ b/src/x86/cpuid.rs @@ -0,0 +1,119 @@ +//! `cpuid` intrinsics + +#![cfg_attr(feature = "cargo-clippy", allow(stutter))] + +#[cfg(test)] +use stdsimd_test::assert_instr; + +/// Result of the `cpuid` instruction. +#[derive(Copy, Clone, Eq, Ord, PartialEq, PartialOrd)] +#[cfg_attr(feature = "cargo-clippy", allow(stutter))] +pub struct CpuidResult { + /// EAX register. + pub eax: u32, + /// EBX register. + pub ebx: u32, + /// ECX register. + pub ecx: u32, + /// EDX register. + pub edx: u32, +} + +/// `cpuid` instruction. +/// +/// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which +/// information using the `eax` and `ecx` registers, and the format in +/// which this information is returned in `eax...edx`. +/// +/// The `has_cpuid()` intrinsics can be used to query whether the `cpuid` +/// instruction is available. +/// +/// The definitive references are: +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +/// Instruction Set Reference, A-Z][intel64_ref]. +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +/// System Instructions][amd64_ref]. +/// +/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID +/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +#[inline(always)] +#[cfg_attr(test, assert_instr(cpuid))] +pub unsafe fn __cpuid_count(eax: u32, ecx: u32) -> CpuidResult { + let mut r = ::std::mem::uninitialized::(); + asm!("cpuid" + : "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx) + : "{eax}"(eax), "{ecx}"(ecx) + : :); + r +} + +/// `cpuid` instruction. +/// +/// See `__cpuid_count`. +#[inline(always)] +#[cfg_attr(test, assert_instr(cpuid))] +pub unsafe fn __cpuid(eax: u32) -> CpuidResult { + __cpuid_count(eax, 0) +} + +/// Does the host support the `cpuid` instruction? +#[inline(always)] +pub fn has_cpuid() -> bool { + #[cfg(target_arch = "x86_64")] + { + true + } + #[cfg(target_arch = "x86")] + { + use super::ia32::{__readeflags, __writeeflags}; + + // On `x86` the `cpuid` instruction is not always available. + // This follows the approach indicated in: + // http://wiki.osdev.org/CPUID#Checking_CPUID_availability + unsafe { + // Read EFLAGS: + let eflags: u32 = __readeflags(); + + // Invert the ID bit in EFLAGS: + let eflags_mod: u32 = eflags | 0x0020_0000; + + // Store the modified EFLAGS (ID bit may or may not be inverted) + __writeeflags(eflags_mod); + + // Read EFLAGS again: + let eflags_after: u32 = __readeflags(); + + // Check if the ID bit changed: + eflags_after != eflags + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_always_has_cpuid() { + // all currently-tested targets have the instruction + // FIXME: add targets without `cpuid` to CI + assert!(has_cpuid()); + } + + #[cfg(target_arch = "x86")] + #[test] + fn test_has_cpuid() { + use vendor::__readeflags; + unsafe { + let before = __readeflags(); + + if has_cpuid() { + assert!(before != __readeflags()); + } else { + assert!(before == __readeflags()); + } + } + } + +} diff --git a/src/x86/mod.rs b/src/x86/mod.rs index 79d1e06feb..081d1635d3 100644 --- a/src/x86/mod.rs +++ b/src/x86/mod.rs @@ -1,6 +1,7 @@ //! `x86` and `x86_64` intrinsics. pub use self::ia32::*; +pub use self::cpuid::*; pub use self::xsave::*; pub use self::sse::*; @@ -31,8 +32,9 @@ mod macros; #[macro_use] mod runtime; -mod xsave; mod ia32; +mod cpuid; +mod xsave; mod sse; mod sse2; From 10f430ff20af004f2b4d311999a3a3c3b93c6196 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 5 Nov 2017 18:47:46 +0100 Subject: [PATCH 5/9] [x86] cleanup run-time; add SSE4a, AVX-512, and xsave --- Cargo.toml | 2 +- src/x86/runtime.rs | 358 +++++++++++++++++++++++++++++++---------- tests/cpu-detection.rs | 20 ++- 3 files changed, 292 insertions(+), 88 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 874f9d4f08..697ebf8d74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ opt-level = 3 [dev-dependencies] stdsimd-test = { version = "0.*", path = "stdsimd-test" } -cupid = "0.3" +cupid = "0.4.0" [features] strict = [] diff --git a/src/x86/runtime.rs b/src/x86/runtime.rs index 1549c4f7a3..9cbe42808c 100644 --- a/src/x86/runtime.rs +++ b/src/x86/runtime.rs @@ -49,6 +49,10 @@ macro_rules! __unstable_detect_feature { $crate::vendor::__unstable_detect_feature( $crate::vendor::__Feature::sse4_2{}) }; + ("sse4a") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::sse4a{}) + }; ("avx") => { $crate::vendor::__unstable_detect_feature( $crate::vendor::__Feature::avx{}) @@ -57,6 +61,46 @@ macro_rules! __unstable_detect_feature { $crate::vendor::__unstable_detect_feature( $crate::vendor::__Feature::avx2{}) }; + ("avx512f") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512f{}) + }; + ("avx512cd") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512cd{}) + }; + ("avx512er") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512er{}) + }; + ("avx512pf") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512pf{}) + }; + ("avx512bw") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512bw{}) + }; + ("avx512dq") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512dq{}) + }; + ("avx512vl") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512vl{}) + }; + ("avx512ifma") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512_ifma{}) + }; + ("avx512vbmi") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512_vbmi{}) + }; + ("avx512vpopcntdq") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::avx512_vpopcntdq{}) + }; ("fma") => { $crate::vendor::__unstable_detect_feature( $crate::vendor::__Feature::fma{}) @@ -85,6 +129,30 @@ macro_rules! __unstable_detect_feature { $crate::vendor::__unstable_detect_feature( $crate::vendor::__Feature::popcnt{}) }; + ("xsave") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::xsave{}) + }; + ("xsaveopt") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::xsaveopt{}) + }; + ("xsave") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::xsave{}) + }; + ("xsaveopt") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::xsaveopt{}) + }; + ("xsaves") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::xsaves{}) + }; + ("xsavec") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::xsavec{}) + }; ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) }; @@ -110,10 +178,32 @@ pub enum __Feature { sse4_1, /// SSE4.2 (Streaming SIMD Extensions 4.2) sse4_2, + /// SSE4a (Streaming SIMD Extensions 4a) + sse4a, /// AVX (Advanced Vector Extensions) avx, /// AVX2 (Advanced Vector Extensions 2) avx2, + /// AVX-512 F (Foundation) + avx512f, + /// AVX-512 CD (Conflict Detection Instructions) + avx512cd, + /// AVX-512 ER (Exponential and Reciprocal Instructions) + avx512er, + /// AVX-512 PF (Prefetch Instructions) + avx512pf, + /// AVX-512 BW (Byte and Word Instructions) + avx512bw, + /// AVX-512 DQ (Doubleword and Quadword) + avx512dq, + /// AVX-512 VL (Vector Length Extensions) + avx512vl, + /// AVX-512 IFMA (Integer Fused Multiply Add) + avx512_ifma, + /// AVX-512 VBMI (Vector Byte Manipulation Instructions) + avx512_vbmi, + /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword) + avx512_vpopcntdq, /// FMA (Fused Multiply Add) fma, /// BMI1 (Bit Manipulation Instructions 1) @@ -127,7 +217,14 @@ pub enum __Feature { tbm, /// POPCNT (Population Count) popcnt, - + /// XSAVE (Save Processor Extended States) + xsave, + /// XSAVEOPT (Save Processor Extended States Optimized) + xsaveopt, + /// XSAVES (Save Processor Extended States Supervisor) + xsaves, + /// XSAVEC (Save Processor Extended States Compacted) + xsavec, #[doc(hidden)] __NonExhaustive, } @@ -159,102 +256,176 @@ fn test_bit(x: usize, bit: u32) -> bool { /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf fn detect_features() -> usize { - let extended_features_ebx; - let proc_info_ecx; - let proc_info_edx; + use super::cpuid::{__cpuid, __cpuid_count, has_cpuid, CpuidResult}; + use super::xsave::_xgetbv; + let mut value: usize = 0; - unsafe { - /// To obtain all feature flags we need two CPUID queries: + // If the x86 CPU does not support the CPUID instruction then it is too + // old to support any of the currently-detectable features. + if !has_cpuid() { + return value; + } - /// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits" - /// This gives us most of the CPU features in ECX and EDX (see - /// below). - asm!("cpuid" - : "={ecx}"(proc_info_ecx), "={edx}"(proc_info_edx) - : "{eax}"(0x0000_0001_u32), "{ecx}"(0 as u32) - : :); + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU + // has `cpuid` support. + + // 0. EAX = 0: Basic Information: + // - EAX returns the "Highest Function Parameter", that is, the maximum + // leaf value for subsequent calls of `cpuinfo` in range [0, + // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars, + // returned in EBX, EDX, and ECX (in that order): + let (max_leaf, vendor_id) = unsafe { + let CpuidResult { + eax: max_leaf, + ebx, + ecx, + edx, + } = __cpuid(0); + let vendor_id: [[u8; 4]; 3] = [ + ::std::mem::transmute(ebx), + ::std::mem::transmute(edx), + ::std::mem::transmute(ecx), + ]; + let vendor_id: [u8; 12] = ::std::mem::transmute(vendor_id); + (max_leaf, vendor_id) + }; - /// 2. EAX=7, ECX=0: Queries "Extended Features" - /// This gives us information about bmi,bmi2, and avx2 support - /// (see below); the result in ECX is not currently needed. - asm!("cpuid" - : "={ebx}"(extended_features_ebx) - : "{eax}"(0x0000_0007_u32), "{ecx}"(0 as u32) - : :); + if max_leaf < 1 { + // Earlier Intel 486, CPUID not implemented + return value; } - let mut value: usize = 0; + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { + ecx: proc_info_ecx, + edx: proc_info_edx, + .. + } = unsafe { __cpuid(0x0000_0001_u32) }; - if test_bit(extended_features_ebx, 3) { - value = set_bit(value, __Feature::bmi as u32); - } - if test_bit(extended_features_ebx, 8) { - value = set_bit(value, __Feature::bmi2 as u32); - } + // EAX = 7, ECX = 0: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let (extended_features_ebx, extended_features_ecx) = if max_leaf >= 7 { + let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + (ebx, ecx) + } else { + (0, 0) // CPUID does not support "Extended Features" + }; - if test_bit(proc_info_ecx, 0) { - value = set_bit(value, __Feature::sse3 as u32); - } - if test_bit(proc_info_ecx, 5) { - value = set_bit(value, __Feature::abm as u32); - } - if test_bit(proc_info_ecx, 9) { - value = set_bit(value, __Feature::ssse3 as u32); - } - if test_bit(proc_info_ecx, 12) { - value = set_bit(value, __Feature::fma as u32); - } - if test_bit(proc_info_ecx, 19) { - value = set_bit(value, __Feature::sse4_1 as u32); - } - if test_bit(proc_info_ecx, 20) { - value = set_bit(value, __Feature::sse4_2 as u32); - } - if test_bit(proc_info_ecx, 21) { - value = set_bit(value, __Feature::tbm as u32); - } - if test_bit(proc_info_ecx, 23) { - value = set_bit(value, __Feature::popcnt as u32); - } + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported + // - EAX returns the max leaf value for extended information, that is, + // `cpuid` calls in range [0x8000_0000; u32::MAX]: + let CpuidResult { + eax: extended_max_leaf, + .. + } = unsafe { __cpuid(0x8000_0000_u32) }; - if test_bit(proc_info_edx, 25) { - value = set_bit(value, __Feature::sse as u32); - } - if test_bit(proc_info_edx, 26) { - value = set_bit(value, __Feature::sse2 as u32); - } + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature + // Bits" + let extended_proc_info_ecx = if extended_max_leaf >= 1 { + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; + ecx + } else { + 0 + }; - // ECX[26] detects XSAVE and ECX[27] detects OSXSAVE, that is, whether the - // OS is AVX enabled and supports saving the state of the AVX/AVX2 vector - // registers on context-switches, see: - // - // - https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled - // - https://hg.mozilla. - // org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 - // - if test_bit(proc_info_ecx, 26) && test_bit(proc_info_ecx, 27) { - /// XGETBV: reads the contents of the extended control - /// register (XCR). - unsafe fn xgetbv(xcr_no: u32) -> u64 { - let eax: u32; - let edx: u32; - // xgetbv - asm!("xgetbv" - : "={eax}"(eax), "={edx}"(edx) - : "{ecx}"(xcr_no) - : :); - ((edx as u64) << 32) | (eax as u64) - } + { + // borrows value till the end of this scope: + let mut enable = |r, rb, f| if test_bit(r as usize, rb) { + value = set_bit(value, f as u32); + }; + + enable(proc_info_ecx, 0, __Feature::sse3); + enable(proc_info_ecx, 9, __Feature::ssse3); + enable(proc_info_ecx, 12, __Feature::fma); + enable(proc_info_ecx, 19, __Feature::sse4_1); + enable(proc_info_ecx, 20, __Feature::sse4_2); + enable(proc_info_ecx, 23, __Feature::popcnt); + enable(proc_info_edx, 25, __Feature::sse); + enable(proc_info_edx, 26, __Feature::sse2); + + enable(extended_features_ebx, 3, __Feature::bmi); + enable(extended_features_ebx, 8, __Feature::bmi2); + + // `XSAVE` and `AVX` support: + if test_bit(proc_info_ecx as usize, 26) { + // 0. Here the CPU supports `XSAVE`. + + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and + // supports saving the state of the AVX/AVX2 vector registers on + // context-switches, see: + // + // - https://software.intel. + // com/en-us/blogs/2011/04/14/is-avx-enabled + // - https://hg.mozilla. + // org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 + let cpu_osxsave = test_bit(proc_info_ecx as usize, 27); - // This is safe because on x86 `xgetbv` is always available. - if unsafe { xgetbv(0) } & 6 == 6 { - if test_bit(proc_info_ecx, 28) { - value = set_bit(value, __Feature::avx as u32); + // 2. The OS must have signaled the CPU that it supports saving and + // restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and + // `XCR0.AVX[2]` to `1`. + // + // This is safe because the CPU supports `xsave` + let xcr0 = unsafe { _xgetbv(0) }; + let os_avx_support = xcr0 & 6 == 6; + let os_avx512_support = xcr0 & 224 == 224; + + if cpu_osxsave && os_avx_support { + // Only if the OS and the CPU support saving/restoring the AVX + // registers we enable `xsave` support: + enable(proc_info_ecx, 26, __Feature::xsave); + + // And AVX/AVX2: + enable(proc_info_ecx, 28, __Feature::avx); + enable(extended_features_ebx, 5, __Feature::avx2); + + // For AVX-512 the OS also needs to support saving/restoring + // the + // extended state, only then we enable AVX-512 support: + if os_avx512_support { + enable(extended_features_ebx, 16, __Feature::avx512f); + enable(extended_features_ebx, 17, __Feature::avx512dq); + enable(extended_features_ebx, 21, __Feature::avx512_ifma); + enable(extended_features_ebx, 26, __Feature::avx512pf); + enable(extended_features_ebx, 27, __Feature::avx512er); + enable(extended_features_ebx, 28, __Feature::avx512cd); + enable(extended_features_ebx, 30, __Feature::avx512bw); + enable(extended_features_ebx, 31, __Feature::avx512vl); + enable(extended_features_ecx, 1, __Feature::avx512_vbmi); + enable( + extended_features_ecx, + 14, + __Feature::avx512_vpopcntdq, + ); + } } - if test_bit(extended_features_ebx, 5) { - value = set_bit(value, __Feature::avx2 as u32); + + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = + // 1) + if max_leaf >= 0xd { + let CpuidResult { + eax: proc_extended_state1_eax, + .. + } = unsafe { __cpuid_count(0xd_u32, 1) }; + enable(proc_extended_state1_eax, 0, __Feature::xsaveopt); + enable(proc_extended_state1_eax, 1, __Feature::xsavec); + enable(proc_extended_state1_eax, 3, __Feature::xsaves); } } + + // This detects ABM on AMD CPUs and LZCNT on Intel CPUs. + // On intel CPUs with popcnt, lzcnt implements the + // "missing part" of ABM, so we map both to the same + // internal feature. + // + // The `cfg_feature_enabled!("lzcnt")` macro then + // internally maps to __Feature::abm. + enable(extended_proc_info_ecx, 5, __Feature::abm); + if vendor_id == *b"AuthenticAMD" { + // These features are only available on AMD CPUs: + enable(extended_proc_info_ecx, 6, __Feature::sse4a); + enable(extended_proc_info_ecx, 21, __Feature::tbm); + } } value @@ -294,12 +465,29 @@ mod tests { println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2")); println!("avx: {:?}", cfg_feature_enabled!("avx")); println!("avx2: {:?}", cfg_feature_enabled!("avx2")); + println!("avx512f {:?}", cfg_feature_enabled!("avx512f")); + println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd")); + println!("avx512er {:?}", cfg_feature_enabled!("avx512er")); + println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf")); + println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw")); + println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq")); + println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl")); + println!("avx512ifma {:?}", cfg_feature_enabled!("avx512ifma")); + println!("avx512vbmi {:?}", cfg_feature_enabled!("avx512vbmi")); + println!( + "avx512vpopcntdq {:?}", + cfg_feature_enabled!("avx512vpopcntdq") + ); + println!("fma: {:?}", cfg_feature_enabled!("fma")); println!("abm: {:?}", cfg_feature_enabled!("abm")); println!("bmi: {:?}", cfg_feature_enabled!("bmi")); println!("bmi2: {:?}", cfg_feature_enabled!("bmi2")); println!("tbm: {:?}", cfg_feature_enabled!("tbm")); println!("popcnt: {:?}", cfg_feature_enabled!("popcnt")); println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt")); - println!("fma: {:?}", cfg_feature_enabled!("fma")); + println!("xsave {:?}", cfg_feature_enabled!("xsave")); + println!("xsaveopt {:?}", cfg_feature_enabled!("xsaveopt")); + println!("xsaves {:?}", cfg_feature_enabled!("xsaves")); + println!("xsavec {:?}", cfg_feature_enabled!("xsavec")); } } diff --git a/tests/cpu-detection.rs b/tests/cpu-detection.rs index 294fd8ca7c..4207824c0e 100644 --- a/tests/cpu-detection.rs +++ b/tests/cpu-detection.rs @@ -20,10 +20,26 @@ fn works() { assert_eq!(cfg_feature_enabled!("sse4.2"), information.sse4_2()); assert_eq!(cfg_feature_enabled!("avx"), information.avx()); assert_eq!(cfg_feature_enabled!("avx2"), information.avx2()); + // assert_eq!(cfg_feature_enabled!("avx512f"), information.avx512f()); + // assert_eq!(cfg_feature_enabled!("avx512cd"), information.avx512cd()); + // assert_eq!(cfg_feature_enabled!("avx512er"), information.avx512er()); + // assert_eq!(cfg_feature_enabled!("avx512pf"), information.avx512pf()); + // assert_eq!(cfg_feature_enabled!("avx512bw"), information.avx512bw()); + // assert_eq!(cfg_feature_enabled!("avx512dq"), information.avx512dq()); + // assert_eq!(cfg_feature_enabled!("avx512vl"), information.avx512vl()); + // assert_eq!(cfg_feature_enabled!("avx512ifma"), information.avx512ifma()); + // assert_eq!(cfg_feature_enabled!("avx512vbmi"), information.avx512vbmi()); + // assert_eq!(cfg_feature_enabled!("avx512vpopcntdq"), information.avx512vpopcntdq()); assert_eq!(cfg_feature_enabled!("fma"), information.fma()); assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1()); assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2()); assert_eq!(cfg_feature_enabled!("popcnt"), information.popcnt()); - - // TODO: tbm, abm, lzcnt + // assert_eq!(cfg_feature_enabled!("sse4a"), information.sse4a()); + assert_eq!(cfg_feature_enabled!("abm"), information.lzcnt()); + assert_eq!(cfg_feature_enabled!("tbm"), information.tbm()); + assert_eq!(cfg_feature_enabled!("lzcnt"), information.lzcnt()); + assert_eq!(cfg_feature_enabled!("xsave"), information.xsave()); + assert_eq!(cfg_feature_enabled!("xsaveopt"), information.xsaveopt()); + assert_eq!(cfg_feature_enabled!("xsavec"), information.xsavec_and_xrstor()); + assert_eq!(cfg_feature_enabled!("xsavec"), information.xsaves_xrstors_and_ia32_xss()); } From 65d597b0c31d8eaeec2b8fcb07781ff252c18dc5 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 7 Nov 2017 11:41:08 +0100 Subject: [PATCH 6/9] [cpuid] Improve docs, implement __get_cpuid_max Closes #174 . --- src/x86/cpuid.rs | 62 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/src/x86/cpuid.rs b/src/x86/cpuid.rs index d19f485e45..9820f23f6e 100644 --- a/src/x86/cpuid.rs +++ b/src/x86/cpuid.rs @@ -19,16 +19,21 @@ pub struct CpuidResult { pub edx: u32, } -/// `cpuid` instruction. +/// Returns the result of the `cpuid` instruction for a given `leaf` (`EAX`) +/// and +/// `sub_leaf` (`ECX`). /// -/// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which -/// information using the `eax` and `ecx` registers, and the format in -/// which this information is returned in `eax...edx`. +/// The highest-supported leaf value is returned by the first tuple argument of +/// [`__get_cpuid_max(0)`](fn.__get_cpuid_max.html). For leaves containung +/// sub-leaves, the second tuple argument returns the highest-supported +/// sub-leaf +/// value. /// -/// The `has_cpuid()` intrinsics can be used to query whether the `cpuid` -/// instruction is available. +/// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which +/// information using the `EAX` and `ECX` registers, and the interpretation of +/// the results returned in `EAX`, `EBX`, `ECX`, and `EDX`. /// -/// The definitive references are: +/// The references are: /// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: /// Instruction Set Reference, A-Z][intel64_ref]. /// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and @@ -39,22 +44,28 @@ pub struct CpuidResult { /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf #[inline(always)] #[cfg_attr(test, assert_instr(cpuid))] -pub unsafe fn __cpuid_count(eax: u32, ecx: u32) -> CpuidResult { +pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult { let mut r = ::std::mem::uninitialized::(); - asm!("cpuid" - : "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx) - : "{eax}"(eax), "{ecx}"(ecx) - : :); + if cfg!(target_arch = "x86") { + asm!("cpuid" + : "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx) + : "{eax}"(leaf), "{ecx}"(sub_leaf) + : :); + } else { + // x86-64 uses %rbx as the base register, so preserve it. + asm!("cpuid\n" + : "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx) + : "{eax}"(leaf), "{ecx}"(sub_leaf) + : "rbx" :); + } r } -/// `cpuid` instruction. -/// -/// See `__cpuid_count`. +/// See [`__cpuid_count`](fn.__cpuid_count.html). #[inline(always)] #[cfg_attr(test, assert_instr(cpuid))] -pub unsafe fn __cpuid(eax: u32) -> CpuidResult { - __cpuid_count(eax, 0) +pub unsafe fn __cpuid(leaf: u32) -> CpuidResult { + __cpuid_count(leaf, 0) } /// Does the host support the `cpuid` instruction? @@ -90,6 +101,22 @@ pub fn has_cpuid() -> bool { } } +/// Returns the highest-supported `leaf` (`EAX`) and sub-leaf (`ECX`) `cpuid` +/// values. +/// +/// If `cpuid` is supported, and `leaf` is zero, then the first tuple argument +/// contains the highest `leaf` value that `cpuid` supports. For `leaf`s +/// containing sub-leafs, the second tuple argument contains the +/// highest-supported sub-leaf value. +/// +/// See also [`__cpuid`](fn.__cpuid.html) and +/// [`__cpuid_count`](fn.__cpuid_count.html). +#[inline(always)] +pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) { + let CpuidResult { eax, ebx, .. } = __cpuid(leaf); + (eax, ebx) +} + #[cfg(test)] mod tests { use super::*; @@ -115,5 +142,4 @@ mod tests { } } } - } From 582758a6615929c67c797a19ed67e0afb8705d54 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 9 Nov 2017 18:24:40 +0100 Subject: [PATCH 7/9] [ci] add intel_sde feature --- .travis.yml | 4 ++-- Cargo.toml | 3 ++- ci/run-docker.sh | 3 ++- ci/run.sh | 12 ++++++++---- src/x86/mod.rs | 4 ++++ src/x86/xsave.rs | 3 ++- 6 files changed, 20 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index c91e4f34de..f97609447c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ matrix: - env: TARGET=i586-unknown-linux-gnu - env: TARGET=i686-unknown-linux-gnu - env: TARGET=x86_64-unknown-linux-gnu NO_ADD=1 - - env: TARGET=x86_64-unknown-linux-gnu-emulated NO_ADD=1 STDSIMD_TEST_EVERYTHING=1 + - env: TARGET=x86_64-unknown-linux-gnu-emulated NO_ADD=1 STDSIMD_TEST_EVERYTHING=1 FEATURES="intel_sde" - env: TARGET=arm-unknown-linux-gnueabihf - env: TARGET=armv7-unknown-linux-gnueabihf - env: TARGET=aarch64-unknown-linux-gnu @@ -33,7 +33,7 @@ install: script: - cargo generate-lockfile - - ci/run-docker.sh $TARGET + - ci/run-docker.sh $TARGET $FEATURES notifications: email: diff --git a/Cargo.toml b/Cargo.toml index 697ebf8d74..16c995e7ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,4 +32,5 @@ cupid = "0.4.0" [features] strict = [] -std = [] \ No newline at end of file +std = [] +intel_sde = [] \ No newline at end of file diff --git a/ci/run-docker.sh b/ci/run-docker.sh index d5ea59e4cd..fe9abac13e 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -16,6 +16,7 @@ run() { --env CARGO_HOME=/cargo \ --volume `rustc --print sysroot`:/rust:ro \ --env TARGET=$target \ + --env FEATURES=$2 \ --env STDSIMD_TEST_EVERYTHING \ --volume `pwd`:/checkout:ro \ --volume `pwd`/target:/checkout/target \ @@ -31,5 +32,5 @@ if [ -z "$1" ]; then run $d done else - run $1 + run $1 $2 fi diff --git a/ci/run.sh b/ci/run.sh index f4d3382cb0..1d4be7e09f 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -15,10 +15,14 @@ case ${TARGET} in ;; esac +FEATURES="strict,$FEATURES" +FEATURES_STD="${FEATURES},std" + echo "RUSTFLAGS=${RUSTFLAGS}" +echo "FEATURES=${FEATURES}" -cargo test --target $TARGET --features "strict" -cargo test --release --target $TARGET --features "strict" +cargo test --target $TARGET --features $FEATURES +cargo test --release --target $TARGET --features $FEATURES -cargo test --target $TARGET --features "strict,std" -cargo test --release --target $TARGET --features "strict,std" +cargo test --target $TARGET --features $FEATURES_STD +cargo test --release --target $TARGET --features $FEATURES_STD diff --git a/src/x86/mod.rs b/src/x86/mod.rs index 081d1635d3..990db824d4 100644 --- a/src/x86/mod.rs +++ b/src/x86/mod.rs @@ -16,6 +16,8 @@ pub use self::avx2::*; pub use self::abm::*; pub use self::bmi::*; pub use self::bmi2::*; + +#[cfg(not(feature = "intel_sde"))] pub use self::tbm::*; pub use self::runtime::{__unstable_detect_feature, __Feature}; @@ -48,6 +50,8 @@ mod avx2; mod abm; mod bmi; mod bmi2; + +#[cfg(not(feature = "intel_sde"))] mod tbm; #[allow(non_camel_case_types)] diff --git a/src/x86/xsave.rs b/src/x86/xsave.rs index 0007399db3..4b9f5b8f2b 100644 --- a/src/x86/xsave.rs +++ b/src/x86/xsave.rs @@ -377,6 +377,7 @@ mod tests { assert_eq!(a, b); } + #[cfg(not(feature = "intel_sde"))] #[simd_test = "xsaves"] unsafe fn xsaves() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers @@ -389,7 +390,7 @@ mod tests { assert_eq!(a, b); } - #[cfg(not(target_arch = "x86"))] + #[cfg(not(any(target_arch = "x86", feature = "intel_sde")))] #[simd_test = "xsaves"] unsafe fn xsaves64() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers From 2aa0e810b3c9b49e22a3c771d8f9b841c4bb01c2 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 26 Oct 2017 22:00:03 +0200 Subject: [PATCH 8/9] [arm] runtime-detection support --- .../aarch64-unknown-linux-gnu/Dockerfile | 1 + ci/run-docker.sh | 3 +- ci/run.sh | 9 +- src/lib.rs | 22 +- src/macros.rs | 50 ----- src/runtime/aarch64.rs | 56 +++++ src/runtime/arm.rs | 66 ++++++ src/runtime/bit.rs | 11 + src/runtime/cache.rs | 29 +++ src/runtime/linux/cpuinfo.rs | 211 ++++++++++++++++++ src/runtime/linux/mod.rs | 31 +++ src/runtime/macros.rs | 39 ++++ src/runtime/mod.rs | 40 ++++ src/{x86/runtime.rs => runtime/x86.rs} | 50 +---- src/x86/mod.rs | 4 - tests/cpu-detection.rs | 19 +- 16 files changed, 535 insertions(+), 106 deletions(-) create mode 100644 src/runtime/aarch64.rs create mode 100644 src/runtime/arm.rs create mode 100644 src/runtime/bit.rs create mode 100644 src/runtime/cache.rs create mode 100644 src/runtime/linux/cpuinfo.rs create mode 100644 src/runtime/linux/mod.rs create mode 100644 src/runtime/macros.rs create mode 100644 src/runtime/mod.rs rename src/{x86/runtime.rs => runtime/x86.rs} (91%) diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 4e3bff0ac0..68261a2f03 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ qemu-user \ make \ file + ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \ OBJDUMP=aarch64-linux-gnu-objdump diff --git a/ci/run-docker.sh b/ci/run-docker.sh index fe9abac13e..d08a164be3 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -4,10 +4,11 @@ set -ex run() { - echo $1 + echo "Building docker container for TARGET=${1}" docker build -t stdsimd ci/docker/$1 mkdir -p target target=$(echo $1 | sed 's/-emulated//') + echo "Running docker" docker run \ --user `id -u`:`id -g` \ --rm \ diff --git a/ci/run.sh b/ci/run.sh index 1d4be7e09f..8c8f18c37d 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -20,9 +20,10 @@ FEATURES_STD="${FEATURES},std" echo "RUSTFLAGS=${RUSTFLAGS}" echo "FEATURES=${FEATURES}" +echo "OBJDUMP=${OBJDUMP}" -cargo test --target $TARGET --features $FEATURES -cargo test --release --target $TARGET --features $FEATURES +cargo test --target $TARGET --features $FEATURES --verbose -- --nocapture +cargo test --release --target $TARGET --features $FEATURES --verbose -- --nocapture -cargo test --target $TARGET --features $FEATURES_STD -cargo test --release --target $TARGET --features $FEATURES_STD +cargo test --target $TARGET --features $FEATURES_STD --verbose -- --nocapture +cargo test --release --target $TARGET --features $FEATURES_STD --verbose -- --nocapture diff --git a/src/lib.rs b/src/lib.rs index 05df07fa21..add34c2742 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -159,8 +159,29 @@ pub mod vendor { pub use aarch64::*; pub use nvptx::*; + + #[cfg(any( + // x86/x86_64: + any(target_arch = "x86", target_arch = "x86_64"), + // linux + std + (arm|aarch64): + all(target_os = "linux", + feature = "std", + any(target_arch = "arm", target_arch = "aarch64")) + ))] + pub use runtime::{__unstable_detect_feature, __Feature}; } +#[cfg(any( + // x86/x86_64: + any(target_arch = "x86", target_arch = "x86_64"), + // linux + std + (arm|aarch64): + all(target_os = "linux", + feature = "std", + any(target_arch = "arm", target_arch = "aarch64")) +))] +#[macro_use] +mod runtime; + #[macro_use] mod macros; mod simd_llvm; @@ -204,7 +225,6 @@ mod v16 { } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -#[macro_use] mod x86; #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] diff --git a/src/macros.rs b/src/macros.rs index c2009fa939..563e196b64 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -373,56 +373,6 @@ macro_rules! define_casts { } } -/// Is a feature supported by the host CPU? -/// -/// This macro performs run-time feature detection. It returns true if the host -/// CPU in which the binary is running on supports a particular feature. -#[macro_export] -macro_rules! cfg_feature_enabled { - ($name:tt) => ( - { - #[cfg(target_feature = $name)] - { - true - } - #[cfg(not(target_feature = $name))] - { - __unstable_detect_feature!($name) - } - } - ) -} - -/// On ARM features are only detected at compile-time using -/// cfg(target_feature), so if this macro is executed the -/// feature is not supported. -#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] -#[macro_export] -#[doc(hidden)] -macro_rules! __unstable_detect_feature { - ("neon") => { false }; - ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) }; -} - -/// In all unsupported architectures using the macro is an error -#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", - target_arch = "arm", target_arch = "aarch64")))] -#[macro_export] -#[doc(hidden)] -macro_rules! __unstable_detect_feature { - ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) }; -} - -#[cfg(test)] -mod tests { - #[cfg(target_arch = "x86_64")] - #[test] - fn test_macros() { - assert!(cfg_feature_enabled!("sse")); - } -} - - #[cfg(test)] #[macro_export] macro_rules! test_arithmetic_ { diff --git a/src/runtime/aarch64.rs b/src/runtime/aarch64.rs new file mode 100644 index 0000000000..5d10fc06e3 --- /dev/null +++ b/src/runtime/aarch64.rs @@ -0,0 +1,56 @@ +//! Run-time feature detection on ARM Aarch64. +use super::{bit, linux}; + +#[macro_export] +#[doc(hidden)] +macro_rules! __unstable_detect_feature { + ("neon") => { + // FIXME: this should be removed once we rename Aarch64 neon to asimd + $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::asimd{}) + }; + ("asimd") => { + $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::asimd{}) + }; + ("pmull") => { + $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::pmull{}) + }; + ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) }; +} + +/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset +/// for a particular feature. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +#[doc(hidden)] +#[allow(non_camel_case_types)] +#[repr(u8)] +pub enum __Feature { + /// ARM Advanced SIMD (ASIMD) - Aarch64 + asimd, + /// Polynomial Multiply + pmull, +} + +pub fn detect_features(mut x: T) -> usize { + let value: usize = 0; + { + let mut enable_feature = |f| { + if x.has_feature(&f) { + bit::set(value, f as u32); + } + }; + enable_feature(__Feature::asimd); + enable_feature(__Feature::pmull); + } + value +} + +impl linux::FeatureQuery for linux::CpuInfo { + fn has_feature(&mut self, x: &__Feature) -> bool { + use self::__Feature::*; + match *x { + asimd => self.field("Features").has("asimd"), + pmull => self.field("Features").has("pmull"), + } + } +} diff --git a/src/runtime/arm.rs b/src/runtime/arm.rs new file mode 100644 index 0000000000..60ef909fca --- /dev/null +++ b/src/runtime/arm.rs @@ -0,0 +1,66 @@ +//! Run-time feature detection on ARM Aarch32. + +use super::{bit, linux}; + +#[macro_export] +#[doc(hidden)] +macro_rules! __unstable_detect_feature { + ("neon") => { + $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::neon{}) + }; + ("pmull") => { + $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::pmull{}) + }; + ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) }; +} + +/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a +/// particular feature. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +#[doc(hidden)] +#[allow(non_camel_case_types)] +#[repr(u8)] +pub enum __Feature { + /// ARM Advanced SIMD (NEON) - Aarch32 + neon, + /// Polynomial Multiply + pmull, +} + +pub fn detect_features(mut x: T) -> usize { + let value: usize = 0; + { + let mut enable_feature = |f| { + if x.has_feature(&f) { + bit::set(value, f as u32); + } + }; + enable_feature(__Feature::neon); + enable_feature(__Feature::pmull); + } + value +} + +/// Is the CPU known to have a broken NEON unit? +/// +/// See https://crbug.com/341598. +fn has_broken_neon(cpuinfo: &linux::CpuInfo) -> bool { + cpuinfo.field("CPU implementer") == "0x51" + && cpuinfo.field("CPU architecture") == "7" + && cpuinfo.field("CPU variant") == "0x1" + && cpuinfo.field("CPU part") == "0x04d" + && cpuinfo.field("CPU revision") == "0" +} + +impl linux::FeatureQuery for linux::CpuInfo { + fn has_feature(&mut self, x: &__Feature) -> bool { + use self::__Feature::*; + match *x { + neon => { + self.field("Features").has("neon") && !has_broken_neon(self) + } + pmull => self.field("Features").has("pmull"), + } + } +} diff --git a/src/runtime/bit.rs b/src/runtime/bit.rs new file mode 100644 index 0000000000..42483e5225 --- /dev/null +++ b/src/runtime/bit.rs @@ -0,0 +1,11 @@ +//! Bit manipulation utilities + +/// Sets the `bit` of `x`. +pub const fn set(x: usize, bit: u32) -> usize { + x | 1 << bit +} + +/// Tests the `bit` of `x`. +pub const fn test(x: usize, bit: u32) -> bool { + x & (1 << bit) != 0 +} diff --git a/src/runtime/cache.rs b/src/runtime/cache.rs new file mode 100644 index 0000000000..6ec39e98e8 --- /dev/null +++ b/src/runtime/cache.rs @@ -0,0 +1,29 @@ +//! Cache of run-time feature detection + +use super::bit; +use std::sync::atomic::{AtomicUsize, Ordering}; + +/// This global variable is a bitset used to cache the features supported by +/// the +/// CPU. +static CACHE: AtomicUsize = AtomicUsize::new(::std::usize::MAX); + +/// Test the `bit` of the storage. If the storage has not been initialized, +/// initializes it with the result of `f()`. +/// +/// On its first invocation, it detects the CPU features and caches them in the +/// `FEATURES` global variable as an `AtomicUsize`. +/// +/// It uses the `__Feature` variant to index into this variable as a bitset. If +/// the bit is set, the feature is enabled, and otherwise it is disabled. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +pub fn test(bit: u32, f: F) -> bool +where + F: FnOnce() -> usize, +{ + if CACHE.load(Ordering::Relaxed) == ::std::usize::MAX { + CACHE.store(f(), Ordering::Relaxed); + } + bit::test(CACHE.load(Ordering::Relaxed), bit) +} diff --git a/src/runtime/linux/cpuinfo.rs b/src/runtime/linux/cpuinfo.rs new file mode 100644 index 0000000000..0b18c41cef --- /dev/null +++ b/src/runtime/linux/cpuinfo.rs @@ -0,0 +1,211 @@ +//! Reads /proc/cpuinfo on Linux systems + +/// cpuinfo +pub struct CpuInfo { + raw: String, +} + +/// Field of cpuinfo +#[derive(Debug)] +pub struct CpuInfoField<'a>(Option<&'a str>); + +impl<'a> PartialEq<&'a str> for CpuInfoField<'a> { + fn eq(&self, other: &&'a str) -> bool { + match self.0 { + None => other.len() == 0, + Some(f) => f == other.trim(), + } + } +} + +impl<'a> CpuInfoField<'a> { + pub fn new<'b>(v: Option<&'b str>) -> CpuInfoField<'b> { + match v { + None => CpuInfoField::<'b>(None), + Some(f) => CpuInfoField::<'b>(Some(f.trim())), + } + } + /// Does the field exist? + pub fn exists(&self) -> bool { + self.0.is_some() + } + /// Does the field contain `other`? + pub fn has(&self, other: &str) -> bool { + match self.0 { + None => other.len() == 0, + Some(f) => { + let other = other.trim(); + for v in f.split(" ") { + if v == other { + return true; + } + } + false + } + } + } +} + +impl CpuInfo { + /// Reads /proc/cpuinfo into CpuInfo. + pub fn new() -> Result { + use std::io::Read; + let mut file = ::std::fs::File::open("/proc/cpuinfo")?; + let mut cpui = CpuInfo { raw: String::new() }; + file.read_to_string(&mut cpui.raw)?; + Ok(cpui) + } + /// Returns the value of the cpuinfo `field`. + pub fn field(&self, field: &str) -> CpuInfoField { + for l in self.raw.lines() { + if l.trim().starts_with(field) { + return CpuInfoField(l.split(": ").skip(1).next()); + } + } + CpuInfoField(None) + } + + /// Returns the `raw` contents of `/proc/cpuinfo` + fn raw(&self) -> &String { + &self.raw + } + + fn from_str(other: &str) -> Result { + Ok(CpuInfo { + raw: String::from(other), + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(target_os = "linux")] + #[test] + fn test_cpuinfo_linux() { + let cpuinfo = CpuInfo::new().unwrap(); + if cpuinfo.field("vendor_id") == "GenuineIntel" { + assert!(cpuinfo.field("flags").exists()); + assert!(!cpuinfo.field("vendor33_id").exists()); + assert!(cpuinfo.field("flags").has("sse")); + assert!(!cpuinfo.field("flags").has("avx314")); + } + println!("{}", cpuinfo.raw()); + } + + + const CORE_DUO_T6500: &str = r"processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 23 +model name : Intel(R) Core(TM)2 Duo CPU T6500 @ 2.10GHz +stepping : 10 +microcode : 0xa0b +cpu MHz : 1600.000 +cache size : 2048 KB +physical id : 0 +siblings : 2 +core id : 0 +cpu cores : 2 +apicid : 0 +initial apicid : 0 +fdiv_bug : no +hlt_bug : no +f00f_bug : no +coma_bug : no +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm +bogomips : 4190.43 +clflush size : 64 +cache_alignment : 64 +address sizes : 36 bits physical, 48 bits virtual +power management: +"; + + #[test] + fn test_cpuinfo_linux_core_duo_t6500() { + let cpuinfo = CpuInfo::from_str(CORE_DUO_T6500).unwrap(); + assert_eq!(cpuinfo.field("vendor_id"), "GenuineIntel"); + assert_eq!(cpuinfo.field("cpu family"), "6"); + assert_eq!(cpuinfo.field("model"), "23"); + assert_eq!( + cpuinfo.field("model name"), + "Intel(R) Core(TM)2 Duo CPU T6500 @ 2.10GHz" + ); + assert_eq!( + cpuinfo.field("flags"), + "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm" + ); + assert!(cpuinfo.field("flags").has("fpu")); + assert!(cpuinfo.field("flags").has("dtherm")); + assert!(cpuinfo.field("flags").has("sse2")); + assert!(!cpuinfo.field("flags").has("avx")); + } + + const ARM_CORTEX_A53: &str = r"Processor : AArch64 Processor rev 3 (aarch64) + processor : 0 + processor : 1 + processor : 2 + processor : 3 + processor : 4 + processor : 5 + processor : 6 + processor : 7 + Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 + CPU implementer : 0x41 + CPU architecture: AArch64 + CPU variant : 0x0 + CPU part : 0xd03 + CPU revision : 3 + + Hardware : HiKey Development Board + "; + + #[test] + fn test_cpuinfo_linux_arm_cortex_a53() { + let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A53).unwrap(); + assert_eq!( + cpuinfo.field("Processor"), + "AArch64 Processor rev 3 (aarch64)" + ); + assert_eq!( + cpuinfo.field("Features"), + "fp asimd evtstrm aes pmull sha1 sha2 crc32" + ); + assert!(cpuinfo.field("Features").has("pmull")); + assert!(!cpuinfo.field("Features").has("neon")); + assert!(cpuinfo.field("Features").has("asimd")); + } + + const ARM_CORTEX_A57: &str = r"Processor : Cortex A57 Processor rev 1 (aarch64) +processor : 0 +processor : 1 +processor : 2 +processor : 3 +Features : fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd07 +CPU revision : 1"; + + #[test] + fn test_cpuinfo_linux_arm_cortex_a57() { + let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A57).unwrap(); + assert_eq!( + cpuinfo.field("Processor"), + "Cortex A57 Processor rev 1 (aarch64)" + ); + assert_eq!( + cpuinfo.field("Features"), + "fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt" + ); + assert!(cpuinfo.field("Features").has("pmull")); + assert!(cpuinfo.field("Features").has("neon")); + assert!(cpuinfo.field("Features").has("asimd")); + } +} diff --git a/src/runtime/linux/mod.rs b/src/runtime/linux/mod.rs new file mode 100644 index 0000000000..6625152baf --- /dev/null +++ b/src/runtime/linux/mod.rs @@ -0,0 +1,31 @@ +//! Run-time feature detection for ARM on linux +mod cpuinfo; +pub use self::cpuinfo::CpuInfo; + +use super::__Feature; + +pub trait FeatureQuery { + fn has_feature(&mut self, x: &__Feature) -> bool; +} + +fn detect_features_impl(x: T) -> usize { + #[cfg(target_arch = "arm")] + { + super::arm::detect_features(x) + } + #[cfg(target_arch = "aarch64")] + { + super::aarch64::detect_features(x) + } +} + +/// Detects ARM features: +pub fn detect_features() -> usize { + // FIXME: use libc::getauxval, and if that fails /proc/auxv + // Try to read /proc/cpuinfo + if let Ok(v) = cpuinfo::CpuInfo::new() { + return detect_features_impl(v); + } + // Otherwise all features are disabled + 0 +} diff --git a/src/runtime/macros.rs b/src/runtime/macros.rs new file mode 100644 index 0000000000..e8278bb295 --- /dev/null +++ b/src/runtime/macros.rs @@ -0,0 +1,39 @@ +//! Run-time feature detection macros. + +/// Is a feature supported by the host CPU? +/// +/// This macro performs run-time feature detection. It returns true if the host +/// CPU in which the binary is running on supports a particular feature. +#[macro_export] +macro_rules! cfg_feature_enabled { + ($name:tt) => ( + { + #[cfg(target_feature = $name)] + { + true + } + #[cfg(not(target_feature = $name))] + { + __unstable_detect_feature!($name) + } + } + ) +} + +/// In all unsupported architectures using the macro is an error +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", + target_arch = "arm", target_arch = "aarch64")))] +#[macro_export] +#[doc(hidden)] +macro_rules! __unstable_detect_feature { + ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) }; +} + +#[cfg(test)] +mod tests { + #[cfg(target_arch = "x86_64")] + #[test] + fn test_macros() { + assert!(cfg_feature_enabled!("sse")); + } +} diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs new file mode 100644 index 0000000000..a48b7b20ce --- /dev/null +++ b/src/runtime/mod.rs @@ -0,0 +1,40 @@ +//! Run-time feature detection +mod cache; +mod bit; + +#[macro_use] +mod macros; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[macro_use] +mod x86; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub use self::x86::__Feature; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use self::x86::detect_features; + +#[cfg(all(target_arch = "arm", target_os = "linux", feature = "std"))] +#[macro_use] +mod arm; +#[cfg(all(target_arch = "arm", target_os = "linux", feature = "std"))] +pub use self::arm::__Feature; + +#[cfg(all(target_arch = "aarch64", target_os = "linux", feature = "std"))] +#[macro_use] +mod aarch64; +#[cfg(all(target_arch = "aarch64", target_os = "linux", feature = "std"))] +pub use self::aarch64::__Feature; + +#[cfg(all(feature = "std", target_os = "linux", + any(target_arch = "arm", target_arch = "aarch64")))] +mod linux; + +#[cfg(all(feature = "std", target_os = "linux", + any(target_arch = "arm", target_arch = "aarch64")))] +pub use self::linux::detect_features; + +/// Performs run-time feature detection. +#[doc(hidden)] +pub fn __unstable_detect_feature(x: __Feature) -> bool { + cache::test(x as u32, detect_features) +} diff --git a/src/x86/runtime.rs b/src/runtime/x86.rs similarity index 91% rename from src/x86/runtime.rs rename to src/runtime/x86.rs index 9cbe42808c..6d16a5398f 100644 --- a/src/x86/runtime.rs +++ b/src/runtime/x86.rs @@ -15,7 +15,8 @@ //! this functions queries the CPU for the available features and stores them //! in a global `AtomicUsize` variable. The query is performed by just checking //! whether the feature bit in this global variable is set or cleared. -use std::sync::atomic::{AtomicUsize, Ordering}; + +use super::bit; /// This macro maps the string-literal feature names to values of the /// `__Feature` enum at compile-time. The feature names used are the same as @@ -228,18 +229,6 @@ pub enum __Feature { #[doc(hidden)] __NonExhaustive, } -/// Sets the `bit`-th bit of `x`. -fn set_bit(x: usize, bit: u32) -> usize { - debug_assert!(32 > bit); - x | 1 << bit -} - -/// Tests the `bit`-th bit of `x`. -fn test_bit(x: usize, bit: u32) -> bool { - debug_assert!(32 > bit); - x & (1 << bit) != 0 -} - /// Run-time feature detection on x86 works by using the CPUID instruction. /// /// The [CPUID Wikipedia page][wiki_cpuid] contains @@ -255,9 +244,9 @@ fn test_bit(x: usize, bit: u32) -> bool { /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf -fn detect_features() -> usize { - use super::cpuid::{__cpuid, __cpuid_count, has_cpuid, CpuidResult}; - use super::xsave::_xgetbv; +pub fn detect_features() -> usize { + use vendor::{__cpuid, __cpuid_count, has_cpuid, CpuidResult}; + use vendor::_xgetbv; let mut value: usize = 0; // If the x86 CPU does not support the CPUID instruction then it is too @@ -331,8 +320,8 @@ fn detect_features() -> usize { { // borrows value till the end of this scope: - let mut enable = |r, rb, f| if test_bit(r as usize, rb) { - value = set_bit(value, f as u32); + let mut enable = |r, rb, f| if bit::test(r as usize, rb) { + value = bit::set(value, f as u32); }; enable(proc_info_ecx, 0, __Feature::sse3); @@ -348,7 +337,7 @@ fn detect_features() -> usize { enable(extended_features_ebx, 8, __Feature::bmi2); // `XSAVE` and `AVX` support: - if test_bit(proc_info_ecx as usize, 26) { + if bit::test(proc_info_ecx as usize, 26) { // 0. Here the CPU supports `XSAVE`. // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and @@ -359,7 +348,7 @@ fn detect_features() -> usize { // com/en-us/blogs/2011/04/14/is-avx-enabled // - https://hg.mozilla. // org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 - let cpu_osxsave = test_bit(proc_info_ecx as usize, 27); + let cpu_osxsave = bit::test(proc_info_ecx as usize, 27); // 2. The OS must have signaled the CPU that it supports saving and // restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and @@ -431,27 +420,6 @@ fn detect_features() -> usize { value } -/// This global variable is a bitset used to cache the features supported by -/// the CPU. -static FEATURES: AtomicUsize = AtomicUsize::new(::std::usize::MAX); - -/// Performs run-time feature detection. -/// -/// On its first invocation, it detects the CPU features and caches them -/// in the `FEATURES` global variable as an `AtomicUsize`. -/// -/// It uses the `__Feature` variant to index into this variable as a bitset. If -/// the bit is set, the feature is enabled, and otherwise it is disabled. -/// -/// PLEASE: do not use this, it is an implementation detail subject to change. -#[doc(hidden)] -pub fn __unstable_detect_feature(x: __Feature) -> bool { - if FEATURES.load(Ordering::Relaxed) == ::std::usize::MAX { - FEATURES.store(detect_features(), Ordering::Relaxed); - } - test_bit(FEATURES.load(Ordering::Relaxed), x as u32) -} - #[cfg(test)] mod tests { #[cfg(feature = "std")] diff --git a/src/x86/mod.rs b/src/x86/mod.rs index 990db824d4..8d57ebd2d9 100644 --- a/src/x86/mod.rs +++ b/src/x86/mod.rs @@ -20,8 +20,6 @@ pub use self::bmi2::*; #[cfg(not(feature = "intel_sde"))] pub use self::tbm::*; -pub use self::runtime::{__unstable_detect_feature, __Feature}; - /// 128-bit wide signed integer vector type #[allow(non_camel_case_types)] pub type __m128i = ::v128::i8x16; @@ -31,8 +29,6 @@ pub type __m256i = ::v256::i8x32; #[macro_use] mod macros; -#[macro_use] -mod runtime; mod ia32; mod cpuid; diff --git a/tests/cpu-detection.rs b/tests/cpu-detection.rs index 4207824c0e..c4b4c9627f 100644 --- a/tests/cpu-detection.rs +++ b/tests/cpu-detection.rs @@ -27,9 +27,12 @@ fn works() { // assert_eq!(cfg_feature_enabled!("avx512bw"), information.avx512bw()); // assert_eq!(cfg_feature_enabled!("avx512dq"), information.avx512dq()); // assert_eq!(cfg_feature_enabled!("avx512vl"), information.avx512vl()); - // assert_eq!(cfg_feature_enabled!("avx512ifma"), information.avx512ifma()); - // assert_eq!(cfg_feature_enabled!("avx512vbmi"), information.avx512vbmi()); - // assert_eq!(cfg_feature_enabled!("avx512vpopcntdq"), information.avx512vpopcntdq()); + // assert_eq!(cfg_feature_enabled!("avx512ifma"), + // information.avx512_ifma()); + // assert_eq!(cfg_feature_enabled!("avx512vbmi"), + // information.avx512_vbmi()); + // assert_eq!(cfg_feature_enabled!("avx512vpopcntdq"), + // information.avx512_vpopcntdq()); assert_eq!(cfg_feature_enabled!("fma"), information.fma()); assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1()); assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2()); @@ -40,6 +43,12 @@ fn works() { assert_eq!(cfg_feature_enabled!("lzcnt"), information.lzcnt()); assert_eq!(cfg_feature_enabled!("xsave"), information.xsave()); assert_eq!(cfg_feature_enabled!("xsaveopt"), information.xsaveopt()); - assert_eq!(cfg_feature_enabled!("xsavec"), information.xsavec_and_xrstor()); - assert_eq!(cfg_feature_enabled!("xsavec"), information.xsaves_xrstors_and_ia32_xss()); + assert_eq!( + cfg_feature_enabled!("xsavec"), + information.xsavec_and_xrstor() + ); + assert_eq!( + cfg_feature_enabled!("xsavec"), + information.xsaves_xrstors_and_ia32_xss() + ); } From 8d92d15be163ec60db9506535238ae52aa7f0b01 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 17 Nov 2017 17:26:44 +0100 Subject: [PATCH 9/9] [clippy] fix missing doc on pub item --- src/x86/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/x86/mod.rs b/src/x86/mod.rs index 8d57ebd2d9..3e740a8642 100644 --- a/src/x86/mod.rs +++ b/src/x86/mod.rs @@ -50,8 +50,9 @@ mod bmi2; #[cfg(not(feature = "intel_sde"))] mod tbm; -#[allow(non_camel_case_types)] +/// `C`'s `void` type. #[cfg(not(feature = "std"))] +#[allow(non_camel_case_types)] #[repr(u8)] pub enum c_void { #[doc(hidden)] __variant1,