Skip to content

Commit be24581

Browse files
committed
[arm] runtime-detection support
1 parent d11a2f1 commit be24581

File tree

16 files changed

+517
-104
lines changed

16 files changed

+517
-104
lines changed

ci/docker/aarch64-unknown-linux-gnu/Dockerfile

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
88
qemu-user \
99
make \
1010
file
11+
1112
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
1213
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \
1314
OBJDUMP=aarch64-linux-gnu-objdump

ci/run-docker.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
set -ex
55

66
run() {
7-
echo $1
7+
echo "Building docker container for TARGET=${1}"
88
docker build -t stdsimd ci/docker/$1
99
mkdir -p target
1010
target=$(echo $1 | sed 's/-emulated//')
11+
echo "Running docker"
1112
docker run \
1213
--user `id -u`:`id -g` \
1314
--rm \

ci/run.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ FEATURES="strict,$FEATURES"
1919

2020
echo "RUSTFLAGS=${RUSTFLAGS}"
2121
echo "FEATURES=${FEATURES}"
22+
echo "OBJDUMP=${OBJDUMP}"
2223

23-
cargo test --target $TARGET --features $FEATURES
24-
cargo test --release --target $TARGET --features $FEATURES
24+
cargo test --target $TARGET --features $FEATURES --verbose -- --nocapture
25+
cargo test --release --target $TARGET --features $FEATURES --verbose -- --nocapture

src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,13 @@ pub mod vendor {
153153

154154
#[cfg(target_arch = "aarch64")]
155155
pub use aarch64::*;
156+
157+
pub use runtime::{__unstable_detect_feature, __Feature};
156158
}
157159

160+
#[macro_use]
161+
mod runtime;
162+
158163
#[macro_use]
159164
mod macros;
160165
mod simd_llvm;
@@ -187,7 +192,6 @@ mod v16 {
187192
}
188193

189194
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
190-
#[macro_use]
191195
mod x86;
192196

193197
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]

src/macros.rs

-50
Original file line numberDiff line numberDiff line change
@@ -373,56 +373,6 @@ macro_rules! define_casts {
373373
}
374374
}
375375

376-
/// Is a feature supported by the host CPU?
377-
///
378-
/// This macro performs run-time feature detection. It returns true if the host
379-
/// CPU in which the binary is running on supports a particular feature.
380-
#[macro_export]
381-
macro_rules! cfg_feature_enabled {
382-
($name:tt) => (
383-
{
384-
#[cfg(target_feature = $name)]
385-
{
386-
true
387-
}
388-
#[cfg(not(target_feature = $name))]
389-
{
390-
__unstable_detect_feature!($name)
391-
}
392-
}
393-
)
394-
}
395-
396-
/// On ARM features are only detected at compile-time using
397-
/// cfg(target_feature), so if this macro is executed the
398-
/// feature is not supported.
399-
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
400-
#[macro_export]
401-
#[doc(hidden)]
402-
macro_rules! __unstable_detect_feature {
403-
("neon") => { false };
404-
($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
405-
}
406-
407-
/// In all unsupported architectures using the macro is an error
408-
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
409-
target_arch = "arm", target_arch = "aarch64")))]
410-
#[macro_export]
411-
#[doc(hidden)]
412-
macro_rules! __unstable_detect_feature {
413-
($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
414-
}
415-
416-
#[cfg(test)]
417-
mod tests {
418-
#[cfg(target_arch = "x86_64")]
419-
#[test]
420-
fn test_macros() {
421-
assert!(cfg_feature_enabled!("sse"));
422-
}
423-
}
424-
425-
426376
#[cfg(test)]
427377
#[macro_export]
428378
macro_rules! test_arithmetic_ {

src/runtime/aarch64.rs

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
//! Run-time feature detection on ARM Aarch64.
2+
use super::{bit, linux};
3+
4+
#[macro_export]
5+
#[doc(hidden)]
6+
macro_rules! __unstable_detect_feature {
7+
("neon") => {
8+
// FIXME: this should be removed once we rename Aarch64 neon to asimd
9+
$crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::asimd{})
10+
};
11+
("asimd") => {
12+
$crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::asimd{})
13+
};
14+
("pmull") => {
15+
$crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::pmull{})
16+
};
17+
($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
18+
}
19+
20+
/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
21+
/// for a particular feature.
22+
///
23+
/// PLEASE: do not use this, it is an implementation detail subject to change.
24+
#[doc(hidden)]
25+
#[allow(non_camel_case_types)]
26+
#[repr(u8)]
27+
pub enum __Feature {
28+
/// ARM Advanced SIMD (ASIMD) - Aarch64
29+
asimd,
30+
/// Polynomial Multiply
31+
pmull,
32+
}
33+
34+
pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
35+
let value: usize = 0;
36+
{
37+
let mut enable_feature = |f| {
38+
if x.has_feature(&f) {
39+
bit::set(value, f as u32);
40+
}
41+
};
42+
enable_feature(__Feature::asimd);
43+
enable_feature(__Feature::pmull);
44+
}
45+
value
46+
}
47+
48+
impl linux::FeatureQuery for linux::CpuInfo {
49+
fn has_feature(&mut self, x: &__Feature) -> bool {
50+
use self::__Feature::*;
51+
match *x {
52+
asimd => self.field("Features").has("asimd"),
53+
pmull => self.field("Features").has("pmull"),
54+
}
55+
}
56+
}

src/runtime/arm.rs

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
//! Run-time feature detection on ARM Aarch32.
2+
3+
use super::{bit, linux};
4+
5+
#[macro_export]
6+
#[doc(hidden)]
7+
macro_rules! __unstable_detect_feature {
8+
("neon") => {
9+
$crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::neon{})
10+
};
11+
("pmull") => {
12+
$crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::pmull{})
13+
};
14+
($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
15+
}
16+
17+
/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
18+
/// particular feature.
19+
///
20+
/// PLEASE: do not use this, it is an implementation detail subject to change.
21+
#[doc(hidden)]
22+
#[allow(non_camel_case_types)]
23+
#[repr(u8)]
24+
pub enum __Feature {
25+
/// ARM Advanced SIMD (NEON) - Aarch32
26+
neon,
27+
/// Polynomial Multiply
28+
pmull,
29+
}
30+
31+
pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
32+
let value: usize = 0;
33+
{
34+
let mut enable_feature = |f| {
35+
if x.has_feature(&f) {
36+
bit::set(value, f as u32);
37+
}
38+
};
39+
enable_feature(__Feature::neon);
40+
enable_feature(__Feature::pmull);
41+
}
42+
value
43+
}
44+
45+
/// Is the CPU known to have a broken NEON unit?
46+
///
47+
/// See https://crbug.com/341598.
48+
fn has_broken_neon(cpuinfo: &linux::CpuInfo) -> bool {
49+
cpuinfo.field("CPU implementer") == "0x51"
50+
&& cpuinfo.field("CPU architecture") == "7"
51+
&& cpuinfo.field("CPU variant") == "0x1"
52+
&& cpuinfo.field("CPU part") == "0x04d"
53+
&& cpuinfo.field("CPU revision") == "0"
54+
}
55+
56+
impl linux::FeatureQuery for linux::CpuInfo {
57+
fn has_feature(&mut self, x: &__Feature) -> bool {
58+
use self::__Feature::*;
59+
match *x {
60+
neon => {
61+
self.field("Features").has("neon") && !has_broken_neon(self)
62+
}
63+
pmull => self.field("Features").has("pmull"),
64+
}
65+
}
66+
}

src/runtime/bit.rs

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
//! Bit manipulation utilities
2+
3+
/// Sets the `bit` of `x`.
4+
pub const fn set(x: usize, bit: u32) -> usize {
5+
x | 1 << bit
6+
}
7+
8+
/// Tests the `bit` of `x`.
9+
pub const fn test(x: usize, bit: u32) -> bool {
10+
x & (1 << bit) != 0
11+
}

src/runtime/cache.rs

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//! Cache of run-time feature detection
2+
3+
use super::bit;
4+
use std::sync::atomic::{AtomicUsize, Ordering};
5+
6+
/// This global variable is a bitset used to cache the features supported by
7+
/// the
8+
/// CPU.
9+
static CACHE: AtomicUsize = AtomicUsize::new(::std::usize::MAX);
10+
11+
/// Test the `bit` of the storage. If the storage has not been initialized,
12+
/// initializes it with the result of `f()`.
13+
///
14+
/// On its first invocation, it detects the CPU features and caches them in the
15+
/// `FEATURES` global variable as an `AtomicUsize`.
16+
///
17+
/// It uses the `__Feature` variant to index into this variable as a bitset. If
18+
/// the bit is set, the feature is enabled, and otherwise it is disabled.
19+
///
20+
/// PLEASE: do not use this, it is an implementation detail subject to change.
21+
pub fn test<F>(bit: u32, f: F) -> bool
22+
where
23+
F: FnOnce() -> usize,
24+
{
25+
if CACHE.load(Ordering::Relaxed) == ::std::usize::MAX {
26+
CACHE.store(f(), Ordering::Relaxed);
27+
}
28+
bit::test(CACHE.load(Ordering::Relaxed), bit)
29+
}

0 commit comments

Comments
 (0)