Skip to content

Commit aa803f4

Browse files
authored
Merge pull request rust-lang#650 from bjorn3/simd_emulation
Simd emulation
2 parents 3f76607 + 1028fbb commit aa803f4

19 files changed

+772
-82
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ perf.data.old
88
/build_sysroot/sysroot_src
99
/build_sysroot/Cargo.lock
1010
/rust
11+
/regex

Readme.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ $ RUSTFLAGS="-Cpanic=abort -Zcodegen-backend=$cg_clif_dir/target/debug/librustc_
3232
* Good non-rust abi support ([vectors are passed by-ref](https://github.com/bjorn3/rustc_codegen_cranelift/issues/10))
3333
* Checked binops ([some missing instructions in cranelift](https://github.com/CraneStation/cranelift/issues/460))
3434
* Inline assembly ([no cranelift support](https://github.com/CraneStation/cranelift/issues/444))
35-
* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171))
35+
* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171), some basic things work)
3636

3737
## Troubleshooting
3838

build_sysroot/build_sysroot.sh

+11-15
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,30 @@
11
#!/bin/bash
2+
3+
# Requires the CHANNEL env var to be set to `debug` or `release.`
4+
25
set -e
36
cd $(dirname "$0")
47

8+
pushd ../ >/dev/null
9+
source ./config.sh
10+
popd >/dev/null
11+
512
# Cleanup for previous run
613
# v Clean target dir except for build scripts and incremental cache
714
rm -r target/*/{debug,release}/{build,deps,examples,libsysroot*,native} || true
815
rm Cargo.lock 2>/dev/null || true
916
rm -r sysroot 2>/dev/null || true
1017

11-
# FIXME find a better way to get the target triple
12-
unamestr=`uname`
13-
if [[ "$unamestr" == 'Linux' ]]; then
14-
TARGET_TRIPLE='x86_64-unknown-linux-gnu'
15-
elif [[ "$unamestr" == 'Darwin' ]]; then
16-
TARGET_TRIPLE='x86_64-apple-darwin'
17-
else
18-
echo "Unsupported os"
19-
exit 1
20-
fi
21-
2218
# Build libs
23-
mkdir -p sysroot/lib/rustlib/$TARGET_TRIPLE/lib/
2419
export RUSTFLAGS="$RUSTFLAGS -Z force-unstable-if-unmarked"
2520
if [[ "$1" == "--release" ]]; then
26-
channel='release'
21+
sysroot_channel='release'
2722
RUSTFLAGS="$RUSTFLAGS -Zmir-opt-level=3" cargo build --target $TARGET_TRIPLE --release
2823
else
29-
channel='debug'
24+
sysroot_channel='debug'
3025
cargo build --target $TARGET_TRIPLE
3126
fi
3227

3328
# Copy files to sysroot
34-
cp target/$TARGET_TRIPLE/$channel/deps/*.rlib sysroot/lib/rustlib/$TARGET_TRIPLE/lib/
29+
mkdir -p sysroot/lib/rustlib/$TARGET_TRIPLE/lib/
30+
cp target/$TARGET_TRIPLE/$sysroot_channel/deps/*.rlib sysroot/lib/rustlib/$TARGET_TRIPLE/lib/

cargo.sh

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
3+
if [ -z $CHANNEL ]; then
4+
export CHANNEL='debug'
5+
fi
6+
7+
pushd $(dirname "$0") >/dev/null
8+
source config.sh
9+
popd >/dev/null
10+
11+
cmd=$1
12+
shift
13+
14+
cargo $cmd --target $TARGET_TRIPLE $@

clean_all.sh

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
set -e
33

44
rm -rf target/ build_sysroot/{sysroot/,sysroot_src/,target/,Cargo.lock} perf.data{,.old}
5+
rm -rf regex/

config.sh

+2-8
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,8 @@ else
1010
exit 1
1111
fi
1212

13-
if [[ "$1" == "--release" ]]; then
14-
channel='release'
15-
cargo build --release
16-
else
17-
channel='debug'
18-
cargo build
19-
fi
13+
TARGET_TRIPLE=$(rustc -vV | grep host | cut -d: -f2 | tr -d " ")
2014

21-
export RUSTFLAGS='-Zalways-encode-mir -Cpanic=abort -Cdebuginfo=2 -Zcodegen-backend='$(pwd)'/target/'$channel'/librustc_codegen_cranelift.'$dylib_ext' --sysroot '$(pwd)'/build_sysroot/sysroot'
15+
export RUSTFLAGS='-Zalways-encode-mir -Cpanic=abort -Cdebuginfo=2 -Zcodegen-backend='$(pwd)'/target/'$CHANNEL'/librustc_codegen_cranelift.'$dylib_ext' --sysroot '$(pwd)'/build_sysroot/sysroot'
2216
RUSTC="rustc $RUSTFLAGS -L crate=target/out --out-dir target/out"
2317
export RUSTC_LOG=warn # display metadata load errors

crate_patches/regex.patch

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
From febff2a8c639efb5de1e1b4758cdb473847d80ce Mon Sep 17 00:00:00 2001
2+
From: bjorn3 <bjorn3@users.noreply.github.com>
3+
Date: Tue, 30 Jul 2019 12:12:37 +0200
4+
Subject: [PATCH] Disable threads in shootout-regex-dna example
5+
6+
---
7+
examples/shootout-regex-dna.rs | 4 ++--
8+
1 file changed, 2 insertions(+), 2 deletions(-)
9+
10+
diff --git a/examples/shootout-regex-dna.rs b/examples/shootout-regex-dna.rs
11+
index 2171bb3..37382f8 100644
12+
--- a/examples/shootout-regex-dna.rs
13+
+++ b/examples/shootout-regex-dna.rs
14+
@@ -37,7 +37,7 @@ fn main() {
15+
for variant in variants {
16+
let seq = seq_arc.clone();
17+
let restr = variant.to_string();
18+
- let future = thread::spawn(move || variant.find_iter(&seq).count());
19+
+ let future = variant.find_iter(&seq).count();
20+
counts.push((restr, future));
21+
}
22+
23+
@@ -60,7 +60,7 @@ fn main() {
24+
}
25+
26+
for (variant, count) in counts {
27+
- println!("{} {}", variant, count.join().unwrap());
28+
+ println!("{} {}", variant, count);
29+
}
30+
println!("\n{}\n{}\n{}", ilen, clen, seq.len());
31+
}
32+
--
33+
2.11.0
34+

example/mini_core_hello_world.rs

+10
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,23 @@ impl<T: ?Sized, U: ?Sized> CoerceUnsized<Unique<U>> for Unique<T> where T: Unsiz
117117
fn take_f32(_f: f32) {}
118118
fn take_unique(_u: Unique<()>) {}
119119

120+
fn return_u128_pair() -> (u128, u128) {
121+
(0, 0)
122+
}
123+
124+
fn call_return_u128_pair() {
125+
return_u128_pair();
126+
}
127+
120128
fn main() {
121129
take_unique(Unique {
122130
pointer: 0 as *const (),
123131
_marker: PhantomData,
124132
});
125133
take_f32(0.1);
126134

135+
call_return_u128_pair();
136+
127137
//return;
128138

129139
unsafe {

example/std_example.rs

+125
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#![feature(core_intrinsics)]
22

3+
use std::arch::x86_64::*;
34
use std::io::Write;
45
use std::intrinsics;
56

7+
68
fn main() {
79
let _ = ::std::iter::repeat('a' as u8).take(10).collect::<Vec<_>>();
810
let stderr = ::std::io::stderr();
@@ -43,6 +45,129 @@ fn main() {
4345
assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 >> 64, 0xFEDCBA98765432u128);
4446
assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 as i128 >> 64, 0xFEDCBA98765432i128);
4547
assert_eq!(353985398u128 * 932490u128, 330087843781020u128);
48+
49+
unsafe {
50+
test_simd();
51+
}
52+
}
53+
54+
#[target_feature(enable = "sse2")]
55+
unsafe fn test_simd() {
56+
let x = _mm_setzero_si128();
57+
let y = _mm_set1_epi16(7);
58+
let or = _mm_or_si128(x, y);
59+
let cmp_eq = _mm_cmpeq_epi8(y, y);
60+
let cmp_lt = _mm_cmplt_epi8(y, y);
61+
62+
assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]);
63+
assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_eq), [0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff]);
64+
assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]);
65+
66+
test_mm_slli_si128();
67+
test_mm_movemask_epi8();
68+
test_mm256_movemask_epi8();
69+
test_mm_add_epi8();
70+
test_mm_add_pd();
71+
72+
let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
73+
assert_eq!(mask1, 1);
74+
}
75+
76+
#[target_feature(enable = "sse2")]
77+
unsafe fn test_mm_slli_si128() {
78+
#[rustfmt::skip]
79+
let a = _mm_setr_epi8(
80+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
81+
);
82+
let r = _mm_slli_si128(a, 1);
83+
let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
84+
assert_eq_m128i(r, e);
85+
86+
#[rustfmt::skip]
87+
let a = _mm_setr_epi8(
88+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
89+
);
90+
let r = _mm_slli_si128(a, 15);
91+
let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
92+
assert_eq_m128i(r, e);
93+
94+
#[rustfmt::skip]
95+
let a = _mm_setr_epi8(
96+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
97+
);
98+
let r = _mm_slli_si128(a, 16);
99+
assert_eq_m128i(r, _mm_set1_epi8(0));
100+
101+
#[rustfmt::skip]
102+
let a = _mm_setr_epi8(
103+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
104+
);
105+
let r = _mm_slli_si128(a, -1);
106+
assert_eq_m128i(_mm_set1_epi8(0), r);
107+
108+
#[rustfmt::skip]
109+
let a = _mm_setr_epi8(
110+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
111+
);
112+
let r = _mm_slli_si128(a, -0x80000000);
113+
assert_eq_m128i(r, _mm_set1_epi8(0));
114+
}
115+
116+
#[target_feature(enable = "sse2")]
117+
unsafe fn test_mm_movemask_epi8() {
118+
#[rustfmt::skip]
119+
let a = _mm_setr_epi8(
120+
0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
121+
0b0101, 0b1111_0000u8 as i8, 0, 0,
122+
0, 0, 0b1111_0000u8 as i8, 0b0101,
123+
0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
124+
);
125+
let r = _mm_movemask_epi8(a);
126+
assert_eq!(r, 0b10100100_00100101);
127+
}
128+
129+
#[target_feature(enable = "avx2")]
130+
unsafe fn test_mm256_movemask_epi8() {
131+
let a = _mm256_set1_epi8(-1);
132+
let r = _mm256_movemask_epi8(a);
133+
let e = -1;
134+
assert_eq!(r, e);
135+
}
136+
137+
#[target_feature(enable = "sse2")]
138+
unsafe fn test_mm_add_epi8() {
139+
let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
140+
#[rustfmt::skip]
141+
let b = _mm_setr_epi8(
142+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
143+
);
144+
let r = _mm_add_epi8(a, b);
145+
#[rustfmt::skip]
146+
let e = _mm_setr_epi8(
147+
16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
148+
);
149+
assert_eq_m128i(r, e);
150+
}
151+
152+
#[target_feature(enable = "sse2")]
153+
unsafe fn test_mm_add_pd() {
154+
let a = _mm_setr_pd(1.0, 2.0);
155+
let b = _mm_setr_pd(5.0, 10.0);
156+
let r = _mm_add_pd(a, b);
157+
assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
158+
}
159+
160+
fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) {
161+
unsafe {
162+
assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x));
163+
}
164+
}
165+
166+
#[target_feature(enable = "sse2")]
167+
pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
168+
if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
169+
panic!("{:?} != {:?}", a, b);
170+
}
46171
}
47172

48173
#[derive(PartialEq)]

patches/0015-Remove-usage-of-unsized-locals.patch

+13
Original file line numberDiff line numberDiff line change
@@ -94,5 +94,18 @@ index b2142e7..718bb1c 100644
9494
}
9595

9696
pub fn min_stack() -> usize {
97+
diff --git a/src/libstd/sys/unix/thread.rs b/src/libstd/sys/unix/thread.rs
98+
index f4a1783..362b537 100644
99+
--- a/src/libstd/sys/unix/thread.rs
100+
+++ b/src/libstd/sys/unix/thread.rs
101+
@@ -40,6 +40,8 @@ impl Thread {
102+
// unsafe: see thread::Builder::spawn_unchecked for safety requirements
103+
pub unsafe fn new(stack: usize, p: Box<dyn FnOnce()>)
104+
-> io::Result<Thread> {
105+
+ panic!("Threads are not yet supported, because cranelift doesn't support atomics.");
106+
+
107+
let p = box p;
108+
let mut native: libc::pthread_t = mem::zeroed();
109+
let mut attr: libc::pthread_attr_t = mem::zeroed();
97110
--
98111
2.20.1 (Apple Git-117)
+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
From 7403e2998345ef0650fd50628d7098d4d1e88e5c Mon Sep 17 00:00:00 2001
2+
From: bjorn3 <bjorn3@users.noreply.github.com>
3+
Date: Sat, 6 Apr 2019 12:16:21 +0200
4+
Subject: [PATCH] Remove usage of unsized locals
5+
6+
---
7+
src/stdarch/crates/core_arch/src/x86/cpuid.rs | 2 ++
8+
1 files changed, 2 insertions(+), 0 deletions(-)
9+
10+
diff --git a/src/stdarch/crates/core_arch/src/x86/cpuid.rs b/src/stdarch/crates/core_arch/src/x86/cpuid.rs
11+
index f313c42..ff952bc 100644
12+
--- a/src/stdarch/crates/core_arch/src/x86/cpuid.rs
13+
+++ b/src/stdarch/crates/core_arch/src/x86/cpuid.rs
14+
@@ -84,6 +84,9 @@ pub unsafe fn __cpuid(leaf: u32) -> CpuidResult {
15+
/// Does the host support the `cpuid` instruction?
16+
#[inline]
17+
pub fn has_cpuid() -> bool {
18+
+ // __cpuid intrinsic is not yet implemented
19+
+ return false;
20+
+
21+
#[cfg(target_env = "sgx")]
22+
{
23+
false
24+
--
25+
2.20.1 (Apple Git-117)

prepare.sh

+7
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,10 @@ set -e
44
rustup component add rust-src
55
./build_sysroot/prepare_sysroot_src.sh
66
cargo install hyperfine || echo "Skipping hyperfine install"
7+
8+
git clone https://github.com/rust-lang/regex.git || echo "rust-lang/regex has already been cloned"
9+
pushd regex
10+
git checkout -- .
11+
git checkout 341f207c1071f7290e3f228c710817c280c8dca1
12+
git apply ../crate_patches/regex.patch
13+
popd

0 commit comments

Comments
 (0)