Skip to content

Commit 36708b2

Browse files
committed
Use inline assembly for VEXT on NEON
The compiler doesn't seem to be able to convert 8-bit rotating shuffles of 64-bit elements into the VEXT instruction. Unfortunately, this code crashes the LLVM compiler used by rustc.
1 parent 153a329 commit 36708b2

File tree

1 file changed

+41
-1
lines changed

1 file changed

+41
-1
lines changed

src/simd.rs

+41-1
Original file line numberDiff line numberDiff line change
@@ -218,16 +218,56 @@ fn u64x4_rotate_right_16(vec: u64x4) -> u64x4 {
218218
}
219219
}
220220

221+
#[cfg(feature = "simd_asm")]
222+
#[cfg(target_arch = "arm")]
223+
use simdty::u64x2;
224+
225+
#[cfg(feature = "simd_asm")]
226+
#[cfg(target_arch = "arm")]
227+
#[inline(always)]
228+
fn vext_u64_u8(a: u64x2, b: u8) -> u64x2 {
229+
unsafe {
230+
let result: u64x2;
231+
asm!("vext.8 ${0:e}, ${1:e}, ${1:e}, $2\nvext.8 ${0:f}, ${1:f}, ${1:f}, $2"
232+
: "=w" (result)
233+
: "w" (a), "n" (b));
234+
result
235+
}
236+
}
237+
238+
#[cfg(feature = "simd_asm")]
239+
#[cfg(target_arch = "arm")]
240+
#[inline(always)]
241+
fn u64x4_rotate_right_u8(vec: u64x4, n: u8) -> u64x4 {
242+
let tmp0 = vext_u64_u8(u64x2(vec.0, vec.1), n);
243+
let tmp1 = vext_u64_u8(u64x2(vec.2, vec.3), n);
244+
u64x4(tmp0.0, tmp0.1, tmp1.0, tmp1.1)
245+
}
246+
221247
impl Vector for u64x4 {
222248
impl_vector_common!(u64x4, u64, 64);
223249

224250
#[cfg(feature = "simd")]
225-
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
251+
#[cfg(any(all(target_arch = "arm", not(feature = "simd_asm")),
252+
target_arch = "aarch64"))]
253+
#[inline(always)]
254+
fn rotate_right(self, n: u32) -> Self
255+
{
256+
match n {
257+
32 => u64x4_rotate_right_32(self),
258+
_ => self.rotate_right_any(n),
259+
}
260+
}
261+
262+
#[cfg(feature = "simd_asm")]
263+
#[cfg(target_arch = "arm")]
226264
#[inline(always)]
227265
fn rotate_right(self, n: u32) -> Self
228266
{
229267
match n {
230268
32 => u64x4_rotate_right_32(self),
269+
24 => u64x4_rotate_right_u8(self, 3),
270+
16 => u64x4_rotate_right_u8(self, 2),
231271
_ => self.rotate_right_any(n),
232272
}
233273
}

0 commit comments

Comments
 (0)