Skip to content

Commit 62da00d

Browse files
committed
Split out a separate implementation for aarch64
1 parent 8da5575 commit 62da00d

File tree

2 files changed

+62
-26
lines changed

2 files changed

+62
-26
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ extern "C" {
285285
b3: int8x16_t,
286286
c: uint8x16_t,
287287
) -> int8x16_t;
288+
289+
#[link_name = "llvm.aarch64.neon.fcvtzu.v4i32.v4f32"]
290+
fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
291+
#[link_name = "llvm.aarch64.neon.fcvtzs.v4i32.v4f32"]
292+
fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
288293
}
289294

290295
/// Absolute Value (wrapping).
@@ -1838,6 +1843,21 @@ pub unsafe fn vld1q_u32(addr: *const u32) -> uint32x4_t {
18381843
))
18391844
}
18401845

1846+
#[inline]
1847+
#[target_feature(enable = "neon")]
1848+
#[cfg_attr(test, assert_instr(fcvtzu))]
1849+
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
1850+
vctq_s32_f32_(a)
1851+
}
1852+
1853+
/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
1854+
#[inline]
1855+
#[target_feature(enable = "neon")]
1856+
#[cfg_attr(test, assert_instr(fcvtzu))]
1857+
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
1858+
vctq_u32_f32_(a)
1859+
}
1860+
18411861
#[cfg(test)]
18421862
mod tests {
18431863
use crate::core_arch::aarch64::test_support::*;
@@ -1846,6 +1866,42 @@ mod tests {
18461866
use std::mem::transmute;
18471867
use stdarch_test::simd_test;
18481868

1869+
#[simd_test(enable = "neon")]
1870+
unsafe fn test_vcvtq_s32_f32() {
1871+
let f = f32x4::new(-1., 2., 3., 4.);
1872+
let e = i32x4::new(-1, 2, 3, 4);
1873+
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
1874+
assert_eq!(r, e);
1875+
1876+
let f = f32x4::new(10e37, 2., 3., 4.);
1877+
let e = i32x4::new(0x7fffffff, 2, 3, 4);
1878+
let r: i32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1879+
assert_eq!(r, e);
1880+
1881+
let f = f32x4::new(-10e37, 2., 3., 4.);
1882+
let e = i32x4::new(-0x80000000, 2, 3, 4);
1883+
let r: i32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1884+
assert_eq!(r, e);
1885+
}
1886+
1887+
#[simd_test(enable = "neon")]
1888+
unsafe fn test_vcvtq_u32_f32() {
1889+
let f = f32x4::new(1., 2., 3., 4.);
1890+
let e = u32x4::new(1, 2, 3, 4);
1891+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1892+
assert_eq!(r, e);
1893+
1894+
let f = f32x4::new(-1., 2., 3., 4.);
1895+
let e = u32x4::new(0, 2, 3, 4);
1896+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1897+
assert_eq!(r, e);
1898+
1899+
let f = f32x4::new(10e37, 2., 3., 4.);
1900+
let e = u32x4::new(0xffffffff, 2, 3, 4);
1901+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1902+
assert_eq!(r, e);
1903+
}
1904+
18491905
#[simd_test(enable = "neon")]
18501906
unsafe fn test_vld1q_f32() {
18511907
let e = f32x4::new(1., 2., 3., 4.);

crates/core_arch/src/arm/neon/mod.rs

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1815,21 +1815,21 @@ pub unsafe fn vld1q_dup_f32(addr: *const f32) -> float32x4_t {
18151815

18161816
/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector)
18171817
#[inline]
1818+
#[cfg(target_arch = "arm")]
18181819
#[target_feature(enable = "neon")]
1819-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1820-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt.s32.f32"))]
1821-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
1820+
#[target_feature(enable = "v7")]
1821+
#[cfg_attr(test, assert_instr("vcvt.s32.f32"))]
18221822
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
18231823
use crate::core_arch::simd::{f32x4, i32x4};
18241824
transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a)))
18251825
}
18261826

18271827
/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
18281828
#[inline]
1829+
#[cfg(target_arch = "arm")]
18291830
#[target_feature(enable = "neon")]
1830-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1831-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt.u32.f32"))]
1832-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
1831+
#[target_feature(enable = "v7")]
1832+
#[cfg_attr(test, assert_instr("vcvt.u32.f32"))]
18331833
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
18341834
use crate::core_arch::simd::{f32x4, u32x4};
18351835
transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a)))
@@ -1906,16 +1906,6 @@ mod tests {
19061906
let e = i32x4::new(-1, 2, 3, 4);
19071907
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
19081908
assert_eq!(r, e);
1909-
1910-
let f = f32x4::new(10e37, 2., 3., 4.);
1911-
let e = i32x4::new(0x7fffffff, 2, 3, 4);
1912-
let r: i32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1913-
assert_eq!(r, e);
1914-
1915-
let f = f32x4::new(-10e37, 2., 3., 4.);
1916-
let e = i32x4::new(-0x80000000, 2, 3, 4);
1917-
let r: i32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1918-
assert_eq!(r, e);
19191909
}
19201910

19211911
#[simd_test(enable = "neon")]
@@ -1924,16 +1914,6 @@ mod tests {
19241914
let e = u32x4::new(1, 2, 3, 4);
19251915
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
19261916
assert_eq!(r, e);
1927-
1928-
let f = f32x4::new(-1., 2., 3., 4.);
1929-
let e = u32x4::new(0, 2, 3, 4);
1930-
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1931-
assert_eq!(r, e);
1932-
1933-
let f = f32x4::new(10e37, 2., 3., 4.);
1934-
let e = u32x4::new(0xffffffff, 2, 3, 4);
1935-
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1936-
assert_eq!(r, e);
19371917
}
19381918

19391919
#[simd_test(enable = "neon")]

0 commit comments

Comments
 (0)