Skip to content

Commit abd53c9

Browse files
authored
Complete vst1 neon instructions (#1221)
1 parent 5fdbc47 commit abd53c9

File tree

8 files changed

+1200
-168
lines changed

8 files changed

+1200
-168
lines changed

Diff for: crates/core_arch/src/aarch64/neon/generated.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -13066,7 +13066,7 @@ mod test {
1306613066
let a: [f64; 3] = [0., 1., 2.];
1306713067
let e: [f64; 2] = [1., 2.];
1306813068
let mut r: [f64; 2] = [0f64; 2];
13069-
vst1_f64_x2(r.as_mut_ptr(), vld1_f64_x2(a[1..].as_ptr()));
13069+
vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
1307013070
assert_eq!(r, e);
1307113071
}
1307213072

@@ -13075,7 +13075,7 @@ mod test {
1307513075
let a: [f64; 5] = [0., 1., 2., 3., 4.];
1307613076
let e: [f64; 4] = [1., 2., 3., 4.];
1307713077
let mut r: [f64; 4] = [0f64; 4];
13078-
vst1q_f64_x2(r.as_mut_ptr(), vld1q_f64_x2(a[1..].as_ptr()));
13078+
vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
1307913079
assert_eq!(r, e);
1308013080
}
1308113081

@@ -13084,7 +13084,7 @@ mod test {
1308413084
let a: [f64; 4] = [0., 1., 2., 3.];
1308513085
let e: [f64; 3] = [1., 2., 3.];
1308613086
let mut r: [f64; 3] = [0f64; 3];
13087-
vst1_f64_x3(r.as_mut_ptr(), vld1_f64_x3(a[1..].as_ptr()));
13087+
vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
1308813088
assert_eq!(r, e);
1308913089
}
1309013090

@@ -13093,7 +13093,7 @@ mod test {
1309313093
let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
1309413094
let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
1309513095
let mut r: [f64; 6] = [0f64; 6];
13096-
vst1q_f64_x3(r.as_mut_ptr(), vld1q_f64_x3(a[1..].as_ptr()));
13096+
vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
1309713097
assert_eq!(r, e);
1309813098
}
1309913099

@@ -13102,7 +13102,7 @@ mod test {
1310213102
let a: [f64; 5] = [0., 1., 2., 3., 4.];
1310313103
let e: [f64; 4] = [1., 2., 3., 4.];
1310413104
let mut r: [f64; 4] = [0f64; 4];
13105-
vst1_f64_x4(r.as_mut_ptr(), vld1_f64_x4(a[1..].as_ptr()));
13105+
vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
1310613106
assert_eq!(r, e);
1310713107
}
1310813108

@@ -13111,7 +13111,7 @@ mod test {
1311113111
let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
1311213112
let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
1311313113
let mut r: [f64; 8] = [0f64; 8];
13114-
vst1q_f64_x4(r.as_mut_ptr(), vld1q_f64_x4(a[1..].as_ptr()));
13114+
vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
1311513115
assert_eq!(r, e);
1311613116
}
1311713117

Diff for: crates/core_arch/src/aarch64/neon/mod.rs

+4-27
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,7 @@ pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
678678
/// Load multiple single-element structures to one, two, three, or four registers
679679
#[inline]
680680
#[target_feature(enable = "neon")]
681-
#[cfg_attr(test, assert_instr(ldr))]
681+
#[cfg_attr(test, assert_instr(ld1r))]
682682
pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
683683
let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
684684
simd_shuffle2!(x, x, [0, 0])
@@ -698,7 +698,7 @@ pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t)
698698
#[inline]
699699
#[target_feature(enable = "neon")]
700700
#[rustc_legacy_const_generics(2)]
701-
#[cfg_attr(test, assert_instr(ldr, LANE = 1))]
701+
#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
702702
pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
703703
static_assert_imm1!(LANE);
704704
simd_insert(src, LANE as u32, *ptr)
@@ -886,7 +886,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
886886

887887
// Store multiple single-element structures from one, two, three, or four registers.
888888
#[inline]
889-
#[target_feature(enable = "neon")]
889+
#[target_feature(enable = "neon,aes")]
890890
#[cfg_attr(test, assert_instr(str))]
891891
#[allow(clippy::cast_ptr_alignment)]
892892
pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
@@ -895,7 +895,7 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
895895

896896
// Store multiple single-element structures from one, two, three, or four registers.
897897
#[inline]
898-
#[target_feature(enable = "neon")]
898+
#[target_feature(enable = "neon,aes")]
899899
#[cfg_attr(test, assert_instr(str))]
900900
#[allow(clippy::cast_ptr_alignment)]
901901
pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
@@ -4803,29 +4803,6 @@ mod tests {
48034803
assert_eq!(r, e)
48044804
}
48054805

4806-
#[simd_test(enable = "neon")]
4807-
unsafe fn test_vst1_p64() {
4808-
let mut vals = [0_u64; 2];
4809-
let a = u64x1::new(1);
4810-
4811-
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
4812-
4813-
assert_eq!(vals[0], 0);
4814-
assert_eq!(vals[1], 1);
4815-
}
4816-
4817-
#[simd_test(enable = "neon")]
4818-
unsafe fn test_vst1q_p64() {
4819-
let mut vals = [0_u64; 3];
4820-
let a = u64x2::new(1, 2);
4821-
4822-
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
4823-
4824-
assert_eq!(vals[0], 0);
4825-
assert_eq!(vals[1], 1);
4826-
assert_eq!(vals[2], 2);
4827-
}
4828-
48294806
#[simd_test(enable = "neon")]
48304807
unsafe fn test_vst1_f64() {
48314808
let mut vals = [0_f64; 2];

Diff for: crates/core_arch/src/arm/neon.rs

+16
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,22 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
480480
vst1q_v8i16(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
481481
}
482482

483+
/// Store multiple single-element structures from one, two, three, or four registers.
484+
#[inline]
485+
#[target_feature(enable = "neon,aes,v8")]
486+
#[cfg_attr(test, assert_instr("vst1.64"))]
487+
pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
488+
vst1_v1i64(ptr as *const i8, transmute(a), align_of::<p64>() as i32)
489+
}
490+
491+
/// Store multiple single-element structures from one, two, three, or four registers.
492+
#[inline]
493+
#[target_feature(enable = "neon,aes,v8")]
494+
#[cfg_attr(test, assert_instr("vst1.64"))]
495+
pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
496+
vst1q_v2i64(ptr as *const i8, transmute(a), align_of::<p64>() as i32)
497+
}
498+
483499
// Store multiple single-element structures from one, two, three, or four registers.
484500
#[inline]
485501
#[target_feature(enable = "neon,v7")]

0 commit comments

Comments
 (0)