diff --git a/ci/run.sh b/ci/run.sh index a13e5963f8..467898b388 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -85,8 +85,9 @@ cargo_test() { cmd="$cmd --skip test_vec_lde_u16 --skip test_vec_lde_u32 --skip test_vec_expte" ;; # Miscompilation: https://github.com/rust-lang/rust/issues/112460 + # Also LLVM bug: https://github.com/rust-lang/stdarch/issues/1217 arm*) - cmd="$cmd --skip vld2q_dup_f32" + cmd="$cmd --skip vld" ;; esac diff --git a/crates/intrinsic-test/missing_arm.txt b/crates/intrinsic-test/missing_arm.txt index 7439cd6e66..844c5a1bb8 100644 --- a/crates/intrinsic-test/missing_arm.txt +++ b/crates/intrinsic-test/missing_arm.txt @@ -213,3 +213,23 @@ vrndxq_f32 #vrshrn_n_u64 #vshrq_n_u64 #vshr_n_u64 + +# Seems to be miscompiled. +vtbl2_p8 +vtbl2_s8 +vtbl2_u8 +vtbl3_p8 +vtbl3_s8 +vtbl3_u8 +vtbl4_p8 +vtbl4_s8 +vtbl4_u8 +vtbx2_p8 +vtbx2_s8 +vtbx2_u8 +vtbx3_p8 +vtbx3_s8 +vtbx3_u8 +vtbx4_p8 +vtbx4_s8 +vtbx4_u8 diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index 7ea189ff50..05298d8c64 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -124,29 +124,14 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { // Intrinsics using `cvtpi2ps` are typically "composites" and // in some cases exceed the limit. "cvtpi2ps" => 25, - // core_arch/src/arm_shared/simd32 // vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit) - "usad8" | "vfma" | "vfms" => 27, - "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, - // core_arch/src/arm_shared/simd32 - // vst1q_s64_x4_vst1 : #instructions = 27 >= 22 (limit) - "vld3" => 28, - // core_arch/src/arm_shared/simd32 - // vld4q_lane_u32_vld4 : #instructions = 36 >= 22 (limit) - "vld4" => 37, - // core_arch/src/arm_shared/simd32 - // vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit) - "vst1" => 41, - // core_arch/src/arm_shared/simd32 - // vst3q_u32_vst3 : #instructions = 25 >= 22 (limit) - "vst3" => 26, - // core_arch/src/arm_shared/simd32 - // vst4q_u32_vst4 : #instructions = 33 >= 22 (limit) - "vst4" => 34, - + "vfma" | "vfms" => 27, // core_arch/src/arm_shared/simd32 - // vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit) - "nop" if fnname.contains("vst1q_p64") => 34, + "usad8" | "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" + | "ssub8" => 29, + // core_arch/src/arm_shared/neon + _ if fnname.contains("_vld") => 50, + _ if fnname.contains("_vst") => 50, // Original limit was 20 instructions, but ARM DSP Intrinsics // are exactly 20 instructions long. So, bump the limit to 22