From 4a86ebf82a0aa3cebd6b5bca16f4fff30f0d7186 Mon Sep 17 00:00:00 2001 From: Mathieu Poumeyrol Date: Mon, 17 Apr 2023 22:58:50 +0200 Subject: [PATCH] wip, broken max const --- .../arm64simd/arm64simd_act_f32_32n.tmpl | 58 ++++++++++++++++++- linalg/src/frame/activations.rs | 8 ++- linalg/src/frame/activations/tests.rs | 16 +++-- 3 files changed, 73 insertions(+), 9 deletions(-) diff --git a/linalg/arm64/arm64simd/arm64simd_act_f32_32n.tmpl b/linalg/arm64/arm64simd/arm64simd_act_f32_32n.tmpl index 2462e373b7..3160c16a03 100644 --- a/linalg/arm64/arm64simd/arm64simd_act_f32_32n.tmpl +++ b/linalg/arm64/arm64simd/arm64simd_act_f32_32n.tmpl @@ -8,6 +8,10 @@ .text .align 4 +// fn(ops: *const Op, constants: *const $ti, xs: *mut $ti, len: usize) -> usize + +// x0 <- ops, x1 <- constant, x2 <- xs, x3 <- len(xs) + .cpu generic+fp+simd .global {{G}}arm64simd_act_f32_32n_{{suffix}} {{G}}arm64simd_act_f32_32n_{{suffix}}: @@ -16,9 +20,61 @@ stp d10, d11, [sp, #-16]! stp d12, d13, [sp, #-16]! stp d14, d15, [sp, #-16]! + + cmp x3, 0 + beq .ok + +.outer_loop: + mov x5, x0 // x5 is "pc" + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2] + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], 64 + +.inner_loop: + ldr x6, [x5] // x6 is fetched instruction at x5 + and x7, x6, 0xffff + + cmp x7, 0 + beq .end_of_inner_loop + cmp x7, 10 + beq .max_const + + b .unsupported + +.inner_loop_payload_done: + add x5, x5, 4 + b .inner_loop +.end_of_inner_loop: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2] + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], 64 + + add x2, x2, 128 + subs x3, x3, 32 + bne .outer_loop + +.max_const: + lsr x7, x6, 16 + and x7, x7, 0xff + lsl x7, x7, 2 + add x7, x7, x1 + ld1 { v24.s }[0], [x7] + dup v24.4s, v24.s[0] + fmax v0.4s, v0.4s, v24.4s + fmax v1.4s, v1.4s, v24.4s + fmax v2.4s, v2.4s, v24.4s + fmax v3.4s, v3.4s, v24.4s + fmax v4.4s, v4.4s, v24.4s + fmax v5.4s, v5.4s, v24.4s + fmax v6.4s, v6.4s, v24.4s + fmax v7.4s, v7.4s, v24.4s + b .inner_loop_payload_done + + +.unsupported: + mov x0, 1 + b .return +.ok: mov x0, 0 -// b .return .return: ldp d14, d15, [sp], #16 diff --git a/linalg/src/frame/activations.rs b/linalg/src/frame/activations.rs index 5586c16973..580731bd83 100644 --- a/linalg/src/frame/activations.rs +++ b/linalg/src/frame/activations.rs @@ -133,8 +133,12 @@ macro_rules! act_impl { } } - #[cfg(test)] - act_tests!($cond, $func, $ti); + mod [] { + use super::*; + + #[cfg(test)] + act_tests!($cond, $func, $ti); + } } }; } diff --git a/linalg/src/frame/activations/tests.rs b/linalg/src/frame/activations/tests.rs index fb916e856c..22756d4dbb 100644 --- a/linalg/src/frame/activations/tests.rs +++ b/linalg/src/frame/activations/tests.rs @@ -1,12 +1,16 @@ use crate::LADatum; -use super::{Program, Op}; +use super::{Op, Program}; use Op::*; pub fn noop() -> Program { Program { ops: vec![Done], csts: vec![] } } +pub fn max_const(c: T) -> Program { + Program { ops: vec![MaxConst(0)], csts: vec![c] } +} + macro_rules! prop_act_e2e { ($cond:expr, $ti: ty, $ker: ty, $name: ident ( $($param:ident),* )) => { proptest::proptest! { @@ -14,7 +18,7 @@ macro_rules! prop_act_e2e { fn $name( x in proptest::prelude::any::<$ti>(), repeat in 1usize..4, - $($param in proptest::prelude::any::<$ti>()),*) + $($param in proptest::prelude::any::<$ti>()),*) { use crate::frame::activations::ActivationKer; if $cond { @@ -39,14 +43,14 @@ macro_rules! prop_act_unit { fn $name( x in proptest::prelude::any::<$ti>(), repeat in 1usize..4, - $($param in proptest::prelude::any::<$ti>()),*) + $($param in proptest::prelude::any::<$ti>()),*) { use crate::frame::activations::ActivationKer; if $cond { let mut input = tract_data::prelude::Tensor::zero_aligned::<$ti>(&[<$ker>::nr() * repeat], <$ker>::alignment_bytes()).unwrap(); input.fill_t::<$ti>(x).unwrap(); - let refer2: fn($ti) -> $ti = $refer; - let expected:Vec<$ti> = input.as_slice::<$ti>().unwrap().iter().cloned().map(refer2).collect(); +// let refer2: fn($ti, $($param),*) -> $ti = $refer; + let expected:Vec<$ti> = input.as_slice::<$ti>().unwrap().iter().cloned().map(|x| $refer(x, $($param),*)).collect(); let prog = crate::frame::activations::tests::$name($($param),*); <$ker>::run(&prog.ops, &prog.csts, &mut input.as_slice_mut::<$ti>().unwrap()); @@ -62,6 +66,7 @@ macro_rules! prop_act_unit { macro_rules! act_tests { ($cond:expr, $ker:ty, $ti:ty) => { prop_act_unit!($cond, $ti, $ker, noop(), |x| x); + prop_act_unit!($cond, $ti, $ker, max_const(alpha), |x: $ti, alpha| x.max(alpha)); prop_act_e2e!($cond, $ti, $ker, relu()); prop_act_e2e!($cond, $ti, $ker, affine(alpha, beta)); @@ -75,4 +80,3 @@ macro_rules! act_tests { */ }; } -