Skip to content

Commit

Permalink
wip, broken max const
Browse files Browse the repository at this point in the history
  • Loading branch information
kali committed Apr 17, 2023
1 parent 2eeecb3 commit 4a86ebf
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 9 deletions.
58 changes: 57 additions & 1 deletion linalg/arm64/arm64simd/arm64simd_act_f32_32n.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
.text
.align 4

// fn(ops: *const Op, constants: *const $ti, xs: *mut $ti, len: usize) -> usize

// x0 <- ops, x1 <- constant, x2 <- xs, x3 <- len(xs)

.cpu generic+fp+simd
.global {{G}}arm64simd_act_f32_32n_{{suffix}}
{{G}}arm64simd_act_f32_32n_{{suffix}}:
Expand All @@ -16,9 +20,61 @@
stp d10, d11, [sp, #-16]!
stp d12, d13, [sp, #-16]!
stp d14, d15, [sp, #-16]!

cmp x3, 0
beq .ok

.outer_loop:
mov x5, x0 // x5 is "pc"
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2]
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], 64

.inner_loop:
ldr x6, [x5] // x6 is fetched instruction at x5
and x7, x6, 0xffff

cmp x7, 0
beq .end_of_inner_loop
cmp x7, 10
beq .max_const

b .unsupported

.inner_loop_payload_done:
add x5, x5, 4
b .inner_loop
.end_of_inner_loop:
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2]
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], 64

add x2, x2, 128
subs x3, x3, 32
bne .outer_loop

.max_const:
lsr x7, x6, 16
and x7, x7, 0xff
lsl x7, x7, 2
add x7, x7, x1
ld1 { v24.s }[0], [x7]
dup v24.4s, v24.s[0]
fmax v0.4s, v0.4s, v24.4s
fmax v1.4s, v1.4s, v24.4s
fmax v2.4s, v2.4s, v24.4s
fmax v3.4s, v3.4s, v24.4s
fmax v4.4s, v4.4s, v24.4s
fmax v5.4s, v5.4s, v24.4s
fmax v6.4s, v6.4s, v24.4s
fmax v7.4s, v7.4s, v24.4s
b .inner_loop_payload_done


.unsupported:
mov x0, 1
b .return

.ok:
mov x0, 0
// b .return

.return:
ldp d14, d15, [sp], #16
Expand Down
8 changes: 6 additions & 2 deletions linalg/src/frame/activations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,12 @@ macro_rules! act_impl {
}
}

#[cfg(test)]
act_tests!($cond, $func, $ti);
mod [<test_ $func>] {
use super::*;

#[cfg(test)]
act_tests!($cond, $func, $ti);
}
}
};
}
Expand Down
16 changes: 10 additions & 6 deletions linalg/src/frame/activations/tests.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
use crate::LADatum;

use super::{Program, Op};
use super::{Op, Program};
use Op::*;

pub fn noop<T: LADatum>() -> Program<T> {
Program { ops: vec![Done], csts: vec![] }
}

pub fn max_const<T: LADatum>(c: T) -> Program<T> {
Program { ops: vec![MaxConst(0)], csts: vec![c] }
}

macro_rules! prop_act_e2e {
($cond:expr, $ti: ty, $ker: ty, $name: ident ( $($param:ident),* )) => {
proptest::proptest! {
#[test]
fn $name(
x in proptest::prelude::any::<$ti>(),
repeat in 1usize..4,
$($param in proptest::prelude::any::<$ti>()),*)
$($param in proptest::prelude::any::<$ti>()),*)
{
use crate::frame::activations::ActivationKer;
if $cond {
Expand All @@ -39,14 +43,14 @@ macro_rules! prop_act_unit {
fn $name(
x in proptest::prelude::any::<$ti>(),
repeat in 1usize..4,
$($param in proptest::prelude::any::<$ti>()),*)
$($param in proptest::prelude::any::<$ti>()),*)
{
use crate::frame::activations::ActivationKer;
if $cond {
let mut input = tract_data::prelude::Tensor::zero_aligned::<$ti>(&[<$ker>::nr() * repeat], <$ker>::alignment_bytes()).unwrap();
input.fill_t::<$ti>(x).unwrap();
let refer2: fn($ti) -> $ti = $refer;
let expected:Vec<$ti> = input.as_slice::<$ti>().unwrap().iter().cloned().map(refer2).collect();
// let refer2: fn($ti, $($param),*) -> $ti = $refer;
let expected:Vec<$ti> = input.as_slice::<$ti>().unwrap().iter().cloned().map(|x| $refer(x, $($param),*)).collect();
let prog = crate::frame::activations::tests::$name($($param),*);
<$ker>::run(&prog.ops, &prog.csts, &mut input.as_slice_mut::<$ti>().unwrap());

Expand All @@ -62,6 +66,7 @@ macro_rules! prop_act_unit {
macro_rules! act_tests {
($cond:expr, $ker:ty, $ti:ty) => {
prop_act_unit!($cond, $ti, $ker, noop(), |x| x);
prop_act_unit!($cond, $ti, $ker, max_const(alpha), |x: $ti, alpha| x.max(alpha));

prop_act_e2e!($cond, $ti, $ker, relu());
prop_act_e2e!($cond, $ti, $ker, affine(alpha, beta));
Expand All @@ -75,4 +80,3 @@ macro_rules! act_tests {
*/
};
}

0 comments on commit 4a86ebf

Please sign in to comment.