Skip to content

Commit

Permalink
plug in mat*vec one + cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
mathieupoumeyrolsonos authored and kali committed Jun 8, 2024
1 parent 5cded57 commit c54c235
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 47 deletions.
53 changes: 6 additions & 47 deletions linalg/arm64/apple_amx/apple_amx_mmm_f16_64x32.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -580,23 +580,21 @@
ldp x5, x6, [x0, #8] // c base ptr, rsc
ldp x7, x8, [x0, #24] // csc, item_size

/*
cmp x7, 4
cmp x7, 2
bne .store_generic
ands x8, x5, 0x7f
bne .store_generic
ands x8, x6, 0x7f
bne .store_generic

orr x5, x5, {{ 0|setting:62 }} // pair
lsl x8, x6, 4
add x8, x8, x5 // x8 = 16*rsc
orr x8, x8, {{ 0|setting:57 }} // first to x8 is z2
lsl x8, x6, 32
add x8, x8, x5 // x8 = 32*rsc
orr x8, x8, {{ 0|setting:56 }} // first to x8 is z1

mov x4, {{0|setting:58}} // Zreg += 4
mov x4, {{0|setting:57}} // Zreg += 2
add x4, x4, x6 // +rsc

mov x3, 16
mov x3, 32
.loop_store_direct:
{% amx stz x5 %}
{% amx stz x8 %}
Expand All @@ -606,7 +604,6 @@
bne .loop_store_direct

b .non_linear_loop
*/

.store_generic:

Expand Down Expand Up @@ -637,44 +634,6 @@
bne .loop_store
b .non_linear_loop

/*
add x8, x1, 64

mov x3, 0 // row id
.loop_store:
and x9, x3, 0xf // x9 = row % 16
lsl x9, x9, 2 // x9 = (row % 16) * 4
lsr x10, x3, 4 // x10 = row / 16
lsl x10, x10, 1 // x10 = (row / 16) * 2
add x9, x9, x10 // x9 = x9 + x10

lsl x2, x9, 56
orr x2, x2, {{ 0|setting:62 }}
orr x2, x2, x1
{% amx stz x2 %}
ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]

mov x4, x5
{% for neon in (0..3) %}
{% for lane in (0..3) %}
st1 { v{{neon}}.s }[{{lane}}], [x4], x7
{% endfor %}
{% endfor %}
ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x8]
{% for neon in (0..3) %}
{% for lane in (0..3) %}
st1 { v{{neon}}.s }[{{lane}}], [x4], x7
{% endfor %}
{% endfor %}
add x5, x5, x6

add x3, x3, 1
cmp x3, 64
bne .loop_store

b .non_linear_loop
*/

.return:
{{ AMX_CLR }}
ret
1 change: 1 addition & 0 deletions linalg/src/arm64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ pub fn plug(ops: &mut Ops) {
log::info!("AMX optimisation activated");
ops.mmm_f16 = Box::new(|_, _, _| apple_amx::apple_amx_mmm_f16_64x32.mmm());
ops.mmm_f32 = Box::new(|_, _, _| apple_amx::apple_amx_mmm_f32_32x32.mmm());
ops.mmv_f16 = Box::new(|_, _| apple_amx::apple_amx_mmm_f16_64x1.mmm());
ops.mmv_f32 = Box::new(|_, _| apple_amx::apple_amx_mmm_f32_32x1.mmm());
} else {
log::info!("No AMX optimisation");
Expand Down

0 comments on commit c54c235

Please sign in to comment.