Skip to content

Commit

Permalink
mask_arm64.s: Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
nhooyr committed Oct 20, 2023
1 parent 32d0aa1 commit 4cc3d0e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 15 deletions.
4 changes: 2 additions & 2 deletions mask_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,10 @@ less_than_4:

less_than_2:
TESTQ $1, CX
JZ done
JZ end
XORB SI, (AX)
ROLL $24, SI

done:
end:
MOVL SI, ret+24(FP)
RET
24 changes: 11 additions & 13 deletions mask_arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ TEXT ·maskAsm(SB), NOSPLIT, $0-28
CMP $64, R1
BLT less_than_64

// TODO: allign memory like amd64
// TODO: align memory like amd64

loop_64:
VLD1 (R0), [V1.B16, V2.B16, V3.B16, V4.B16]
Expand All @@ -29,41 +29,39 @@ loop_64:
BGE loop_64

less_than_64:
// quick end
CBZ R1, end
TBZ $5, R1, less_than32
TBZ $5, R1, less_than_32
VLD1 (R0), [V1.B16, V2.B16]
VEOR V1.B16, V0.B16, V1.B16
VEOR V2.B16, V0.B16, V2.B16
VST1.P [V1.B16, V2.B16], 32(R0)

less_than32:
TBZ $4, R1, less_than16
less_than_32:
TBZ $4, R1, less_than_16
LDP (R0), (R11, R12)
EOR R11, R2, R11
EOR R12, R2, R12
STP.P (R11, R12), 16(R0)

less_than16:
TBZ $3, R1, less_than8
less_than_16:
TBZ $3, R1, less_than_8
MOVD (R0), R11
EOR R2, R11, R11
MOVD.P R11, 8(R0)

less_than8:
TBZ $2, R1, less_than4
less_than_8:
TBZ $2, R1, less_than_4
MOVWU (R0), R11
EORW R2, R11, R11
MOVWU.P R11, 4(R0)

less_than4:
TBZ $1, R1, less_than2
less_than_4:
TBZ $1, R1, less_than_2
MOVHU (R0), R11
EORW R3, R11, R11
MOVHU.P R11, 2(R0)
RORW $16, R3

less_than2:
less_than_2:
TBZ $0, R1, end
MOVBU (R0), R11
EORW R3, R11, R11
Expand Down

0 comments on commit 4cc3d0e

Please sign in to comment.