Skip to content

Commit

Permalink
Merge pull request #172 from greatroar/decoder-fixes
Browse files Browse the repository at this point in the history
Decoder improvements
  • Loading branch information
pierrec authored Mar 22, 2022
2 parents bc1239b + db11e26 commit b8cae7c
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 43 deletions.
57 changes: 32 additions & 25 deletions internal/lz4block/decode_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

// AX scratch
// BX scratch
// CX scratch
// DX token
// CX literal and match lengths
// DX token, match offset
//
// DI &dst
// SI &src
Expand Down Expand Up @@ -44,11 +44,9 @@ TEXT ·decodeBlock(SB), NOSPLIT, $48-80
MOVQ R9, R13
SUBQ $16, R13

loop:
// for si < len(src)
CMPQ SI, R9
JAE end
XORL CX, CX

loop:
// token := uint32(src[si])
MOVBLZX (SI), DX
INCQ SI
Expand All @@ -70,7 +68,7 @@ loop:
// copy shortcut

// A two-stage shortcut for the most common case:
// 1) If the literal length is 1..14, and there is enough space,
// 1) If the literal length is 0..14, and there is enough space,
// enter the shortcut and copy 16 bytes on behalf of the literals
// (in the fast mode, only 8 bytes can be safely copied this way).
// 2) Further if the match length is 4..18, copy 18 bytes in a similar
Expand All @@ -91,6 +89,8 @@ loop:
// If it doesn't work out, the info won't be wasted.
// offset := uint16(data[:2])
MOVWLZX (SI), DX
TESTL DX, DX
JE err_corrupt
ADDQ $2, SI
JC err_short_buf

Expand Down Expand Up @@ -122,7 +122,7 @@ loop:
LEAQ const_minMatch(DI)(CX*1), DI

// shortcut complete, load next token
JMP loop
JMP loopcheck

// Read the rest of the literal length:
// do { BX = src[si++]; lit_len += BX } while (BX == 0xFF).
Expand Down Expand Up @@ -213,13 +213,13 @@ memmove_lit:
SUBQ $16, R13

finish_lit_copy:
CMPQ SI, R9
JAE end

offset:
// CX := mLen
// free up DX to use for offset
MOVQ DX, CX
MOVL DX, CX
ANDL $0xF, CX

CMPQ SI, R9
JAE end

// offset
// si += 2
Expand All @@ -231,10 +231,8 @@ offset:
MOVWQZX -2(SI), DX

// 0 offset is invalid
CMPQ DX, $0
JEQ err_corrupt

ANDB $0xF, CX
TESTL DX, DX
JEQ err_corrupt

match_len_loop_pre:
// if mlen != 0xF
Expand Down Expand Up @@ -301,7 +299,7 @@ copy_match_loop:
DECQ CX
JNZ copy_match_loop

JMP loop
JMP loopcheck

copy_interior_match:
CMPQ CX, $16
Expand All @@ -317,7 +315,8 @@ copy_interior_match:
MOVOU X0, (DI)

ADDQ CX, DI
JMP loop
XORL CX, CX
JMP loopcheck

copy_match_from_dict:
// CX = match_len
Expand Down Expand Up @@ -420,8 +419,21 @@ memmove_match:
SUBQ $16, R13
MOVQ dict_base+48(FP), R14
MOVQ dict_len+56(FP), R15
XORL CX, CX

JMP loop
loopcheck:
// for si < len(src)
CMPQ SI, R9
JB loop

end:
// Remaining length must be zero.
TESTQ CX, CX
JNE err_corrupt

SUBQ R11, DI
MOVQ DI, ret+72(FP)
RET

err_corrupt:
MOVQ $-1, ret+72(FP)
Expand All @@ -434,8 +446,3 @@ err_short_buf:
err_short_dict:
MOVQ $-3, ret+72(FP)
RET

end:
SUBQ R11, DI
MOVQ DI, ret+72(FP)
RET
8 changes: 5 additions & 3 deletions internal/lz4block/decode_arm.s
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,13 @@ copyLiteralFinish:
MOVB.NE tmp2, -1(dst)

copyLiteralDone:
CMP src, srcend
BEQ end

// Initial part of match length.
// This frees up the token register for reuse as offset.
AND $15, token, len

CMP src, srcend
BEQ end

// Read offset.
ADD.S $2, src
BCS shortSrc
Expand Down Expand Up @@ -213,6 +213,8 @@ copyMatchDone:
BNE loop

end:
CMP $0, len
BNE corrupt
SUB dstorig, dst, tmp1
MOVW tmp1, ret+36(FP)
RET
Expand Down
25 changes: 13 additions & 12 deletions internal/lz4block/decode_arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ copyLiteralShortEnd:
MOVB.P tmp4, 1(dst)

copyLiteralDone:
// Initial part of match length.
AND $15, token, len

CMP src, srcend
BEQ end

Expand All @@ -129,8 +132,7 @@ copyLiteralDone:
MOVHU -2(src), offset
CBZ offset, corrupt

// Read match length.
AND $15, token, len
// Read rest of match length.
CMP $15, len
BNE readMatchlenDone

Expand Down Expand Up @@ -174,11 +176,9 @@ copyDict:
CBZ len, copyMatchDone

// If the match extends beyond the dictionary, the rest is at dstorig.
// Recompute the offset for the next check.
MOVD dstorig, match

// The code up to copyMatchLoop1 assumes len >= minMatch.
CMP $const_minMatch, len
BLO copyMatchLoop1
SUB dstorig, dst, offset

copyMatchTry8:
// Copy doublewords if both len and offset are at least eight.
Expand All @@ -190,29 +190,30 @@ copyMatchTry8:
AND $7, len, lenRem
SUB $8, len
copyMatchLoop8:
SUBS $8, len
MOVD.P 8(match), tmp1
MOVD.P tmp1, 8(dst)
SUBS $8, len
BPL copyMatchLoop8

ADD lenRem, match
MOVD (match)(len), tmp2 // match+len == match+lenRem-8.
ADD lenRem, dst
MOVD -8(match), tmp2
MOVD $0, len
MOVD tmp2, -8(dst)
B copyMatchDone

copyMatchLoop1:
// Finish with a byte-at-a-time copy.
SUB $1, len
// Byte-at-a-time copy for small offsets.
MOVBU.P 1(match), tmp2
MOVB.P tmp2, 1(dst)
CBNZ len, copyMatchLoop1
SUBS $1, len
BNE copyMatchLoop1

copyMatchDone:
CMP src, srcend
BNE loop

end:
CBNZ len, corrupt
SUB dstorig, dst, tmp1
MOVD tmp1, ret+72(FP)
RET
Expand Down
1 change: 1 addition & 0 deletions internal/lz4block/decode_asm.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build (amd64 || arm || arm64) && !appengine && gc && !noasm
// +build amd64 arm arm64
// +build !appengine
// +build gc
Expand Down
8 changes: 5 additions & 3 deletions internal/lz4block/decode_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ func decodeBlock(dst, src, dict []byte) (ret int) {
di += lLen
}
}
if si == uint(len(src)) {

mLen := b & 0xF
if si == uint(len(src)) && mLen == 0 {
break
} else if si > uint(len(src)) {
} else if si >= uint(len(src)) {
return hasError
}

Expand All @@ -86,7 +88,7 @@ func decodeBlock(dst, src, dict []byte) (ret int) {
si += 2

// Match.
mLen := minMatch + b&0xF
mLen += minMatch
if mLen == minMatch+0xF {
for {
x := uint(src[si])
Expand Down
23 changes: 23 additions & 0 deletions internal/lz4block/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ func TestBlockDecode(t *testing.T) {
emitSeq(strings.Repeat("A", 15), 0, 0),
bytes.Repeat([]byte("A"), 15),
},
{
"literal_only_long_2",
emitSeq(strings.Repeat("A", 16), 0, 0),
bytes.Repeat([]byte("A"), 16),
},
{
"repeat_match_len",
emitSeq("a", 1, 4),
Expand All @@ -115,6 +120,13 @@ func TestBlockDecode(t *testing.T) {
emitSeq("A", 1, 16),
bytes.Repeat([]byte("A"), 17),
},
{
// Triggers a case in the amd64 decoder where its last action
// is a call to runtime.memmove.
"memmove_last",
emitSeq(strings.Repeat("ABCD", 20), 80, 60),
bytes.Repeat([]byte("ABCD"), 36)[:140],
},
{
"repeat_match_log_len_2_seq",
concat(emitSeq("a", 1, 15), emitSeq("B", 1, 15), emitSeq("end", 0, 0)),
Expand Down Expand Up @@ -171,6 +183,11 @@ func TestDecodeBlockInvalid(t *testing.T) {
"\x20a", // litlen = 2 but only a single-byte literal
100,
},
{
"no_space_for_offset",
"\x01", // mLen > 0 but no following offset.
10,
},
{
"write_beyond_len_dst",
"\x1b0\x01\x00000000000000",
Expand All @@ -181,6 +198,12 @@ func TestDecodeBlockInvalid(t *testing.T) {
"\x000000",
10,
},
{
// Zero offset in a short literal.
"zero_offset",
"\xe1abcdefghijklmn\x00\x00\xe0abcdefghijklmn",
40,
},
} {
t.Run(test.name, func(t *testing.T) {
dst := make([]byte, test.size+8)
Expand Down

0 comments on commit b8cae7c

Please sign in to comment.