Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

s2: Add Intel LZ4s converter #766

Merged
merged 6 commits into from
Mar 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 224 additions & 0 deletions internal/lz4ref/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ func CompressBlock(src, dst []byte) (int, error) {
return n, err
}

func CompressBlockLZ4s(src, dst []byte) (int, error) {
c := compressorPool.Get().(*Compressor)
n, err := c.CompressBlockLZ4s(src, dst)
compressorPool.Put(c)
return n, err
}

func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
// Zero out reused table to avoid non-deterministic output (issue #65).
c.reset()
Expand Down Expand Up @@ -290,6 +297,223 @@ lastLiterals:
return di, nil
}

func (c *Compressor) CompressBlockLZ4s(src, dst []byte) (int, error) {
// Zero out reused table to avoid non-deterministic output (issue #65).
c.reset()

const debug = false
const minMatch = 3
const addExtraLits = 32 // Suboptimal, but test emitting literals without matches. Set to 0 to disable.

if debug {
fmt.Printf("lz4 block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
}

// Return 0, nil only if the destination buffer size is < CompressBlockBound.
isNotCompressible := len(dst) < CompressBlockBound(len(src))

// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
// This significantly speeds up incompressible data and usually has very small impact on compression.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7

// si: Current position of the search.
// anchor: Position of the current literals.
var si, di, anchor int
sn := len(src) - mfLimit
if sn <= 0 {
goto lastLiterals
}

// Fast scan strategy: the hash table only stores the last five-byte sequences.
for si < sn {
// Hash the next five bytes (sequence)...
match := binary.LittleEndian.Uint64(src[si:])
h := blockHash(match)
h2 := blockHash(match >> 8)

// We check a match at s, s+1 and s+2 and pick the first one we get.
// Checking 3 only requires us to load the source one.
ref := c.get(h, si)
ref2 := c.get(h2, si+1)
c.put(h, si)
c.put(h2, si+1)

offset := si - ref

if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) {
// No match. Start calculating another hash.
// The processor can usually do this out-of-order.
h = blockHash(match >> 16)
ref3 := c.get(h, si+2)

// Check the second match at si+1
si += 1
offset = si - ref2

if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
// No match. Check the third match at si+2
si += 1
offset = si - ref3
c.put(h, si)

if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) {
// Skip one extra byte (at si+3) before we check 3 matches again.
si += 2 + (si-anchor)>>adaptSkipLog
continue
}
}
}

// Match found.
lLen := si - anchor // Literal length.
// We already matched 4 bytes.
mLen := 4

// Extend backwards if we can, reducing literals.
tOff := si - offset - 1
for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
si--
tOff--
lLen--
mLen++
}

// Add the match length, so we continue search at the end.
// Use mLen to store the offset base.
si, mLen = si+mLen, si+minMatch

// Find the longest match by looking by batches of 8 bytes.
for si+8 <= sn {
x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
if x == 0 {
si += 8
} else {
// Stop is first non-zero byte.
si += bits.TrailingZeros64(x) >> 3
break
}
}
if addExtraLits > 15 {
// Add X lits.
if lLen > addExtraLits {
dst[di] = 0xf0
dst[di+1] = byte(int(addExtraLits-15) & 0xff) // hack to compile
di += 2
copy(dst[di:di+addExtraLits], src[anchor:anchor+lLen])
di += addExtraLits
lLen -= addExtraLits
anchor += addExtraLits
}
}
mLen = si - mLen
if di >= len(dst) {
return 0, ErrInvalidSourceShortBuffer
}
if mLen < 0xF {
dst[di] = byte(mLen)
} else {
dst[di] = 0xF
}

// Encode literals length.
if debug {
fmt.Printf("emit %d literals\n", lLen)
}
if lLen < 0xF {
dst[di] |= byte(lLen << 4)
} else {
dst[di] |= 0xF0
di++
l := lLen - 0xF
for ; l >= 0xFF && di < len(dst); l -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, ErrInvalidSourceShortBuffer
}
dst[di] = byte(l)
}
di++

// Literals.
if di+lLen > len(dst) {
return 0, ErrInvalidSourceShortBuffer
}
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
di += lLen + 2
anchor = si

// Encode offset.
if debug {
fmt.Printf("emit copy, length: %d, offset: %d\n", mLen+minMatch, offset)
}
if di > len(dst) {
return 0, ErrInvalidSourceShortBuffer
}
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)

// Encode match length part 2.
if mLen >= 0xF {
for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, ErrInvalidSourceShortBuffer
}
dst[di] = byte(mLen)
di++
}
// Check if we can load next values.
if si >= sn {
break
}
// Hash match end-2
h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
c.put(h, si-2)
}

lastLiterals:
if isNotCompressible && anchor == 0 {
// Incompressible.
return 0, nil
}

// Last literals.
if di >= len(dst) {
return 0, ErrInvalidSourceShortBuffer
}
lLen := len(src) - anchor
if lLen < 0xF {
dst[di] = byte(lLen << 4)
} else {
dst[di] = 0xF0
di++
for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, ErrInvalidSourceShortBuffer
}
dst[di] = byte(lLen)
}
di++

// Write the last literals.
if isNotCompressible && di >= anchor {
// Incompressible.
return 0, nil
}
if di+len(src)-anchor > len(dst) {
return 0, ErrInvalidSourceShortBuffer
}
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}

func UncompressBlock(dst, src []byte) (ret int) {
// Restrict capacities so we don't read or write out of bounds.
dst = dst[:len(dst):len(dst)]
Expand Down
28 changes: 22 additions & 6 deletions s2/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,11 @@ func main() {
o.genEmitCopyNoRepeat()
o.snappy = false
o.genMatchLen()
o.cvtLZ4BlockAsm()
o.cvtLZ4BlockAsm(false)
o.cvtLZ4BlockAsm(true)
o.snappy = true
o.cvtLZ4BlockAsm()
o.cvtLZ4BlockAsm(false)
o.cvtLZ4BlockAsm(true)

Generate()
}
Expand Down Expand Up @@ -2862,15 +2864,22 @@ func (o options) matchLenAlt(name string, a, b, len reg.GPVirtual, end LabelRef)
return matched
}

func (o options) cvtLZ4BlockAsm() {
func (o options) cvtLZ4BlockAsm(lz4s bool) {
snap := "Asm"
name := "lz4_s2_"
srcAlgo := "LZ4"
dstAlgo := "S2"
if o.snappy {
snap = "SnappyAsm"
name = "lz4_snappy_"
dstAlgo = "Snappy"
}
TEXT("cvtLZ4Block"+snap, NOSPLIT, "func(dst, src []byte) (uncompressed int, dstUsed int)")
Doc("cvtLZ4Block converts an LZ4 block to S2", "")
if lz4s {
name = strings.ReplaceAll(name, "lz4", "lz4s")
srcAlgo = "LZ4s"
}
TEXT("cvt"+srcAlgo+"Block"+snap, NOSPLIT, "func(dst, src []byte) (uncompressed int, dstUsed int)")
Doc("cvt"+srcAlgo+"Block converts an "+srcAlgo+" block to "+dstAlgo, "")
Pragma("noescape")
o.outputMargin = 10
o.maxOffset = math.MaxUint16
Expand Down Expand Up @@ -2914,7 +2923,10 @@ func (o options) cvtLZ4BlockAsm() {
JAE(LabelRef(name + "dstfull"))
}

const lz4MinMatch = 4
var lz4MinMatch = 4
if lz4s {
lz4MinMatch = 3
}

Label(name + "loop")
checkSrc(src)
Expand Down Expand Up @@ -2971,6 +2983,10 @@ func (o options) cvtLZ4BlockAsm() {
JMP(LabelRef(name + "corrupt"))

Label(name + "match")
if lz4s {
CMPQ(ml, U8(lz4MinMatch))
JEQ(LabelRef(name + "loop"))
}
// if s >= len(src)-2 {
end := GP64()
LEAQ(Mem{Base: src, Disp: 2}, end)
Expand Down
8 changes: 8 additions & 0 deletions s2/encode_go.go
Original file line number Diff line number Diff line change
Expand Up @@ -717,3 +717,11 @@ func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4BlockSnappyAsm should be unreachable")
}

func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4sBlockAsm should be unreachable")
}

func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4sBlockSnappyAsm should be unreachable")
}
12 changes: 11 additions & 1 deletion s2/encodeblock_amd64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading