diff --git a/s2/encode_best.go b/s2/encode_best.go index e7cf9d3dec..4bc80bc6a7 100644 --- a/s2/encode_best.go +++ b/s2/encode_best.go @@ -370,7 +370,7 @@ func encodeBlockBestSnappy(dst, src []byte) (d int) { } offset := m.s - m.offset - return score - emitCopySize(offset, m.length) + return score - emitCopyNoRepeatSize(offset, m.length) } matchAt := func(offset, s int, first uint32) match { @@ -581,6 +581,28 @@ func emitCopySize(offset, length int) int { return 2 } +// emitCopyNoRepeatSize returns the size to encode the offset+length +// +// It assumes that: +// 1 <= offset && offset <= math.MaxUint32 +// 4 <= length && length <= 1 << 24 +func emitCopyNoRepeatSize(offset, length int) int { + if offset >= 65536 { + return 5 + 5*(length/64) + } + + // Offset no more than 2 bytes. + if length > 64 { + // Emit remaining as repeats, at least 4 bytes remain. + return 3 + 3*(length/60) + } + if length >= 12 || offset >= 2048 { + return 3 + } + // Emit the remaining copy, encoded as 2 bytes. + return 2 +} + // emitRepeatSize returns the number of bytes required to encode a repeat. // Length must be at least 4 and < 1<<24 func emitRepeatSize(offset, length int) int {