Skip to content

Commit

Permalink
go/mysql: performance optimizations in protocol encoding
Browse files Browse the repository at this point in the history
This employs a couple tricks that combined seemed fruitful:

* Swapping to binary.LittleEndian.Put* on the basic calls gets us a free
  boost while removing code. The main win from this swap is the slice
  boundary check, resulting in a massive boost. I kept it inlined, but
  added my own boundary checking in `writeLenEncInt` since swapping it
  out here resulted in a very minor performance regression from the
  current results. I assume from the extra coersion needed to the uint*
  type, and another reslice.
* Reslicing the byte slice early so all future operations work on
  0-index rather than pos+ indexing. This seemed to be a pretty sizeable
  win without needing to do more addition on every operation later to
  determine the index, they get swapped out for constants.
* Read path employs the same early reslicing, but already has explicit
  bounds checks.
* Rewrite `writeZeroes` to utilize the Go memclr optimization.

```
$ benchstat {old,new}.txt
goos: darwin
goarch: arm64
pkg: vitess.io/vitess/go/mysql
                                 │    old.txt     │               new.txt                │
                                 │     sec/op     │    sec/op     vs base                │
EncWriteInt/16-bit-10               0.4685n ±  0%   0.3516n ± 0%  -24.94% (p=0.000 n=10)
EncWriteInt/16-bit-lenencoded-10     2.049n ±  0%    2.049n ± 0%        ~ (p=0.972 n=10)
EncWriteInt/24-bit-lenencoded-10     1.987n ±  0%    2.056n ± 0%   +3.45% (p=0.000 n=10)
EncWriteInt/32-bit-10               0.7819n ±  0%   0.3906n ± 0%  -50.05% (p=0.000 n=10)
EncWriteInt/64-bit-10               1.4080n ±  0%   0.4684n ± 0%  -66.73% (p=0.000 n=10)
EncWriteInt/64-bit-lenencoded-10     3.126n ±  0%    2.051n ± 0%  -34.40% (p=0.000 n=10)
EncWriteZeroes/4-bytes-10           2.5030n ±  0%   0.3123n ± 0%  -87.52% (p=0.000 n=10)
EncWriteZeroes/10-bytes-10          4.3815n ±  0%   0.3120n ± 0%  -92.88% (p=0.000 n=10)
EncWriteZeroes/23-bytes-10          8.4575n ±  0%   0.3124n ± 0%  -96.31% (p=0.000 n=10)
EncWriteZeroes/55-bytes-10         20.8750n ± 10%   0.6245n ± 0%  -97.01%
EncReadInt/16-bit-10                 2.050n ±  0%    2.068n ± 1%   +0.90% (p=0.001 n=10)
EncReadInt/24-bit-10                 2.034n ±  0%    2.050n ± 0%   +0.76% (p=0.000 n=10)
EncReadInt/64-bit-10                 2.819n ±  1%    2.187n ± 0%  -22.41% (p=0.000 n=10)
geomean                              2.500n         0.8363n       -66.55%
```

Signed-off-by: Matt Robenolt <matt@ydekproductions.com>
  • Loading branch information
mattrobenolt committed Jul 5, 2024
1 parent cb2d0df commit 2a6a739
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 56 deletions.
111 changes: 59 additions & 52 deletions go/mysql/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,31 +47,37 @@ func lenEncIntSize(i uint64) int {
}

func writeLenEncInt(data []byte, pos int, i uint64) int {
// reslice at pos to avoid doing arithmetic below
data = data[pos:]

switch {
case i < 251:
data[pos] = byte(i)
data[0] = byte(i)
return pos + 1
case i < 1<<16:
data[pos] = 0xfc
data[pos+1] = byte(i)
data[pos+2] = byte(i >> 8)
_ = data[2] // early bounds check
data[0] = 0xfc
data[1] = byte(i)
data[2] = byte(i >> 8)
return pos + 3
case i < 1<<24:
data[pos] = 0xfd
data[pos+1] = byte(i)
data[pos+2] = byte(i >> 8)
data[pos+3] = byte(i >> 16)
_ = data[3] // early bounds check
data[0] = 0xfd
data[1] = byte(i)
data[2] = byte(i >> 8)
data[3] = byte(i >> 16)
return pos + 4
default:
data[pos] = 0xfe
data[pos+1] = byte(i)
data[pos+2] = byte(i >> 8)
data[pos+3] = byte(i >> 16)
data[pos+4] = byte(i >> 24)
data[pos+5] = byte(i >> 32)
data[pos+6] = byte(i >> 40)
data[pos+7] = byte(i >> 48)
data[pos+8] = byte(i >> 56)
_ = data[8] // early bounds check
data[0] = 0xfe
data[1] = byte(i)
data[2] = byte(i >> 8)
data[3] = byte(i >> 16)
data[4] = byte(i >> 24)
data[5] = byte(i >> 32)
data[6] = byte(i >> 40)
data[7] = byte(i >> 48)
data[8] = byte(i >> 56)
return pos + 9
}
}
Expand Down Expand Up @@ -101,28 +107,17 @@ func writeByte(data []byte, pos int, value byte) int {
}

func writeUint16(data []byte, pos int, value uint16) int {
data[pos] = byte(value)
data[pos+1] = byte(value >> 8)
binary.LittleEndian.PutUint16(data[pos:], value)
return pos + 2
}

func writeUint32(data []byte, pos int, value uint32) int {
data[pos] = byte(value)
data[pos+1] = byte(value >> 8)
data[pos+2] = byte(value >> 16)
data[pos+3] = byte(value >> 24)
binary.LittleEndian.PutUint32(data[pos:], value)
return pos + 4
}

func writeUint64(data []byte, pos int, value uint64) int {
data[pos] = byte(value)
data[pos+1] = byte(value >> 8)
data[pos+2] = byte(value >> 16)
data[pos+3] = byte(value >> 24)
data[pos+4] = byte(value >> 32)
data[pos+5] = byte(value >> 40)
data[pos+6] = byte(value >> 48)
data[pos+7] = byte(value >> 56)
binary.LittleEndian.PutUint64(data[pos:], value)
return pos + 8
}

Expand All @@ -137,10 +132,16 @@ func writeLenEncString(data []byte, pos int, value string) int {
}

func writeZeroes(data []byte, pos int, len int) int {
for i := 0; i < len; i++ {
data[pos+i] = 0
// XXX: This implementation is optimized to leverage
// the go compiler's memclr pattern, see: https://github.com/golang/go/issues/5373
end := pos + len
data = data[pos:end]

for i := range data {
data[i] = 0
}
return pos + len

return end
}

//
Expand Down Expand Up @@ -228,6 +229,7 @@ func readFixedLenUint64(data []byte) (uint64, bool) {
case 3: // 2 bytes
return uint64(binary.LittleEndian.Uint16(data[1:])), true
case 4: // 3 bytes
_ = data[3] // early bounds check
return uint64(data[1]) |
uint64(data[2])<<8 |
uint64(data[3])<<16, true
Expand All @@ -242,37 +244,42 @@ func readLenEncInt(data []byte, pos int) (uint64, int, bool) {
if pos >= len(data) {
return 0, 0, false
}
switch data[pos] {

// reslice to avoid arithmetic below
data = data[pos:]

switch data[0] {
case 0xfc:
// Encoded in the next 2 bytes.
if pos+2 >= len(data) {
if 2 >= len(data) {
return 0, 0, false
}
return uint64(data[pos+1]) |
uint64(data[pos+2])<<8, pos + 3, true
return uint64(data[1]) |
uint64(data[2])<<8, pos + 3, true
case 0xfd:
// Encoded in the next 3 bytes.
if pos+3 >= len(data) {
if 3 >= len(data) {
return 0, 0, false
}
return uint64(data[pos+1]) |
uint64(data[pos+2])<<8 |
uint64(data[pos+3])<<16, pos + 4, true
return uint64(data[1]) |
uint64(data[2])<<8 |
uint64(data[3])<<16, pos + 4, true
case 0xfe:
// Encoded in the next 8 bytes.
if pos+8 >= len(data) {
if 8 >= len(data) {
return 0, 0, false
}
return uint64(data[pos+1]) |
uint64(data[pos+2])<<8 |
uint64(data[pos+3])<<16 |
uint64(data[pos+4])<<24 |
uint64(data[pos+5])<<32 |
uint64(data[pos+6])<<40 |
uint64(data[pos+7])<<48 |
uint64(data[pos+8])<<56, pos + 9, true
return uint64(data[1]) |
uint64(data[2])<<8 |
uint64(data[3])<<16 |
uint64(data[4])<<24 |
uint64(data[5])<<32 |
uint64(data[6])<<40 |
uint64(data[7])<<48 |
uint64(data[8])<<56, pos + 9, true
default:
return uint64(data[0]), pos + 1, true
}
return uint64(data[pos]), pos + 1, true
}

func readLenEncString(data []byte, pos int) (string, int, bool) {
Expand Down
142 changes: 138 additions & 4 deletions go/mysql/encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ func TestEncUint16(t *testing.T) {

_, _, ok = readUint16(data, 9)
assert.False(t, ok, "readUint16 returned ok=true for shorter value")

}

func TestEncBytes(t *testing.T) {
Expand All @@ -122,7 +121,6 @@ func TestEncBytes(t *testing.T) {

_, _, ok = readBytes(data, 9, 2)
assert.False(t, ok, "readBytes returned ok=true for shorter value")

}

func TestEncUint32(t *testing.T) {
Expand All @@ -145,7 +143,6 @@ func TestEncUint32(t *testing.T) {

_, _, ok = readUint32(data, 7)
assert.False(t, ok, "readUint32 returned ok=true for shorter value")

}

func TestEncUint64(t *testing.T) {
Expand All @@ -169,7 +166,6 @@ func TestEncUint64(t *testing.T) {

_, _, ok = readUint64(data, 7)
assert.False(t, ok, "readUint64 returned ok=true for shorter value")

}

func TestEncString(t *testing.T) {
Expand Down Expand Up @@ -317,3 +313,141 @@ func TestEncString(t *testing.T) {
}
}
}

func TestWriteZeroes(t *testing.T) {
buf := make([]byte, 32)
resetBuf := func() {
t.Helper()
for i := range len(buf) {
buf[i] = 'f'
}
}

allMatch := func(b []byte, c byte) bool {
for i := range b {
if b[i] != c {
return false
}
}
return true
}

t.Run("0-offset", func(t *testing.T) {
for _, size := range []int{4, 10, 23, 24, 25, 26, 27} {
resetBuf()
pos := writeZeroes(buf, 0, size)
assert.Equal(t, size, pos, "expected to advance pos to %d, got %d", size, pos)
assert.True(t, allMatch(buf[:pos], 0), "buffer should be zeroes, %v", buf[:pos])
assert.True(t, allMatch(buf[pos:], 'f'), "buffer should be dirty, %v", buf[pos:])
}
})

t.Run("3-offset", func(t *testing.T) {
offset := 3
for _, size := range []int{4, 10, 23, 24, 25, 26, 27} {
resetBuf()
pos := writeZeroes(buf, offset, size)
assert.Equal(t, offset+size, pos, "expected to advance pos to %d, got %d", offset+size, pos)
assert.True(t, allMatch(buf[:offset], 'f'), "buffer should be dirty, %v", buf[offset:pos])
assert.True(t, allMatch(buf[offset:pos], 0), "buffer should be zeroes, %v", buf[:pos])
assert.True(t, allMatch(buf[pos:], 'f'), "buffer should be dirty, %v", buf[pos:])
}
})
}

func BenchmarkEncWriteInt(b *testing.B) {
buf := make([]byte, 16)

b.Run("16-bit", func(b *testing.B) {
value := uint16(0x0100)
for range b.N {
_ = writeUint16(buf, 0, value)
}
})

b.Run("16-bit-lenencoded", func(b *testing.B) {
value := uint64(0x0100)
for range b.N {
_ = writeLenEncInt(buf, 0, value)
}
})

b.Run("24-bit-lenencoded", func(b *testing.B) {
value := uint64(0xabcdef)
for range b.N {
_ = writeLenEncInt(buf, 0, value)
}
})

b.Run("32-bit", func(b *testing.B) {
value := uint32(0xabcdef)
for range b.N {
_ = writeUint32(buf, 0, value)
}
})

b.Run("64-bit", func(b *testing.B) {
value := uint64(0xa0a1a2a3a4a5a6a7)
for range b.N {
_ = writeUint64(buf, 0, value)
}
})

b.Run("64-bit-lenencoded", func(b *testing.B) {
value := uint64(0xa0a1a2a3a4a5a6a7)
for range b.N {
_ = writeLenEncInt(buf, 0, value)
}
})
}

func BenchmarkEncWriteZeroes(b *testing.B) {
buf := make([]byte, 128)

b.Run("4-bytes", func(b *testing.B) {
for range b.N {
_ = writeZeroes(buf, 16, 4)
}
})

b.Run("10-bytes", func(b *testing.B) {
for range b.N {
_ = writeZeroes(buf, 16, 10)
}
})

b.Run("23-bytes", func(b *testing.B) {
for range b.N {
_ = writeZeroes(buf, 16, 23)
}
})

b.Run("55-bytes", func(b *testing.B) {
for range b.N {
_ = writeZeroes(buf, 16, 55)
}
})
}

func BenchmarkEncReadInt(b *testing.B) {
b.Run("16-bit", func(b *testing.B) {
data := []byte{0xfc, 0xfb, 0x00}
for range b.N {
_, _, _ = readLenEncInt(data, 0)
}
})

b.Run("24-bit", func(b *testing.B) {
data := []byte{0xfd, 0x00, 0x00, 0x01}
for range b.N {
_, _, _ = readLenEncInt(data, 0)
}
})

b.Run("64-bit", func(b *testing.B) {
data := []byte{0xfe, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0}
for range b.N {
_, _, _ = readLenEncInt(data, 0)
}
})
}

0 comments on commit 2a6a739

Please sign in to comment.