Skip to content

Commit

Permalink
go/mysql: performance optimizations in protocol encoding
Browse files Browse the repository at this point in the history
This employs a couple tricks that combined seemed fruitful:

* Swapping to binary.LittleEndian.Put* on the basic calls gets us a free
  boost while removing code. The main win from this swap is the slice
  boundary check, resulting in a massive boost. I kept it inlined, but
  added my own boundary checking in `writeLenEncInt` since swapping it
  out here resulted in a very minor performance regression from the
  current results. I assume from the extra coersion needed to the uint*
  type, and another reslice.
* Reslicing the byte slice early so all future operations work on
  0-index rather than pos+ indexing. This seemed to be a pretty sizeable
  win without needing to do more addition on every operation later to
  determine the index, they get swapped out for constants.
* Read path employs the same early reslicing, but already has explicit
  bounds checks.
* Rewrite and specialize writeZeroes for the known constants in the
  MySQL protocol, as well as a more generic algorithm that works in
  chunks of 4 bytes.

One interesting observation from `writeZeroes`, the specialized versions
get highly optimized, I assume, because of no branching necessary at
all. The inlined zerofill can be highly optimized by the compiler.
See: https://godbolt.org/z/E68heoddc

```
$ benchstat {old,new}.txt
goos: darwin
goarch: arm64
pkg: vitess.io/vitess/go/mysql
                                 │    old.txt    │               new.txt                │
                                 │    sec/op     │    sec/op     vs base                │
EncWriteInt/16-bit-10              0.4685n ±  0%   0.3604n ± 0%  -23.07% (p=0.000 n=10)
EncWriteInt/16-bit-lenencoded-10    2.049n ±  0%    2.096n ± 0%   +2.32% (p=0.000 n=10)
EncWriteInt/24-bit-lenencoded-10    1.987n ±  0%    2.099n ± 0%   +5.66% (p=0.000 n=10)
EncWriteInt/32-bit-10              0.7819n ±  0%   0.3994n ± 3%  -48.91% (p=0.000 n=10)
EncWriteInt/64-bit-10              1.4080n ±  0%   0.5075n ± 1%  -63.95% (p=0.000 n=10)
EncWriteInt/64-bit-lenencoded-10    3.126n ±  0%    2.219n ± 1%  -29.03% (p=0.000 n=10)
EncWriteZeroes/4-bytes-10          2.5030n ±  0%   0.5842n ± 2%  -76.66% (p=0.000 n=10)
EncWriteZeroes/10-bytes-10         4.3815n ±  0%   0.6735n ± 1%  -84.63% (p=0.000 n=10)
EncWriteZeroes/23-bytes-10          8.458n ±  0%    2.157n ± 6%  -74.50% (p=0.000 n=10)
EncWriteZeroes/55-bytes-10          20.88n ± 10%    12.31n ± 1%  -41.03% (p=0.000 n=10)
EncReadInt/16-bit-10                2.050n ±  0%    2.182n ± 1%   +6.44% (p=0.000 n=10)
EncReadInt/24-bit-10                2.034n ±  0%    2.066n ± 5%   +1.55% (p=0.000 n=10)
EncReadInt/64-bit-10                2.819n ±  1%    2.194n ± 0%  -22.16%
geomean                             2.500n          1.392n       -44.33%
```
  • Loading branch information
mattrobenolt committed Jul 5, 2024
1 parent cb2d0df commit c549579
Show file tree
Hide file tree
Showing 4 changed files with 296 additions and 60 deletions.
6 changes: 3 additions & 3 deletions go/mysql/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ func (c *Conn) writeSSLRequest(capabilities uint32, characterSet uint8, params *
pos = writeUint32(data, pos, capabilityFlags)

// Max-packet size, always 0. See doc.go.
pos = writeZeroes(data, pos, 4)
pos = writeZeroes4(data, pos)

// Character set.
_ = writeByte(data, pos, characterSet)
Expand Down Expand Up @@ -570,13 +570,13 @@ func (c *Conn) writeHandshakeResponse41(capabilities uint32, scrambledPassword [
pos = writeUint32(data, pos, capabilityFlags)

// Max-packet size, always 0. See doc.go.
pos = writeZeroes(data, pos, 4)
pos = writeZeroes4(data, pos)

// Character set.
pos = writeByte(data, pos, characterSet)

// 23 reserved bytes, all 0.
pos = writeZeroes(data, pos, 23)
pos = writeZeroes23(data, pos)

// Username
pos = writeNullString(data, pos, params.Uname)
Expand Down
206 changes: 154 additions & 52 deletions go/mysql/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,31 +47,37 @@ func lenEncIntSize(i uint64) int {
}

func writeLenEncInt(data []byte, pos int, i uint64) int {
// reslice at pos to avoid doing arithmetic below
data = data[pos:]

switch {
case i < 251:
data[pos] = byte(i)
data[0] = byte(i)
return pos + 1
case i < 1<<16:
data[pos] = 0xfc
data[pos+1] = byte(i)
data[pos+2] = byte(i >> 8)
_ = data[2] // early bounds check
data[0] = 0xfc
data[1] = byte(i)
data[2] = byte(i >> 8)
return pos + 3
case i < 1<<24:
data[pos] = 0xfd
data[pos+1] = byte(i)
data[pos+2] = byte(i >> 8)
data[pos+3] = byte(i >> 16)
_ = data[3] // early bounds check
data[0] = 0xfd
data[1] = byte(i)
data[2] = byte(i >> 8)
data[3] = byte(i >> 16)
return pos + 4
default:
data[pos] = 0xfe
data[pos+1] = byte(i)
data[pos+2] = byte(i >> 8)
data[pos+3] = byte(i >> 16)
data[pos+4] = byte(i >> 24)
data[pos+5] = byte(i >> 32)
data[pos+6] = byte(i >> 40)
data[pos+7] = byte(i >> 48)
data[pos+8] = byte(i >> 56)
_ = data[8] // early bounds check
data[0] = 0xfe
data[1] = byte(i)
data[2] = byte(i >> 8)
data[3] = byte(i >> 16)
data[4] = byte(i >> 24)
data[5] = byte(i >> 32)
data[6] = byte(i >> 40)
data[7] = byte(i >> 48)
data[8] = byte(i >> 56)
return pos + 9
}
}
Expand Down Expand Up @@ -101,28 +107,17 @@ func writeByte(data []byte, pos int, value byte) int {
}

func writeUint16(data []byte, pos int, value uint16) int {
data[pos] = byte(value)
data[pos+1] = byte(value >> 8)
binary.LittleEndian.PutUint16(data[pos:], value)
return pos + 2
}

func writeUint32(data []byte, pos int, value uint32) int {
data[pos] = byte(value)
data[pos+1] = byte(value >> 8)
data[pos+2] = byte(value >> 16)
data[pos+3] = byte(value >> 24)
binary.LittleEndian.PutUint32(data[pos:], value)
return pos + 4
}

func writeUint64(data []byte, pos int, value uint64) int {
data[pos] = byte(value)
data[pos+1] = byte(value >> 8)
data[pos+2] = byte(value >> 16)
data[pos+3] = byte(value >> 24)
data[pos+4] = byte(value >> 32)
data[pos+5] = byte(value >> 40)
data[pos+6] = byte(value >> 48)
data[pos+7] = byte(value >> 56)
binary.LittleEndian.PutUint64(data[pos:], value)
return pos + 8
}

Expand All @@ -137,10 +132,111 @@ func writeLenEncString(data []byte, pos int, value string) int {
}

func writeZeroes(data []byte, pos int, len int) int {
for i := 0; i < len; i++ {
data[pos+i] = 0
// XXX: we are hardcoding for the fast path of the known
// constants used within the MySQL protocol. It seems the lengths
// used are 4, 10, 23, we can highly optimize for these
// known values by inlining the whole zerofill.
switch len {
case 4:
return writeZeroes4(data, pos)
case 10:
return writeZeroes10(data, pos)
case 23:
return writeZeroes23(data, pos)
default:
// reslice early
data = data[pos:]

// fall back to a slower chunking method to handle
// arbitrary lengths if needed.
zeroes := 0

for ; zeroes < len-3; zeroes += 4 {
_ = data[3] // early bounds check
data[0] = 0
data[1] = 0
data[2] = 0
data[3] = 0
data = data[4:]
}

switch len - zeroes {
case 3:
_ = data[2] // early bounds check
data[0] = 0
data[1] = 0
data[2] = 0
case 2:
_ = data[1] // early bounds check
data[0] = 0
data[1] = 0
case 1:
data[0] = 0
}

return pos + len
}
return pos + len
}

func writeZeroes4(data []byte, pos int) int {
// reslice early at pos to avoid arithmetic below
data = data[pos:]

_ = data[3] // early bounds check
data[0] = 0
data[1] = 0
data[2] = 0
data[3] = 0
return pos + 4
}

func writeZeroes10(data []byte, pos int) int {
// reslice early at pos to avoid arithmetic below
data = data[pos:]

_ = data[9] // early bounds check
data[0] = 0
data[1] = 0
data[2] = 0
data[3] = 0
data[4] = 0
data[5] = 0
data[6] = 0
data[7] = 0
data[8] = 0
data[9] = 0
return pos + 10
}

func writeZeroes23(data []byte, pos int) int {
// reslice early at pos to avoid arithmetic below
data = data[pos:]

_ = data[22] // early bounds check
data[0] = 0
data[1] = 0
data[2] = 0
data[3] = 0
data[4] = 0
data[5] = 0
data[6] = 0
data[7] = 0
data[8] = 0
data[9] = 0
data[10] = 0
data[11] = 0
data[12] = 0
data[13] = 0
data[14] = 0
data[15] = 0
data[16] = 0
data[17] = 0
data[18] = 0
data[19] = 0
data[20] = 0
data[21] = 0
data[22] = 0
return pos + 23
}

//
Expand Down Expand Up @@ -228,6 +324,7 @@ func readFixedLenUint64(data []byte) (uint64, bool) {
case 3: // 2 bytes
return uint64(binary.LittleEndian.Uint16(data[1:])), true
case 4: // 3 bytes
_ = data[3] // early bounds check
return uint64(data[1]) |
uint64(data[2])<<8 |
uint64(data[3])<<16, true
Expand All @@ -242,37 +339,42 @@ func readLenEncInt(data []byte, pos int) (uint64, int, bool) {
if pos >= len(data) {
return 0, 0, false
}
switch data[pos] {

// reslice to avoid arithmetic below
data = data[pos:]

switch data[0] {
case 0xfc:
// Encoded in the next 2 bytes.
if pos+2 >= len(data) {
if 2 >= len(data) {
return 0, 0, false
}
return uint64(data[pos+1]) |
uint64(data[pos+2])<<8, pos + 3, true
return uint64(data[1]) |
uint64(data[2])<<8, pos + 3, true
case 0xfd:
// Encoded in the next 3 bytes.
if pos+3 >= len(data) {
if 3 >= len(data) {
return 0, 0, false
}
return uint64(data[pos+1]) |
uint64(data[pos+2])<<8 |
uint64(data[pos+3])<<16, pos + 4, true
return uint64(data[1]) |
uint64(data[2])<<8 |
uint64(data[3])<<16, pos + 4, true
case 0xfe:
// Encoded in the next 8 bytes.
if pos+8 >= len(data) {
if 8 >= len(data) {
return 0, 0, false
}
return uint64(data[pos+1]) |
uint64(data[pos+2])<<8 |
uint64(data[pos+3])<<16 |
uint64(data[pos+4])<<24 |
uint64(data[pos+5])<<32 |
uint64(data[pos+6])<<40 |
uint64(data[pos+7])<<48 |
uint64(data[pos+8])<<56, pos + 9, true
return uint64(data[1]) |
uint64(data[2])<<8 |
uint64(data[3])<<16 |
uint64(data[4])<<24 |
uint64(data[5])<<32 |
uint64(data[6])<<40 |
uint64(data[7])<<48 |
uint64(data[8])<<56, pos + 9, true
default:
return uint64(data[0]), pos + 1, true
}
return uint64(data[pos]), pos + 1, true
}

func readLenEncString(data []byte, pos int) (string, int, bool) {
Expand Down
Loading

0 comments on commit c549579

Please sign in to comment.