Skip to content

Commit

Permalink
encoding/base64: Optimize DecodeString
Browse files Browse the repository at this point in the history
Optimize DecodeString for the common case where most of the input isn't
a newline or a padding character.
Also add some testcases found when fuzzing this implementation against
upstream.
Change Decode benchmark to run with different input sizes.

name                 old time/op    new time/op    delta
DecodeString/2-4       71.5ns ± 4%    70.0ns ± 6%     ~     (p=0.246 n=5+5)
DecodeString/4-4        112ns ±25%      91ns ± 2%     ~     (p=0.056 n=5+5)
DecodeString/8-4        136ns ± 5%     126ns ± 5%   -7.33%  (p=0.016 n=5+5)
DecodeString/64-4       872ns ±29%     652ns ±21%  -25.23%  (p=0.032 n=5+5)
DecodeString/8192-4    90.9µs ±21%    61.0µs ±13%  -32.87%  (p=0.008 n=5+5)

name                 old speed      new speed      delta
DecodeString/2-4     56.0MB/s ± 4%  57.2MB/s ± 6%     ~     (p=0.310 n=5+5)
DecodeString/4-4     73.4MB/s ±23%  87.7MB/s ± 2%     ~     (p=0.056 n=5+5)
DecodeString/8-4     87.8MB/s ± 5%  94.8MB/s ± 5%   +7.98%  (p=0.016 n=5+5)
DecodeString/64-4     103MB/s ±24%   136MB/s ±19%  +32.63%  (p=0.032 n=5+5)
DecodeString/8192-4   122MB/s ±19%   180MB/s ±11%  +47.75%  (p=0.008 n=5+5)

Improves #19636

Change-Id: I39667f4fb682a12b3137946d017ad999553c5780
Reviewed-on: https://go-review.googlesource.com/34950
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
  • Loading branch information
josselin-c authored and bradfitz committed Apr 24, 2017
1 parent d9b1f9e commit 31c96fc
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 25 deletions.
45 changes: 24 additions & 21 deletions src/encoding/base64/base64.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,44 +273,50 @@ func (e CorruptInputError) Error() string {
// indicates if end-of-message padding or a partial quantum was encountered
// and thus any additional data is an error.
func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
var inIdx int
si := 0

// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}

for si < len(src) && !end {
// Decode quantum using the base64 alphabet
var dbuf [4]byte
dinc, dlen := 3, 4

for j := range dbuf {
for j := 0; j < len(dbuf); j++ {
if len(src) == si {
if enc.padChar != NoPadding || j < 2 {
switch {
case j == 0:
return n, false, nil
case j == 1, enc.padChar != NoPadding:
return n, false, CorruptInputError(si - j)
}
dinc, dlen, end = j-1, j, true
break
}
in := src[si]
inIdx = si

si++
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++

out := enc.decodeMap[in]
if out != 0xFF {
dbuf[j] = out
continue
}

if in == '\n' || in == '\r' {
j--
continue
}
if rune(in) == enc.padChar {
// We've reached the end and there's padding
switch j {
case 0, 1:
// incorrect padding
return n, false, CorruptInputError(inIdx)
return n, false, CorruptInputError(si - 1)
case 2:
// "==" is expected, the first "=" is already consumed.
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
if si == len(src) {
// not enough padding
return n, false, CorruptInputError(len(src))
Expand All @@ -321,10 +327,10 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
}

si++
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
}
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
if si < len(src) {
// trailing garbage
Expand All @@ -333,10 +339,7 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
dinc, dlen, end = 3, j, true
break
}
dbuf[j] = enc.decodeMap[in]
if dbuf[j] == 0xFF {
return n, false, CorruptInputError(inIdx)
}
return n, false, CorruptInputError(si - 1)
}

// Convert 4x 6bit source bytes into 3 bytes
Expand Down
21 changes: 17 additions & 4 deletions src/encoding/base64/base64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package base64
import (
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"reflect"
Expand Down Expand Up @@ -202,6 +203,9 @@ func TestDecodeCorrupt(t *testing.T) {
offset int // -1 means no corruption.
}{
{"", -1},
{"\n", -1},
{"AAA=\n", -1},
{"AAAA\n", -1},
{"!!!!", 0},
{"====", 0},
{"x===", 1},
Expand Down Expand Up @@ -468,10 +472,19 @@ func BenchmarkEncodeToString(b *testing.B) {
}

func BenchmarkDecodeString(b *testing.B) {
data := StdEncoding.EncodeToString(make([]byte, 8192))
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
StdEncoding.DecodeString(data)
sizes := []int{2, 4, 8, 64, 8192}
benchFunc := func(b *testing.B, benchSize int) {
data := StdEncoding.EncodeToString(make([]byte, benchSize))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
StdEncoding.DecodeString(data)
}
}
for _, size := range sizes {
b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
benchFunc(b, size)
})
}
}

Expand Down

0 comments on commit 31c96fc

Please sign in to comment.