Skip to content

Commit

Permalink
Implement json.Marshal just for strings (#4979)
Browse files Browse the repository at this point in the history
We are using json.Marshal() in our code to convert strings to bytes. This function is very generic function and can be used to marshal any type. For our use case, most of the time, this is called
for string type. When we dig deeper into the function's code, most of the time is spent on finding the right encoder function for type. In our case stringEncoder.
This PR takes away the code of encodeState.string() and defines internally, which is just called
for string type. Here are the benchmarks:

go test -v -run ^$ -bench BenchmarkJsonMarshal -benchtime=20s
[Decoder]: Using assembly version of decoder
goos: linux
goarch: amd64
pkg: github.com/dgraph-io/dgraph/query
BenchmarkJsonMarshal/STDJsonMarshal-largestring-16         	11113017	      2128 ns/op
BenchmarkJsonMarshal/stringJsonMarshal-largestring-16      	12233304	      1957 ns/op
BenchmarkJsonMarshal/STDJsonMarshal-smallstring-16         	100000000	       252 ns/op
BenchmarkJsonMarshal/stringJsonMarshal-smallstring-16      	271762213	        87.7 ns/op
BenchmarkJsonMarshal/STDJsonMarshal-specialchars-16        	37255737	       636 ns/op
BenchmarkJsonMarshal/stringJsonMarshal-specialchars-16     	52765609	       463 ns/op
PASS
ok  	github.com/dgraph-io/dgraph/query	159.218s
  • Loading branch information
ashish-goswami authored and danielmai committed Apr 24, 2020
1 parent 836faeb commit 5510fd2
Show file tree
Hide file tree
Showing 3 changed files with 368 additions and 1 deletion.
99 changes: 98 additions & 1 deletion query/outputnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ import (
"sort"
"strconv"
"strings"
"sync"
"time"
"unicode/utf8"

"github.com/golang/glog"
"github.com/pkg/errors"
Expand Down Expand Up @@ -128,12 +130,107 @@ var (
boolTrue = []byte("true")
boolFalse = []byte("false")
emptyString = []byte(`""`)

// Below variables are used in stringJsonMarshal function.
bufferPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}

hex = "0123456789abcdef"
escapeHTML = true
)

// stringJsonMarshal is replacement for json.Marshal() function only for string type.
// This function is encodeState.string(string, escapeHTML) in "encoding/json/encode.go".
// It should be in sync with encodeState.string function.
func stringJsonMarshal(s string) []byte {
e := bufferPool.Get().(*bytes.Buffer)
e.Reset()

e.WriteByte('"')
start := 0
for i := 0; i < len(s); {
if b := s[i]; b < utf8.RuneSelf {
if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) {
i++
continue
}
if start < i {
e.WriteString(s[start:i])
}
e.WriteByte('\\')
switch b {
case '\\', '"':
e.WriteByte(b)
case '\n':
e.WriteByte('n')
case '\r':
e.WriteByte('r')
case '\t':
e.WriteByte('t')
default:
// This encodes bytes < 0x20 except for \t, \n and \r.
// If escapeHTML is set, it also escapes <, >, and &
// because they can lead to security holes when
// user-controlled strings are rendered into JSON
// and served to some browsers.
e.WriteString(`u00`)
e.WriteByte(hex[b>>4])
e.WriteByte(hex[b&0xF])
}
i++
start = i
continue
}
c, size := utf8.DecodeRuneInString(s[i:])
if c == utf8.RuneError && size == 1 {
if start < i {
e.WriteString(s[start:i])
}
e.WriteString(`\ufffd`)
i += size
start = i
continue
}
// U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings,
// but don't work in JSONP, which has to be evaluated as JavaScript,
// and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
if c == '\u2028' || c == '\u2029' {
if start < i {
e.WriteString(s[start:i])
}
e.WriteString(`\u202`)
e.WriteByte(hex[c&0xF])
i += size
start = i
continue
}
i += size
}
if start < len(s) {
e.WriteString(s[start:])
}
e.WriteByte('"')
buf := append([]byte(nil), e.Bytes()...)
bufferPool.Put(e)
return buf
}

func valToBytes(v types.Val) ([]byte, error) {
switch v.Tid {
case types.StringID, types.DefaultID:
return json.Marshal(v.Value)
switch str := v.Value.(type) {
case string:
return stringJsonMarshal(str), nil
default:
return json.Marshal(str)
}
case types.BinaryID:
return []byte(fmt.Sprintf("%q", v.Value)), nil
case types.IntID:
Expand Down
50 changes: 50 additions & 0 deletions query/outputnode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ package query

import (
"bytes"
"encoding/json"
"fmt"
"runtime"
"strings"
"sync"
"testing"

Expand Down Expand Up @@ -94,3 +96,51 @@ func TestNormalizeJSONLimit(t *testing.T) {
_, err := n.normalize()
require.Error(t, err, "Couldn't evaluate @normalize directive - too many results")
}

func BenchmarkJsonMarshal(b *testing.B) {
inputStrings := [][]string{
[]string{"largestring", strings.Repeat("a", 1024)},
[]string{"smallstring", "abcdef"},
[]string{"specialchars", "<><>^)(*&(%*&%&^$*&%)(*&)^)"},
}

var result []byte

for _, input := range inputStrings {
b.Run(fmt.Sprintf("STDJsonMarshal-%s", input[0]), func(b *testing.B) {
for i := 0; i < b.N; i++ {
result, _ = json.Marshal(input[1])
}
})

b.Run(fmt.Sprintf("stringJsonMarshal-%s", input[0]), func(b *testing.B) {
for i := 0; i < b.N; i++ {
result = stringJsonMarshal(input[1])
}
})
}

_ = result
}

func TestStringJsonMarshal(t *testing.T) {
inputs := []string{
"",
"0",
"true",
"1.909045927350",
"nil",
"null",
"<&>",
`quoted"str"ing`,
}

for _, input := range inputs {
gm, err := json.Marshal(input)
require.NoError(t, err)

sm := stringJsonMarshal(input)

require.Equal(t, gm, sm)
}
}
Loading

0 comments on commit 5510fd2

Please sign in to comment.