Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement json.Marshal just for strings #4979

Merged
merged 6 commits into from
Mar 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 98 additions & 1 deletion query/outputnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ import (
"sort"
"strconv"
"strings"
"sync"
"time"
"unicode/utf8"

"github.com/golang/glog"
"github.com/pkg/errors"
Expand Down Expand Up @@ -133,12 +135,107 @@ var (
boolTrue = []byte("true")
boolFalse = []byte("false")
emptyString = []byte(`""`)

// Below variables are used in stringJsonMarshal function.
bufferPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}

hex = "0123456789abcdef"
escapeHTML = true
)

// stringJsonMarshal is replacement for json.Marshal() function only for string type.
// This function is encodeState.string(string, escapeHTML) in "encoding/json/encode.go".
// It should be in sync with encodeState.string function.
func stringJsonMarshal(s string) []byte {
e := bufferPool.Get().(*bytes.Buffer)
e.Reset()

e.WriteByte('"')
start := 0
for i := 0; i < len(s); {
if b := s[i]; b < utf8.RuneSelf {
if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) {
i++
continue
}
if start < i {
e.WriteString(s[start:i])
}
e.WriteByte('\\')
switch b {
case '\\', '"':
e.WriteByte(b)
case '\n':
e.WriteByte('n')
case '\r':
e.WriteByte('r')
case '\t':
e.WriteByte('t')
default:
// This encodes bytes < 0x20 except for \t, \n and \r.
// If escapeHTML is set, it also escapes <, >, and &
// because they can lead to security holes when
// user-controlled strings are rendered into JSON
// and served to some browsers.
e.WriteString(`u00`)
e.WriteByte(hex[b>>4])
e.WriteByte(hex[b&0xF])
}
i++
start = i
continue
}
c, size := utf8.DecodeRuneInString(s[i:])
if c == utf8.RuneError && size == 1 {
if start < i {
e.WriteString(s[start:i])
}
e.WriteString(`\ufffd`)
i += size
start = i
continue
}
// U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings,
// but don't work in JSONP, which has to be evaluated as JavaScript,
// and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
if c == '\u2028' || c == '\u2029' {
if start < i {
e.WriteString(s[start:i])
}
e.WriteString(`\u202`)
e.WriteByte(hex[c&0xF])
i += size
start = i
continue
}
i += size
}
if start < len(s) {
e.WriteString(s[start:])
}
e.WriteByte('"')
buf := append([]byte(nil), e.Bytes()...)
bufferPool.Put(e)
return buf
}

func valToBytes(v types.Val) ([]byte, error) {
switch v.Tid {
case types.StringID, types.DefaultID:
return json.Marshal(v.Value)
switch str := v.Value.(type) {
case string:
return stringJsonMarshal(str), nil
default:
return json.Marshal(str)
}
case types.BinaryID:
return []byte(fmt.Sprintf("%q", v.Value)), nil
case types.IntID:
Expand Down
50 changes: 50 additions & 0 deletions query/outputnode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ package query

import (
"bytes"
"encoding/json"
"fmt"
"runtime"
"strings"
"sync"
"testing"

Expand Down Expand Up @@ -94,3 +96,51 @@ func TestNormalizeJSONLimit(t *testing.T) {
_, err := n.normalize()
require.Error(t, err, "Couldn't evaluate @normalize directive - too many results")
}

func BenchmarkJsonMarshal(b *testing.B) {
inputStrings := [][]string{
[]string{"largestring", strings.Repeat("a", 1024)},
[]string{"smallstring", "abcdef"},
[]string{"specialchars", "<><>^)(*&(%*&%&^$*&%)(*&)^)"},
}

var result []byte

for _, input := range inputStrings {
b.Run(fmt.Sprintf("STDJsonMarshal-%s", input[0]), func(b *testing.B) {
for i := 0; i < b.N; i++ {
result, _ = json.Marshal(input[1])
}
})

b.Run(fmt.Sprintf("stringJsonMarshal-%s", input[0]), func(b *testing.B) {
for i := 0; i < b.N; i++ {
result = stringJsonMarshal(input[1])
}
})
}

_ = result
}

func TestStringJsonMarshal(t *testing.T) {
inputs := []string{
"",
"0",
"true",
"1.909045927350",
"nil",
"null",
"<&>",
`quoted"str"ing`,
}

for _, input := range inputs {
gm, err := json.Marshal(input)
require.NoError(t, err)

sm := stringJsonMarshal(input)

require.Equal(t, gm, sm)
}
}
Loading