From 49f5952a4e9bd3849fe450d9881322ef19194a20 Mon Sep 17 00:00:00 2001 From: Ashish Goswami Date: Wed, 19 Feb 2020 21:11:54 +0530 Subject: [PATCH 1/6] Have json.Marshal function just for string type --- query/outputnode.go | 81 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/query/outputnode.go b/query/outputnode.go index 6b40be0c154..ee11e5c6659 100644 --- a/query/outputnode.go +++ b/query/outputnode.go @@ -18,12 +18,12 @@ package query import ( "bytes" - "encoding/json" "fmt" "sort" "strconv" "strings" "time" + "unicode/utf8" "github.com/golang/glog" "github.com/pkg/errors" @@ -135,10 +135,87 @@ var ( emptyString = []byte(`""`) ) +// stringJsonMarshal is replacement for json.Marshal() function only for string type. +// This function is encodeState.string(string, escapeHTML) in "encoding/json/encode.go". +func stringJsonMarshal(s string) []byte { + var hex = "0123456789abcdef" + escapeHTML := true + var e bytes.Buffer + e.WriteByte('"') + start := 0 + for i := 0; i < len(s); { + if b := s[i]; b < utf8.RuneSelf { + if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) { + i++ + continue + } + if start < i { + e.WriteString(s[start:i]) + } + e.WriteByte('\\') + switch b { + case '\\', '"': + e.WriteByte(b) + case '\n': + e.WriteByte('n') + case '\r': + e.WriteByte('r') + case '\t': + e.WriteByte('t') + default: + // This encodes bytes < 0x20 except for \t, \n and \r. + // If escapeHTML is set, it also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. + e.WriteString(`u00`) + e.WriteByte(hex[b>>4]) + e.WriteByte(hex[b&0xF]) + } + i++ + start = i + continue + } + c, size := utf8.DecodeRuneInString(s[i:]) + if c == utf8.RuneError && size == 1 { + if start < i { + e.WriteString(s[start:i]) + } + e.WriteString(`\ufffd`) + i += size + start = i + continue + } + // U+2028 is LINE SEPARATOR. + // U+2029 is PARAGRAPH SEPARATOR. + // They are both technically valid characters in JSON strings, + // but don't work in JSONP, which has to be evaluated as JavaScript, + // and can lead to security holes there. It is valid JSON to + // escape them, so we do so unconditionally. + // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. + if c == '\u2028' || c == '\u2029' { + if start < i { + e.WriteString(s[start:i]) + } + e.WriteString(`\u202`) + e.WriteByte(hex[c&0xF]) + i += size + start = i + continue + } + i += size + } + if start < len(s) { + e.WriteString(s[start:]) + } + e.WriteByte('"') + return e.Bytes() +} + func valToBytes(v types.Val) ([]byte, error) { switch v.Tid { case types.StringID, types.DefaultID: - return json.Marshal(v.Value) + return stringJsonMarshal(v.Value.(string)), nil case types.BinaryID: return []byte(fmt.Sprintf("%q", v.Value)), nil case types.IntID: From a9c37ceb2d8b5e036c937c8afffad15d95a03982 Mon Sep 17 00:00:00 2001 From: Ashish Goswami Date: Thu, 19 Mar 2020 15:53:30 +0530 Subject: [PATCH 2/6] Add table.go --- query/table.go | 217 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 query/table.go diff --git a/query/table.go b/query/table.go new file mode 100644 index 00000000000..da4ad23dd20 --- /dev/null +++ b/query/table.go @@ -0,0 +1,217 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +package query + +import "unicode/utf8" + +// safeSet holds the value true if the ASCII character with the given array +// position can be represented inside a JSON string without any further +// escaping. +// +// All values are true except for the ASCII control characters (0-31), the +// double quote ("), and the backslash character ("\"). +var safeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': true, + '=': true, + '>': true, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} + +// htmlSafeSet holds the value true if the ASCII character with the given +// array position can be safely represented inside a JSON string, embedded +// inside of HTML