Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expression: refactor json path syntax and make it compatible with mysql #37074

Merged
merged 7 commits into from
Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion errno/errname.go
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,7 @@ var MySQLErrName = map[uint16]*mysql.ErrMessage{
ErrAggregateInOrderNotSelect: mysql.Message("Expression #%d of ORDER BY clause is not in SELECT list, contains aggregate function; this is incompatible with %s", nil),
ErrInvalidJSONData: mysql.Message("Invalid JSON data provided to function %s: %s", nil),
ErrInvalidJSONText: mysql.Message("Invalid JSON text: %-.192s", []int{0}),
ErrInvalidJSONPath: mysql.Message("Invalid JSON path expression %s.", nil),
ErrInvalidJSONPath: mysql.Message("Invalid JSON path expression. The error is around character position %d.", []int{0}),
ErrInvalidJSONCharset: mysql.Message("Cannot create a JSON value from a string with CHARACTER SET '%s'.", nil),
ErrInvalidTypeForJSON: mysql.Message("Invalid data type for JSON data in argument %d to function %s; a JSON string or JSON type is required.", nil),
ErrInvalidJSONPathWildcard: mysql.Message("In this situation, path expressions may not contain the * and ** tokens.", nil),
Expand Down
2 changes: 1 addition & 1 deletion errors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1588,7 +1588,7 @@ Invalid JSON text: %-.192s

["json:3143"]
error = '''
Invalid JSON path expression %s.
Invalid JSON path expression. The error is around character position %d.
'''

["json:3144"]
Expand Down
283 changes: 193 additions & 90 deletions types/json/path_expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
package json
hawkingrei marked this conversation as resolved.
Show resolved Hide resolved

import (
"encoding/json"
"math"
"regexp"
"strconv"
"strings"
"sync"
"unicode"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/util/hack"
"github.com/pingcap/tidb/util/kvcache"
)
Expand Down Expand Up @@ -50,10 +50,6 @@ import (
select json_extract('{"a": "b", "c": [1, "2"]}', '$.*') -> ["b", [1, "2"]]
*/

// ([\$]*[a-zA-Z_][a-zA-Z0-9_]*)+ matches any identifier, may start with $ and appear multiple times;
// "[^"\\]*(\\.[^"\\]*)*" matches any string literal which can carry escaped quotes;
var jsonPathExprLegRe = regexp.MustCompile(`(\.\s*(([\$]*[a-zA-Z_][a-zA-Z0-9_]*)+|\*|"[^"\\]*(\\.[^"\\]*)*")|(\[\s*([0-9]+|\*)\s*\])|\*\*)`)

type pathLegType byte

const (
Expand Down Expand Up @@ -128,7 +124,7 @@ func (pe PathExpression) popOneLeg() (pathLeg, PathExpression) {
return pe.legs[0], newPe
}

// popOneLastLeg returns the a parent PathExpression and the last pathLeg
// popOneLastLeg returns the parent PathExpression and the last pathLeg
func (pe PathExpression) popOneLastLeg() (PathExpression, pathLeg) {
lastLegIdx := len(pe.legs) - 1
lastLeg := pe.legs[lastLegIdx]
Expand Down Expand Up @@ -165,111 +161,218 @@ func (pe PathExpression) ContainsAnyAsterisk() bool {
return pe.flags.containsAnyAsterisk()
}

// ParseJSONPathExpr parses a JSON path expression. Returns a PathExpression
// object which can be used in JSON_EXTRACT, JSON_SET and so on.
func ParseJSONPathExpr(pathExpr string) (pe PathExpression, err error) {
peCache.mu.Lock()
val, ok := peCache.cache.Get(pathExpressionKey(pathExpr))
if ok {
peCache.mu.Unlock()
return val.(PathExpression), nil
}
peCache.mu.Unlock()
type stream struct {
pathExpr string
pos int
}

defer func() {
if err == nil {
peCache.mu.Lock()
peCache.cache.Put(pathExpressionKey(pathExpr), kvcache.Value(pe))
peCache.mu.Unlock()
func (s *stream) skipWhiteSpace() {
for ; s.pos < len(s.pathExpr); s.pos++ {
if !unicode.IsSpace(rune(s.pathExpr[s.pos])) {
break
}
}()

// Find the position of first '$'. If any no-blank characters in
// pathExpr[0: dollarIndex), return an ErrInvalidJSONPath error.
dollarIndex := strings.Index(pathExpr, "$")
if dollarIndex < 0 {
err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
return
}
for i := 0; i < dollarIndex; i++ {
if !isBlank(rune(pathExpr[i])) {
err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
return
}

func (s *stream) read() byte {
b := s.pathExpr[s.pos]
s.pos++
return b
}

func (s *stream) peek() byte {
return s.pathExpr[s.pos]
}

func (s *stream) skip(i int) {
s.pos += i
}

func (s *stream) exhausted() bool {
return s.pos >= len(s.pathExpr)
}

func (s *stream) readWhile(f func(byte) bool) (str string, metEnd bool) {
start := s.pos
for ; !s.exhausted(); s.skip(1) {
if !f(s.peek()) {
return s.pathExpr[start:s.pos], false
}
}
return s.pathExpr[start:s.pos], true
}

pathExprSuffix := strings.TrimFunc(pathExpr[dollarIndex+1:], isBlank)
indices := jsonPathExprLegRe.FindAllStringIndex(pathExprSuffix, -1)
if len(indices) == 0 && len(pathExprSuffix) != 0 {
err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
return
func parseJSONPathExpr(pathExpr string) (pe PathExpression, err error) {
s := &stream{pathExpr: pathExpr, pos: 0}
s.skipWhiteSpace()
if s.exhausted() || s.read() != '$' {
return PathExpression{}, ErrInvalidJSONPath.GenWithStackByArgs(1)
}
s.skipWhiteSpace()

pe.legs = make([]pathLeg, 0, len(indices))
pe.legs = make([]pathLeg, 0, 16)
pe.flags = pathExpressionFlag(0)

lastEnd := 0
for _, indice := range indices {
start, end := indice[0], indice[1]
var ok bool

for !s.exhausted() {
xiongjiwei marked this conversation as resolved.
Show resolved Hide resolved
switch s.peek() {
xhebox marked this conversation as resolved.
Show resolved Hide resolved
case '.':
ok = parseMember(s, &pe)
case '[':
ok = parseArray(s, &pe)
case '*':
ok = parseWildcard(s, &pe)
default:
ok = false
}

// Check all characters between two legs are blank.
for i := lastEnd; i < start; i++ {
if !isBlank(rune(pathExprSuffix[i])) {
err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
return
}
if !ok {
return PathExpression{}, ErrInvalidJSONPath.GenWithStackByArgs(s.pos)
}
lastEnd = end

if pathExprSuffix[start] == '[' {
// The leg is an index of a JSON array.
var leg = strings.TrimFunc(pathExprSuffix[start+1:end], isBlank)
var indexStr = strings.TrimFunc(leg[0:len(leg)-1], isBlank)
var index int
if len(indexStr) == 1 && indexStr[0] == '*' {
pe.flags |= pathExpressionContainsAsterisk
index = arrayIndexAsterisk
} else {
if index, err = strconv.Atoi(indexStr); err != nil {
err = errors.Trace(err)
return
}
}
pe.legs = append(pe.legs, pathLeg{typ: pathLegIndex, arrayIndex: index})
} else if pathExprSuffix[start] == '.' {
// The leg is a key of a JSON object.
var key = strings.TrimFunc(pathExprSuffix[start+1:end], isBlank)
if len(key) == 1 && key[0] == '*' {
pe.flags |= pathExpressionContainsAsterisk
} else if key[0] == '"' {
// We need unquote the origin string.
if key, err = unquoteString(key[1 : len(key)-1]); err != nil {
err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
return

s.skipWhiteSpace()
}

if len(pe.legs) > 0 && pe.legs[len(pe.legs)-1].typ == pathLegDoubleAsterisk {
return PathExpression{}, ErrInvalidJSONPath.GenWithStackByArgs(s.pos)
}

return
}

func parseWildcard(s *stream, p *PathExpression) bool {
s.skip(1)
if s.exhausted() || s.read() != '*' {
return false
}
if s.exhausted() || s.peek() == '*' {
return false
}

p.flags |= pathExpressionContainsDoubleAsterisk
p.legs = append(p.legs, pathLeg{typ: pathLegDoubleAsterisk})
return true
}

func parseArray(s *stream, p *PathExpression) bool {
s.skip(1)
s.skipWhiteSpace()
if s.exhausted() {
return false
}

if s.peek() == '*' {
s.skip(1)
p.flags |= pathExpressionContainsAsterisk
p.legs = append(p.legs, pathLeg{typ: pathLegIndex, arrayIndex: arrayIndexAsterisk})
} else {
// FIXME: only support an integer index for now. Need to support [last], [1 to 2]... in the future.
str, meetEnd := s.readWhile(func(b byte) bool {
return b >= '0' && b <= '9'
})
if meetEnd {
return false
}
index, err := strconv.Atoi(str)
if err != nil || index > math.MaxUint32 {
return false
}
p.legs = append(p.legs, pathLeg{typ: pathLegIndex, arrayIndex: index})
}

s.skipWhiteSpace()
if s.exhausted() || s.read() != ']' {
return false
}

return true
}

func parseMember(s *stream, p *PathExpression) bool {
var err error
s.skip(1)
s.skipWhiteSpace()
if s.exhausted() {
return false
}

if s.peek() == '*' {
s.skip(1)
p.flags |= pathExpressionContainsAsterisk
p.legs = append(p.legs, pathLeg{typ: pathLegKey, dotKey: "*"})
} else {
var dotKey string
var wasQuoted bool
if s.peek() == '"' {
s.skip(1)
str, meetEnd := s.readWhile(func(b byte) bool {
if b == '\\' {
s.skip(1)
return true
}
return b != '"'
})
if meetEnd {
return false
}
pe.legs = append(pe.legs, pathLeg{typ: pathLegKey, dotKey: key})
s.skip(1)
dotKey = str
wasQuoted = true
} else {
// The leg is '**'.
pe.flags |= pathExpressionContainsDoubleAsterisk
pe.legs = append(pe.legs, pathLeg{typ: pathLegDoubleAsterisk})
dotKey, _ = s.readWhile(func(b byte) bool {
return !(unicode.IsSpace(rune(b)) || b == '.' || b == '[' || b == '*')
})
}
dotKey = "\"" + dotKey + "\""

if !json.Valid(hack.Slice(dotKey)) {
return false
}
dotKey, err = unquoteString(dotKey[1 : len(dotKey)-1])
if err != nil || (!wasQuoted && !isEcmascriptIdentifier(dotKey)) {
return false
}

p.legs = append(p.legs, pathLeg{typ: pathLegKey, dotKey: dotKey})
}
return true
}

func isEcmascriptIdentifier(s string) bool {
if s == "" {
return false
}
if len(pe.legs) > 0 {
// The last leg of a path expression cannot be '**'.
if pe.legs[len(pe.legs)-1].typ == pathLegDoubleAsterisk {
err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
return

for i := 0; i < len(s); i++ {
if (i != 0 && s[i] >= '0' && s[i] <= '9') ||
(s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z') ||
s[i] == '_' || s[i] == '$' || s[i] >= 0x80 {
continue
}
return false
}
return
return true
}

func isBlank(c rune) bool {
if c == '\n' || c == '\r' || c == '\t' || c == ' ' {
return true
// ParseJSONPathExpr parses a JSON path expression. Returns a PathExpression
// object which can be used in JSON_EXTRACT, JSON_SET and so on.
func ParseJSONPathExpr(pathExpr string) (PathExpression, error) {
peCache.mu.Lock()
val, ok := peCache.cache.Get(pathExpressionKey(pathExpr))
if ok {
peCache.mu.Unlock()
return val.(PathExpression), nil
}
peCache.mu.Unlock()

pathExpression, err := parseJSONPathExpr(pathExpr)
if err == nil {
peCache.mu.Lock()
peCache.cache.Put(pathExpressionKey(pathExpr), kvcache.Value(pathExpression))
peCache.mu.Unlock()
}
return false
return pathExpression, err
}

func (pe PathExpression) String() string {
Expand Down
17 changes: 14 additions & 3 deletions types/json/path_expr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ func TestContainsAnyAsterisk(t *testing.T) {
expression string
containsAsterisks bool
}{
{"$.a[b]", false},
{"$.a[1]", false},
{"$.a[*]", true},
{"$.*[b]", true},
{"$**.a[b]", true},
{"$.*[1]", true},
{"$**.a[1]", true},
}

for _, test := range tests {
Expand Down Expand Up @@ -58,6 +58,17 @@ func TestValidatePathExpr(t *testing.T) {
{`$.hello \"escaped quotes\" world[3][*].*.key3`, false, 0},
{`$NoValidLegsHere`, false, 0},
{`$ No Valid Legs Here .a.b.c`, false, 0},
{`$.a[b]`, false, 0},
xiongjiwei marked this conversation as resolved.
Show resolved Hide resolved
{`$.*[b]`, false, 0},
{`$**.a[b]`, false, 0},
{`$.b[ 1 ].`, false, 0},
{`$.performance.txn-entry-size-limit`, false, 0},
{`$."performance".txn-entry-size-limit`, false, 0},
{`$."performance."txn-entry-size-limit`, false, 0},
{`$."performance."txn-entry-size-limit"`, false, 0},
{`$[`, false, 0},
{`$a.***[3]`, false, 0},
{`$1a`, false, 0},
}

for _, test := range tests {
Expand Down