fix: validate on demand at first for GetByPath and NewRaw (#389)

* feat(ast): validate the demanded fields for ast Get * fix: add more tests * fix: check invalid path type * fix(arm): make return error when skip invalid exponent * fix(arm): skip object and array with validate * fix: check input json in NewRaw * fix: validate the path in native c * chore: add licenses * fix: panic when invalid path * fix: add Get Full json benchmark * fix(arm): use fast skip besides the ondemand fields * chore codes --------- Co-authored-by: liuqiang <liuqiang.06@bytedance.com>
bytedance · Mar 22, 2023 · 8639e93 · 8639e93
1 parent ddd5cd6
commit 8639e93
Show file tree

Hide file tree

Showing 23 changed files with 2,819 additions and 2,322 deletions.
diff --git a/ast/api_amd64.go b/ast/api_amd64.go
@@ -1,5 +1,20 @@
 // +build amd64,go1.15,!go1.21
 
+/*
+ * Copyright 2022 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package ast
 
@@ -100,15 +115,16 @@ func (self *Parser) skipFast() (int, types.ParsingError) {
 }
 
 func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
-    start := native.GetByPath(&self.s, &self.p, &path)
+    fsm := types.NewStateMachine()
+    start := native.GetByPath(&self.s, &self.p, &path, fsm)
+    types.FreeStateMachine(fsm)
     runtime.KeepAlive(path)
     if start < 0 {
         return self.p, types.ParsingError(-start)
     }
     return start, 0
 }
 
-
 func (self *Searcher) GetByPath(path ...interface{}) (Node, error) {
     var err types.ParsingError
     var start int
@@ -120,6 +136,9 @@ func (self *Searcher) GetByPath(path ...interface{}) (Node, error) {
         if err == types.ERR_NOT_FOUND {
             return Node{}, ErrNotExist
         }
+        if err == types.ERR_UNSUPPORT_TYPE {
+            panic("path must be either int(>=0) or string")
+        }
         return Node{}, self.parser.syntaxError(err)
     }
 

diff --git a/ast/api_amd64_test.go b/ast/api_amd64_test.go
@@ -1,5 +1,21 @@
 // +build amd64,go1.15,!go1.21
 
+/*
+ * Copyright 2022 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package ast
 
 import (

diff --git a/ast/api_compat.go b/ast/api_compat.go
@@ -1,5 +1,21 @@
 // +build !amd64 go1.21
 
+/*
+ * Copyright 2022 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package ast
 
 import (
@@ -24,8 +40,6 @@ func unquote(src string) (string, types.ParsingError) {
     return rt.Mem2Str(out), 0
 }
 
-
-
 func decodeBase64(src string) ([]byte, error) {
     return base64.StdEncoding.DecodeString(src)
 }
@@ -53,7 +67,12 @@ func (self *Parser) skip() (int, types.ParsingError) {
 }
 
 func (self *Parser) skipFast() (int, types.ParsingError) {
-    return self.skip()
+    e, s := skipValueFast(self.s, self.p)
+    if e < 0 {
+        return self.p, types.ParsingError(-e)
+    }
+    self.p = e
+    return s, 0
 }
 
 func (self *Node) encodeInterface(buf *[]byte) error {
@@ -70,17 +89,16 @@ func (self *Searcher) GetByPath(path ...interface{}) (Node, error) {
 
     var err types.ParsingError
     for _, p := range path {
-        switch p := p.(type) {
-        case int:
-            if err = self.parser.searchIndex(p); err != 0 {
+        if idx, ok := p.(int); ok && idx >= 0 {
+            if err = self.parser.searchIndex(idx); err != 0 {
                 return Node{}, self.parser.ExportError(err)
             }
-        case string:
-            if err = self.parser.searchKey(p); err != 0 {
+        } else if key, ok := p.(string); ok {
+            if err = self.parser.searchKey(key); err != 0 {
                 return Node{}, self.parser.ExportError(err)
             }
-        default:
-            panic("path must be either int or string")
+        } else {
+            panic("path must be either int(>=0) or string")
         }
     }
 

diff --git a/ast/api_test.go b/ast/api_test.go
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2022 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ast
+
+import (
+    `testing`
+    `strings`
+
+    `github.com/stretchr/testify/assert`
+)
+
+type Path = []interface{}
+
+type testGetApi struct {
+    json      string
+    path      Path
+}
+
+type checkError func(error) bool
+
+func isSyntaxError(err error) bool {
+    if err == nil {
+        return false
+    }
+    return strings.HasPrefix(err.Error(), `"Syntax error at index`)
+}
+
+func isEmptySource(err error) bool {
+    if err == nil {
+        return false
+    }
+    return strings.Contains(err.Error(), "no sources available")
+}
+
+func isErrNotExist(err error) bool {
+    return err == ErrNotExist
+}
+
+func isErrUnsupportType(err error) bool {
+    return err == ErrUnsupportType
+}
+
+func testSyntaxJson(t *testing.T, json string, path ...interface{}) {
+    search := NewSearcher(json)
+    _, err := search.GetByPath(path...)
+    assert.True(t, isSyntaxError(err))
+}
+
+func TestGetFromEmptyJson(t *testing.T) {
+    tests := []testGetApi {
+        { "", nil },
+        { "", Path{}},
+        { "", Path{""}},
+        { "", Path{0}},
+        { "", Path{"", ""}},
+    }
+    for _, test := range tests {
+        f := func(t *testing.T) {
+            search := NewSearcher(test.json)
+            _, err := search.GetByPath(test.path...)
+            assert.True(t, isEmptySource(err))
+        }
+        t.Run(test.json, f)
+    }
+}
+
+func TestGetFromSyntaxError(t *testing.T) {
+    tests := []testGetApi {
+        { " \r\n\f\t", Path{} },
+        { "123.", Path{} },
+        { "+124", Path{} },
+        { "-", Path{} },
+        { "-e123", Path{} },
+        { "-1.e123", Path{} },
+        { "-12e456.1", Path{} },
+        { "-12e.1", Path{} },
+        { "[", Path{} },
+        { "{", Path{} },
+        { "[}", Path{} },
+        { "{]", Path{} },
+        { "{,}", Path{} },
+        { "[,]", Path{} },
+        { "tru", Path{} },
+        { "fals", Path{} },
+        { "nul", Path{} },
+        { `{"a":"`, Path{"a"} },
+        { `{"`, Path{} },
+        { `"`, Path{} },
+        { `"\"`, Path{} },
+        { `"\\\"`, Path{} },
+        { `"hello`, Path{} },
+        { `{{}}`, Path{} },
+        { `{[]}`, Path{} },
+        { `{:,}`, Path{} },
+        { `{test:error}`, Path{} },
+        { `{":true}`, Path{} },
+        { `{"" false}`, Path{} },
+        { `{ "" : "false }`, Path{} },
+        { `{"":"",}`, Path{} },
+        { `{ " test : true}`, Path{} },
+        { `{ "test" : tru }`, Path{} },
+        { `{ "test" : true , }`, Path{} },
+        { `{ {"test" : true , } }`, Path{} },
+        { `{"test":1. }`, Path{} },
+        { `{"\\\""`, Path{} },
+        { `{"\\\"":`, Path{} },
+        { `{"\\\":",""}`, Path{} },
+        { `[{]`, Path{} },
+        { `[tru]`, Path{} },
+        { `[-1.]`, Path{} },
+        { `[[]`, Path{} },
+        { `[[],`, Path{} },
+        { `[ true , false , [ ]`, Path{} },
+        { `[true, false, [],`, Path{} },
+        { `[true, false, [],]`, Path{} },
+        { `{"key": [true, false, []], "key2": {{}}`, Path{} },
+    }
+
+    for _, test := range tests {
+        f := func(t *testing.T) {
+            testSyntaxJson(t, test.json, test.path...)
+            path := append(Path{"key"}, test.path...)
+            testSyntaxJson(t, `{"key":` + test.json, path...)
+            path  = append(Path{""}, test.path...)
+            testSyntaxJson(t, `{"":` + test.json, path...)
+            path  = append(Path{1}, test.path...)
+            testSyntaxJson(t, `["",` + test.json, path...)
+        }
+        t.Run(test.json, f)
+    }
+}
+
+// NOTE: GetByPath API not validate the undemanded fields for performance.
+func TestGetWithInvalidUndemandedField(t *testing.T) {
+    type Any = interface{}
+    tests := []struct {
+        json string
+        path Path
+        exp  Any
+    } {
+        { "-0xyz", Path{}, Any(float64(-0))},
+        { "-12e4xyz", Path{}, Any(float64(-12e4))},
+        { "truex",  Path{}, Any(true)},
+        { "false,", Path{}, Any(false)},
+        { `{"a":{,xxx},"b":true}`, Path{"b"}, Any(true)},
+        { `{"a":[,xxx],"b":true}`, Path{"b"}, Any(true)},
+    }
+
+    for _, test := range tests {
+        f := func(t *testing.T) {
+            search := NewSearcher(test.json)
+            node, err := search.GetByPath(test.path...)
+            assert.NoError(t, err)
+            v, err := node.Interface()
+            assert.NoError(t, err)
+            assert.Equal(t, v, test.exp)
+        }
+        t.Run(test.json, f)
+    }
+}
+
+func TestGet_InvalidPathType(t *testing.T) {
+    assert.Panics(t, assert.PanicTestFunc(func() {
+        data := `{"a":[{"b":true}]}`
+        s := NewSearcher(data)
+        s.GetByPath("a", true)
+
+        s = NewSearcher(data)
+        s.GetByPath("a", nil)
+
+        s = NewSearcher(data)
+        s.GetByPath("a", -1)
+    }))
+}