From 3a25fcac4f42cf1ae5832655cf35d5813b37338f Mon Sep 17 00:00:00 2001 From: Yi Duan Date: Thu, 28 Oct 2021 17:56:09 +0800 Subject: [PATCH] feat: add node.Load()/LoadAll() to support concurrency (#124) Change-Id: Ic70b61c720a10d16cecc1f3d7c131d81ff6d4071 Co-authored-by: duanyi.aster --- ast/node.go | 72 +++++++++++++++++-- ast/node_test.go | 182 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 240 insertions(+), 14 deletions(-) diff --git a/ast/node.go b/ast/node.go index 67440d244..f7b66ecdc 100644 --- a/ast/node.go +++ b/ast/node.go @@ -145,7 +145,7 @@ func (self *Node) Raw() (string, error) { func (self *Node) checkRaw() error { if self.IsRaw() { - *self = self.parseRaw() + self.parseRaw(false) } if err := self.Check(); err != nil { return err @@ -666,6 +666,64 @@ func (self *Node) InterfaceUseNode() (interface{}, error) { } } +// LoadAll loads all the node's children and children's children as parsed. +// After calling it, the node can be safely used on concurrency +func (self *Node) LoadAll() error { + if self.IsRaw() { + self.parseRaw(true) + return self.Check() + } + + switch self.itype() { + case types.V_ARRAY: + e := self.len() + if err := self.loadAllIndex(); err != nil { + return err + } + for i := 0; i < e; i++ { + n := self.nodeAt(i) + n.parseRaw(true) + if err := n.Check(); err != nil { + return err + } + } + return nil + case types.V_OBJECT: + e := self.len() + if err := self.loadAllKey(); err != nil { + return err + } + for i := 0; i < e; i++ { + n := self.pairAt(i) + n.Value.parseRaw(true) + if err := n.Value.Check(); err != nil { + return err + } + } + return nil + default: + return self.Check() + } +} + +// Load loads the node's children as parsed. +// After calling it, only the node itself can be used on concurrency (not include its children) +func (self *Node) Load() error { + if self.IsRaw() { + self.parseRaw(false) + return self.Load() + } + + switch self.t { + case _V_ARRAY_LAZY: + return self.skipAllIndex() + case _V_OBJECT_LAZY: + return self.skipAllKey() + default: + return self.Check() + } +} + /**---------------------------------- Internal Helper Methods ----------------------------------**/ var ( @@ -1402,14 +1460,18 @@ func newRawNode(str string, typ types.ValueType) Node { } } -func (self *Node) parseRaw() Node { +func (self *Node) parseRaw(full bool) { raw := addr2str(self.p, self.v) parser := NewParser(raw) - n, e := parser.Parse() + if full { + parser.noLazy = true + parser.skipValue = false + } + var e types.ParsingError + *self, e = parser.Parse() if e != 0 { - return *newSyntaxError(parser.syntaxError(e)) + *self = *newSyntaxError(parser.syntaxError(e)) } - return n } func newError(err types.ParsingError, msg string) *Node { diff --git a/ast/node_test.go b/ast/node_test.go index 0eac58f78..f92a0cdc9 100644 --- a/ast/node_test.go +++ b/ast/node_test.go @@ -17,19 +17,104 @@ package ast import ( - `encoding/json` - `fmt` - `reflect` - `strconv` - `testing` - - `github.com/bytedance/sonic/internal/native/types` - jsoniter `github.com/json-iterator/go` - `github.com/stretchr/testify/assert` + `encoding/json` + `fmt` + `reflect` + `strconv` + `testing` + + `github.com/bytedance/sonic/internal/native/types` + jsoniter `github.com/json-iterator/go` + `github.com/stretchr/testify/assert` ) var parallelism = 4 +func TestLoadAll(t *testing.T) { + e := Node{} + err := e.Load() + if err != ErrNotExist { + t.Fatal(err) + } + err = e.LoadAll() + if err != ErrNotExist { + t.Fatal(err) + } + + root, err := NewSearcher(`{"a":{"1":[1],"2":2},"b":[{"1":1},2],"c":[1,2]}`).GetByPath() + if err != nil { + t.Fatal(err) + } + if err = root.Load(); err != nil { + t.Fatal(err) + } + if root.len() != 3 { + t.Fatal(root.len()) + } + + c := root.Get("c") + if !c.IsRaw() { + t.Fatal(err) + } + err = c.LoadAll() + if err != nil { + t.Fatal(err) + } + if c.len() != 2 { + t.Fatal(c.len()) + } + c1 := c.nodeAt(0) + if n, err := c1.Int64(); err != nil || n != 1 { + t.Fatal(n, err) + } + + a := root.pairAt(0) + if a.Key != "a" { + t.Fatal(a.Key) + } else if !a.Value.IsRaw() { + t.Fatal(a.Value.itype()) + } else if n, err := a.Value.Len(); n != 0 || err != nil { + t.Fatal(n, err) + } + if err := a.Value.Load(); err != nil { + t.Fatal(err) + } + if a.Value.len() != 2 { + t.Fatal(a.Value.len()) + } + a1 := a.Value.Get("1") + if !a1.IsRaw() { + t.Fatal(a1) + } + a.Value.LoadAll() + if a1.t != types.V_ARRAY || a1.len() != 1 { + t.Fatal(a1.t, a1.len()) + } + + b := root.pairAt(1) + if b.Key != "b" { + t.Fatal(b.Key) + } else if !b.Value.IsRaw() { + t.Fatal(b.Value.itype()) + } else if n, err := b.Value.Len(); n != 0 || err != nil { + t.Fatal(n, err) + } + if err := b.Value.Load(); err != nil { + t.Fatal(err) + } + if b.Value.len() != 2 { + t.Fatal(b.Value.len()) + } + b1 := b.Value.Index(0) + if !b1.IsRaw() { + t.Fatal(b1) + } + b.Value.LoadAll() + if b1.t != types.V_OBJECT || b1.len() != 1 { + t.Fatal(a1.t, a1.len()) + } +} + func TestIndexPair(t *testing.T) { root, _ := NewParser(`{"a":1,"b":2}`).Parse() a := root.IndexPair(0) @@ -130,10 +215,27 @@ func TestTypeCast(t *testing.T) { } func TestCheckError(t *testing.T) { + n := newRawNode("[hello]", types.V_ARRAY) + n.parseRaw(false) + if n.Check() != nil { + t.Fatal(n.Check()) + } + n = newRawNode("[hello]", types.V_ARRAY) + n.parseRaw(true) + p := NewParser("[hello]") + p.noLazy = true + p.skipValue = false + _, x := p.Parse() + if n.Error() != newSyntaxError(p.syntaxError(x)).Error() { + t.Fatal(n.Check()) + } + + s, err := NewParser(`{"a":{}, "b":talse, "c":{}}`).Parse() if err != 0 { t.Fatal(err) } + root := s.GetByPath() // fmt.Println(root.Check()) a := root.Get("a") @@ -666,6 +768,68 @@ func TestNodeAdd(t *testing.T) { } } +func BenchmarkLoadNode(b *testing.B) { + b.Run("Interface()", func(b *testing.B) { + b.SetParallelism(parallelism) + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) + if err != nil { + b.Fatal(err) + } + _, _ = root.Interface() + } + }) + }) + + b.Run("LoadAll()", func(b *testing.B) { + b.SetParallelism(parallelism) + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) + if err != nil { + b.Fatal(err) + } + _ = root.LoadAll() + } + }) + }) + + b.Run("InterfaceUseNode()", func(b *testing.B) { + b.SetParallelism(parallelism) + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) + if err != nil { + b.Fatal(err) + } + _, _ = root.InterfaceUseNode() + } + }) + }) + + b.Run("Load()", func(b *testing.B) { + b.SetParallelism(parallelism) + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + root, err := NewSearcher(_TwitterJson).GetByPath("statuses", 0) + if err != nil { + b.Fatal(err) + } + _ = root.Load() + } + }) + }) +} + func BenchmarkNodeGetByPath(b *testing.B) { root, derr := NewParser(_TwitterJson).Parse() if derr != 0 {