Skip to content

Commit

Permalink
json: improve performance by using a pool of scanners (#535)
Browse files Browse the repository at this point in the history
* add benchmark for each detector with it's corresponding file

* json/ndjson: use a pool of scanners for detection

it helps by reducing memory allocations to amortized 0
➜  mimetype git:(bench) ✗ benchstat master sync_pool
goos: linux
goarch: amd64
pkg: github.com/gabriel-vasile/mimetype
cpu: Intel(R) Core(TM) i7-10510U CPU @ 1.80GHz
                                              │   master    │             sync_pool              │
                                              │   sec/op    │   sec/op     vs base               │
Files/json.json/application/json-8              1.403µ ± 0%   1.355µ ± 1%  -3.39% (p=0.000 n=20)
Files/ndjson.xl.ndjson/application/x-ndjson-8   8.655µ ± 1%   8.504µ ± 1%  -1.76% (p=0.001 n=20)
Files/ndjson.ndjson/application/x-ndjson-8      3.069µ ± 1%   2.958µ ± 5%       ~ (p=0.129 n=20)
geomean                                         3.340µ        3.242µ       -2.92%

                                              │   master   │               sync_pool               │
                                              │    B/op    │   B/op    vs base                     │
Files/json.json/application/json-8              120.0 ± 0%   0.0 ± 0%  -100.00% (p=0.000 n=20)
Files/ndjson.xl.ndjson/application/x-ndjson-8   720.0 ± 0%   0.0 ± 0%  -100.00% (p=0.000 n=20)
Files/ndjson.ndjson/application/x-ndjson-8      240.0 ± 0%   0.0 ± 0%  -100.00% (p=0.000 n=20)
geomean                                         274.7                  ?                       ¹ ²
¹ summaries must be >0 to compute geomean
² ratios must be >0 to compute geomean

                                              │   master   │                sync_pool                │
                                              │ allocs/op  │ allocs/op   vs base                     │
Files/json.json/application/json-8              4.000 ± 0%   0.000 ± 0%  -100.00% (p=0.000 n=20)
Files/ndjson.xl.ndjson/application/x-ndjson-8   24.00 ± 0%    0.00 ± 0%  -100.00% (p=0.000 n=20)
Files/ndjson.ndjson/application/x-ndjson-8      8.000 ± 0%   0.000 ± 0%  -100.00% (p=0.000 n=20)
geomean                                         9.158                    ?                       ¹ ²
¹ summaries must be >0 to compute geomean
² ratios must be >0 to compute geomean

* benchmark: use sequential instead of concurrent benchmarks

there is no reason to make it concurrent
  • Loading branch information
gabriel-vasile authored May 26, 2024
1 parent 43192c8 commit 09ff708
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 7 deletions.
27 changes: 25 additions & 2 deletions internal/json/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ package json

import (
"fmt"
"sync"
)

type (
Expand Down Expand Up @@ -73,18 +74,38 @@ type (
}
)

var scannerPool = sync.Pool{
New: func() any {
return &scanner{}
},
}

func newScanner() *scanner {
s := scannerPool.Get().(*scanner)
s.reset()
return s
}

func freeScanner(s *scanner) {
// Avoid hanging on to too much memory in extreme cases.
if len(s.parseState) > 1024 {
s.parseState = nil
}
scannerPool.Put(s)
}

// Scan returns the number of bytes scanned and if there was any error
// in trying to reach the end of data.
func Scan(data []byte) (int, error) {
s := &scanner{}
s := newScanner()
defer freeScanner(s)
_ = checkValid(data, s)
return s.index, s.err
}

// checkValid verifies that data is valid JSON-encoded data.
// scan is passed in for use by checkValid to avoid an allocation.
func checkValid(data []byte, scan *scanner) error {
scan.reset()
for _, c := range data {
scan.index++
if scan.step(scan, c) == scanError {
Expand All @@ -105,6 +126,8 @@ func (s *scanner) reset() {
s.step = stateBeginValue
s.parseState = s.parseState[0:0]
s.err = nil
s.endTop = false
s.index = 0
}

// eof tells the scanner that the end of input has been reached.
Expand Down
32 changes: 27 additions & 5 deletions mimetype_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,9 @@ func BenchmarkSliceRand(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()

b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
Detect(data)
}
})
for n := 0; n < b.N; n++ {
Detect(data)
}
}

func BenchmarkText(b *testing.B) {
Expand All @@ -513,6 +511,30 @@ func BenchmarkText(b *testing.B) {
}
}

// BenchmarkFiles benchmarks each detector with his coresponding file.
func BenchmarkFiles(b *testing.B) {
for f, m := range files {
data, err := os.ReadFile(filepath.Join(testDataDir, f))
if err != nil {
b.Fatal(err)
}
if uint32(len(data)) > defaultLimit {
data = data[:defaultLimit]
}
b.Run(f+"/"+m, func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
parsed, _, _ := mime.ParseMediaType(m)
mType := Lookup(parsed)
for n := 0; n < b.N; n++ {
if !mType.detector(data, uint32(len(data))) {
b.Fatal("detection should never fail")
}
}
})
}
}

func BenchmarkAll(b *testing.B) {
r := rand.New(rand.NewSource(0))
data := make([]byte, defaultLimit)
Expand Down

0 comments on commit 09ff708

Please sign in to comment.