Skip to content

Commit

Permalink
pkg/query: Add fast map of strings to json marshaling
Browse files Browse the repository at this point in the history
This has a ~82% improvement compared to the previously used method.

```
$ benchstat stdlib.txt ours.txt
name        old time/op  new time/op  delta
MarshalMap   412ns ± 1%    73ns ± 0%  -82.34%  (p=0.008 n=5+5)
```

Which results in an overall improvement of ~44% for real life profiling
data that has labels:

```
$ benchstat old.txt new.txt
name                old time/op  new time/op  delta
ArrowFlamegraph-10   289ms ± 1%   161ms ± 1%  -44.23%  (p=0.008 n=5+5)
```
  • Loading branch information
brancz committed Aug 6, 2023
1 parent 2d6b35a commit 356bc4d
Show file tree
Hide file tree
Showing 5 changed files with 253 additions and 6 deletions.
8 changes: 3 additions & 5 deletions pkg/query/flamegraph_arrow.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package query
import (
"bytes"
"context"
"encoding/json"
"fmt"
"strings"

Expand Down Expand Up @@ -400,6 +399,7 @@ func generateFlamegraphArrowRecord(ctx context.Context, mem memory.Allocator, tr
}
}
}
lsbytes := make([]byte, 0, 512)
for i := 0; i < fb.builderCumulative.Len(); i++ {
if lsets, hasLabels := fb.labels[i]; hasLabels {
inter := mapsIntersection(lsets)
Expand All @@ -408,10 +408,8 @@ func generateFlamegraphArrowRecord(ctx context.Context, mem memory.Allocator, tr
continue
}

lsbytes, err := json.Marshal(inter)
if err != nil {
return nil, 0, 0, 0, err
}
lsbytes = lsbytes[:0]
lsbytes = MarshalStringMap(lsbytes, inter)
if err := fb.builderLabels.Append(lsbytes); err != nil {
return nil, 0, 0, 0, err
}
Expand Down
76 changes: 76 additions & 0 deletions pkg/query/flamegraph_arrow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ package query

import (
"bytes"
"compress/gzip"
"context"
"encoding/json"
"io"
"os"
"testing"

"github.com/apache/arrow/go/v13/arrow"
Expand Down Expand Up @@ -681,3 +684,76 @@ func TestMapsIntersection(t *testing.T) {
{"thread": "2", "thread_name": "name"},
}))
}

func BenchmarkArrowFlamegraph(b *testing.B) {
fileContent, err := os.ReadFile("testdata/profile-with-labels.pb.gz")
require.NoError(b, err)

gz, err := gzip.NewReader(bytes.NewBuffer(fileContent))
require.NoError(b, err)

decompressed, err := io.ReadAll(gz)
require.NoError(b, err)

p := &pprofpb.Profile{}
require.NoError(b, p.UnmarshalVT(decompressed))

pp, err := pprofprofile.ParseData(fileContent)
require.NoError(b, err)

np, err := PprofToSymbolizedProfile(parcaprofile.MetaFromPprof(p, "memory", 0), pp, 0)
require.NoError(b, err)

tracer := trace.NewNoopTracerProvider().Tracer("")

b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _, err := GenerateFlamegraphArrow(
context.Background(),
memory.DefaultAllocator,
tracer,
np,
nil,
0,
)
require.NoError(b, err)
}
}

func TestMarshalMap(t *testing.T) {
m := map[string]string{
"test1": "something",
"test2": "something_else",
}

buf := make([]byte, 0, 1024)
buf = MarshalStringMap(buf, m)
res := string(buf)
expected := []string{
`{"test1":"something","test2":"something_else"}`,
`{"test2":"something_else","test1":"something"}`,
}
require.Contains(t, expected, res)
}

func BenchmarkMarshalMap(b *testing.B) {
m := map[string]string{
"test1": "something",
"test2": "something_else",
}

var err error
b.ResetTimer()
b.Run("stdlib", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, err = json.Marshal(m)
}
})
_ = err
b.Run("ours", func(b *testing.B) {
buf := make([]byte, 0, 1024)
for i := 0; i < b.N; i++ {
buf = MarshalStringMap(buf, m)
}
})
}
173 changes: 173 additions & 0 deletions pkg/query/json.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// Copyright 2023 The Parca Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package query

import (
"math/bits"
"reflect"
"unsafe"
)

func MarshalStringMap(buf []byte, m map[string]string) []byte {
buf = buf[:0]
buf = append(buf, '{')

for k, v := range m {
buf = appendString(buf, k)
buf = append(buf, ':')
buf = appendString(buf, v)
buf = append(buf, ',')
}

buf[len(buf)-1] = '}'
return buf
}

const (
lsb = 0x0101010101010101
msb = 0x8080808080808080
)

var hex = "0123456789abcdef"

var needEscape = [256]bool{
'"': true,
'\\': true,
0x00: true,
0x01: true,
0x02: true,
0x03: true,
0x04: true,
0x05: true,
0x06: true,
0x07: true,
0x08: true,
0x09: true,
0x0a: true,
0x0b: true,
0x0c: true,
0x0d: true,
0x0e: true,
0x0f: true,
0x10: true,
0x11: true,
0x12: true,
0x13: true,
0x14: true,
0x15: true,
0x16: true,
0x17: true,
0x18: true,
0x19: true,
0x1a: true,
0x1b: true,
0x1c: true,
0x1d: true,
0x1e: true,
0x1f: true,
/* 0x20 - 0xff */
}

// Taken from goccy/go-json.
func appendString(buf []byte, s string) []byte {
valLen := len(s)
if valLen == 0 {
return append(buf, `""`...)
}
buf = append(buf, '"')
var i, j int
if valLen >= 8 {
chunks := stringToUint64Slice(s)
for _, n := range chunks {
// combine masks before checking for the MSB of each byte. We include
// `n` in the mask to check whether any of the *input* byte MSBs were
// set (i.e. the byte was outside the ASCII range).
mask := n | (n - (lsb * 0x20)) |
((n ^ (lsb * '"')) - lsb) |
((n ^ (lsb * '\\')) - lsb)
if (mask & msb) != 0 {
j = bits.TrailingZeros64(mask&msb) / 8
goto ESCAPE_END
}
}
valLen := len(s)
for i := len(chunks) * 8; i < valLen; i++ {
if needEscape[s[i]] {
j = i
goto ESCAPE_END
}
}
return append(append(buf, s...), '"')
}
ESCAPE_END:
for j < valLen {
c := s[j]

if !needEscape[c] {
// fast path: most of the time, printable ascii characters are used
j++
continue
}

switch c {
case '\\', '"':
buf = append(buf, s[i:j]...)
buf = append(buf, '\\', c)
i = j + 1
j = j + 1
continue

case '\n':
buf = append(buf, s[i:j]...)
buf = append(buf, '\\', 'n')
i = j + 1
j = j + 1
continue

case '\r':
buf = append(buf, s[i:j]...)
buf = append(buf, '\\', 'r')
i = j + 1
j = j + 1
continue

case '\t':
buf = append(buf, s[i:j]...)
buf = append(buf, '\\', 't')
i = j + 1
j = j + 1
continue

case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F
buf = append(buf, s[i:j]...)
buf = append(buf, `\u00`...)
buf = append(buf, hex[c>>4], hex[c&0xF])
i = j + 1
j = j + 1
continue
}
j++
}

return append(append(buf, s[i:]...), '"')
}

func stringToUint64Slice(s string) []uint64 {
return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{ //nolint:govet
Data: ((*reflect.StringHeader)(unsafe.Pointer(&s))).Data,
Len: len(s) / 8,
Cap: len(s) / 8,
}))
}
2 changes: 1 addition & 1 deletion pkg/query/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func Benchmark_Query_Merge(b *testing.B) {
},
},
//nolint:staticcheck // SA1019: Fow now we want to support these APIs
ReportType: pb.QueryRequest_REPORT_TYPE_FLAMEGRAPH_UNSPECIFIED,
ReportType: pb.QueryRequest_REPORT_TYPE_FLAMEGRAPH_ARROW,
})
require.NoError(b, err)
}
Expand Down
Binary file added pkg/query/testdata/profile-with-labels.pb.gz
Binary file not shown.

0 comments on commit 356bc4d

Please sign in to comment.