diff --git a/pkg/query/flamegraph_arrow.go b/pkg/query/flamegraph_arrow.go index 3bbe3e20920..02e0fbe8f05 100644 --- a/pkg/query/flamegraph_arrow.go +++ b/pkg/query/flamegraph_arrow.go @@ -16,7 +16,6 @@ package query import ( "bytes" "context" - "encoding/json" "fmt" "strings" @@ -400,6 +399,7 @@ func generateFlamegraphArrowRecord(ctx context.Context, mem memory.Allocator, tr } } } + lsbytes := make([]byte, 0, 512) for i := 0; i < fb.builderCumulative.Len(); i++ { if lsets, hasLabels := fb.labels[i]; hasLabels { inter := mapsIntersection(lsets) @@ -408,10 +408,8 @@ func generateFlamegraphArrowRecord(ctx context.Context, mem memory.Allocator, tr continue } - lsbytes, err := json.Marshal(inter) - if err != nil { - return nil, 0, 0, 0, err - } + lsbytes = lsbytes[:0] + lsbytes = MarshalStringMap(lsbytes, inter) if err := fb.builderLabels.Append(lsbytes); err != nil { return nil, 0, 0, 0, err } diff --git a/pkg/query/flamegraph_arrow_test.go b/pkg/query/flamegraph_arrow_test.go index db54d411c5a..94196c1c4f6 100644 --- a/pkg/query/flamegraph_arrow_test.go +++ b/pkg/query/flamegraph_arrow_test.go @@ -15,8 +15,11 @@ package query import ( "bytes" + "compress/gzip" "context" "encoding/json" + "io" + "os" "testing" "github.com/apache/arrow/go/v13/arrow" @@ -681,3 +684,76 @@ func TestMapsIntersection(t *testing.T) { {"thread": "2", "thread_name": "name"}, })) } + +func BenchmarkArrowFlamegraph(b *testing.B) { + fileContent, err := os.ReadFile("testdata/profile-with-labels.pb.gz") + require.NoError(b, err) + + gz, err := gzip.NewReader(bytes.NewBuffer(fileContent)) + require.NoError(b, err) + + decompressed, err := io.ReadAll(gz) + require.NoError(b, err) + + p := &pprofpb.Profile{} + require.NoError(b, p.UnmarshalVT(decompressed)) + + pp, err := pprofprofile.ParseData(fileContent) + require.NoError(b, err) + + np, err := PprofToSymbolizedProfile(parcaprofile.MetaFromPprof(p, "memory", 0), pp, 0) + require.NoError(b, err) + + tracer := trace.NewNoopTracerProvider().Tracer("") + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _, err := GenerateFlamegraphArrow( + context.Background(), + memory.DefaultAllocator, + tracer, + np, + nil, + 0, + ) + require.NoError(b, err) + } +} + +func TestMarshalMap(t *testing.T) { + m := map[string]string{ + "test1": "something", + "test2": "something_else", + } + + buf := make([]byte, 0, 1024) + buf = MarshalStringMap(buf, m) + res := string(buf) + expected := []string{ + `{"test1":"something","test2":"something_else"}`, + `{"test2":"something_else","test1":"something"}`, + } + require.Contains(t, expected, res) +} + +func BenchmarkMarshalMap(b *testing.B) { + m := map[string]string{ + "test1": "something", + "test2": "something_else", + } + + var err error + b.ResetTimer() + b.Run("stdlib", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err = json.Marshal(m) + } + }) + _ = err + b.Run("ours", func(b *testing.B) { + buf := make([]byte, 0, 1024) + for i := 0; i < b.N; i++ { + buf = MarshalStringMap(buf, m) + } + }) +} diff --git a/pkg/query/json.go b/pkg/query/json.go new file mode 100644 index 00000000000..e1f2872a77d --- /dev/null +++ b/pkg/query/json.go @@ -0,0 +1,173 @@ +// Copyright 2023 The Parca Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "math/bits" + "reflect" + "unsafe" +) + +func MarshalStringMap(buf []byte, m map[string]string) []byte { + buf = buf[:0] + buf = append(buf, '{') + + for k, v := range m { + buf = appendString(buf, k) + buf = append(buf, ':') + buf = appendString(buf, v) + buf = append(buf, ',') + } + + buf[len(buf)-1] = '}' + return buf +} + +const ( + lsb = 0x0101010101010101 + msb = 0x8080808080808080 +) + +var hex = "0123456789abcdef" + +var needEscape = [256]bool{ + '"': true, + '\\': true, + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + 0x09: true, + 0x0a: true, + 0x0b: true, + 0x0c: true, + 0x0d: true, + 0x0e: true, + 0x0f: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1a: true, + 0x1b: true, + 0x1c: true, + 0x1d: true, + 0x1e: true, + 0x1f: true, + /* 0x20 - 0xff */ +} + +// Taken from goccy/go-json. +func appendString(buf []byte, s string) []byte { + valLen := len(s) + if valLen == 0 { + return append(buf, `""`...) + } + buf = append(buf, '"') + var i, j int + if valLen >= 8 { + chunks := stringToUint64Slice(s) + for _, n := range chunks { + // combine masks before checking for the MSB of each byte. We include + // `n` in the mask to check whether any of the *input* byte MSBs were + // set (i.e. the byte was outside the ASCII range). + mask := n | (n - (lsb * 0x20)) | + ((n ^ (lsb * '"')) - lsb) | + ((n ^ (lsb * '\\')) - lsb) + if (mask & msb) != 0 { + j = bits.TrailingZeros64(mask&msb) / 8 + goto ESCAPE_END + } + } + valLen := len(s) + for i := len(chunks) * 8; i < valLen; i++ { + if needEscape[s[i]] { + j = i + goto ESCAPE_END + } + } + return append(append(buf, s...), '"') + } +ESCAPE_END: + for j < valLen { + c := s[j] + + if !needEscape[c] { + // fast path: most of the time, printable ascii characters are used + j++ + continue + } + + switch c { + case '\\', '"': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', c) + i = j + 1 + j = j + 1 + continue + + case '\n': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', 'n') + i = j + 1 + j = j + 1 + continue + + case '\r': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', 'r') + i = j + 1 + j = j + 1 + continue + + case '\t': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', 't') + i = j + 1 + j = j + 1 + continue + + case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F + buf = append(buf, s[i:j]...) + buf = append(buf, `\u00`...) + buf = append(buf, hex[c>>4], hex[c&0xF]) + i = j + 1 + j = j + 1 + continue + } + j++ + } + + return append(append(buf, s[i:]...), '"') +} + +func stringToUint64Slice(s string) []uint64 { + return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{ //nolint:govet + Data: ((*reflect.StringHeader)(unsafe.Pointer(&s))).Data, + Len: len(s) / 8, + Cap: len(s) / 8, + })) +} diff --git a/pkg/query/query_test.go b/pkg/query/query_test.go index 7ec41fceb69..cdee75cd996 100644 --- a/pkg/query/query_test.go +++ b/pkg/query/query_test.go @@ -136,7 +136,7 @@ func Benchmark_Query_Merge(b *testing.B) { }, }, //nolint:staticcheck // SA1019: Fow now we want to support these APIs - ReportType: pb.QueryRequest_REPORT_TYPE_FLAMEGRAPH_UNSPECIFIED, + ReportType: pb.QueryRequest_REPORT_TYPE_FLAMEGRAPH_ARROW, }) require.NoError(b, err) } diff --git a/pkg/query/testdata/profile-with-labels.pb.gz b/pkg/query/testdata/profile-with-labels.pb.gz new file mode 100644 index 00000000000..3143e3609af Binary files /dev/null and b/pkg/query/testdata/profile-with-labels.pb.gz differ