From 642a49f39df70f77ad656b4518048821e0ac6807 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 15 Apr 2024 09:55:08 +0800 Subject: [PATCH 1/5] fix: sanitize pprof references --- pkg/distributor/distributor.go | 16 ++- pkg/distributor/model/push.go | 5 +- pkg/pprof/merge.go | 87 +------------- pkg/pprof/merge_test.go | 40 +++++++ pkg/pprof/pprof.go | 151 +++++++++++++++++++++---- pkg/pprof/pprof_test.go | 199 ++++++++++++++++++++++++++++++--- 6 files changed, 367 insertions(+), 131 deletions(-) diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 7fbc88f41a..b4e834b057 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -230,14 +230,18 @@ func (d *Distributor) Push(ctx context.Context, grpcReq *connect.Request[pushv1. } func (d *Distributor) GetProfileLanguage(series *distributormodel.ProfileSeries) string { + if series.Language != "" { + return series.Language + } if len(series.Samples) == 0 { return "unknown" } lang := series.GetLanguage() - if lang != "" { - return lang + if lang == "" { + lang = pprof.GetLanguage(series.Samples[0].Profile, d.logger) } - return pprof.GetLanguage(series.Samples[0].Profile, d.logger) + series.Language = lang + return series.Language } func (d *Distributor) PushParsed(ctx context.Context, req *distributormodel.PushRequest) (resp *connect.Response[pushv1.PushResponse], err error) { @@ -280,9 +284,6 @@ func (d *Distributor) PushParsed(ctx context.Context, req *distributormodel.Push d.metrics.receivedCompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(len(raw.RawProfile))) } p := raw.Profile - if profLanguage == "go" { - p.Profile = pprof.FixGoProfile(p.Profile) - } decompressedSize := p.SizeVT() d.metrics.receivedDecompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(decompressedSize)) d.metrics.receivedSamples.WithLabelValues(profName, tenantID).Observe(float64(len(p.Sample))) @@ -309,6 +310,9 @@ func (d *Distributor) PushParsed(ctx context.Context, req *distributormodel.Push // therefore it should be done after the rate limit check. for _, series := range req.Series { for _, sample := range series.Samples { + if series.Language == "go" { + sample.Profile.Profile = pprof.FixGoProfile(sample.Profile.Profile) + } sample.Profile.Normalize() } } diff --git a/pkg/distributor/model/push.go b/pkg/distributor/model/push.go index a996e06785..64fb180ad3 100644 --- a/pkg/distributor/model/push.go +++ b/pkg/distributor/model/push.go @@ -28,8 +28,9 @@ type ProfileSample struct { } type ProfileSeries struct { - Labels []*v1.LabelPair - Samples []*ProfileSample + Labels []*v1.LabelPair + Samples []*ProfileSample + Language string } func (p *ProfileSeries) GetLanguage() string { diff --git a/pkg/pprof/merge.go b/pkg/pprof/merge.go index ff1ff7d529..784bc7d72c 100644 --- a/pkg/pprof/merge.go +++ b/pkg/pprof/merge.go @@ -36,7 +36,7 @@ func (m *ProfileMerge) merge(p *profilev1.Profile, clone bool) error { if p == nil || len(p.StringTable) < 2 { return nil } - ConvertIDsToIndices(p) + sanitizeReferences(p) var initial bool if m.profile == nil { m.init(p, clone) @@ -425,88 +425,3 @@ func (t *RewriteTable[K, V, M]) Append(values []V) { } func (t *RewriteTable[K, V, M]) Values() []M { return t.s } - -func ConvertIDsToIndices(p *profilev1.Profile) { - denseMappings := hasDenseMappings(p) - denseLocations := hasDenseLocations(p) - denseFunctions := hasDenseFunctions(p) - if denseMappings && denseLocations && denseFunctions { - // In most cases IDs are dense (do match the element index), - // therefore the function does not change anything. - return - } - // NOTE(kolesnikovae): - // In some cases IDs is a non-monotonically increasing sequence, - // therefore the same map can be reused to avoid re-allocations. - t := make(map[uint64]uint64, len(p.Location)) - if !denseMappings { - for i, x := range p.Mapping { - idx := uint64(i + 1) - x.Id, t[x.Id] = idx, idx - } - RewriteMappingsWithMap(p, t) - } - if !denseLocations { - for i, x := range p.Location { - idx := uint64(i + 1) - x.Id, t[x.Id] = idx, idx - } - RewriteLocationsWithMap(p, t) - } - if !denseFunctions { - for i, x := range p.Function { - idx := uint64(i + 1) - x.Id, t[x.Id] = idx, idx - } - RewriteFunctionsWithMap(p, t) - } -} - -func hasDenseFunctions(p *profilev1.Profile) bool { - for i, f := range p.Function { - if f.Id != uint64(i+1) { - return false - } - } - return true -} - -func hasDenseLocations(p *profilev1.Profile) bool { - for i, loc := range p.Location { - if loc.Id != uint64(i+1) { - return false - } - } - return true -} - -func hasDenseMappings(p *profilev1.Profile) bool { - for i, m := range p.Mapping { - if m.Id != uint64(i+1) { - return false - } - } - return true -} - -func RewriteFunctionsWithMap(p *profilev1.Profile, n map[uint64]uint64) { - for _, loc := range p.Location { - for _, line := range loc.Line { - line.FunctionId = n[line.FunctionId] - } - } -} - -func RewriteMappingsWithMap(p *profilev1.Profile, n map[uint64]uint64) { - for _, loc := range p.Location { - loc.MappingId = n[loc.MappingId] - } -} - -func RewriteLocationsWithMap(p *profilev1.Profile, n map[uint64]uint64) { - for _, s := range p.Sample { - for i, loc := range s.LocationId { - s.LocationId[i] = n[loc] - } - } -} diff --git a/pkg/pprof/merge_test.go b/pkg/pprof/merge_test.go index 7dda63065f..ecf076aca9 100644 --- a/pkg/pprof/merge_test.go +++ b/pkg/pprof/merge_test.go @@ -1,8 +1,10 @@ package pprof import ( + "os" "testing" + "github.com/google/pprof/profile" "github.com/stretchr/testify/require" profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" @@ -425,3 +427,41 @@ func TestMergeEmpty(t *testing.T) { }) require.NoError(t, err) } + +// Benchmark_Merge_self/pprof.MergeNoClone-10 4174 290190 ns/op +// Benchmark_Merge_self/pprof.Merge-10 2722 421419 ns/op +// Benchmark_Merge_self/profile.Merge-10 802 1417907 ns/op +func Benchmark_Merge_self(b *testing.B) { + d, err := os.ReadFile("testdata/go.cpu.labels.pprof") + require.NoError(b, err) + + b.Run("pprof.MergeNoClone", func(b *testing.B) { + p, err := RawFromBytes(d) + require.NoError(b, err) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var m ProfileMerge + require.NoError(b, m.MergeNoClone(p.Profile.CloneVT())) + } + }) + + b.Run("pprof.Merge", func(b *testing.B) { + p, err := RawFromBytes(d) + require.NoError(b, err) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var m ProfileMerge + require.NoError(b, m.Merge(p.Profile.CloneVT())) + } + }) + + b.Run("profile.Merge", func(b *testing.B) { + p, err := profile.ParseData(d) + require.NoError(b, err) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err = profile.Merge([]*profile.Profile{p.Copy()}) + require.NoError(b, err) + } + }) +} diff --git a/pkg/pprof/pprof.go b/pkg/pprof/pprof.go index 2a3ebad774..1c36b64e58 100644 --- a/pkg/pprof/pprof.go +++ b/pkg/pprof/pprof.go @@ -352,13 +352,16 @@ var currentTime = time.Now // - Then remove unused references. // - Ensure that the profile has a time_nanos set // - Removes addresses from symbolized profiles. +// - Removes elements with invalid references. +// - Converts identifiers to indices. +// - Ensures that string_table[0] is "". func (p *Profile) Normalize() { // if the profile has no time, set it to now if p.TimeNanos == 0 { p.TimeNanos = currentTime().UnixNano() } - p.ensureHasMapping() + sanitizeReferences(p.Profile) p.clearAddresses() // Non-string labels are not supported. @@ -419,29 +422,6 @@ func (p *Profile) clearAddresses() { } } -// ensureHasMapping ensures all locations have at least a mapping. -func (p *Profile) ensureHasMapping() { - var mId uint64 - for _, m := range p.Mapping { - if mId < m.Id { - mId = m.Id - } - } - var fake *profilev1.Mapping - for _, l := range p.Location { - if l.MappingId == 0 { - if fake == nil { - fake = &profilev1.Mapping{ - Id: mId + 1, - MemoryLimit: ^uint64(0), - } - p.Mapping = append(p.Mapping, fake) - } - l.MappingId = fake.Id - } - } -} - func (p *Profile) clearSampleReferences(samples []*profilev1.Sample) { if len(samples) == 0 { return @@ -1195,3 +1175,126 @@ func Unmarshal(data []byte, p *profilev1.Profile) error { } return p.UnmarshalVT(buf.Bytes()) } + +func sanitizeReferences(p *profilev1.Profile) { + values := len(p.SampleType) + ms := int64(len(p.StringTable)) + // Handle the case when "" is not present, + // or is not at string_table[0]. + z := int64(-1) + for i, s := range p.StringTable { + if s == "" { + z = int64(i) + } + } + var o int64 + if z == -1 { + z = ms + o = 1 + p.StringTable = append(p.StringTable, "") + } + // Swap zero string. Noop if string_table[0] == "". + p.StringTable[z], p.StringTable[0] = p.StringTable[0], p.StringTable[z] + // Now we need to update references to strings: + // invalid references (>= len(string_table)) are set to 0. + // references to empty string are set to 0. + str := func(i int64) int64 { + if i == z || i >= ms { + return 0 + } + return i + o + } + + for _, x := range p.SampleType { + x.Type = str(x.Type) + x.Unit = str(x.Unit) + } + if p.PeriodType != nil { + p.PeriodType.Type = str(p.PeriodType.Type) + p.PeriodType.Unit = str(p.PeriodType.Unit) + } + p.DefaultSampleType = str(p.DefaultSampleType) + p.DropFrames = str(p.DropFrames) + p.KeepFrames = str(p.KeepFrames) + for i := range p.Comment { + p.Comment[i] = str(p.Comment[i]) + } + + t := make(map[uint64]uint64, len(p.Location)) + clearMap := func() { + for k := range t { + delete(t, k) + } + } + + // Sanitize mappings and references to them. + // Locations with invalid references are removed. + j := uint64(1) + for _, x := range p.Mapping { + x.BuildId = str(x.BuildId) + x.Filename = str(x.Filename) + x.Id, t[x.Id] = j, j + j++ + } + // Rewrite references to mappings, removing invalid ones. + // Locations with mapping ID 0 are allowed: in this case, + // a mapping stub is created. + var mapping *profilev1.Mapping + p.Location = slices.RemoveInPlace(p.Location, func(x *profilev1.Location, _ int) bool { + if x.MappingId == 0 { + if mapping == nil { + mapping = &profilev1.Mapping{Id: uint64(len(p.Mapping) + 1)} + p.Mapping = append(p.Mapping, mapping) + } + x.MappingId = mapping.Id + return false + } + x.MappingId = t[x.MappingId] + return x.MappingId == 0 + }) + + // Sanitize functions and references to them. + // Locations with invalid references are removed. + clearMap() + j = 1 + for _, x := range p.Function { + x.Name = str(x.Name) + x.SystemName = str(x.SystemName) + x.Filename = str(x.Filename) + x.Id, t[x.Id] = j, j + j++ + } + p.Location = slices.RemoveInPlace(p.Location, func(x *profilev1.Location, _ int) bool { + for _, line := range x.Line { + if line.FunctionId = t[line.FunctionId]; line.FunctionId == 0 { + return true + } + } + return false + }) + + // Sanitize locations and references to them. + // Samples with invalid references are removed. + clearMap() + j = 1 + for _, x := range p.Location { + x.Id, t[x.Id] = j, j + j++ + } + p.Sample = slices.RemoveInPlace(p.Sample, func(x *profilev1.Sample, _ int) bool { + if len(x.Value) != values { + return true + } + for i := range x.LocationId { + if x.LocationId[i] = t[x.LocationId[i]]; x.LocationId[i] == 0 { + return true + } + } + for _, l := range x.Label { + l.Key = str(l.Key) + l.Str = str(l.Str) + l.NumUnit = str(l.NumUnit) + } + return false + }) +} diff --git a/pkg/pprof/pprof_test.go b/pkg/pprof/pprof_test.go index 70fd7db9b4..3cb9a49324 100644 --- a/pkg/pprof/pprof_test.go +++ b/pkg/pprof/pprof_test.go @@ -1,8 +1,10 @@ package pprof import ( + "io/fs" "math/rand" "os" + "path/filepath" "testing" "time" @@ -51,21 +53,22 @@ func TestNormalizeProfile(t *testing.T) { {Id: 5, Name: 16, SystemName: 17, Filename: 18, StartLine: 1}, }, StringTable: []string{ - "memory", "bytes", "in_used", "allocs", "count", + "", + "bytes", "in_used", "allocs", "count", "main", "runtime.main", "main.go", // fn1 "foo", "runtime.foo", "foo.go", // fn2 "bar", "runtime.bar", "bar.go", // fn3 "buzz", "runtime.buzz", // fn4 "bla", "runtime.bla", "bla.go", // fn5 }, - PeriodType: &profilev1.ValueType{Type: 0, Unit: 1}, + PeriodType: &profilev1.ValueType{Type: 2, Unit: 1}, Comment: []int64{}, DefaultSampleType: 0, } pf := &Profile{Profile: p} pf.Normalize() - require.Equal(t, pf.Profile, &profilev1.Profile{ + require.Equal(t, &profilev1.Profile{ SampleType: []*profilev1.ValueType{ {Type: 2, Unit: 1}, {Type: 3, Unit: 4}, @@ -87,17 +90,18 @@ func TestNormalizeProfile(t *testing.T) { {Id: 4, Name: 12, SystemName: 13, Filename: 5, StartLine: 1}, }, StringTable: []string{ - "memory", "bytes", "in_used", "allocs", "count", + "", + "bytes", "in_used", "allocs", "count", "main.go", "foo", "runtime.foo", "foo.go", "bar", "runtime.bar", "bar.go", "buzz", "runtime.buzz", }, - PeriodType: &profilev1.ValueType{Type: 0, Unit: 1}, + PeriodType: &profilev1.ValueType{Type: 2, Unit: 1}, Comment: []int64{}, TimeNanos: 1577836800000000000, DefaultSampleType: 0, - }) + }, pf.Profile) } func TestNormalizeProfile_SampleLabels(t *testing.T) { @@ -114,9 +118,9 @@ func TestNormalizeProfile_SampleLabels(t *testing.T) { {Type: 1, Unit: 2}, }, Sample: []*profilev1.Sample{ - {LocationId: []uint64{2, 1}, Value: []int64{10}, Label: []*profilev1.Label{{Str: 10, Key: 1}, {Str: 11, Key: 2}}}, - {LocationId: []uint64{2, 1}, Value: []int64{10}, Label: []*profilev1.Label{{Str: 12, Key: 2}, {Str: 10, Key: 1}}}, - {LocationId: []uint64{2, 1}, Value: []int64{10}, Label: []*profilev1.Label{{Str: 11, Key: 2}, {Str: 10, Key: 1}}}, + {LocationId: []uint64{2, 1}, Value: []int64{10}, Label: []*profilev1.Label{{Str: 5, Key: 5}, {Str: 5, Key: 6}}}, + {LocationId: []uint64{2, 1}, Value: []int64{10}, Label: []*profilev1.Label{{Str: 5, Key: 5}, {Str: 5, Key: 7}}}, + {LocationId: []uint64{2, 1}, Value: []int64{10}, Label: []*profilev1.Label{{Str: 5, Key: 5}, {Str: 5, Key: 7}}}, }, Mapping: []*profilev1.Mapping{{Id: 1, HasFunctions: true, MemoryStart: 100, MemoryLimit: 200, FileOffset: 200}}, Location: []*profilev1.Location{ @@ -131,7 +135,7 @@ func TestNormalizeProfile_SampleLabels(t *testing.T) { "", "cpu", "nanoseconds", "main", "main.go", - "foo", + "foo", "bar", "baz", }, PeriodType: &profilev1.ValueType{Type: 1, Unit: 2}, } @@ -143,8 +147,8 @@ func TestNormalizeProfile_SampleLabels(t *testing.T) { {Type: 1, Unit: 2}, }, Sample: []*profilev1.Sample{ - {LocationId: []uint64{2, 1}, Value: []int64{10}, Label: []*profilev1.Label{{Str: 10, Key: 1}, {Str: 12, Key: 2}}}, - {LocationId: []uint64{2, 1}, Value: []int64{20}, Label: []*profilev1.Label{{Str: 10, Key: 1}, {Str: 11, Key: 2}}}, + {LocationId: []uint64{2, 1}, Value: []int64{10}, Label: []*profilev1.Label{{Str: 5, Key: 5}, {Str: 5, Key: 6}}}, + {LocationId: []uint64{2, 1}, Value: []int64{20}, Label: []*profilev1.Label{{Str: 5, Key: 5}, {Str: 5, Key: 7}}}, }, Mapping: []*profilev1.Mapping{{Id: 1, HasFunctions: true}}, Location: []*profilev1.Location{ @@ -159,13 +163,182 @@ func TestNormalizeProfile_SampleLabels(t *testing.T) { "", "cpu", "nanoseconds", "main", "main.go", - "foo", + "foo", "bar", "baz", }, PeriodType: &profilev1.ValueType{Type: 1, Unit: 2}, TimeNanos: 1577836800000000000, }) } +func Test_sanitizeReferences(t *testing.T) { + type testCase struct { + name string + profile *profilev1.Profile + expected *profilev1.Profile + } + + testCases := []testCase{ + { + name: "string_reference", + profile: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{Type: 10, Unit: 10}}, + Sample: []*profilev1.Sample{{Label: []*profilev1.Label{{Key: 10, Str: 10, NumUnit: 10}}}}, + Mapping: []*profilev1.Mapping{{Filename: 10, BuildId: 10}}, + Function: []*profilev1.Function{{Name: 10, SystemName: 10, Filename: 10}}, + DropFrames: 10, + KeepFrames: 10, + PeriodType: &profilev1.ValueType{Type: 10, Unit: 10}, + DefaultSampleType: 10, + Comment: []int64{0, 10}, + StringTable: []string{}, + }, + expected: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{}, + Mapping: []*profilev1.Mapping{{Id: 1}}, + Function: []*profilev1.Function{{Id: 1}}, + PeriodType: &profilev1.ValueType{}, + Comment: []int64{0, 0}, + StringTable: []string{""}, + }, + }, + { + name: "zero_string", + profile: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{Type: 10, Unit: 10}}, + Sample: []*profilev1.Sample{{Label: []*profilev1.Label{{Key: 10, Str: 10, NumUnit: 10}}}}, + Mapping: []*profilev1.Mapping{{Filename: 10, BuildId: 10}}, + Function: []*profilev1.Function{{Name: 10, SystemName: 10, Filename: 10}}, + DropFrames: 10, + KeepFrames: 10, + PeriodType: &profilev1.ValueType{Type: 10, Unit: 10}, + DefaultSampleType: 10, + Comment: []int64{0, 10}, + StringTable: []string{"foo", ""}, + }, + expected: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{}, + Mapping: []*profilev1.Mapping{{Id: 1}}, + Function: []*profilev1.Function{{Id: 1}}, + PeriodType: &profilev1.ValueType{}, + Comment: []int64{0, 0}, + StringTable: []string{"", "foo"}, + }, + }, + { + name: "mapping_reference", + profile: &profilev1.Profile{ + Sample: []*profilev1.Sample{ + {LocationId: []uint64{2, 1}}, + {LocationId: []uint64{3, 2, 1}}, + }, + Location: []*profilev1.Location{ + {Id: 1, MappingId: 1}, + {Id: 3, MappingId: 5}, + {Id: 2, MappingId: 0}, + }, + Mapping: []*profilev1.Mapping{ + {Id: 1}, + }, + }, + expected: &profilev1.Profile{ + Sample: []*profilev1.Sample{ + {LocationId: []uint64{2, 1}}, + }, + Location: []*profilev1.Location{ + {Id: 1, MappingId: 1}, + {Id: 2, MappingId: 2}, + }, + Mapping: []*profilev1.Mapping{ + {Id: 1}, + {Id: 2}, + }, + StringTable: []string{""}, + }, + }, + { + name: "location_reference", + profile: &profilev1.Profile{ + Sample: []*profilev1.Sample{ + {LocationId: []uint64{1, 0}}, + {LocationId: []uint64{5}}, + }, + Location: []*profilev1.Location{ + {Id: 1, MappingId: 1}, + {Id: 0, MappingId: 0}, + }, + }, + expected: &profilev1.Profile{ + Sample: []*profilev1.Sample{}, + Location: []*profilev1.Location{ + {Id: 1, MappingId: 1}, + }, + Mapping: []*profilev1.Mapping{ + {Id: 1}, + }, + StringTable: []string{""}, + }, + }, + { + name: "function_reference", + profile: &profilev1.Profile{ + Sample: []*profilev1.Sample{ + {LocationId: []uint64{2, 1}}, + }, + Location: []*profilev1.Location{ + {Id: 2, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 5}}}, + {Id: 3, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 10}}}, + {Id: 1, MappingId: 1, Line: []*profilev1.Line{{}}}, + }, + Function: []*profilev1.Function{ + {Id: 10}, + }, + }, + expected: &profilev1.Profile{ + Sample: []*profilev1.Sample{}, + Location: []*profilev1.Location{}, + Function: []*profilev1.Function{ + {Id: 1}, + }, + StringTable: []string{""}, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + sanitizeReferences(tc.profile) + assert.Equal(t, tc.expected, tc.profile) + }) + } +} + +func Test_sanitize_fixtures(t *testing.T) { + require.NoError(t, filepath.WalkDir("testdata", func(path string, d fs.DirEntry, err error) error { + if err != nil || d.IsDir() || filepath.Ext(path) == ".txt" { + return err + } + t.Run(path, func(t *testing.T) { + f, err := OpenFile(path) + require.NoError(t, err) + c := f.CloneVT() + sanitizeReferences(f.Profile) + assert.Equal(t, len(c.Sample), len(f.Sample)) + assert.Equal(t, len(c.Location), len(f.Location)) + assert.Equal(t, len(c.Function), len(f.Function)) + assert.Equal(t, len(c.StringTable), len(f.StringTable)) + if len(c.Mapping) != 0 { + assert.Equal(t, len(c.Mapping), len(f.Mapping)) + } else { + assert.Equal(t, 1, len(f.Mapping)) + } + }) + return nil + })) +} + func TestFromProfile(t *testing.T) { out, err := FromProfile(testhelper.FooBarProfile) require.NoError(t, err) From 412c3598d51fccfafe46360a1c7acd46fbef582e Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 15 Apr 2024 10:43:51 +0800 Subject: [PATCH 2/5] fix: zero references test --- pkg/pprof/merge_test.go | 45 ----------------------------------------- 1 file changed, 45 deletions(-) diff --git a/pkg/pprof/merge_test.go b/pkg/pprof/merge_test.go index bdd61e838a..ecf076aca9 100644 --- a/pkg/pprof/merge_test.go +++ b/pkg/pprof/merge_test.go @@ -35,51 +35,6 @@ func Test_Merge_Self(t *testing.T) { testhelper.EqualProto(t, p.Profile, m.Profile()) } -func Test_Merge_ZeroReferences(t *testing.T) { - p, err := OpenFile("testdata/go.cpu.labels.pprof") - require.NoError(t, err) - - t.Run("mappingID=0", func(t *testing.T) { - before := p.Location[10] - p.Location[10].MappingId = 0 - defer func() { - p.Location[10] = before - }() - - var m ProfileMerge - require.NoError(t, m.Merge(p.Profile)) - - testhelper.EqualProto(t, p.Profile, m.Profile()) - }) - - t.Run("locationID=0", func(t *testing.T) { - before := p.Sample[10].LocationId[0] - p.Sample[10].LocationId[0] = 0 - defer func() { - p.Sample[10].LocationId[0] = before - }() - - var m ProfileMerge - require.NoError(t, m.Merge(p.Profile)) - - testhelper.EqualProto(t, p.Profile, m.Profile()) - }) - - t.Run("functionID=0", func(t *testing.T) { - before := p.Location[10].Line[0].FunctionId - p.Location[10].Line[0].FunctionId = 0 - defer func() { - p.Location[10].Line[0].FunctionId = before - }() - - var m ProfileMerge - require.NoError(t, m.Merge(p.Profile)) - - testhelper.EqualProto(t, p.Profile, m.Profile()) - }) - -} - func Test_Merge_Halves(t *testing.T) { p, err := OpenFile("testdata/go.cpu.labels.pprof") require.NoError(t, err) From 047c7cfe2884b0f7518aef3808c98603dd10acf5 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 15 Apr 2024 14:41:27 +0800 Subject: [PATCH 3/5] fix: zero string handling --- pkg/pprof/pprof.go | 21 +++++++++++++++------ pkg/pprof/pprof_test.go | 34 ++++++++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/pkg/pprof/pprof.go b/pkg/pprof/pprof.go index 1c36b64e58..3b22acccd3 100644 --- a/pkg/pprof/pprof.go +++ b/pkg/pprof/pprof.go @@ -1177,7 +1177,6 @@ func Unmarshal(data []byte, p *profilev1.Profile) error { } func sanitizeReferences(p *profilev1.Profile) { - values := len(p.SampleType) ms := int64(len(p.StringTable)) // Handle the case when "" is not present, // or is not at string_table[0]. @@ -1187,22 +1186,31 @@ func sanitizeReferences(p *profilev1.Profile) { z = int64(i) } } - var o int64 if z == -1 { + // No empty string found in the table. + // Reduce number of invariants by adding one. z = ms - o = 1 p.StringTable = append(p.StringTable, "") + ms++ } - // Swap zero string. Noop if string_table[0] == "". + // Swap zero string. p.StringTable[z], p.StringTable[0] = p.StringTable[0], p.StringTable[z] // Now we need to update references to strings: // invalid references (>= len(string_table)) are set to 0. // references to empty string are set to 0. str := func(i int64) int64 { + if i == 0 && z > 0 { + // z > 0 indicates that "" is not at string_table[0]. + // This means that element that used to be at 0 has + // been moved to z. + return z + } if i == z || i >= ms { + // The reference to empty string, or a string that is + // not present in the table. return 0 } - return i + o + return i } for _, x := range p.SampleType { @@ -1281,8 +1289,9 @@ func sanitizeReferences(p *profilev1.Profile) { x.Id, t[x.Id] = j, j j++ } + vs := len(p.SampleType) p.Sample = slices.RemoveInPlace(p.Sample, func(x *profilev1.Sample, _ int) bool { - if len(x.Value) != values { + if len(x.Value) != vs { return true } for i := range x.LocationId { diff --git a/pkg/pprof/pprof_test.go b/pkg/pprof/pprof_test.go index 3cb9a49324..1c197b74a7 100644 --- a/pkg/pprof/pprof_test.go +++ b/pkg/pprof/pprof_test.go @@ -203,27 +203,49 @@ func Test_sanitizeReferences(t *testing.T) { }, }, { - name: "zero_string", + name: "zero_string_non_0", profile: &profilev1.Profile{ SampleType: []*profilev1.ValueType{{Type: 10, Unit: 10}}, Sample: []*profilev1.Sample{{Label: []*profilev1.Label{{Key: 10, Str: 10, NumUnit: 10}}}}, Mapping: []*profilev1.Mapping{{Filename: 10, BuildId: 10}}, - Function: []*profilev1.Function{{Name: 10, SystemName: 10, Filename: 10}}, + Function: []*profilev1.Function{{Name: 10, Filename: 0, SystemName: 2}}, DropFrames: 10, KeepFrames: 10, PeriodType: &profilev1.ValueType{Type: 10, Unit: 10}, DefaultSampleType: 10, - Comment: []int64{0, 10}, - StringTable: []string{"foo", ""}, + Comment: []int64{1, 10}, + StringTable: []string{"foo", "", "bar"}, }, expected: &profilev1.Profile{ SampleType: []*profilev1.ValueType{{}}, Sample: []*profilev1.Sample{}, Mapping: []*profilev1.Mapping{{Id: 1}}, - Function: []*profilev1.Function{{Id: 1}}, + Function: []*profilev1.Function{{Id: 1, Filename: 1, SystemName: 2}}, PeriodType: &profilev1.ValueType{}, Comment: []int64{0, 0}, - StringTable: []string{"", "foo"}, + StringTable: []string{"", "foo", "bar"}, + }, + }, + { + name: "zero_string_missing", + profile: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{Type: 10, Unit: 10}}, + Sample: []*profilev1.Sample{{Label: []*profilev1.Label{{Key: 10, Str: 10, NumUnit: 10}}}}, + Mapping: []*profilev1.Mapping{{Filename: 10, BuildId: 10}}, + Function: []*profilev1.Function{{Name: 0, SystemName: 0, Filename: 1}}, + DropFrames: 10, + KeepFrames: 10, + PeriodType: &profilev1.ValueType{Type: 10, Unit: 10}, + DefaultSampleType: 10, + StringTable: []string{"foo", "bar"}, + }, + expected: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{}, + Mapping: []*profilev1.Mapping{{Id: 1}}, + Function: []*profilev1.Function{{Id: 1, Name: 2, SystemName: 2, Filename: 1}}, + PeriodType: &profilev1.ValueType{}, + StringTable: []string{"", "bar", "foo"}, }, }, { From e83b3dfebe00bcc94b7f5c08c5e1d2a6b574a96b Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 15 Apr 2024 16:15:16 +0800 Subject: [PATCH 4/5] handle negative string index Co-authored-by: Christian Simon --- pkg/pprof/pprof.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/pprof/pprof.go b/pkg/pprof/pprof.go index 3b22acccd3..1409c2e0f4 100644 --- a/pkg/pprof/pprof.go +++ b/pkg/pprof/pprof.go @@ -1205,7 +1205,7 @@ func sanitizeReferences(p *profilev1.Profile) { // been moved to z. return z } - if i == z || i >= ms { + if i == z || i >= ms || i < 0 { // The reference to empty string, or a string that is // not present in the table. return 0 From f4f2e82aa65673fb05a5dd819556c7ca941513da Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 15 Apr 2024 18:59:48 +0800 Subject: [PATCH 5/5] fix: add nil checks --- pkg/pprof/merge.go | 20 +++++---- pkg/pprof/pprof.go | 49 +++++++++++++++++----- pkg/pprof/pprof_test.go | 91 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 139 insertions(+), 21 deletions(-) diff --git a/pkg/pprof/merge.go b/pkg/pprof/merge.go index 6c66e355e8..335823af83 100644 --- a/pkg/pprof/merge.go +++ b/pkg/pprof/merge.go @@ -33,10 +33,11 @@ func (m *ProfileMerge) MergeNoClone(p *profilev1.Profile) error { } func (m *ProfileMerge) merge(p *profilev1.Profile, clone bool) error { - if p == nil || len(p.StringTable) < 2 { + if p == nil || len(p.Sample) == 0 || len(p.StringTable) < 2 { return nil } - sanitizeReferences(p) + + sanitizeProfile(p) var initial bool if m.profile == nil { m.init(p, clone) @@ -213,6 +214,9 @@ func compatible(a, b *profilev1.Profile) error { // equalValueType returns true if the two value types are semantically // equal. It ignores the internal fields used during encode/decode. func equalValueType(st1, st2 *profilev1.ValueType) bool { + if st1 == nil || st2 == nil { + return false + } return st1.Type == st2.Type && st1.Unit == st2.Unit } @@ -242,11 +246,13 @@ func RewriteStrings(p *profilev1.Profile, n []uint32) { } p.DropFrames = int64(n[p.DropFrames]) p.KeepFrames = int64(n[p.KeepFrames]) - if p.PeriodType.Type != 0 { - p.PeriodType.Type = int64(n[p.PeriodType.Type]) - } - if p.PeriodType.Unit != 0 { - p.PeriodType.Unit = int64(n[p.PeriodType.Unit]) + if p.PeriodType != nil { + if p.PeriodType.Type != 0 { + p.PeriodType.Type = int64(n[p.PeriodType.Type]) + } + if p.PeriodType.Unit != 0 { + p.PeriodType.Unit = int64(n[p.PeriodType.Unit]) + } } for i, x := range p.Comment { p.Comment[i] = int64(n[x]) diff --git a/pkg/pprof/pprof.go b/pkg/pprof/pprof.go index 1409c2e0f4..f3682cfe28 100644 --- a/pkg/pprof/pprof.go +++ b/pkg/pprof/pprof.go @@ -361,7 +361,7 @@ func (p *Profile) Normalize() { p.TimeNanos = currentTime().UnixNano() } - sanitizeReferences(p.Profile) + sanitizeProfile(p.Profile) p.clearAddresses() // Non-string labels are not supported. @@ -1176,7 +1176,10 @@ func Unmarshal(data []byte, p *profilev1.Profile) error { return p.UnmarshalVT(buf.Bytes()) } -func sanitizeReferences(p *profilev1.Profile) { +func sanitizeProfile(p *profilev1.Profile) { + if p == nil { + return + } ms := int64(len(p.StringTable)) // Handle the case when "" is not present, // or is not at string_table[0]. @@ -1213,14 +1216,19 @@ func sanitizeReferences(p *profilev1.Profile) { return i } - for _, x := range p.SampleType { + p.SampleType = slices.RemoveInPlace(p.SampleType, func(x *profilev1.ValueType, _ int) bool { + if x == nil { + return true + } x.Type = str(x.Type) x.Unit = str(x.Unit) - } + return false + }) if p.PeriodType != nil { p.PeriodType.Type = str(p.PeriodType.Type) p.PeriodType.Unit = str(p.PeriodType.Unit) } + p.DefaultSampleType = str(p.DefaultSampleType) p.DropFrames = str(p.DropFrames) p.KeepFrames = str(p.KeepFrames) @@ -1228,27 +1236,34 @@ func sanitizeReferences(p *profilev1.Profile) { p.Comment[i] = str(p.Comment[i]) } + // Sanitize mappings and references to them. + // Locations with invalid references are removed. t := make(map[uint64]uint64, len(p.Location)) clearMap := func() { for k := range t { delete(t, k) } } - - // Sanitize mappings and references to them. - // Locations with invalid references are removed. j := uint64(1) - for _, x := range p.Mapping { + p.Mapping = slices.RemoveInPlace(p.Mapping, func(x *profilev1.Mapping, _ int) bool { + if x == nil { + return true + } x.BuildId = str(x.BuildId) x.Filename = str(x.Filename) x.Id, t[x.Id] = j, j j++ - } + return false + }) + // Rewrite references to mappings, removing invalid ones. // Locations with mapping ID 0 are allowed: in this case, // a mapping stub is created. var mapping *profilev1.Mapping p.Location = slices.RemoveInPlace(p.Location, func(x *profilev1.Location, _ int) bool { + if x == nil { + return true + } if x.MappingId == 0 { if mapping == nil { mapping = &profilev1.Mapping{Id: uint64(len(p.Mapping) + 1)} @@ -1265,13 +1280,18 @@ func sanitizeReferences(p *profilev1.Profile) { // Locations with invalid references are removed. clearMap() j = 1 - for _, x := range p.Function { + p.Function = slices.RemoveInPlace(p.Function, func(x *profilev1.Function, _ int) bool { + if x == nil { + return true + } x.Name = str(x.Name) x.SystemName = str(x.SystemName) x.Filename = str(x.Filename) x.Id, t[x.Id] = j, j j++ - } + return false + }) + // Check locations again, verifying that all functions are valid. p.Location = slices.RemoveInPlace(p.Location, func(x *profilev1.Location, _ int) bool { for _, line := range x.Line { if line.FunctionId = t[line.FunctionId]; line.FunctionId == 0 { @@ -1289,8 +1309,12 @@ func sanitizeReferences(p *profilev1.Profile) { x.Id, t[x.Id] = j, j j++ } + vs := len(p.SampleType) p.Sample = slices.RemoveInPlace(p.Sample, func(x *profilev1.Sample, _ int) bool { + if x == nil { + return true + } if len(x.Value) != vs { return true } @@ -1300,6 +1324,9 @@ func sanitizeReferences(p *profilev1.Profile) { } } for _, l := range x.Label { + if l == nil { + return true + } l.Key = str(l.Key) l.Str = str(l.Str) l.NumUnit = str(l.NumUnit) diff --git a/pkg/pprof/pprof_test.go b/pkg/pprof/pprof_test.go index 1c197b74a7..220301fac1 100644 --- a/pkg/pprof/pprof_test.go +++ b/pkg/pprof/pprof_test.go @@ -326,12 +326,88 @@ func Test_sanitizeReferences(t *testing.T) { StringTable: []string{""}, }, }, + { + name: "nil_profile", + profile: nil, + expected: nil, + }, + { + name: "nil_sample_type", + profile: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{nil}, + }, + expected: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{}, + StringTable: []string{""}, + }, + }, + { + name: "nil_sample", + profile: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{nil}, + }, + expected: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{}, + StringTable: []string{""}, + }, + }, + { + name: "nil_location", + profile: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{{LocationId: []uint64{1}, Value: []int64{1}}}, + Location: []*profilev1.Location{nil}, + }, + expected: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{}, + Location: []*profilev1.Location{}, + StringTable: []string{""}, + }, + }, + { + name: "nil_function", + profile: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{{LocationId: []uint64{1}, Value: []int64{1}}}, + Location: []*profilev1.Location{{Line: []*profilev1.Line{{FunctionId: 1}}}}, + Function: []*profilev1.Function{nil}, + }, + expected: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{}, + Location: []*profilev1.Location{}, + Function: []*profilev1.Function{}, + Mapping: []*profilev1.Mapping{{Id: 1}}, + StringTable: []string{""}, + }, + }, + { + name: "nil_mapping", + profile: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{{LocationId: []uint64{1}, Value: []int64{1}}}, + Location: []*profilev1.Location{{MappingId: 1, Line: []*profilev1.Line{{FunctionId: 1}}}}, + Function: []*profilev1.Function{nil}, + Mapping: []*profilev1.Mapping{nil}, + }, + expected: &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + Sample: []*profilev1.Sample{}, + Location: []*profilev1.Location{}, + Function: []*profilev1.Function{}, + Mapping: []*profilev1.Mapping{}, + StringTable: []string{""}, + }, + }, } for _, tc := range testCases { tc := tc t.Run(tc.name, func(t *testing.T) { - sanitizeReferences(tc.profile) + sanitizeProfile(tc.profile) assert.Equal(t, tc.expected, tc.profile) }) } @@ -339,14 +415,23 @@ func Test_sanitizeReferences(t *testing.T) { func Test_sanitize_fixtures(t *testing.T) { require.NoError(t, filepath.WalkDir("testdata", func(path string, d fs.DirEntry, err error) error { - if err != nil || d.IsDir() || filepath.Ext(path) == ".txt" { + switch { + case err != nil: return err + case filepath.Ext(path) == ".txt": + return nil + case d.IsDir(): + if d.Name() == "fuzz" { + return fs.SkipDir + } + return nil } + t.Run(path, func(t *testing.T) { f, err := OpenFile(path) require.NoError(t, err) c := f.CloneVT() - sanitizeReferences(f.Profile) + sanitizeProfile(f.Profile) assert.Equal(t, len(c.Sample), len(f.Sample)) assert.Equal(t, len(c.Location), len(f.Location)) assert.Equal(t, len(c.Function), len(f.Function))