Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: sanitize pprof references #3218

Merged
merged 6 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions pkg/distributor/distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,18 @@ func (d *Distributor) Push(ctx context.Context, grpcReq *connect.Request[pushv1.
}

func (d *Distributor) GetProfileLanguage(series *distributormodel.ProfileSeries) string {
if series.Language != "" {
return series.Language
}
if len(series.Samples) == 0 {
return "unknown"
}
lang := series.GetLanguage()
if lang != "" {
return lang
if lang == "" {
lang = pprof.GetLanguage(series.Samples[0].Profile, d.logger)
}
return pprof.GetLanguage(series.Samples[0].Profile, d.logger)
series.Language = lang
return series.Language
}

func (d *Distributor) PushParsed(ctx context.Context, req *distributormodel.PushRequest) (resp *connect.Response[pushv1.PushResponse], err error) {
Expand Down Expand Up @@ -280,9 +284,6 @@ func (d *Distributor) PushParsed(ctx context.Context, req *distributormodel.Push
d.metrics.receivedCompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(len(raw.RawProfile)))
}
p := raw.Profile
if profLanguage == "go" {
p.Profile = pprof.FixGoProfile(p.Profile)
}
decompressedSize := p.SizeVT()
d.metrics.receivedDecompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(decompressedSize))
d.metrics.receivedSamples.WithLabelValues(profName, tenantID).Observe(float64(len(p.Sample)))
Expand All @@ -309,6 +310,9 @@ func (d *Distributor) PushParsed(ctx context.Context, req *distributormodel.Push
// therefore it should be done after the rate limit check.
for _, series := range req.Series {
for _, sample := range series.Samples {
if series.Language == "go" {
sample.Profile.Profile = pprof.FixGoProfile(sample.Profile.Profile)
}
sample.Profile.Normalize()
}
}
Expand Down
5 changes: 3 additions & 2 deletions pkg/distributor/model/push.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ type ProfileSample struct {
}

type ProfileSeries struct {
Labels []*v1.LabelPair
Samples []*ProfileSample
Labels []*v1.LabelPair
Samples []*ProfileSample
Language string
}

func (p *ProfileSeries) GetLanguage() string {
Expand Down
105 changes: 13 additions & 92 deletions pkg/pprof/merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ func (m *ProfileMerge) MergeNoClone(p *profilev1.Profile) error {
}

func (m *ProfileMerge) merge(p *profilev1.Profile, clone bool) error {
if p == nil || len(p.StringTable) < 2 {
if p == nil || len(p.Sample) == 0 || len(p.StringTable) < 2 {
return nil
}
ConvertIDsToIndices(p)

sanitizeProfile(p)
var initial bool
if m.profile == nil {
m.init(p, clone)
Expand Down Expand Up @@ -213,6 +214,9 @@ func compatible(a, b *profilev1.Profile) error {
// equalValueType returns true if the two value types are semantically
// equal. It ignores the internal fields used during encode/decode.
func equalValueType(st1, st2 *profilev1.ValueType) bool {
if st1 == nil || st2 == nil {
return false
}
return st1.Type == st2.Type && st1.Unit == st2.Unit
}

Expand Down Expand Up @@ -242,11 +246,13 @@ func RewriteStrings(p *profilev1.Profile, n []uint32) {
}
p.DropFrames = int64(n[p.DropFrames])
p.KeepFrames = int64(n[p.KeepFrames])
if p.PeriodType.Type != 0 {
p.PeriodType.Type = int64(n[p.PeriodType.Type])
}
if p.PeriodType.Unit != 0 {
p.PeriodType.Unit = int64(n[p.PeriodType.Unit])
if p.PeriodType != nil {
if p.PeriodType.Type != 0 {
p.PeriodType.Type = int64(n[p.PeriodType.Type])
}
if p.PeriodType.Unit != 0 {
p.PeriodType.Unit = int64(n[p.PeriodType.Unit])
}
}
for i, x := range p.Comment {
p.Comment[i] = int64(n[x])
Expand Down Expand Up @@ -431,88 +437,3 @@ func (t *RewriteTable[K, V, M]) Append(values []V) {
}

func (t *RewriteTable[K, V, M]) Values() []M { return t.s }

func ConvertIDsToIndices(p *profilev1.Profile) {
denseMappings := hasDenseMappings(p)
denseLocations := hasDenseLocations(p)
denseFunctions := hasDenseFunctions(p)
if denseMappings && denseLocations && denseFunctions {
// In most cases IDs are dense (do match the element index),
// therefore the function does not change anything.
return
}
// NOTE(kolesnikovae):
// In some cases IDs is a non-monotonically increasing sequence,
// therefore the same map can be reused to avoid re-allocations.
t := make(map[uint64]uint64, len(p.Location))
if !denseMappings {
for i, x := range p.Mapping {
idx := uint64(i + 1)
x.Id, t[x.Id] = idx, idx
}
RewriteMappingsWithMap(p, t)
}
if !denseLocations {
for i, x := range p.Location {
idx := uint64(i + 1)
x.Id, t[x.Id] = idx, idx
}
RewriteLocationsWithMap(p, t)
}
if !denseFunctions {
for i, x := range p.Function {
idx := uint64(i + 1)
x.Id, t[x.Id] = idx, idx
}
RewriteFunctionsWithMap(p, t)
}
}

func hasDenseFunctions(p *profilev1.Profile) bool {
for i, f := range p.Function {
if f.Id != uint64(i+1) {
return false
}
}
return true
}

func hasDenseLocations(p *profilev1.Profile) bool {
for i, loc := range p.Location {
if loc.Id != uint64(i+1) {
return false
}
}
return true
}

func hasDenseMappings(p *profilev1.Profile) bool {
for i, m := range p.Mapping {
if m.Id != uint64(i+1) {
return false
}
}
return true
}

func RewriteFunctionsWithMap(p *profilev1.Profile, n map[uint64]uint64) {
for _, loc := range p.Location {
for _, line := range loc.Line {
line.FunctionId = n[line.FunctionId]
}
}
}

func RewriteMappingsWithMap(p *profilev1.Profile, n map[uint64]uint64) {
for _, loc := range p.Location {
loc.MappingId = n[loc.MappingId]
}
}

func RewriteLocationsWithMap(p *profilev1.Profile, n map[uint64]uint64) {
for _, s := range p.Sample {
for i, loc := range s.LocationId {
s.LocationId[i] = n[loc]
}
}
}
85 changes: 40 additions & 45 deletions pkg/pprof/merge_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package pprof

import (
"os"
"testing"

"github.com/google/pprof/profile"
"github.com/stretchr/testify/require"

profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
Expand Down Expand Up @@ -33,51 +35,6 @@ func Test_Merge_Self(t *testing.T) {
testhelper.EqualProto(t, p.Profile, m.Profile())
}

func Test_Merge_ZeroReferences(t *testing.T) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to remove the test as it's no longer valid: invalid elements are discarded at normalization/merge

p, err := OpenFile("testdata/go.cpu.labels.pprof")
require.NoError(t, err)

t.Run("mappingID=0", func(t *testing.T) {
before := p.Location[10]
p.Location[10].MappingId = 0
defer func() {
p.Location[10] = before
}()

var m ProfileMerge
require.NoError(t, m.Merge(p.Profile))

testhelper.EqualProto(t, p.Profile, m.Profile())
})

t.Run("locationID=0", func(t *testing.T) {
before := p.Sample[10].LocationId[0]
p.Sample[10].LocationId[0] = 0
defer func() {
p.Sample[10].LocationId[0] = before
}()

var m ProfileMerge
require.NoError(t, m.Merge(p.Profile))

testhelper.EqualProto(t, p.Profile, m.Profile())
})

t.Run("functionID=0", func(t *testing.T) {
before := p.Location[10].Line[0].FunctionId
p.Location[10].Line[0].FunctionId = 0
defer func() {
p.Location[10].Line[0].FunctionId = before
}()

var m ProfileMerge
require.NoError(t, m.Merge(p.Profile))

testhelper.EqualProto(t, p.Profile, m.Profile())
})

}

func Test_Merge_Halves(t *testing.T) {
p, err := OpenFile("testdata/go.cpu.labels.pprof")
require.NoError(t, err)
Expand Down Expand Up @@ -470,3 +427,41 @@ func TestMergeEmpty(t *testing.T) {
})
require.NoError(t, err)
}

// Benchmark_Merge_self/pprof.MergeNoClone-10 4174 290190 ns/op
// Benchmark_Merge_self/pprof.Merge-10 2722 421419 ns/op
// Benchmark_Merge_self/profile.Merge-10 802 1417907 ns/op
func Benchmark_Merge_self(b *testing.B) {
d, err := os.ReadFile("testdata/go.cpu.labels.pprof")
require.NoError(b, err)

b.Run("pprof.MergeNoClone", func(b *testing.B) {
p, err := RawFromBytes(d)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
var m ProfileMerge
require.NoError(b, m.MergeNoClone(p.Profile.CloneVT()))
}
})

b.Run("pprof.Merge", func(b *testing.B) {
p, err := RawFromBytes(d)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
var m ProfileMerge
require.NoError(b, m.Merge(p.Profile.CloneVT()))
}
})

b.Run("profile.Merge", func(b *testing.B) {
p, err := profile.ParseData(d)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err = profile.Merge([]*profile.Profile{p.Copy()})
require.NoError(b, err)
}
})
}
Loading
Loading