Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: optimize deduplication #3351

Merged
merged 7 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
435 changes: 223 additions & 212 deletions api/gen/proto/go/ingester/v1/ingester.pb.go

Large diffs are not rendered by default.

117 changes: 117 additions & 0 deletions api/gen/proto/go/ingester/v1/ingester_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions api/ingester/v1/ingester.proto
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,10 @@ message MergeSpanProfileResult {
}

message ProfileSets {
// DEPRECATED: Use fingerprints instead.
repeated types.v1.Labels labelsSets = 1;
repeated SeriesProfile profiles = 2;
repeated uint64 fingerprints = 3;
}

message SeriesProfile {
Expand Down
10 changes: 9 additions & 1 deletion api/openapiv2/gen/phlare.swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -1071,14 +1071,22 @@
"items": {
"type": "object",
"$ref": "#/definitions/v1Labels"
}
},
"description": "DEPRECATED: Use fingerprints instead."
},
"profiles": {
"type": "array",
"items": {
"type": "object",
"$ref": "#/definitions/v1SeriesProfile"
}
},
"fingerprints": {
"type": "array",
"items": {
"type": "string",
"format": "uint64"
}
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion pkg/phlaredb/block_querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ import (
)

const (
defaultBatchSize = 4096
defaultBatchSize = 64 << 10

// This controls the buffer size for reads to a parquet io.Reader. This value should be small for memory or
// disk backed readers, but when the reader is backed by network storage a larger size will be advantageous.
Expand Down
33 changes: 12 additions & 21 deletions pkg/phlaredb/filter_profiles_bidi.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,14 @@ import (
"github.com/prometheus/common/model"

ingestv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1"
typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
"github.com/grafana/pyroscope/pkg/iter"
phlaremodel "github.com/grafana/pyroscope/pkg/model"
)

type BidiServerMerge[Res any, Req any] interface {
Send(Res) error
Receive() (Req, error)
}

type labelWithIndex struct {
phlaremodel.Labels
index int
}

type ProfileWithIndex struct {
Profile
Index int
Expand Down Expand Up @@ -72,8 +65,8 @@ func filterProfiles[B BidiServerMerge[Res, Req], Res filterResponse, Req filterR
defer sp.Finish()
selection := make([][]Profile, len(profiles))
selectProfileResult := &ingestv1.ProfileSets{
Profiles: make([]*ingestv1.SeriesProfile, 0, batchProfileSize),
LabelsSets: make([]*typesv1.Labels, 0, batchProfileSize),
Profiles: make([]*ingestv1.SeriesProfile, 0, batchProfileSize),
Fingerprints: make([]uint64, 0, batchProfileSize),
}
its := make([]iter.Iterator[ProfileWithIndex], len(profiles))
for i, iter := range profiles {
Expand All @@ -92,28 +85,26 @@ func filterProfiles[B BidiServerMerge[Res, Req], Res filterResponse, Req filterR
otlog.Int("batch_requested_size", batchProfileSize),
)

seriesByFP := map[model.Fingerprint]labelWithIndex{}
selectProfileResult.LabelsSets = selectProfileResult.LabelsSets[:0]
seriesByFP := map[model.Fingerprint]int{}
selectProfileResult.Profiles = selectProfileResult.Profiles[:0]
selectProfileResult.Fingerprints = selectProfileResult.Fingerprints[:0]

for _, profile := range batch {
var ok bool
var lblsIdx labelWithIndex
lblsIdx, ok = seriesByFP[profile.Fingerprint()]
var idx int
fp := profile.Fingerprint()
idx, ok = seriesByFP[fp]
if !ok {
lblsIdx = labelWithIndex{
Labels: profile.Labels(),
index: len(selectProfileResult.LabelsSets),
}
seriesByFP[profile.Fingerprint()] = lblsIdx
selectProfileResult.LabelsSets = append(selectProfileResult.LabelsSets, &typesv1.Labels{Labels: profile.Labels()})
idx = len(selectProfileResult.Fingerprints)
seriesByFP[fp] = idx
selectProfileResult.Fingerprints = append(selectProfileResult.Fingerprints, uint64(fp))
}
selectProfileResult.Profiles = append(selectProfileResult.Profiles, &ingestv1.SeriesProfile{
LabelIndex: int32(lblsIdx.index),
LabelIndex: int32(idx),
Timestamp: int64(profile.Timestamp()),
})

}

sp.LogFields(otlog.String("msg", "sending batch to client"))
var err error
switch s := BidiServerMerge[Res, Req](stream).(type) {
Expand Down
24 changes: 13 additions & 11 deletions pkg/phlaredb/filter_profiles_bidi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"github.com/stretchr/testify/require"

ingestv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1"
typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
"github.com/grafana/pyroscope/pkg/iter"
phlaremodel "github.com/grafana/pyroscope/pkg/model"
schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
Expand Down Expand Up @@ -39,34 +38,36 @@ func TestFilterProfiles(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 2, len(filtered[0]))
require.Equal(t, 3, len(bidi.profilesSent))
testhelper.EqualProto(t, []*ingestv1.ProfileSets{

expectedSent := []*ingestv1.ProfileSets{
{
LabelsSets: lo.Times(5, func(i int) *typesv1.Labels {
return &typesv1.Labels{Labels: phlaremodel.LabelsFromStrings("foo", "bar", "i", fmt.Sprintf("%d", i))}
Fingerprints: lo.Times(5, func(i int) uint64 {
return phlaremodel.LabelsFromStrings("foo", "bar", "i", fmt.Sprintf("%d", i)).Hash()
}),
Profiles: lo.Times(5, func(i int) *ingestv1.SeriesProfile {
return &ingestv1.SeriesProfile{Timestamp: int64(model.TimeFromUnixNano(int64(i * int(time.Minute)))), LabelIndex: int32(i)}
}),
},
{
LabelsSets: lo.Times(5, func(i int) *typesv1.Labels {
return &typesv1.Labels{Labels: phlaremodel.LabelsFromStrings("foo", "bar", "i", fmt.Sprintf("%d", i+5))}
Fingerprints: lo.Times(5, func(i int) uint64 {
return phlaremodel.LabelsFromStrings("foo", "bar", "i", fmt.Sprintf("%d", i+5)).Hash()
}),
Profiles: lo.Times(5, func(i int) *ingestv1.SeriesProfile {
return &ingestv1.SeriesProfile{Timestamp: int64(model.TimeFromUnixNano(int64((i + 5) * int(time.Minute)))), LabelIndex: int32(i)}
}),
},
{
LabelsSets: lo.Times(1, func(i int) *typesv1.Labels {
return &typesv1.Labels{Labels: phlaremodel.LabelsFromStrings("foo", "bar", "i", fmt.Sprintf("%d", i+10))}
Fingerprints: lo.Times(1, func(i int) uint64 {
return phlaremodel.LabelsFromStrings("foo", "bar", "i", fmt.Sprintf("%d", i+10)).Hash()
}),
Profiles: lo.Times(1, func(i int) *ingestv1.SeriesProfile {
return &ingestv1.SeriesProfile{Timestamp: int64(model.TimeFromUnixNano(int64((i + 10) * int(time.Minute)))), LabelIndex: int32(i)}
}),
},
}, bidi.profilesSent)
}
testhelper.EqualProto(t, expectedSent, bidi.profilesSent)

require.Equal(t, []Profile{
expectedFiltered := []Profile{
ProfileWithLabels{
profile: &schemav1.InMemoryProfile{TimeNanos: int64(5 * int(time.Minute))},
lbs: phlaremodel.LabelsFromStrings("foo", "bar", "i", fmt.Sprintf("%d", 5)),
Expand All @@ -77,5 +78,6 @@ func TestFilterProfiles(t *testing.T) {
lbs: phlaremodel.LabelsFromStrings("foo", "bar", "i", fmt.Sprintf("%d", 10)),
fp: model.Fingerprint(phlaremodel.LabelsFromStrings("foo", "bar", "i", fmt.Sprintf("%d", 10)).Hash()),
},
}, filtered[0])
}
require.Equal(t, expectedFiltered, filtered[0])
}
4 changes: 2 additions & 2 deletions pkg/phlaredb/phlaredb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ func TestMergeProfilesStacktraces(t *testing.T) {
resp, err := bidi.Receive()
require.NoError(t, err)
require.Nil(t, resp.Result)
require.Len(t, resp.SelectedProfiles.LabelsSets, 1)
require.Len(t, resp.SelectedProfiles.Fingerprints, 1)
require.Len(t, resp.SelectedProfiles.Profiles, 5)

require.NoError(t, bidi.Send(&ingestv1.MergeProfilesStacktracesRequest{
Expand Down Expand Up @@ -325,7 +325,7 @@ func TestMergeProfilesPprof(t *testing.T) {
resp, err := bidi.Receive()
require.NoError(t, err)
require.Nil(t, resp.Result)
require.Len(t, resp.SelectedProfiles.LabelsSets, 1)
require.Len(t, resp.SelectedProfiles.Fingerprints, 1)
require.Len(t, resp.SelectedProfiles.Profiles, 5)

require.NoError(t, bidi.Send(&ingestv1.MergeProfilesPprofRequest{
Expand Down
Loading
Loading