Skip to content

Commit bf62e28

Browse files
committed
sampling: fix pubsub implementation
The initial implementation was written as a ~quick hack, with the expectation that it would be replaced by the Changes API. It was broken due to its ignorance of data streams, and multi-shard indices. Sequence numbers are only comparable within a single shard. Given that there is no known delivery date for the Changes API, we propose to instead revise the pubsub implementation to address the problems by: - enforcing single-shard indices for sampled trace data streams - searching (now single-shard) backing indices individually In addition, we now use global checkpoints to bound searches, and use PIT (point in time) for paging through results. Querying underlying indices and global checkpoints requires an additional "monitor" index privilege.
1 parent 12b8dae commit bf62e28

File tree

7 files changed

+638
-283
lines changed

7 files changed

+638
-283
lines changed

apmpackage/apm/0.2.0/data_stream/sampled_traces/manifest.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,10 @@ title: APM tail-sampled traces
22
type: traces
33
dataset: sampled
44
ilm_policy: traces-apm.sampled-default_policy
5+
elasticsearch:
6+
index_template:
7+
settings:
8+
# Create a single shard per index, so we can use
9+
# global checkpoints as a way of limiting search
10+
# results.
11+
number_of_shards: 1
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License;
3+
// you may not use this file except in compliance with the Elastic License.
4+
5+
package pubsub
6+
7+
import (
8+
"context"
9+
"encoding/json"
10+
"fmt"
11+
"io/ioutil"
12+
"net/http"
13+
14+
"github.com/pkg/errors"
15+
16+
"github.com/elastic/go-elasticsearch/v7/esapi"
17+
18+
"github.com/elastic/apm-server/elasticsearch"
19+
)
20+
21+
// getGlobalCheckpoints returns the current global checkpoint for each index
22+
// underlying dataStream. Each index is required to have a single (primary) shard.
23+
func getGlobalCheckpoints(
24+
ctx context.Context,
25+
client elasticsearch.Client,
26+
dataStream string,
27+
) (map[string]int64, error) {
28+
indexGlobalCheckpoints := make(map[string]int64)
29+
resp, err := esapi.IndicesStatsRequest{
30+
Index: []string{dataStream},
31+
Level: "shards",
32+
}.Do(ctx, client)
33+
if err != nil {
34+
return nil, errors.New("index stats request failed")
35+
}
36+
defer resp.Body.Close()
37+
if resp.IsError() {
38+
switch resp.StatusCode {
39+
case http.StatusNotFound:
40+
// Data stream does not yet exist.
41+
return indexGlobalCheckpoints, nil
42+
}
43+
message, _ := ioutil.ReadAll(resp.Body)
44+
return nil, fmt.Errorf("index stats request failed: %s", message)
45+
}
46+
47+
var stats dataStreamStats
48+
if err := json.NewDecoder(resp.Body).Decode(&stats); err != nil {
49+
return nil, err
50+
}
51+
52+
for index, indexStats := range stats.Indices {
53+
if n := len(indexStats.Shards); n > 1 {
54+
return nil, fmt.Errorf("expected 1 shard, got %d for index %q", n, index)
55+
}
56+
for _, shardStats := range indexStats.Shards {
57+
for _, shardStats := range shardStats {
58+
if shardStats.Routing.Primary {
59+
indexGlobalCheckpoints[index] = shardStats.SeqNo.GlobalCheckpoint
60+
break
61+
}
62+
}
63+
}
64+
}
65+
return indexGlobalCheckpoints, nil
66+
}
67+
68+
type dataStreamStats struct {
69+
Indices map[string]indexStats `json:"indices"`
70+
}
71+
72+
type indexStats struct {
73+
Shards map[string][]shardStats `json:"shards"`
74+
}
75+
76+
type shardStats struct {
77+
Routing struct {
78+
Primary bool `json:"primary"`
79+
} `json:"routing"`
80+
SeqNo struct {
81+
GlobalCheckpoint int64 `json:"global_checkpoint"`
82+
} `json:"seq_no"`
83+
}

x-pack/apm-server/sampling/pubsub/datastream.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ const dataStreamIndexTemplate = `{
8787
"data_stream": {},
8888
"template": {
8989
"settings": {
90-
"index.lifecycle.name": %q
90+
"index.lifecycle.name": %q,
91+
"index.number_of_shards": 1
9192
},
9293
"mappings": {
9394
"properties": {

0 commit comments

Comments
 (0)