Skip to content

Commit 13a5550

Browse files
authored
Utilize new ES fleet polling API for global checkpoint monitoring (#200)
* Utilize fleet polling API for global checkpoint monitoring * Adjust for API changes, configurable poll timeout * Update retryDelay to 3 secs * Update tests for monitor API change * Adjust to the latest API changes * Remove fleet indexes bootstrapping for tests, it is done by fleet system index plugin now * Fix unit tests
1 parent 91af65a commit 13a5550

22 files changed

+391
-161
lines changed

cmd/fleet/handleCheckin.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,8 @@ func (ct *CheckinT) fetchAgentPendingActions(ctx context.Context, seqno sqn.SeqN
313313
now := time.Now().UTC().Format(time.RFC3339)
314314

315315
return dl.FindActions(ctx, ct.bulker, dl.QueryAgentActions, map[string]interface{}{
316-
dl.FieldSeqNo: seqno.Get(0),
317-
dl.FieldMaxSeqNo: ct.gcp.GetCheckpoint(),
316+
dl.FieldSeqNo: seqno.Value(),
317+
dl.FieldMaxSeqNo: ct.gcp.GetCheckpoint().Value(),
318318
dl.FieldExpiration: now,
319319
dl.FieldAgents: []string{agentId},
320320
})

cmd/fleet/main.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/elastic/fleet-server/v7/internal/pkg/config"
2222
"github.com/elastic/fleet-server/v7/internal/pkg/coordinator"
2323
"github.com/elastic/fleet-server/v7/internal/pkg/dl"
24+
"github.com/elastic/fleet-server/v7/internal/pkg/es"
2425
"github.com/elastic/fleet-server/v7/internal/pkg/logger"
2526
"github.com/elastic/fleet-server/v7/internal/pkg/monitor"
2627
"github.com/elastic/fleet-server/v7/internal/pkg/policy"
@@ -505,7 +506,13 @@ func (f *FleetServer) runServer(ctx context.Context, cfg *config.Config) (err er
505506
// shutdown before the bulker is then cancelled.
506507
bulkCtx, bulkCancel := context.WithCancel(context.Background())
507508
defer bulkCancel()
508-
es, bulker, err := bulk.InitES(bulkCtx, cfg)
509+
esCli, bulker, err := bulk.InitES(bulkCtx, cfg)
510+
if err != nil {
511+
return err
512+
}
513+
514+
// Monitoring es client, longer timeout, no retries
515+
monCli, err := es.NewClient(ctx, cfg, true)
509516
if err != nil {
510517
return err
511518
}
@@ -514,7 +521,10 @@ func (f *FleetServer) runServer(ctx context.Context, cfg *config.Config) (err er
514521
g, ctx := errgroup.WithContext(ctx)
515522

516523
// Coordinator policy monitor
517-
pim, err := monitor.New(dl.FleetPolicies, es, monitor.WithFetchSize(cfg.Inputs[0].Monitor.FetchSize))
524+
pim, err := monitor.New(dl.FleetPolicies, esCli, monCli,
525+
monitor.WithFetchSize(cfg.Inputs[0].Monitor.FetchSize),
526+
monitor.WithPollTimeout(cfg.Inputs[0].Monitor.PollTimeout),
527+
)
518528
if err != nil {
519529
return err
520530
}
@@ -536,7 +546,11 @@ func (f *FleetServer) runServer(ctx context.Context, cfg *config.Config) (err er
536546
var ad *action.Dispatcher
537547
var tr *action.TokenResolver
538548

539-
am, err = monitor.NewSimple(dl.FleetActions, es, monitor.WithExpiration(true), monitor.WithFetchSize(cfg.Inputs[0].Monitor.FetchSize))
549+
am, err = monitor.NewSimple(dl.FleetActions, esCli, monCli,
550+
monitor.WithExpiration(true),
551+
monitor.WithFetchSize(cfg.Inputs[0].Monitor.FetchSize),
552+
monitor.WithPollTimeout(cfg.Inputs[0].Monitor.PollTimeout),
553+
)
540554
if err != nil {
541555
return err
542556
}

dev-tools/integration/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func main() {
3131
checkErr(err)
3232

3333
ctx := context.Background()
34-
es, err := es.NewClient(ctx, cfg)
34+
es, err := es.NewClient(ctx, cfg, false)
3535
checkErr(err)
3636

3737
err = esutil.EnsureESIndices(ctx, es)

internal/pkg/bulk/bulk.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ const (
8787

8888
func InitES(ctx context.Context, cfg *config.Config, opts ...BulkOpt) (*elasticsearch.Client, Bulk, error) {
8989

90-
es, err := es.NewClient(ctx, cfg)
90+
es, err := es.NewClient(ctx, cfg, false)
9191
if err != nil {
9292
return nil, nil, err
9393
}

internal/pkg/config/config_test.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ func TestConfig(t *testing.T) {
9393
MaxCost: defaultCacheMaxCost,
9494
},
9595
Monitor: Monitor{
96-
FetchSize: defaultFetchSize,
96+
FetchSize: defaultFetchSize,
97+
PollTimeout: defaultPollTimeout,
9798
},
9899
},
99100
},
@@ -182,7 +183,8 @@ func TestConfig(t *testing.T) {
182183
MaxCost: defaultCacheMaxCost,
183184
},
184185
Monitor: Monitor{
185-
FetchSize: defaultFetchSize,
186+
FetchSize: defaultFetchSize,
187+
PollTimeout: defaultPollTimeout,
186188
},
187189
},
188190
},
@@ -269,7 +271,8 @@ func TestConfig(t *testing.T) {
269271
MaxCost: defaultCacheMaxCost,
270272
},
271273
Monitor: Monitor{
272-
FetchSize: defaultFetchSize,
274+
FetchSize: defaultFetchSize,
275+
PollTimeout: defaultPollTimeout,
273276
},
274277
},
275278
},
@@ -356,7 +359,8 @@ func TestConfig(t *testing.T) {
356359
MaxCost: defaultCacheMaxCost,
357360
},
358361
Monitor: Monitor{
359-
FetchSize: defaultFetchSize,
362+
FetchSize: defaultFetchSize,
363+
PollTimeout: defaultPollTimeout,
360364
},
361365
},
362366
},

internal/pkg/config/monitor.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,19 @@
44

55
package config
66

7+
import "time"
8+
79
const (
8-
defaultFetchSize = 1000
10+
defaultFetchSize = 1000
11+
defaultPollTimeout = 5 * time.Minute
912
)
1013

1114
type Monitor struct {
12-
FetchSize int `config:"fetch_size"`
15+
FetchSize int `config:"fetch_size"`
16+
PollTimeout time.Duration `config:"poll_timeout"`
1317
}
1418

1519
func (m *Monitor) InitDefaults() {
1620
m.FetchSize = defaultFetchSize
21+
m.PollTimeout = defaultPollTimeout
1722
}

internal/pkg/config/output.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ import (
2020
"github.com/elastic/beats/v7/libbeat/common/transport/tlscommon"
2121
)
2222

23+
// The timeout would be driven by the server for long poll.
24+
// Giving it some sane long value.
25+
const httpTransportLongPollTimeout = 10 * time.Minute
26+
2327
var hasScheme = regexp.MustCompile(`^([a-z][a-z0-9+\-.]*)://`)
2428

2529
// Elasticsearch is the configuration for elasticsearch.
@@ -77,7 +81,7 @@ func (c *Elasticsearch) Validate() error {
7781
}
7882

7983
// ToESConfig converts the configuration object into the config for the elasticsearch client.
80-
func (c *Elasticsearch) ToESConfig() (elasticsearch.Config, error) {
84+
func (c *Elasticsearch) ToESConfig(longPoll bool) (elasticsearch.Config, error) {
8185
// build the addresses
8286
addrs := make([]string, len(c.Hosts))
8387
for i, host := range c.Hosts {
@@ -104,6 +108,17 @@ func (c *Elasticsearch) ToESConfig() (elasticsearch.Config, error) {
104108
ResponseHeaderTimeout: c.Timeout,
105109
ExpectContinueTimeout: 1 * time.Second,
106110
}
111+
112+
disableRetry := false
113+
114+
if longPoll {
115+
httpTransport.IdleConnTimeout = httpTransportLongPollTimeout
116+
httpTransport.ResponseHeaderTimeout = httpTransportLongPollTimeout
117+
118+
// no retries for long poll monitoring
119+
disableRetry = true
120+
}
121+
107122
if c.TLS != nil && c.TLS.IsEnabled() {
108123
tls, err := tlscommon.LoadTLSConfig(c.TLS)
109124
if err != nil {
@@ -136,6 +151,7 @@ func (c *Elasticsearch) ToESConfig() (elasticsearch.Config, error) {
136151
Header: h,
137152
Transport: httpTransport,
138153
MaxRetries: c.MaxRetries,
154+
DisableRetry: disableRetry,
139155
}, nil
140156
}
141157

internal/pkg/config/output_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ func TestToESConfig(t *testing.T) {
171171
cmpopts.IgnoreUnexported(tls.Config{}),
172172
}
173173
t.Run(name, func(t *testing.T) {
174-
res, err := test.cfg.ToESConfig()
174+
res, err := test.cfg.ToESConfig(false)
175175
require.NoError(t, err)
176176
test.result.Header.Set("X-elastic-product-origin", "fleet")
177177
if !assert.True(t, cmp.Equal(test.result, res, copts...)) {

internal/pkg/coordinator/monitor_integration_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ func TestMonitorLeadership(t *testing.T) {
3636
serversIndex := ftesting.SetupIndex(bulkCtx, t, bulker, es.MappingServer)
3737
policiesIndex := ftesting.SetupIndex(bulkCtx, t, bulker, es.MappingPolicy)
3838
leadersIndex := ftesting.SetupIndex(bulkCtx, t, bulker, es.MappingPolicyLeader)
39-
pim, err := monitor.New(policiesIndex, bulker.Client())
39+
pim, err := monitor.New(policiesIndex, bulker.Client(), bulker.Client())
4040
if err != nil {
4141
t.Fatal(err)
4242
}

internal/pkg/es/client.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ import (
1515
"github.com/rs/zerolog/log"
1616
)
1717

18-
func NewClient(ctx context.Context, cfg *config.Config) (*elasticsearch.Client, error) {
19-
escfg, err := cfg.Output.Elasticsearch.ToESConfig()
18+
func NewClient(ctx context.Context, cfg *config.Config, longPoll bool) (*elasticsearch.Client, error) {
19+
escfg, err := cfg.Output.Elasticsearch.ToESConfig(longPoll)
2020
if err != nil {
2121
return nil, err
2222
}

0 commit comments

Comments
 (0)