Skip to content

Commit

Permalink
Add test for elasticsearch re-connection after network error & allow …
Browse files Browse the repository at this point in the history
…graceful shutdown (#40794)

This commit reworks the `eslegclient.Connection` to accept a context in its `Connect` method, this allows the caller to cancel any in flight requests made by the connection by cancelling the context.

The libbeat `outputs.Connectable` interface (used by `outputs.NetworkClient`) had to be updated to accept the context, which required refactoring in most of the outputs to also accept a context on connect.

The worker from libbeat/publisher/pipeline/client_worker.go now uses a context for it's cancellation instead of a channel,
this context is also used when creating a connection to Elasticsearch.

An integration test is added to ensure the
ES output can always recover from network errors.
  • Loading branch information
belimawr authored Oct 25, 2024
1 parent efbc4ff commit 4dfef8b
Show file tree
Hide file tree
Showing 48 changed files with 481 additions and 147 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- Lower logging level to debug when attempting to configure beats with unknown fields from autodiscovered events/environments {pull}[37816][37816]
- Set timeout of 1 minute for FQDN requests {pull}37756[37756]
- Fix issue where old data could be saved in the memory queue after acknowledgment, increasing memory use {pull}41356[41356]
- Ensure Elasticsearch output can always recover from network errors {pull}40794[40794]

*Auditbeat*

Expand Down
53 changes: 53 additions & 0 deletions NOTICE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16257,6 +16257,29 @@ Contents of probable licence file $GOMODCACHE/github.com/elastic/mito@v1.15.0/LI
limitations under the License.


--------------------------------------------------------------------------------
Dependency : github.com/elastic/mock-es
Version: v0.0.0-20240712014503-e5b47ece0015
Licence type (autodetected): Apache-2.0
--------------------------------------------------------------------------------

Contents of probable licence file $GOMODCACHE/github.com/elastic/mock-es@v0.0.0-20240712014503-e5b47ece0015/LICENSE:

Copyright 2024 Elasticsearch B.V.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


--------------------------------------------------------------------------------
Dependency : github.com/elastic/tk-btf
Version: v0.1.0
Expand Down Expand Up @@ -48611,6 +48634,36 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


--------------------------------------------------------------------------------
Dependency : github.com/mileusna/useragent
Version: v1.3.4
Licence type (autodetected): MIT
--------------------------------------------------------------------------------

Contents of probable licence file $GOMODCACHE/github.com/mileusna/useragent@v1.3.4/LICENSE.md:

MIT License

Copyright (c) 2017 Miloš Mileusnić

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

--------------------------------------------------------------------------------
Dependency : github.com/minio/asm2plan9s
Version: v0.0.0-20200509001527-cdd76441f9d8
Expand Down
37 changes: 24 additions & 13 deletions filebeat/beater/filebeat.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package beater

import (
"context"
"flag"
"fmt"
"path/filepath"
Expand Down Expand Up @@ -195,14 +196,16 @@ func (fb *Filebeat) setupPipelineLoaderCallback(b *beat.Beat) error {

overwritePipelines := true
b.OverwritePipelinesCallback = func(esConfig *conf.C) error {
esClient, err := eslegclient.NewConnectedClient(esConfig, "Filebeat")
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
esClient, err := eslegclient.NewConnectedClient(ctx, esConfig, "Filebeat")
if err != nil {
return err
}

// When running the subcommand setup, configuration from modules.d directories
// have to be loaded using cfg.Reloader. Otherwise those configurations are skipped.
pipelineLoaderFactory := newPipelineLoaderFactory(b.Config.Output.Config())
pipelineLoaderFactory := newPipelineLoaderFactory(ctx, b.Config.Output.Config())
enableAllFilesets, _ := b.BeatConfig.Bool("config.modules.enable_all_filesets", -1)
forceEnableModuleFilesets, _ := b.BeatConfig.Bool("config.modules.force_enable_module_filesets", -1)
filesetOverrides := fileset.FilesetOverrides{
Expand Down Expand Up @@ -322,14 +325,6 @@ func (fb *Filebeat) Run(b *beat.Beat) error {
outDone := make(chan struct{}) // outDone closes down all active pipeline connections
pipelineConnector := channel.NewOutletFactory(outDone).Create

// Create a ES connection factory for dynamic modules pipeline loading
var pipelineLoaderFactory fileset.PipelineLoaderFactory
if b.Config.Output.Name() == "elasticsearch" {
pipelineLoaderFactory = newPipelineLoaderFactory(b.Config.Output.Config())
} else {
logp.Warn(pipelinesWarning)
}

inputsLogger := logp.NewLogger("input")
v2Inputs := fb.pluginFactory(b.Info, inputsLogger, stateStore)
v2InputLoader, err := v2.NewLoader(inputsLogger, v2Inputs, "type", cfg.DefaultType)
Expand All @@ -350,8 +345,22 @@ func (fb *Filebeat) Run(b *beat.Beat) error {
compat.RunnerFactory(inputsLogger, b.Info, v2InputLoader),
input.NewRunnerFactory(pipelineConnector, registrar, fb.done),
))
moduleLoader := fileset.NewFactory(inputLoader, b.Info, pipelineLoaderFactory, config.OverwritePipelines)

// Create a ES connection factory for dynamic modules pipeline loading
var pipelineLoaderFactory fileset.PipelineLoaderFactory
// The pipelineFactory needs a context to control the connections to ES,
// when the pipelineFactory/ESClient are not needed any more the context
// must be cancelled. This pipeline factory will be used by the moduleLoader
// that is run by a crawler, whenever this crawler is stopped we also cancel
// the context.
pipelineFactoryCtx, cancelPipelineFactoryCtx := context.WithCancel(context.Background())
defer cancelPipelineFactoryCtx()
if b.Config.Output.Name() == "elasticsearch" {
pipelineLoaderFactory = newPipelineLoaderFactory(pipelineFactoryCtx, b.Config.Output.Config())
} else {
logp.Warn(pipelinesWarning)
}
moduleLoader := fileset.NewFactory(inputLoader, b.Info, pipelineLoaderFactory, config.OverwritePipelines)
crawler, err := newCrawler(inputLoader, moduleLoader, config.Inputs, fb.done, *once)
if err != nil {
logp.Err("Could not init crawler: %v", err)
Expand Down Expand Up @@ -389,6 +398,7 @@ func (fb *Filebeat) Run(b *beat.Beat) error {
err = crawler.Start(fb.pipeline, config.ConfigInput, config.ConfigModules)
if err != nil {
crawler.Stop()
cancelPipelineFactoryCtx()
return fmt.Errorf("Failed to start crawler: %w", err)
}

Expand Down Expand Up @@ -444,6 +454,7 @@ func (fb *Filebeat) Run(b *beat.Beat) error {
modules.Stop()
adiscover.Stop()
crawler.Stop()
cancelPipelineFactoryCtx()

timeout := fb.config.ShutdownTimeout
// Checks if on shutdown it should wait for all events to be published
Expand Down Expand Up @@ -487,9 +498,9 @@ func (fb *Filebeat) Stop() {
}

// Create a new pipeline loader (es client) factory
func newPipelineLoaderFactory(esConfig *conf.C) fileset.PipelineLoaderFactory {
func newPipelineLoaderFactory(ctx context.Context, esConfig *conf.C) fileset.PipelineLoaderFactory {
pipelineLoaderFactory := func() (fileset.PipelineLoader, error) {
esClient, err := eslegclient.NewConnectedClient(esConfig, "Filebeat")
esClient, err := eslegclient.NewConnectedClient(ctx, esConfig, "Filebeat")
if err != nil {
return nil, fmt.Errorf("Error creating Elasticsearch client: %w", err)
}
Expand Down
5 changes: 4 additions & 1 deletion filebeat/fileset/modules_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package fileset

import (
"context"
"encoding/json"
"path/filepath"
"testing"
Expand Down Expand Up @@ -268,7 +269,9 @@ func getTestingElasticsearch(t eslegtest.TestLogger) *eslegclient.Connection {

conn.Encoder = eslegclient.NewJSONEncoder(nil, false)

err = conn.Connect()
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
err = conn.Connect(ctx)
if err != nil {
t.Fatal(err)
panic(err) // panic in case TestLogger did not stop test
Expand Down
5 changes: 4 additions & 1 deletion filebeat/fileset/pipelines_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package fileset

import (
"context"
"net/http"
"net/http/httptest"
"testing"
Expand Down Expand Up @@ -101,7 +102,9 @@ func TestLoadPipelinesWithMultiPipelineFileset(t *testing.T) {
})
require.NoError(t, err)

err = testESClient.Connect()
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
err = testESClient.Connect(ctx)
require.NoError(t, err)

err = testRegistry.LoadPipelines(testESClient, false)
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ require (
github.com/elastic/go-quark v0.2.0
github.com/elastic/go-sfdc v0.0.0-20241010131323-8e176480d727
github.com/elastic/mito v1.15.0
github.com/elastic/mock-es v0.0.0-20240712014503-e5b47ece0015
github.com/elastic/tk-btf v0.1.0
github.com/elastic/toutoumomoma v0.0.0-20240626215117-76e39db18dfb
github.com/foxcpp/go-mockdns v0.0.0-20201212160233-ede2f9158d15
Expand Down Expand Up @@ -340,6 +341,7 @@ require (
github.com/mattn/go-ieproxy v0.0.1 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/mileusna/useragent v1.3.4 // indirect
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/u
github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
github.com/elastic/mito v1.15.0 h1:MicOxLSVkgU2Aonbh3i+++66Wl5wvD8y9gALK8PQDYs=
github.com/elastic/mito v1.15.0/go.mod h1:J+wCf4HccW2YoSFmZMGu+d06gN+WmnIlj5ehBqine74=
github.com/elastic/mock-es v0.0.0-20240712014503-e5b47ece0015 h1:z8cC8GASpPo8yKlbnXI36HQ/BM9wYjhBPNbDjAWm0VU=
github.com/elastic/mock-es v0.0.0-20240712014503-e5b47ece0015/go.mod h1:qH9DX/Dmflz6EAtaks/+2SsdQzecVAKE174Zl66hk7E=
github.com/elastic/pkcs8 v1.0.0 h1:HhitlUKxhN288kcNcYkjW6/ouvuwJWd9ioxpjnD9jVA=
github.com/elastic/pkcs8 v1.0.0/go.mod h1:ipsZToJfq1MxclVTwpG7U/bgeDtf+0HkUiOxebk95+0=
github.com/elastic/sarama v1.19.1-0.20220310193331-ebc2b0d8eef3 h1:FzA0/n4iMt8ojGDGRoiFPSHFvvdVIvxOxyLtiFnrLBM=
Expand Down Expand Up @@ -703,6 +705,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5
github.com/miekg/dns v1.1.22/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs=
github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ=
github.com/mileusna/useragent v1.3.4 h1:MiuRRuvGjEie1+yZHO88UBYg8YBC/ddF6T7F56i3PCk=
github.com/mileusna/useragent v1.3.4/go.mod h1:3d8TOmwL/5I8pJjyVDteHtgDGcefrFUX4ccGOMKNYYc=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
Expand Down
8 changes: 4 additions & 4 deletions heartbeat/beater/heartbeat.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func New(b *beat.Beat, rawConfig *conf.C) (beat.Beater, error) {
if b.Config.Output.Name() == "elasticsearch" && !b.Manager.Enabled() {
// Connect to ES and setup the State loader if the output is not managed by agent
// Note this, intentionally, blocks until connected or max attempts reached
esClient, err := makeESClient(b.Config.Output.Config(), 3, 2*time.Second)
esClient, err := makeESClient(context.TODO(), b.Config.Output.Config(), 3, 2*time.Second)
if err != nil {
if parsedConfig.RunOnce {
trace.Abort()
Expand Down Expand Up @@ -275,7 +275,7 @@ func (bt *Heartbeat) RunCentralMgmtMonitors(b *beat.Beat) {
}

// Backoff panics with 0 duration, set to smallest unit
esClient, err := makeESClient(outCfg.Config(), 1, 1*time.Nanosecond)
esClient, err := makeESClient(context.TODO(), outCfg.Config(), 1, 1*time.Nanosecond)
if err != nil {
logp.L().Warnf("skipping monitor state management during managed reload: %w", err)
} else {
Expand Down Expand Up @@ -324,7 +324,7 @@ func (bt *Heartbeat) Stop() {
}

// makeESClient establishes an ES connection meant to load monitors' state
func makeESClient(cfg *conf.C, attempts int, wait time.Duration) (*eslegclient.Connection, error) {
func makeESClient(ctx context.Context, cfg *conf.C, attempts int, wait time.Duration) (*eslegclient.Connection, error) {
var (
esClient *eslegclient.Connection
err error
Expand Down Expand Up @@ -353,7 +353,7 @@ func makeESClient(cfg *conf.C, attempts int, wait time.Duration) (*eslegclient.C
}

for i := 0; i < attempts; i++ {
esClient, err = eslegclient.NewConnectedClient(newCfg, "Heartbeat")
esClient, err = eslegclient.NewConnectedClient(ctx, newCfg, "Heartbeat")
if err == nil {
connectDelay.Reset()
return esClient, nil
Expand Down
3 changes: 2 additions & 1 deletion heartbeat/beater/heartbeat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package beater

import (
"context"
"testing"
"time"

Expand All @@ -39,7 +40,7 @@ func TestMakeESClient(t *testing.T) {
anyAttempt := 1
anyDuration := 1 * time.Second

_, _ = makeESClient(origCfg, anyAttempt, anyDuration)
_, _ = makeESClient(context.Background(), origCfg, anyAttempt, anyDuration)

timeout, err := origCfg.Int("timeout", -1)
require.NoError(t, err)
Expand Down
5 changes: 4 additions & 1 deletion heartbeat/monitors/wrappers/monitorstate/testutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package monitorstate

import (
"context"
"encoding/json"
"testing"

Expand Down Expand Up @@ -50,7 +51,9 @@ func IntegES(t *testing.T) (esc *eslegclient.Connection) {

conn.Encoder = eslegclient.NewJSONEncoder(nil, false)

err = conn.Connect()
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
err = conn.Connect(ctx)
if err != nil {
t.Fatal(err)
panic(err) // panic in case TestLogger did not stop test
Expand Down
4 changes: 3 additions & 1 deletion libbeat/cmd/instance/beat.go
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,9 @@ func (b *Beat) Setup(settings Settings, bt beat.Creator, setup SetupSettings) er
if !isElasticsearchOutput(outCfg.Name()) {
return fmt.Errorf("index management requested but the Elasticsearch output is not configured/enabled")
}
esClient, err := eslegclient.NewConnectedClient(outCfg.Config(), b.Info.Beat)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
esClient, err := eslegclient.NewConnectedClient(ctx, outCfg.Config(), b.Info.Beat)
if err != nil {
return err
}
Expand Down
13 changes: 8 additions & 5 deletions libbeat/esleg/eslegclient/api_mock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package eslegclient

import (
"context"
"encoding/json"
"fmt"
"net/http"
Expand Down Expand Up @@ -63,14 +64,14 @@ func TestOneHostSuccessResp(t *testing.T) {

server := ElasticsearchMock(200, expectedResp)

client := newTestConnection(server.URL)
client := newTestConnection(t, server.URL)

params := map[string]string{
"refresh": "true",
}
_, resp, err := client.Index(index, "test", "1", params, body)
if err != nil {
t.Errorf("Index() returns error: %s", err)
t.Fatalf("Index() returns error: %s", err)
}
if !resp.Created {
t.Errorf("Index() fails: %s", resp)
Expand All @@ -89,8 +90,10 @@ func TestOneHost500Resp(t *testing.T) {

server := ElasticsearchMock(http.StatusInternalServerError, []byte("Something wrong happened"))

client := newTestConnection(server.URL)
err := client.Connect()
client := newTestConnection(t, server.URL)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
err := client.Connect(ctx)
if err != nil {
t.Fatalf("Failed to connect: %v", err)
}
Expand Down Expand Up @@ -121,7 +124,7 @@ func TestOneHost503Resp(t *testing.T) {

server := ElasticsearchMock(503, []byte("Something wrong happened"))

client := newTestConnection(server.URL)
client := newTestConnection(t, server.URL)

params := map[string]string{
"refresh": "true",
Expand Down
Loading

0 comments on commit 4dfef8b

Please sign in to comment.