From 276c6cb74be8da1d1a6642af1fedb7e1dd46bc50 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Wed, 19 Feb 2020 15:51:57 -0500 Subject: [PATCH 01/12] Fix / clarify the module reload logic --- libbeat/cfgfile/reload.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/libbeat/cfgfile/reload.go b/libbeat/cfgfile/reload.go index 55ee0312bf3..1876741db2e 100644 --- a/libbeat/cfgfile/reload.go +++ b/libbeat/cfgfile/reload.go @@ -185,7 +185,11 @@ func (rl *Reloader) Run(runnerFactory RunnerFactory) { rl.config.Reload.Period = 0 } - overwriteUpdate := true + // If forceReload is set, the configuration should be reloaded + // even if there are no changes. It is set on the first iteration, + // and whenever an attempted reload fails. It is unset whenever + // a reload succeeds. + forceReload := true for { select { @@ -204,9 +208,8 @@ func (rl *Reloader) Run(runnerFactory RunnerFactory) { logp.Err("Error fetching new config files: %v", err) } - // no file changes - if !updated && !overwriteUpdate { - overwriteUpdate = false + // if there are no changes, skip this reload unless forceReload is set. + if !updated && !forceReload { continue } @@ -215,10 +218,11 @@ func (rl *Reloader) Run(runnerFactory RunnerFactory) { debugf("Number of module configs found: %v", len(configs)) - if err := list.Reload(configs); err != nil { - // Make sure the next run also updates because some runners were not properly loaded - overwriteUpdate = true + if err = list.Reload(configs); err != nil { + logp.Err("Error reloading module configurations: %v", err) } + // force reload on the next iteration if and only if this one failed. + forceReload = err != nil } // Path loading is enabled but not reloading. Loads files only once and then stops. From b9872269dfb005cd5abe8fba900364a3ba61d63f Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Thu, 20 Feb 2020 13:40:51 -0500 Subject: [PATCH 02/12] Clean up error output --- libbeat/cfgfile/reload.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libbeat/cfgfile/reload.go b/libbeat/cfgfile/reload.go index 1876741db2e..ce889ac4f4a 100644 --- a/libbeat/cfgfile/reload.go +++ b/libbeat/cfgfile/reload.go @@ -218,10 +218,10 @@ func (rl *Reloader) Run(runnerFactory RunnerFactory) { debugf("Number of module configs found: %v", len(configs)) - if err = list.Reload(configs); err != nil { - logp.Err("Error reloading module configurations: %v", err) - } - // force reload on the next iteration if and only if this one failed. + err = list.Reload(configs) + // Force reload on the next iteration if and only if this one failed. + // (Any errors are already logged by list.Reload, so we don't need to + // propagate the details further.) forceReload = err != nil } From 99827727272ef295b0801b8e86bdb428e3f148e8 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 24 Feb 2020 18:03:01 -0500 Subject: [PATCH 03/12] Add new monitoring variable libbeat.config.scans, add tests --- libbeat/cfgfile/reload.go | 7 +- libbeat/cfgfile/reload_test.go | 103 ++++++++++++++++++ libbeat/docs/http-endpoint.asciidoc | 1 + libbeat/tests/system/test_http.py | 2 +- .../beat/stats/_meta/test/stats.800.json | 1 + 5 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 libbeat/cfgfile/reload_test.go diff --git a/libbeat/cfgfile/reload.go b/libbeat/cfgfile/reload.go index ce889ac4f4a..dee3be817b5 100644 --- a/libbeat/cfgfile/reload.go +++ b/libbeat/cfgfile/reload.go @@ -45,6 +45,10 @@ var ( debugf = logp.MakeDebug("cfgfile") + // configScans measures how many times the config dir was scanned for + // changes, configReloads measures how many times there were changes that + // triggered an actual reload. + configScans = monitoring.NewInt(nil, "libbeat.config.scans") configReloads = monitoring.NewInt(nil, "libbeat.config.reloads") moduleStarts = monitoring.NewInt(nil, "libbeat.config.module.starts") moduleStops = monitoring.NewInt(nil, "libbeat.config.module.stops") @@ -199,7 +203,7 @@ func (rl *Reloader) Run(runnerFactory RunnerFactory) { case <-time.After(rl.config.Reload.Period): debugf("Scan for new config files") - configReloads.Add(1) + configScans.Add(1) files, updated, err := gw.Scan() if err != nil { @@ -212,6 +216,7 @@ func (rl *Reloader) Run(runnerFactory RunnerFactory) { if !updated && !forceReload { continue } + configReloads.Add(1) // Load all config objects configs, _ := rl.loadConfigs(files) diff --git a/libbeat/cfgfile/reload_test.go b/libbeat/cfgfile/reload_test.go new file mode 100644 index 00000000000..2ec510906b7 --- /dev/null +++ b/libbeat/cfgfile/reload_test.go @@ -0,0 +1,103 @@ +package cfgfile + +import ( + "fmt" + "io/ioutil" + "os" + "testing" + "time" + + "github.com/elastic/beats/libbeat/beat" + "github.com/elastic/beats/libbeat/common" + "github.com/stretchr/testify/assert" +) + +type mockRunner struct{} +type mockRunnerFactory struct{} + +func TestReloader(t *testing.T) { + // Create random temp directory + dir, err := ioutil.TempDir("", "libbeat-reloader") + defer os.RemoveAll(dir) + if err != nil { + t.Fatal(err) + } + glob := dir + "/*.yml" + + config := common.MustNewConfigFrom(common.MapStr{ + "path": glob, + "reload": common.MapStr{ + "period": "1s", + "enabled": true, + }, + }) + // common.Config{} + reloader := NewReloader(nil, config) + retryCount := 5 + + go reloader.Run(nil) + defer reloader.Stop() + + // wait until configScans >= 2 (which should happen after ~1 second) + for i := 0; i < retryCount; i++ { + if configScans.Get() >= 2 { + break + } + // time interval is slightly more than a second so we don't slightly + // undershoot the first iteration and wait a whole extra second. + time.Sleep(1100 * time.Millisecond) + } + if configScans.Get() < 2 { + assert.Fail(t, "Timed out waiting for configScans >= 2") + } + + // The first scan should cause a reload, but additional ones should not, + // so configReloads should still be 1. + assert.Equal(t, int64(1), configReloads.Get()) + + // Write a file to the reloader path to trigger a real reload + content := []byte("test\n") + err = ioutil.WriteFile(dir+"/config1.yml", content, 0644) + assert.NoError(t, err) + + // Wait for the number of scans to increase at least twice. This is somewhat + // pedantic, but if we just wait for the next scan, it's possible to wake up + // during the brief interval after configScans is updated but before + // configReloads is, giving a false negative. Waiting two iterations + // guarantees that the change from the first one has taken effect. + targetScans := configScans.Get() + 2 + for i := 0; i < retryCount; i++ { + time.Sleep(time.Second) + if configScans.Get() >= targetScans { + break + } + } + if configScans.Get() < targetScans { + assert.Fail(t, + fmt.Sprintf("Timed out waiting for configScans >= %d", targetScans)) + } + + // The number of reloads should now have increased. It would be nicer to + // check if the value is exactly 2, but we can't guarantee this: the glob + // watcher includes an extra 1-second margin around the real modification + // time, so changes that fall too close to a scan interval can be detected + // twice. + if configReloads.Get() < 2 { + assert.Fail(t, + fmt.Sprintf( + "Reloader performed %d scans but only reloaded once", + configScans.Get())) + } +} + +func (rf *mockRunnerFactory) Create( + p beat.Pipeline, config *common.Config, meta *common.MapStrPointer, +) (Runner, error) { + return &mockRunner{}, nil +} + +func (r *mockRunner) String() string { + return "mockRunner{}" +} +func (r *mockRunner) Start() {} +func (r *mockRunner) Stop() {} diff --git a/libbeat/docs/http-endpoint.asciidoc b/libbeat/docs/http-endpoint.asciidoc index 1264ce2d56e..00fd30db8c6 100644 --- a/libbeat/docs/http-endpoint.asciidoc +++ b/libbeat/docs/http-endpoint.asciidoc @@ -118,6 +118,7 @@ curl -XGET 'localhost:5066/stats?pretty' "starts": 0, "stops": 0 }, + "scans": 1, "reloads": 1 }, "output": { diff --git a/libbeat/tests/system/test_http.py b/libbeat/tests/system/test_http.py index 0311159835a..819e2c9a1ea 100644 --- a/libbeat/tests/system/test_http.py +++ b/libbeat/tests/system/test_http.py @@ -42,7 +42,7 @@ def test_stats(self): data = json.loads(r.content) # Test one data point - assert data["libbeat"]["config"]["reloads"] == 0 + assert data["libbeat"]["config"]["scans"] == 0 proc.check_kill_and_wait() diff --git a/metricbeat/module/beat/stats/_meta/test/stats.800.json b/metricbeat/module/beat/stats/_meta/test/stats.800.json index ced95cd76e3..12b527060fc 100644 --- a/metricbeat/module/beat/stats/_meta/test/stats.800.json +++ b/metricbeat/module/beat/stats/_meta/test/stats.800.json @@ -44,6 +44,7 @@ "starts": 0, "stops": 0 }, + "scans": 1, "reloads": 1 }, "output": { From 7cd61b0e3b6b9fd5d6b5a44c9f6c908a3df2ec10 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 24 Feb 2020 18:08:15 -0500 Subject: [PATCH 04/12] update changelog --- CHANGELOG.next.asciidoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 2d986597364..8fef55ca34a 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -58,6 +58,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix index names for indexing not always guaranteed to be lower case. {pull}16081[16081] - Add `ssl.ca_sha256` option to the supported TLS option, this allow to check that a specific certificate is used as part of the verified chain. {issue}15717[15717] - Fix loading processors from annotation hints. {pull}16348[16348] +- Fix an issue that could cause redundant configuration reloads. {pull}16440[16440] *Auditbeat* @@ -110,6 +111,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Add document_id setting to decode_json_fields processor. {pull}15859[15859] - Include network information by default on add_host_metadata and add_observer_metadata. {issue}15347[15347] {pull}16077[16077] - Add `aws_ec2` provider for autodiscover. {issue}12518[12518] {pull}14823[14823] +- Add monitoring variable `libbeat.config.scans` to distinguish scans of the configuration directory from actual reloads of its contents. {pull}16440[16440] *Auditbeat* From beadaf38be8c250295c8d27f50e461b9f9d719ad Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 24 Feb 2020 18:14:51 -0500 Subject: [PATCH 05/12] reload tests should only be in the integration build --- libbeat/cfgfile/reload_test.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/libbeat/cfgfile/reload_test.go b/libbeat/cfgfile/reload_test.go index 2ec510906b7..5d7430dcfbb 100644 --- a/libbeat/cfgfile/reload_test.go +++ b/libbeat/cfgfile/reload_test.go @@ -1,3 +1,22 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// +build integration + package cfgfile import ( @@ -7,9 +26,10 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + "github.com/elastic/beats/libbeat/beat" "github.com/elastic/beats/libbeat/common" - "github.com/stretchr/testify/assert" ) type mockRunner struct{} From 5066edcb88788d2a77bb62565aba4989c940af24 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Mon, 24 Feb 2020 18:26:46 -0500 Subject: [PATCH 06/12] remove no-longer-needed mock structures --- libbeat/cfgfile/reload_test.go | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/libbeat/cfgfile/reload_test.go b/libbeat/cfgfile/reload_test.go index 5d7430dcfbb..5565c5a4348 100644 --- a/libbeat/cfgfile/reload_test.go +++ b/libbeat/cfgfile/reload_test.go @@ -28,13 +28,9 @@ import ( "github.com/stretchr/testify/assert" - "github.com/elastic/beats/libbeat/beat" "github.com/elastic/beats/libbeat/common" ) -type mockRunner struct{} -type mockRunnerFactory struct{} - func TestReloader(t *testing.T) { // Create random temp directory dir, err := ioutil.TempDir("", "libbeat-reloader") @@ -109,15 +105,3 @@ func TestReloader(t *testing.T) { configScans.Get())) } } - -func (rf *mockRunnerFactory) Create( - p beat.Pipeline, config *common.Config, meta *common.MapStrPointer, -) (Runner, error) { - return &mockRunner{}, nil -} - -func (r *mockRunner) String() string { - return "mockRunner{}" -} -func (r *mockRunner) Start() {} -func (r *mockRunner) Stop() {} From a14ecc5196c9fcef9d3ee00c97989954429cc679 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 25 Feb 2020 09:21:59 -0500 Subject: [PATCH 07/12] increase unit test timeout --- libbeat/cfgfile/reload_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libbeat/cfgfile/reload_test.go b/libbeat/cfgfile/reload_test.go index 5565c5a4348..d6575acdaee 100644 --- a/libbeat/cfgfile/reload_test.go +++ b/libbeat/cfgfile/reload_test.go @@ -49,7 +49,7 @@ func TestReloader(t *testing.T) { }) // common.Config{} reloader := NewReloader(nil, config) - retryCount := 5 + retryCount := 10 go reloader.Run(nil) defer reloader.Stop() From 11cdbf5e8ba1bff63bad0b9dd00e761e9ac8dc35 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Tue, 25 Feb 2020 12:58:54 -0500 Subject: [PATCH 08/12] increase time limit on preexisting flaky reloader test --- metricbeat/tests/system/test_reload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metricbeat/tests/system/test_reload.py b/metricbeat/tests/system/test_reload.py index 02d29d175be..3385592d2b2 100644 --- a/metricbeat/tests/system/test_reload.py +++ b/metricbeat/tests/system/test_reload.py @@ -76,7 +76,7 @@ def reload_line( # Ensure the module was successfully loaded self.wait_until( lambda: self.log_contains(reload_line(1)), - max_timeout=10) + max_timeout=15) # Remove config again os.remove(config_path) From 3840fccb02180c9bb6df3b800e69680eee34722f Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Wed, 26 Feb 2020 07:39:01 -0500 Subject: [PATCH 09/12] increase the timing for the right test_reload.py --- heartbeat/tests/system/test_reload.py | 3 ++- metricbeat/tests/system/test_reload.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/heartbeat/tests/system/test_reload.py b/heartbeat/tests/system/test_reload.py index 59a0630f193..555fcc39605 100644 --- a/heartbeat/tests/system/test_reload.py +++ b/heartbeat/tests/system/test_reload.py @@ -78,7 +78,8 @@ def test_config_add(self): "test.yml", self.http_cfg("myid", "http://localhost:{}".format(server.server_port))) self.wait_until(lambda: self.log_contains( - "Starting reload procedure, current runners: 1")) + "Starting reload procedure, current runners: 1"), + max_timeout=15) self.wait_until(lambda: self.output_lines() > 0) diff --git a/metricbeat/tests/system/test_reload.py b/metricbeat/tests/system/test_reload.py index 3385592d2b2..02d29d175be 100644 --- a/metricbeat/tests/system/test_reload.py +++ b/metricbeat/tests/system/test_reload.py @@ -76,7 +76,7 @@ def reload_line( # Ensure the module was successfully loaded self.wait_until( lambda: self.log_contains(reload_line(1)), - max_timeout=15) + max_timeout=10) # Remove config again os.remove(config_path) From 181f0817d6528e94e9e39b1018b1ef0bc9ae32d1 Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Wed, 26 Feb 2020 09:34:03 -0500 Subject: [PATCH 10/12] (system test troubleshooting) always trigger config reloader --- libbeat/cfgfile/reload.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libbeat/cfgfile/reload.go b/libbeat/cfgfile/reload.go index dee3be817b5..7545c7f67e6 100644 --- a/libbeat/cfgfile/reload.go +++ b/libbeat/cfgfile/reload.go @@ -227,7 +227,7 @@ func (rl *Reloader) Run(runnerFactory RunnerFactory) { // Force reload on the next iteration if and only if this one failed. // (Any errors are already logged by list.Reload, so we don't need to // propagate the details further.) - forceReload = err != nil + //forceReload = err != nil } // Path loading is enabled but not reloading. Loads files only once and then stops. From eb763e87d6f3833c8e289ab17a75de3b5e0d46cd Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Wed, 26 Feb 2020 10:28:25 -0500 Subject: [PATCH 11/12] turn real reload detection back on --- libbeat/cfgfile/reload.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libbeat/cfgfile/reload.go b/libbeat/cfgfile/reload.go index 7545c7f67e6..dee3be817b5 100644 --- a/libbeat/cfgfile/reload.go +++ b/libbeat/cfgfile/reload.go @@ -227,7 +227,7 @@ func (rl *Reloader) Run(runnerFactory RunnerFactory) { // Force reload on the next iteration if and only if this one failed. // (Any errors are already logged by list.Reload, so we don't need to // propagate the details further.) - //forceReload = err != nil + forceReload = err != nil } // Path loading is enabled but not reloading. Loads files only once and then stops. From fdf2c030ddb127e176bf38dad5f42ba27158530d Mon Sep 17 00:00:00 2001 From: Fae Charlton Date: Wed, 26 Feb 2020 10:44:31 -0500 Subject: [PATCH 12/12] fix flaky reloader tests --- heartbeat/tests/system/test_reload.py | 7 +++++-- metricbeat/tests/system/test_reload.py | 12 +++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/heartbeat/tests/system/test_reload.py b/heartbeat/tests/system/test_reload.py index 555fcc39605..8d23c85cecd 100644 --- a/heartbeat/tests/system/test_reload.py +++ b/heartbeat/tests/system/test_reload.py @@ -69,6 +69,8 @@ def test_config_add(self): """ self.setup_dynamic() + # Wait until the beat is running and has performed its first load of + # the config directory. self.wait_until(lambda: self.log_contains( "Starting reload procedure, current runners: 0")) @@ -77,9 +79,10 @@ def test_config_add(self): self.write_dyn_config( "test.yml", self.http_cfg("myid", "http://localhost:{}".format(server.server_port))) + # The beat should recognize there is a new runner to start. self.wait_until(lambda: self.log_contains( - "Starting reload procedure, current runners: 1"), - max_timeout=15) + "Start list: 1, Stop list: 0"), + max_timeout=10) self.wait_until(lambda: self.output_lines() > 0) diff --git a/metricbeat/tests/system/test_reload.py b/metricbeat/tests/system/test_reload.py index 02d29d175be..29d82bbf82b 100644 --- a/metricbeat/tests/system/test_reload.py +++ b/metricbeat/tests/system/test_reload.py @@ -44,8 +44,6 @@ def test_reload(self): @unittest.skipUnless(re.match("(?i)win|linux|darwin|freebsd|openbsd", sys.platform), "os") def test_start_stop(self): - def reload_line( - num_runners): return "Starting reload procedure, current runners: %d" % num_runners """ Test if module is properly started and stopped """ @@ -61,7 +59,7 @@ def reload_line( # Ensure no modules are loaded self.wait_until( - lambda: self.log_contains(reload_line(0)), + lambda: self.log_contains("Start list: 0, Stop list: 0"), max_timeout=10) systemConfig = """ @@ -73,17 +71,17 @@ def reload_line( with open(config_path, 'w') as f: f.write(systemConfig) - # Ensure the module was successfully loaded + # Ensure the module is started self.wait_until( - lambda: self.log_contains(reload_line(1)), + lambda: self.log_contains("Start list: 1, Stop list: 0"), max_timeout=10) # Remove config again os.remove(config_path) - # Ensure the module was successfully unloaded + # Ensure the module is stopped self.wait_until( - lambda: self.log_contains(reload_line(0)), + lambda: self.log_contains("Start list: 0, Stop list: 1"), max_timeout=10) time.sleep(1)