Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reloader: don't fail on envvar expansion errors #7429

Merged
merged 1 commit into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .mdox.validate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,5 @@ validators:
type: 'ignore'
- regex: 'twitter\.com'
type: 'ignore'
# 500 when requested my mdox in GH actions.
- regex: 'outshift\.cisco\.com'
- regex: 'outshift\.cisco\.com\/blog\/multi-cluster-monitoring'
Copy link
Contributor Author

@rexagod rexagod Jun 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The domain works just fine, only /blog/multi-cluster-monitoring has a problem.

type: 'ignore'
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re

### Added

- [#7429](https://github.com/thanos-io/thanos/pull/7429): Reloader: introduce `TolerateEnvVarExpansionErrors` to allow suppressing errors when expanding environment variables in the configuration file. When set, this will ensure that the reloader won't consider the operation to fail when an unset environment variable is encountered. Note that all unset environment variables are left as is, whereas all set environment variables are expanded as usual.
- [#7317](https://github.com/thanos-io/thanos/pull/7317) Tracing: allow specifying resource attributes for the OTLP configuration.
- [#7367](https://github.com/thanos-io/thanos/pull/7367) Store Gateway: log request ID in request logs.
- [#7361](https://github.com/thanos-io/thanos/pull/7361) Query: *breaking :warning:* pass query stats from remote execution from server to client. We changed the protobuf of the QueryAPI, if you use `query.mode=distributed` you need to update your client (upper level Queriers) first, before updating leaf Queriers (servers).
Expand Down
89 changes: 55 additions & 34 deletions pkg/reloader/reloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/minio/sha256-simd"
ps "github.com/mitchellh/go-ps"
"github.com/mitchellh/go-ps"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
Expand All @@ -87,14 +87,15 @@ import (
// It optionally substitutes environment variables in the configuration.
// Referenced environment variables must be of the form `$(var)` (not `$var` or `${var}`).
type Reloader struct {
logger log.Logger
cfgFile string
cfgOutputFile string
cfgDirs []CfgDirOption
watchInterval time.Duration
retryInterval time.Duration
watchedDirs []string
watcher *watcher
logger log.Logger
cfgFile string
cfgOutputFile string
cfgDirs []CfgDirOption
tolerateEnvVarExpansionErrors bool
retryInterval time.Duration
watchInterval time.Duration
watchedDirs []string
watcher *watcher

tr TriggerReloader

Expand All @@ -104,13 +105,14 @@ type Reloader struct {
lastCfgDirFiles []map[string]struct{}
forceReload bool

reloads prometheus.Counter
reloadErrors prometheus.Counter
lastReloadSuccess prometheus.Gauge
lastReloadSuccessTimestamp prometheus.Gauge
configApplyErrors prometheus.Counter
configApply prometheus.Counter
reloaderInfo *prometheus.GaugeVec
reloads prometheus.Counter
reloadErrors prometheus.Counter
lastReloadSuccess prometheus.Gauge
lastReloadSuccessTimestamp prometheus.Gauge
configApplyErrors prometheus.Counter
configEnvVarExpansionErrors prometheus.Gauge
configApply prometheus.Counter
reloaderInfo *prometheus.GaugeVec
}

// TriggerReloader reloads the configuration of the process.
Expand Down Expand Up @@ -172,6 +174,9 @@ type Options struct {
// RetryInterval controls how often the reloader retries a reloading of the
// configuration in case the reload operation returned an error.
RetryInterval time.Duration
// TolerateEnvVarExpansionErrors suppresses errors when expanding environment variables in the config file, and
// leaves the unset variables as is. All found environment variables are still expanded.
TolerateEnvVarExpansionErrors bool
}

var firstGzipBytes = []byte{0x1f, 0x8b, 0x08}
Expand All @@ -183,15 +188,16 @@ func New(logger log.Logger, reg prometheus.Registerer, o *Options) *Reloader {
logger = log.NewNopLogger()
}
r := &Reloader{
logger: logger,
cfgFile: o.CfgFile,
cfgOutputFile: o.CfgOutputFile,
cfgDirs: o.CfgDirs,
lastCfgDirFiles: make([]map[string]struct{}, len(o.CfgDirs)),
watcher: newWatcher(logger, reg, o.DelayInterval),
watchedDirs: o.WatchedDirs,
watchInterval: o.WatchInterval,
retryInterval: o.RetryInterval,
logger: logger,
cfgFile: o.CfgFile,
cfgOutputFile: o.CfgOutputFile,
cfgDirs: o.CfgDirs,
lastCfgDirFiles: make([]map[string]struct{}, len(o.CfgDirs)),
watcher: newWatcher(logger, reg, o.DelayInterval),
watchedDirs: o.WatchedDirs,
watchInterval: o.WatchInterval,
retryInterval: o.RetryInterval,
tolerateEnvVarExpansionErrors: o.TolerateEnvVarExpansionErrors,

reloads: promauto.With(reg).NewCounter(
prometheus.CounterOpts{
Expand Down Expand Up @@ -229,6 +235,12 @@ func New(logger log.Logger, reg prometheus.Registerer, o *Options) *Reloader {
Help: "Total number of config apply operations that failed.",
},
),
configEnvVarExpansionErrors: promauto.With(reg).NewGauge(
prometheus.GaugeOpts{
Name: "reloader_config_environment_variable_expansion_errors",
Help: "Number of environment variable expansions that failed during the last operation.",
},
),
reloaderInfo: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "reloader_info",
Expand Down Expand Up @@ -348,7 +360,7 @@ func (r *Reloader) Watch(ctx context.Context) error {
}
}

func normalize(logger log.Logger, inputFile, outputFile string) error {
func (r *Reloader) normalize(inputFile, outputFile string) error {
b, err := os.ReadFile(inputFile)
if err != nil {
return errors.Wrap(err, "read file")
Expand All @@ -360,15 +372,15 @@ func normalize(logger log.Logger, inputFile, outputFile string) error {
if err != nil {
return errors.Wrap(err, "create gzip reader")
}
defer runutil.CloseWithLogOnErr(logger, zr, "gzip reader close")
defer runutil.CloseWithLogOnErr(r.logger, zr, "gzip reader close")

b, err = io.ReadAll(zr)
if err != nil {
return errors.Wrap(err, "read compressed config file")
}
}

b, err = expandEnv(b)
b, err = r.expandEnv(b)
if err != nil {
return errors.Wrap(err, "expand environment variables")
}
Expand Down Expand Up @@ -402,7 +414,7 @@ func (r *Reloader) apply(ctx context.Context) error {
}
cfgHash = h.Sum(nil)
if r.cfgOutputFile != "" {
if err := normalize(r.logger, r.cfgFile, r.cfgOutputFile); err != nil {
if err := r.normalize(r.cfgFile, r.cfgOutputFile); err != nil {
return err
}
}
Expand Down Expand Up @@ -446,7 +458,7 @@ func (r *Reloader) apply(ctx context.Context) error {

outFile := filepath.Join(outDir, targetFile.Name())
cfgDirFiles[outFile] = struct{}{}
if err := normalize(r.logger, path, outFile); err != nil {
if err := r.normalize(path, outFile); err != nil {
return errors.Wrapf(err, "move file: %s", path)
}
}
Expand Down Expand Up @@ -692,21 +704,30 @@ func RuntimeInfoURLFromBase(u *url.URL) *url.URL {

var envRe = regexp.MustCompile(`\$\(([a-zA-Z_0-9]+)\)`)

func expandEnv(b []byte) (r []byte, err error) {
r = envRe.ReplaceAllFunc(b, func(n []byte) []byte {
func (r *Reloader) expandEnv(b []byte) (replaced []byte, err error) {
configEnvVarExpansionErrorsCount := 0
replaced = envRe.ReplaceAllFunc(b, func(n []byte) []byte {
if err != nil {
return nil
}
m := n
n = n[2 : len(n)-1]

v, ok := os.LookupEnv(string(n))
if !ok {
err = errors.Errorf("found reference to unset environment variable %q", n)
configEnvVarExpansionErrorsCount++
errStr := errors.Errorf("found reference to unset environment variable %q", n)
if r.tolerateEnvVarExpansionErrors {
level.Warn(r.logger).Log("msg", "expand environment variable", "err", errStr)
return m
}
err = errStr
return nil
}
return []byte(v)
})
return r, err
r.configEnvVarExpansionErrors.Set(float64(configEnvVarExpansionErrorsCount))
return replaced, err
}

type watcher struct {
Expand Down
36 changes: 34 additions & 2 deletions pkg/reloader/reloader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,42 @@ config:
testutil.NotOk(t, err)
testutil.Assert(t, strings.HasSuffix(err.Error(), `found reference to unset environment variable "TEST_RELOADER_THANOS_ENV"`), "expect error since there envvars are not set.")

// Don't fail with unset variables.
ctx2, cancel2 := context.WithTimeout(ctx, 10*time.Second)

// Enable suppressing environment variables expansion errors.
reloader.tolerateEnvVarExpansionErrors = true

// Set an environment variable while leaving the other unset, so as to ensure we don't break the flow when an unset
// variable is found.
testutil.Ok(t, os.Setenv("TEST_RELOADER_THANOS_ENV2", "3"))
err = reloader.Watch(ctx2)
cancel2()

// Restore state.
reloader.tolerateEnvVarExpansionErrors = false
testutil.Ok(t, os.Unsetenv("TEST_RELOADER_THANOS_ENV2"))

// The environment variable expansion errors should be suppressed, but recorded.
testutil.Equals(t, 1.0, promtest.ToFloat64(reloader.configEnvVarExpansionErrors))

// All environment variables expansion errors should be suppressed.
testutil.Ok(t, err)

// Config should consist on unset as well as set variables.
f, err := os.ReadFile(output)
testutil.Ok(t, err)
testutil.Equals(t, `
config:
a: 1
b: $(TEST_RELOADER_THANOS_ENV)
c: 3
`, string(f))

testutil.Ok(t, os.Setenv("TEST_RELOADER_THANOS_ENV", "2"))
testutil.Ok(t, os.Setenv("TEST_RELOADER_THANOS_ENV2", "3"))

rctx, cancel2 := context.WithCancel(ctx)
rctx, cancel3 := context.WithCancel(ctx)
g := sync.WaitGroup{}
g.Add(1)
go func() {
Expand Down Expand Up @@ -159,7 +191,7 @@ config:
}
}
}
cancel2()
cancel3()
g.Wait()

testutil.Ok(t, os.Unsetenv("TEST_RELOADER_THANOS_ENV"))
Expand Down
Loading