From 7522e22cb9c076d16de2485e75db72bed140d105 Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Thu, 1 Aug 2019 13:53:26 +0300 Subject: [PATCH 1/4] Export expvar metrics of badger to the metricsFactory Signed-off-by: Michael Burman --- plugin/storage/badger/factory.go | 68 +++++++++++++++++++++++++++++++- plugin/storage/badger/options.go | 2 +- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/plugin/storage/badger/factory.go b/plugin/storage/badger/factory.go index 758a94404af..d1533f4e651 100644 --- a/plugin/storage/badger/factory.go +++ b/plugin/storage/badger/factory.go @@ -15,12 +15,17 @@ package badger import ( + "expvar" "flag" "io/ioutil" "os" + "strings" "time" "github.com/dgraph-io/badger" + + // init() badger's metrics to make them available in Initialize() + _ "github.com/dgraph-io/badger/y" "github.com/spf13/viper" "github.com/uber/jaeger-lib/metrics" "go.uber.org/zap" @@ -58,6 +63,9 @@ type Factory struct { LastMaintenanceRun metrics.Gauge // LastValueLogCleaned stores the timestamp (UnixNano) of the previous ValueLogGC run LastValueLogCleaned metrics.Gauge + + // Expose badger's internal expvar metrics, which are all gauge's at this point + badgerMetrics map[string]metrics.Gauge } } @@ -118,7 +126,10 @@ func (f *Factory) Initialize(metricsFactory metrics.Factory, logger *zap.Logger) f.metrics.LastMaintenanceRun = metricsFactory.Gauge(metrics.Options{Name: lastMaintenanceRunName}) f.metrics.LastValueLogCleaned = metricsFactory.Gauge(metrics.Options{Name: lastValueLogCleanedName}) + f.registerBadgerExpvarMetrics(metricsFactory) + go f.maintenance() + go f.metricsCopier() logger.Info("Badger storage configuration", zap.Any("configuration", opts)) @@ -150,7 +161,8 @@ func (f *Factory) CreateDependencyReader() (dependencystore.Reader, error) { // Close Implements io.Closer and closes the underlying storage func (f *Factory) Close() error { - f.maintenanceDone <- true + f.maintenanceDone <- true // maintenance close + f.maintenanceDone <- true // metrics close err := f.store.Close() @@ -175,6 +187,7 @@ func (f *Factory) maintenance() { return case t := <-maintenanceTicker.C: var err error + // After there's nothing to clean, the err is raised for err == nil { err = f.store.RunValueLogGC(0.5) // 0.5 is selected to rewrite a file if half of it can be discarded @@ -190,3 +203,56 @@ func (f *Factory) maintenance() { } } } + +func (f *Factory) metricsCopier() { + metricsTicker := time.NewTicker(10 * time.Second) + defer metricsTicker.Stop() + for { + select { + case <-f.maintenanceDone: + return + case <-metricsTicker.C: + expvar.Do(func(kv expvar.KeyValue) { + if strings.HasPrefix(kv.Key, "badger") { + if intVal, ok := kv.Value.(*expvar.Int); ok { + if g, found := f.metrics.badgerMetrics[kv.Key]; found { + g.Update(intVal.Value()) + } + } else if mapVal, ok := kv.Value.(*expvar.Map); ok { + mapVal.Do(func(innerKv expvar.KeyValue) { + // The metrics we're interested in have only a single inner key (dynamic name) + // and we're only interested in its value + if intVal, ok := innerKv.Value.(*expvar.Int); ok { + if g, found := f.metrics.badgerMetrics[kv.Key]; found { + g.Update(intVal.Value()) + } + } + }) + } + } + }) + } + } +} + +func (f *Factory) registerBadgerExpvarMetrics(metricsFactory metrics.Factory) { + f.metrics.badgerMetrics = make(map[string]metrics.Gauge) + + expvar.Do(func(kv expvar.KeyValue) { + if strings.HasPrefix(kv.Key, "badger") { + if _, ok := kv.Value.(*expvar.Int); ok { + g := metricsFactory.Gauge(metrics.Options{Name: kv.Key}) + f.metrics.badgerMetrics[kv.Key] = g + } else if mapVal, ok := kv.Value.(*expvar.Map); ok { + mapVal.Do(func(innerKv expvar.KeyValue) { + // The metrics we're interested in have only a single inner key (dynamic name) + // and we're only interested in its value + if _, ok = innerKv.Value.(*expvar.Int); ok { + g := metricsFactory.Gauge(metrics.Options{Name: kv.Key}) + f.metrics.badgerMetrics[kv.Key] = g + } + }) + } + } + }) +} diff --git a/plugin/storage/badger/options.go b/plugin/storage/badger/options.go index db3c957ca36..14904ef3bc5 100644 --- a/plugin/storage/badger/options.go +++ b/plugin/storage/badger/options.go @@ -41,7 +41,7 @@ type NamespaceConfig struct { } const ( - defaultMaintenanceInterval time.Duration = 5 * time.Minute + defaultMaintenanceInterval time.Duration = 5 * time.Second defaultTTL time.Duration = time.Hour * 72 ) From 38595bb67cedf763dda2c373a07c79ff77077d60 Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Fri, 2 Aug 2019 11:37:43 +0300 Subject: [PATCH 2/4] Add test for metrics collector. Fix the maintenance interval, it should be every 5m not 5s and default table loading mode should stick to memory mapping Signed-off-by: Michael Burman --- plugin/storage/badger/factory.go | 4 ++- plugin/storage/badger/factory_test.go | 30 ++++++++++++++++++ plugin/storage/badger/options.go | 44 ++++++++++++++++----------- 3 files changed, 60 insertions(+), 18 deletions(-) diff --git a/plugin/storage/badger/factory.go b/plugin/storage/badger/factory.go index d1533f4e651..2ec662b03ed 100644 --- a/plugin/storage/badger/factory.go +++ b/plugin/storage/badger/factory.go @@ -23,6 +23,7 @@ import ( "time" "github.com/dgraph-io/badger" + "github.com/dgraph-io/badger/options" // init() badger's metrics to make them available in Initialize() _ "github.com/dgraph-io/badger/y" @@ -92,6 +93,7 @@ func (f *Factory) Initialize(metricsFactory metrics.Factory, logger *zap.Logger) f.logger = logger opts := badger.DefaultOptions + opts.TableLoadingMode = options.MemoryMap if f.Options.primary.Ephemeral { opts.SyncWrites = false @@ -205,7 +207,7 @@ func (f *Factory) maintenance() { } func (f *Factory) metricsCopier() { - metricsTicker := time.NewTicker(10 * time.Second) + metricsTicker := time.NewTicker(f.Options.primary.MetricsUpdateInterval) defer metricsTicker.Stop() for { select { diff --git a/plugin/storage/badger/factory_test.go b/plugin/storage/badger/factory_test.go index 4261292505e..76ef23210ee 100644 --- a/plugin/storage/badger/factory_test.go +++ b/plugin/storage/badger/factory_test.go @@ -144,3 +144,33 @@ func TestMaintenanceCodecov(t *testing.T) { _ = f.store.Close() waiter() // This should trigger the logging of error } + +func TestBadgerMetrics(t *testing.T) { + f := NewFactory() + v, command := config.Viperize(f.AddFlags) + command.ParseFlags([]string{ + "--badger.metrics-update-interval=10ms", + }) + f.InitFromViper(v) + mFactory := metricstest.NewFactory(0) + f.Initialize(mFactory, zap.NewNop()) + assert.NotNil(t, f.metrics.badgerMetrics) + _, found := f.metrics.badgerMetrics["badger_memtable_gets_total"] + assert.True(t, found) + + waiter := func(previousValue int64) int64 { + sleeps := 0 + _, gs := mFactory.Snapshot() + for gs["adger_memtable_gets_total"] == previousValue && sleeps < 8 { + // Wait for the scheduler + time.Sleep(time.Duration(50) * time.Millisecond) + sleeps++ + _, gs = mFactory.Snapshot() + } + assert.True(t, gs["badger_memtable_gets_total"] > previousValue) + return gs["badger_memtable_gets_total"] + } + + vlogSize := waiter(0) + assert.True(t, vlogSize > 0) +} diff --git a/plugin/storage/badger/options.go b/plugin/storage/badger/options.go index 14904ef3bc5..8775578c431 100644 --- a/plugin/storage/badger/options.go +++ b/plugin/storage/badger/options.go @@ -31,18 +31,20 @@ type Options struct { // NamespaceConfig is badger's internal configuration data type NamespaceConfig struct { - namespace string - SpanStoreTTL time.Duration - ValueDirectory string - KeyDirectory string - Ephemeral bool // Setting this to true will ignore ValueDirectory and KeyDirectory - SyncWrites bool - MaintenanceInterval time.Duration + namespace string + SpanStoreTTL time.Duration + ValueDirectory string + KeyDirectory string + Ephemeral bool // Setting this to true will ignore ValueDirectory and KeyDirectory + SyncWrites bool + MaintenanceInterval time.Duration + MetricsUpdateInterval time.Duration } const ( - defaultMaintenanceInterval time.Duration = 5 * time.Second - defaultTTL time.Duration = time.Hour * 72 + defaultMaintenanceInterval time.Duration = 5 * time.Minute + defaultMetricsUpdateInterval time.Duration = 10 * time.Second + defaultTTL time.Duration = time.Hour * 72 ) const ( @@ -52,6 +54,7 @@ const ( suffixSpanstoreTTL = ".span-store-ttl" suffixSyncWrite = ".consistency" suffixMaintenanceInterval = ".maintenance-interval" + suffixMetricsInterval = ".metrics-update-interval" // Intended only for testing purposes defaultDataDir = string(os.PathSeparator) + "data" defaultValueDir = defaultDataDir + string(os.PathSeparator) + "values" defaultKeysDir = defaultDataDir + string(os.PathSeparator) + "keys" @@ -64,13 +67,14 @@ func NewOptions(primaryNamespace string, otherNamespaces ...string) *Options { options := &Options{ primary: &NamespaceConfig{ - namespace: primaryNamespace, - SpanStoreTTL: defaultTTL, - SyncWrites: false, // Performance over durability - Ephemeral: true, // Default is ephemeral storage - ValueDirectory: defaultBadgerDataDir + defaultValueDir, - KeyDirectory: defaultBadgerDataDir + defaultKeysDir, - MaintenanceInterval: defaultMaintenanceInterval, + namespace: primaryNamespace, + SpanStoreTTL: defaultTTL, + SyncWrites: false, // Performance over durability + Ephemeral: true, // Default is ephemeral storage + ValueDirectory: defaultBadgerDataDir + defaultValueDir, + KeyDirectory: defaultBadgerDataDir + defaultKeysDir, + MaintenanceInterval: defaultMaintenanceInterval, + MetricsUpdateInterval: defaultMetricsUpdateInterval, }, } @@ -112,13 +116,18 @@ func addFlags(flagSet *flag.FlagSet, nsConfig *NamespaceConfig) { flagSet.Bool( nsConfig.namespace+suffixSyncWrite, nsConfig.SyncWrites, - "If all writes should be synced immediately. This will greatly reduce write performance.", + "If all writes should be synced immediately. This can impact write performance.", ) flagSet.Duration( nsConfig.namespace+suffixMaintenanceInterval, nsConfig.MaintenanceInterval, "How often the maintenance thread for values is ran. Format is time.Duration (https://golang.org/pkg/time/#Duration)", ) + flagSet.Duration( + nsConfig.namespace+suffixMetricsInterval, + nsConfig.MetricsUpdateInterval, + "How often the badger metrics are collected by Jaeger. Format is time.Duration (https://golang.org/pkg/time/#Duration)", + ) } // InitFromViper initializes Options with properties from viper @@ -133,6 +142,7 @@ func initFromViper(cfg *NamespaceConfig, v *viper.Viper) { cfg.SyncWrites = v.GetBool(cfg.namespace + suffixSyncWrite) cfg.SpanStoreTTL = v.GetDuration(cfg.namespace + suffixSpanstoreTTL) cfg.MaintenanceInterval = v.GetDuration(cfg.namespace + suffixMaintenanceInterval) + cfg.MetricsUpdateInterval = v.GetDuration(cfg.namespace + suffixMetricsInterval) } // GetPrimary returns the primary namespace configuration From c9544d3eaaf502f899725675f3d3c15405316fdb Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Mon, 5 Aug 2019 17:29:18 +0300 Subject: [PATCH 3/4] Address comments, add test for map existence, channel closing and use map inner key for metric tag Signed-off-by: Michael Burman --- plugin/storage/badger/factory.go | 9 ++------- plugin/storage/badger/factory_test.go | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/plugin/storage/badger/factory.go b/plugin/storage/badger/factory.go index 2ec662b03ed..59cec9b7020 100644 --- a/plugin/storage/badger/factory.go +++ b/plugin/storage/badger/factory.go @@ -24,9 +24,6 @@ import ( "github.com/dgraph-io/badger" "github.com/dgraph-io/badger/options" - - // init() badger's metrics to make them available in Initialize() - _ "github.com/dgraph-io/badger/y" "github.com/spf13/viper" "github.com/uber/jaeger-lib/metrics" "go.uber.org/zap" @@ -163,9 +160,7 @@ func (f *Factory) CreateDependencyReader() (dependencystore.Reader, error) { // Close Implements io.Closer and closes the underlying storage func (f *Factory) Close() error { - f.maintenanceDone <- true // maintenance close - f.maintenanceDone <- true // metrics close - + close(f.maintenanceDone) err := f.store.Close() // Remove tmp files if this was ephemeral storage @@ -250,7 +245,7 @@ func (f *Factory) registerBadgerExpvarMetrics(metricsFactory metrics.Factory) { // The metrics we're interested in have only a single inner key (dynamic name) // and we're only interested in its value if _, ok = innerKv.Value.(*expvar.Int); ok { - g := metricsFactory.Gauge(metrics.Options{Name: kv.Key}) + g := metricsFactory.Gauge(metrics.Options{Name: kv.Key, Tags: map[string]string{"directory": innerKv.Key}}) f.metrics.badgerMetrics[kv.Key] = g } }) diff --git a/plugin/storage/badger/factory_test.go b/plugin/storage/badger/factory_test.go index 76ef23210ee..1bcbf74a7e2 100644 --- a/plugin/storage/badger/factory_test.go +++ b/plugin/storage/badger/factory_test.go @@ -141,11 +141,16 @@ func TestMaintenanceCodecov(t *testing.T) { } } - _ = f.store.Close() + err := f.store.Close() + assert.NoError(t, err) waiter() // This should trigger the logging of error } func TestBadgerMetrics(t *testing.T) { + // The expvar is leaking keyparams between tests. We need to clean up a bit.. + eMap := expvar.Get("badger_lsm_size_bytes").(*expvar.Map) + eMap.Init() + f := NewFactory() v, command := config.Viperize(f.AddFlags) command.ParseFlags([]string{ @@ -161,7 +166,7 @@ func TestBadgerMetrics(t *testing.T) { waiter := func(previousValue int64) int64 { sleeps := 0 _, gs := mFactory.Snapshot() - for gs["adger_memtable_gets_total"] == previousValue && sleeps < 8 { + for gs["badger_memtable_gets_total"] == previousValue && sleeps < 8 { // Wait for the scheduler time.Sleep(time.Duration(50) * time.Millisecond) sleeps++ @@ -172,5 +177,14 @@ func TestBadgerMetrics(t *testing.T) { } vlogSize := waiter(0) + _, gs := mFactory.Snapshot() assert.True(t, vlogSize > 0) + assert.True(t, gs["badger_memtable_gets_total"] > 0) // IntVal metric + + mapKey := fmt.Sprintf("badger_lsm_size_bytes|directory=%s", f.tmpDir) + _, found = gs[mapKey] // Map metric + assert.True(t, found) + + err := f.Close() + assert.NoError(t, err) } From 680e1e6082e9e539b19c4214d7a73e7289890b6d Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Tue, 6 Aug 2019 10:58:47 +0300 Subject: [PATCH 4/4] Remove directory tag from the metric name Signed-off-by: Michael Burman --- plugin/storage/badger/factory.go | 2 +- plugin/storage/badger/factory_test.go | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/plugin/storage/badger/factory.go b/plugin/storage/badger/factory.go index 59cec9b7020..3bd5e29927b 100644 --- a/plugin/storage/badger/factory.go +++ b/plugin/storage/badger/factory.go @@ -245,7 +245,7 @@ func (f *Factory) registerBadgerExpvarMetrics(metricsFactory metrics.Factory) { // The metrics we're interested in have only a single inner key (dynamic name) // and we're only interested in its value if _, ok = innerKv.Value.(*expvar.Int); ok { - g := metricsFactory.Gauge(metrics.Options{Name: kv.Key, Tags: map[string]string{"directory": innerKv.Key}}) + g := metricsFactory.Gauge(metrics.Options{Name: kv.Key}) f.metrics.badgerMetrics[kv.Key] = g } }) diff --git a/plugin/storage/badger/factory_test.go b/plugin/storage/badger/factory_test.go index 1bcbf74a7e2..1917d017624 100644 --- a/plugin/storage/badger/factory_test.go +++ b/plugin/storage/badger/factory_test.go @@ -181,8 +181,7 @@ func TestBadgerMetrics(t *testing.T) { assert.True(t, vlogSize > 0) assert.True(t, gs["badger_memtable_gets_total"] > 0) // IntVal metric - mapKey := fmt.Sprintf("badger_lsm_size_bytes|directory=%s", f.tmpDir) - _, found = gs[mapKey] // Map metric + _, found = gs["badger_lsm_size_bytes"] // Map metric assert.True(t, found) err := f.Close()