From deb556035dac93a81083430eccdadfa5d6396e3f Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Tue, 25 May 2021 13:41:07 +0200 Subject: [PATCH] Improve mark file processing. This makes boltdb batch for deleting entries smaller and so goroutines for deletion of chunks are not stuck. Also improve metrics to refresh more often. Signed-off-by: Cyril Tovena --- .../shipper/compactor/retention/marker.go | 26 +++++++++++++++++-- .../dashboards/loki-retention.libsonnet | 2 +- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/pkg/storage/stores/shipper/compactor/retention/marker.go b/pkg/storage/stores/shipper/compactor/retention/marker.go index 421d4c84cce4..ae3d8d2de7d4 100644 --- a/pkg/storage/stores/shipper/compactor/retention/marker.go +++ b/pkg/storage/stores/shipper/compactor/retention/marker.go @@ -199,7 +199,6 @@ func (r *markerProcessor) Start(deleteFunc func(ctx context.Context, chunkId []b level.Error(util_log.Logger).Log("msg", "failed to list marks path", "path", r.folder, "err", err) continue } - r.sweeperMetrics.markerFilesCurrent.Set(float64(len(paths))) if len(paths) == 0 { level.Info(util_log.Logger).Log("msg", "no marks file found") } @@ -221,6 +220,29 @@ func (r *markerProcessor) Start(deleteFunc func(ctx context.Context, chunkId []b } }() + r.wg.Add(1) + go func() { + defer r.wg.Done() + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + tick := func() { + select { + case <-r.ctx.Done(): + case <-ticker.C: + } + } + for ; true; tick() { + if r.ctx.Err() != nil { + return + } + paths, _, err := r.availablePath() + if err != nil { + level.Error(util_log.Logger).Log("msg", "failed to list marks path", "path", r.folder, "err", err) + continue + } + r.sweeperMetrics.markerFilesCurrent.Set(float64(len(paths))) + } + }() } func (r *markerProcessor) processPath(path string, deleteFunc func(ctx context.Context, chunkId []byte) error) error { @@ -259,7 +281,7 @@ func (r *markerProcessor) processPath(path string, deleteFunc func(ctx context.C if err != nil { return err } - dbUpdate.MaxBatchDelay = 1 * time.Second // 1 s is way enough for saving changes, worst case this operation is idempotent. + dbUpdate.MaxBatchDelay = 5 * time.Millisecond defer func() { close(queue) wg.Wait() diff --git a/production/loki-mixin/dashboards/loki-retention.libsonnet b/production/loki-mixin/dashboards/loki-retention.libsonnet index beaa6bd86307..167afbef0e5e 100644 --- a/production/loki-mixin/dashboards/loki-retention.libsonnet +++ b/production/loki-mixin/dashboards/loki-retention.libsonnet @@ -80,7 +80,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; ) .addPanel( $.panel('Marks Files to Process') + - $.queryPanel(['loki_boltdb_shipper_retention_sweeper_marker_files_current{%s}' % $.namespaceMatcher()], ['count']), + $.queryPanel(['sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{%s})' % $.namespaceMatcher()], ['count']), ) .addPanel( $.panel('Delete Rate Per Status') +