From 85759c76e7bdbd7b0e222d475fdecee198c92f26 Mon Sep 17 00:00:00 2001 From: "Grot (@grafanabot)" <43478413+grafanabot@users.noreply.github.com> Date: Fri, 6 Sep 2024 21:32:07 +0100 Subject: [PATCH] chore: [k218] feat(max-allowed-line-length): add config to set `max-allowed-line-length` in pattern ingester (#14076) Co-authored-by: Sven Grossmann --- docs/sources/shared/configuration.md | 4 ++ pkg/pattern/drain/drain.go | 62 ++++++++++++++-------------- pkg/pattern/ingester.go | 27 +++++++----- 3 files changed, 52 insertions(+), 41 deletions(-) diff --git a/docs/sources/shared/configuration.md b/docs/sources/shared/configuration.md index 75b3e85749e7..13fcddde22fa 100644 --- a/docs/sources/shared/configuration.md +++ b/docs/sources/shared/configuration.md @@ -816,6 +816,10 @@ pattern_ingester: # CLI flag: -pattern-ingester.connection-timeout [connection_timeout: | default = 2s] + # The maximum length of log lines that can be used for pattern detection. + # CLI flag: -pattern-ingester.max-allowed-line-length + [max_allowed_line_length: | default = 3000] + # The index_gateway block configures the Loki index gateway server, responsible # for serving index queries without the need to constantly interact with the # object store. diff --git a/pkg/pattern/drain/drain.go b/pkg/pattern/drain/drain.go index 5c48d2980e02..70fc9f5ae458 100644 --- a/pkg/pattern/drain/drain.go +++ b/pkg/pattern/drain/drain.go @@ -36,14 +36,15 @@ import ( ) type Config struct { - maxNodeDepth int - LogClusterDepth int - SimTh float64 - MaxChildren int - ExtraDelimiters []string - MaxClusters int - ParamString string - MaxEvictionRatio float64 + maxNodeDepth int + LogClusterDepth int + SimTh float64 + MaxChildren int + ExtraDelimiters []string + MaxClusters int + ParamString string + MaxEvictionRatio float64 + MaxAllowedLineLength int } func createLogClusterCache(maxSize int, onEvict func(int, *LogCluster)) *LogClusterCache { @@ -125,11 +126,12 @@ func DefaultConfig() *Config { // Both SimTh and MaxClusterDepth impact branching factor: the greater // MaxClusterDepth and SimTh, the less the chance that there will be // "similar" clusters, but the greater the footprint. - SimTh: 0.3, - MaxChildren: 15, - ParamString: `<_>`, - MaxClusters: 300, - MaxEvictionRatio: 0.25, + SimTh: 0.3, + MaxChildren: 15, + ParamString: `<_>`, + MaxClusters: 300, + MaxEvictionRatio: 0.25, + MaxAllowedLineLength: 3000, } } @@ -140,11 +142,10 @@ func New(config *Config, format string, metrics *Metrics) *Drain { config.maxNodeDepth = config.LogClusterDepth - 2 d := &Drain{ - config: config, - rootNode: createNode(), - metrics: metrics, - maxAllowedLineLength: 3000, - format: format, + config: config, + rootNode: createNode(), + metrics: metrics, + format: format, } limiter := newLimiter(config.MaxEvictionRatio) @@ -180,18 +181,17 @@ func New(config *Config, format string, metrics *Metrics) *Drain { } type Drain struct { - config *Config - rootNode *Node - idToCluster *LogClusterCache - clustersCounter int - metrics *Metrics - tokenizer LineTokenizer - maxAllowedLineLength int - format string - tokens []string - state interface{} - limiter *limiter - pruning bool + config *Config + rootNode *Node + idToCluster *LogClusterCache + clustersCounter int + metrics *Metrics + tokenizer LineTokenizer + format string + tokens []string + state interface{} + limiter *limiter + pruning bool } func (d *Drain) Clusters() []*LogCluster { @@ -206,7 +206,7 @@ func (d *Drain) Train(content string, ts int64) *LogCluster { if !d.limiter.Allow() { return nil } - if len(content) > d.maxAllowedLineLength { + if len(content) > d.config.MaxAllowedLineLength { return nil } d.tokens, d.state = d.tokenizer.Tokenize(content, d.tokens, d.state) diff --git a/pkg/pattern/ingester.go b/pkg/pattern/ingester.go index bd43908f289d..3c1bb55b7680 100644 --- a/pkg/pattern/ingester.go +++ b/pkg/pattern/ingester.go @@ -33,16 +33,17 @@ import ( const readBatchSize = 1024 type Config struct { - Enabled bool `yaml:"enabled,omitempty" doc:"description=Whether the pattern ingester is enabled."` - LifecyclerConfig ring.LifecyclerConfig `yaml:"lifecycler,omitempty" doc:"description=Configures how the lifecycle of the pattern ingester will operate and where it will register for discovery."` - ClientConfig clientpool.Config `yaml:"client_config,omitempty" doc:"description=Configures how the pattern ingester will connect to the ingesters."` - ConcurrentFlushes int `yaml:"concurrent_flushes"` - FlushCheckPeriod time.Duration `yaml:"flush_check_period"` - MaxClusters int `yaml:"max_clusters,omitempty" doc:"description=The maximum number of detected pattern clusters that can be created by streams."` - MaxEvictionRatio float64 `yaml:"max_eviction_ratio,omitempty" doc:"description=The maximum eviction ratio of patterns per stream. Once that ratio is reached, the stream will throttled pattern detection."` - MetricAggregation aggregation.Config `yaml:"metric_aggregation,omitempty" doc:"description=Configures the metric aggregation and storage behavior of the pattern ingester."` - TeeConfig TeeConfig `yaml:"tee_config,omitempty" doc:"description=Configures the pattern tee which forwards requests to the pattern ingester."` - ConnectionTimeout time.Duration `yaml:"connection_timeout"` + Enabled bool `yaml:"enabled,omitempty" doc:"description=Whether the pattern ingester is enabled."` + LifecyclerConfig ring.LifecyclerConfig `yaml:"lifecycler,omitempty" doc:"description=Configures how the lifecycle of the pattern ingester will operate and where it will register for discovery."` + ClientConfig clientpool.Config `yaml:"client_config,omitempty" doc:"description=Configures how the pattern ingester will connect to the ingesters."` + ConcurrentFlushes int `yaml:"concurrent_flushes"` + FlushCheckPeriod time.Duration `yaml:"flush_check_period"` + MaxClusters int `yaml:"max_clusters,omitempty" doc:"description=The maximum number of detected pattern clusters that can be created by streams."` + MaxEvictionRatio float64 `yaml:"max_eviction_ratio,omitempty" doc:"description=The maximum eviction ratio of patterns per stream. Once that ratio is reached, the stream will throttled pattern detection."` + MetricAggregation aggregation.Config `yaml:"metric_aggregation,omitempty" doc:"description=Configures the metric aggregation and storage behavior of the pattern ingester."` + TeeConfig TeeConfig `yaml:"tee_config,omitempty" doc:"description=Configures the pattern tee which forwards requests to the pattern ingester."` + ConnectionTimeout time.Duration `yaml:"connection_timeout"` + MaxAllowedLineLength int `yaml:"max_allowed_line_length,omitempty" doc:"description=The maximum length of log lines that can be used for pattern detection."` // For testing. factory ring_client.PoolFactory `yaml:"-"` @@ -91,6 +92,12 @@ func (cfg *Config) RegisterFlags(fs *flag.FlagSet) { 2*time.Second, "Timeout for connections between the Loki and the pattern ingester.", ) + fs.IntVar( + &cfg.MaxAllowedLineLength, + "pattern-ingester.max-allowed-line-length", + drain.DefaultConfig().MaxAllowedLineLength, + "The maximum length of log lines that can be used for pattern detection.", + ) } type TeeConfig struct {