From 9685d4b72177b910d0ca7c4e5f4e32b191635cfd Mon Sep 17 00:00:00 2001 From: Daniel Jaglowski Date: Fri, 27 Oct 2023 10:33:25 -0600 Subject: [PATCH] [chore][pkg/stanza] Remove unlimited memory for file paths (#28491) --- .chloggen/pkg-stanza-rm-seen-paths.yaml | 34 +++++++++++++++++++++++++ pkg/stanza/fileconsumer/config.go | 1 - pkg/stanza/fileconsumer/file.go | 10 +------- 3 files changed, 35 insertions(+), 10 deletions(-) create mode 100755 .chloggen/pkg-stanza-rm-seen-paths.yaml diff --git a/.chloggen/pkg-stanza-rm-seen-paths.yaml b/.chloggen/pkg-stanza-rm-seen-paths.yaml new file mode 100755 index 000000000000..bc606ff14b31 --- /dev/null +++ b/.chloggen/pkg-stanza-rm-seen-paths.yaml @@ -0,0 +1,34 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: breaking + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: filelogreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Change "Started watching file" log behavior + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [28491] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + Previously, every unique file path which was found by the receiver would be remembered indefinitely. + This list was kept independently of the uniqueness / checkpointing mechanism (which does not rely on the file path). + The purpose of this list was to allow us to emit a lot whenever a path was seen for the first time. + This removes the separate list and relies instead on the same mechanism as checkpointing. Now, a similar log is emitted + any time a file is found which is not currently checkpointed. Because the checkpointing mechanism does not maintain history + indefintiely, it is now possible that a log will be emitted for the same file path. This will happen when no file exists at + the path for a period of time. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/pkg/stanza/fileconsumer/config.go b/pkg/stanza/fileconsumer/config.go index 0da539a08bad..7e7aed2f8e52 100644 --- a/pkg/stanza/fileconsumer/config.go +++ b/pkg/stanza/fileconsumer/config.go @@ -183,7 +183,6 @@ func (c Config) buildManager(logger *zap.SugaredLogger, emit emit.Callback, spli maxBatches: c.MaxBatches, previousPollFiles: make([]*reader.Reader, 0, c.MaxConcurrentFiles/2), knownFiles: make([]*reader.Metadata, 0, 10*c.MaxConcurrentFiles), - seenPaths: make(map[string]struct{}, 100), }, nil } diff --git a/pkg/stanza/fileconsumer/file.go b/pkg/stanza/fileconsumer/file.go index 3179363a313b..68e9ea64b042 100644 --- a/pkg/stanza/fileconsumer/file.go +++ b/pkg/stanza/fileconsumer/file.go @@ -34,7 +34,6 @@ type Manager struct { previousPollFiles []*reader.Reader knownFiles []*reader.Metadata - seenPaths map[string]struct{} currentFps []*fingerprint.Fingerprint } @@ -178,14 +177,6 @@ func (m *Manager) consume(ctx context.Context, paths []string) { } func (m *Manager) makeFingerprint(path string) (*fingerprint.Fingerprint, *os.File) { - if _, ok := m.seenPaths[path]; !ok { - if m.readerFactory.FromBeginning { - m.Infow("Started watching file", "path", path) - } else { - m.Infow("Started watching file from end. To read preexisting logs, configure the argument 'start_at' to 'beginning'", "path", path) - } - m.seenPaths[path] = struct{}{} - } file, err := os.Open(path) // #nosec - operator must read in files defined by user if err != nil { m.Errorw("Failed to open file", zap.Error(err)) @@ -274,5 +265,6 @@ func (m *Manager) newReader(file *os.File, fp *fingerprint.Fingerprint) (*reader } // If we don't match any previously known files, create a new reader from scratch + m.Infow("Started watching file", "path", file.Name()) return m.readerFactory.NewReader(file, fp) }