diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 86ac40c3a18..b9f10350a84 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -25,6 +25,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff] - Always use absolute path for event and registry. This can lead to issues when relative paths were used before. {pull}3328[3328] - Remove code to convert states from 1.x. {pull}3767[3767] - Remove deprecated config options force_close_files and close_older. {pull}3768[3768] +- Change clean_removed behaviour to also remove states for files which cannot be found anymore under the same name. {pull}3827[3827] *Heartbeat* diff --git a/filebeat/docs/reference/configuration/filebeat-options.asciidoc b/filebeat/docs/reference/configuration/filebeat-options.asciidoc index 3f05ba61df2..342ee30e21b 100644 --- a/filebeat/docs/reference/configuration/filebeat-options.asciidoc +++ b/filebeat/docs/reference/configuration/filebeat-options.asciidoc @@ -273,14 +273,12 @@ NOTE: Every time a file is renamed, the file state is updated and the counter fo [[clean-removed]] ===== clean_removed -When this option is enabled, Filebeat cleans files from the registry if they cannot be found on disk anymore. This setting does not apply to renamed files or files that were moved to another directory that is still visible to Filebeat. This option is enabled by default. - +When this option is enabled, Filebeat cleans files from the registry if they cannot be found on disk anymore under the last known name. This means also files which were renamed after the harvester was finished will be removed. This option is enabled by default. If a shared drive disappears for a short period and appears again, all files will be read again from the beginning because the states were removed from the registry file. In such cases, we recommend that you disable the `clean_removed` option. You must disable this option if you also disable `close_removed`. - [[scan-frequency]] ===== scan_frequency diff --git a/filebeat/prospector/prospector_log.go b/filebeat/prospector/prospector_log.go index 4aa4b8b6802..5f44f998595 100644 --- a/filebeat/prospector/prospector_log.go +++ b/filebeat/prospector/prospector_log.go @@ -94,24 +94,42 @@ func (l *Log) Run() { if l.config.CleanRemoved { for _, state := range l.Prospector.states.GetStates() { // os.Stat will return an error in case the file does not exist - _, err := os.Stat(state.Source) + stat, err := os.Stat(state.Source) if err != nil { - // Only clean up files where state is Finished - if state.Finished { - state.TTL = 0 - err := l.Prospector.updateState(input.NewEvent(state)) - if err != nil { - logp.Err("File cleanup state update error: %s", err) - } + if os.IsNotExist(err) { + l.removeState(state) logp.Debug("prospector", "Remove state for file as file removed: %s", state.Source) } else { - logp.Debug("prospector", "State for file not removed because not finished: %s", state.Source) + logp.Err("Prospector state for %s was not removed: %s", state.Source, err) + } + } else { + // Check if existing source on disk and state are the same. Remove if not the case. + newState := file.NewState(stat, state.Source) + if !newState.FileStateOS.IsSame(state.FileStateOS) { + l.removeState(state) + logp.Debug("prospector", "Remove state for file as file removed or renamed: %s", state.Source) } } } } } +func (l *Log) removeState(state file.State) { + + // Only clean up files where state is Finished + if !state.Finished { + logp.Debug("prospector", "State for file not removed because harvester not finished: %s", state.Source) + return + } + + state.TTL = 0 + err := l.Prospector.updateState(input.NewEvent(state)) + if err != nil { + logp.Err("File cleanup state update error: %s", err) + } + +} + // getFiles returns all files which have to be harvested // All globs are expanded and then directory and excluded files are removed func (l *Log) getFiles() map[string]os.FileInfo { diff --git a/filebeat/tests/system/test_harvester.py b/filebeat/tests/system/test_harvester.py index 1cdc5be9228..3867b889dae 100644 --- a/filebeat/tests/system/test_harvester.py +++ b/filebeat/tests/system/test_harvester.py @@ -20,6 +20,7 @@ def test_close_renamed(self): self.render_config_template( path=os.path.abspath(self.working_dir) + "/log/test.log", close_renamed="true", + clean_removed="false", scan_frequency="0.1s" ) os.mkdir(self.working_dir + "/log/") @@ -620,7 +621,8 @@ def test_symlink_removed(self): self.render_config_template( path=os.path.abspath(self.working_dir) + "/log/symlink.log", symlinks="true", - clean_removed="false" + clean_removed="false", + close_removed="false", ) os.mkdir(self.working_dir + "/log/") diff --git a/filebeat/tests/system/test_registrar.py b/filebeat/tests/system/test_registrar.py index e42b6cfa60d..dd22f62a88b 100644 --- a/filebeat/tests/system/test_registrar.py +++ b/filebeat/tests/system/test_registrar.py @@ -396,7 +396,8 @@ def test_rotating_file_with_restart(self): self.render_config_template( path=os.path.abspath(self.working_dir) + "/log/input*", scan_frequency="1s", - close_inactive="1s" + close_inactive="1s", + clean_removed="false" ) if os.name == "nt":