Skip to content

Commit

Permalink
Add configuration of filestream input (elastic#21565)
Browse files Browse the repository at this point in the history
(cherry picked from commit d35dfb5)
  • Loading branch information
kvch committed Oct 12, 2020
1 parent 1192782 commit ee33d01
Show file tree
Hide file tree
Showing 8 changed files with 524 additions and 17 deletions.
140 changes: 140 additions & 0 deletions filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ filebeat.inputs:
#
# Possible options are:
# * log: Reads every line of the log file (default)
# * filestream: Improved version of log input. Experimental.
# * stdin: Reads the standard in

#------------------------------ Log input --------------------------------
Expand Down Expand Up @@ -231,6 +232,145 @@ filebeat.inputs:
# Defines if inputs is enabled
#enabled: true

#--------------------------- Filestream input ----------------------------
- type: filestream

# Change to true to enable this input configuration.
enabled: false

# Paths that should be crawled and fetched. Glob based paths.
# To fetch all ".log" files from a specific level of subdirectories
# /var/log/*/*.log can be used.
# For each file found under this path, a harvester is started.
# Make sure not file is defined twice as this can lead to unexpected behaviour.
paths:
- /var/log/*.log
#- c:\programdata\elasticsearch\logs\*

# Configure the file encoding for reading files with international characters
# following the W3C recommendation for HTML5 (http://www.w3.org/TR/encoding).
# Some sample encodings:
# plain, utf-8, utf-16be-bom, utf-16be, utf-16le, big5, gb18030, gbk,
# hz-gb-2312, euc-kr, euc-jp, iso-2022-jp, shift-jis, ...
#encoding: plain


# Exclude lines. A list of regular expressions to match. It drops the lines that are
# matching any regular expression from the list. The include_lines is called before
# exclude_lines. By default, no lines are dropped.
#exclude_lines: ['^DBG']

# Include lines. A list of regular expressions to match. It exports the lines that are
# matching any regular expression from the list. The include_lines is called before
# exclude_lines. By default, all the lines are exported.
#include_lines: ['^ERR', '^WARN']

### Prospector options

# How often the input checks for new files in the paths that are specified
# for harvesting. Specify 1s to scan the directory as frequently as possible
# without causing Filebeat to scan too frequently. Default: 10s.
#prospector.scanner.check_interval: 10s

# Exclude files. A list of regular expressions to match. Filebeat drops the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Expand "**" patterns into regular glob patterns.
#prospector.scanner.recursive_glob: true

# If symlinks is enabled, symlinks are opened and harvested. The harvester is opening the
# original for harvesting but will report the symlink name as source.
#prospector.scanner.symlinks: false

### State options

# Files for the modification data is older then clean_inactive the state from the registry is removed
# By default this is disabled.
#clean_inactive: 0

# Removes the state for file which cannot be found on disk anymore immediately
#clean_removed: true

# Method to determine if two files are the same or not. By default
# the Beat considers two files the same if their inode and device id are the same.
#file_identity.native: ~

# Optional additional fields. These fields can be freely picked
# to add additional information to the crawled log files for filtering
#fields:
# level: debug
# review: 1

# Set to true to publish fields with null values in events.
#keep_null: false

# By default, all events contain `host.name`. This option can be set to true
# to disable the addition of this field to all events. The default value is
# false.
#publisher_pipeline.disable_host: false

# Ignore files which were modified more then the defined timespan in the past.
# ignore_older is disabled by default, so no files are ignored by setting it to 0.
# Time strings like 2h (2 hours), 5m (5 minutes) can be used.
#ignore_older: 0

# Defines the buffer size every harvester uses when fetching the file
#harvester_buffer_size: 16384

# Maximum number of bytes a single log event can have
# All bytes after max_bytes are discarded and not sent. The default is 10MB.
# This is especially useful for multiline log messages which can get large.
#message_max_bytes: 10485760

# Characters which separate the lines. Valid values: auto, line_feed, vertical_tab, form_feed,
# carriage_return, carriage_return_line_feed, next_line, line_separator, paragraph_separator.
#line_terminator: auto

# The Ingest Node pipeline ID associated with this input. If this is set, it
# overwrites the pipeline option from the Elasticsearch output.
#pipeline:

# Backoff values define how aggressively filebeat crawls new files for updates
# The default values can be used in most cases. Backoff defines how long it is waited
# to check a file again after EOF is reached. Default is 1s which means the file
# is checked every second if new lines were added. This leads to a near real time crawling.
# Every time a new line appears, backoff is reset to the initial value.
#backoff.init: 1s

# Max backoff defines what the maximum backoff time is. After having backed off multiple times
# from checking the files, the waiting time will never exceed max_backoff independent of the
# backoff factor. Having it set to 10s means in the worst case a new line can be added to a log
# file after having backed off multiple times, it takes a maximum of 10s to read the new line
#backoff.max: 10s

### Harvester closing options

# Close inactive closes the file handler after the predefined period.
# The period starts when the last line of the file was, not the file ModTime.
# Time strings like 2h (2 hours), 5m (5 minutes) can be used.
#close.on_state_change.inactive: 5m

# Close renamed closes a file handler when the file is renamed or rotated.
# Note: Potential data loss. Make sure to read and understand the docs for this option.
#close.on_state_change.renamed: false

# When enabling this option, a file handler is closed immediately in case a file can't be found
# any more. In case the file shows up again later, harvesting will continue at the last known position
# after scan_frequency.
#close.on_state_change.removed: true

# Closes the file handler as soon as the harvesters reaches the end of the file.
# By default this option is disabled.
# Note: Potential data loss. Make sure to read and understand the docs for this option.
#close.reader.eof: false

# Close timeout closes the harvester after the predefined time.
# This is independent if the harvester did finish reading the file or not.
# By default this option is disabled.
# Note: Potential data loss. Make sure to read and understand the docs for this option.
#close.reader.after_interval: 0

#----------------------------- Stdin input -------------------------------
# Configuration to use stdin input
#- type: stdin
Expand Down
29 changes: 29 additions & 0 deletions filebeat/_meta/config/filebeat.inputs.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,32 @@ filebeat.inputs:
# that was (not) matched before or after or as long as a pattern is not matched based on negate.
# Note: After is the equivalent to previous and before is the equivalent to to next in Logstash
#multiline.match: after

# filestream is an experimental input. It is going to replace log input in the future.
- type: filestream

# Change to true to enable this input configuration.
enabled: false

# Paths that should be crawled and fetched. Glob based paths.
paths:
- /var/log/*.log
#- c:\programdata\elasticsearch\logs\*

# Exclude lines. A list of regular expressions to match. It drops the lines that are
# matching any regular expression from the list.
#exclude_lines: ['^DBG']

# Include lines. A list of regular expressions to match. It exports the lines that are
# matching any regular expression from the list.
#include_lines: ['^ERR', '^WARN']

# Exclude files. A list of regular expressions to match. Filebeat drops the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Optional additional fields. These fields can be freely picked
# to add additional information to the crawled log files for filtering
#fields:
# level: debug
# review: 1
140 changes: 140 additions & 0 deletions filebeat/filebeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ filebeat.inputs:
#
# Possible options are:
# * log: Reads every line of the log file (default)
# * filestream: Improved version of log input. Experimental.
# * stdin: Reads the standard in

#------------------------------ Log input --------------------------------
Expand Down Expand Up @@ -618,6 +619,145 @@ filebeat.inputs:
# Defines if inputs is enabled
#enabled: true

#--------------------------- Filestream input ----------------------------
- type: filestream

# Change to true to enable this input configuration.
enabled: false

# Paths that should be crawled and fetched. Glob based paths.
# To fetch all ".log" files from a specific level of subdirectories
# /var/log/*/*.log can be used.
# For each file found under this path, a harvester is started.
# Make sure not file is defined twice as this can lead to unexpected behaviour.
paths:
- /var/log/*.log
#- c:\programdata\elasticsearch\logs\*

# Configure the file encoding for reading files with international characters
# following the W3C recommendation for HTML5 (http://www.w3.org/TR/encoding).
# Some sample encodings:
# plain, utf-8, utf-16be-bom, utf-16be, utf-16le, big5, gb18030, gbk,
# hz-gb-2312, euc-kr, euc-jp, iso-2022-jp, shift-jis, ...
#encoding: plain


# Exclude lines. A list of regular expressions to match. It drops the lines that are
# matching any regular expression from the list. The include_lines is called before
# exclude_lines. By default, no lines are dropped.
#exclude_lines: ['^DBG']

# Include lines. A list of regular expressions to match. It exports the lines that are
# matching any regular expression from the list. The include_lines is called before
# exclude_lines. By default, all the lines are exported.
#include_lines: ['^ERR', '^WARN']

### Prospector options

# How often the input checks for new files in the paths that are specified
# for harvesting. Specify 1s to scan the directory as frequently as possible
# without causing Filebeat to scan too frequently. Default: 10s.
#prospector.scanner.check_interval: 10s

# Exclude files. A list of regular expressions to match. Filebeat drops the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Expand "**" patterns into regular glob patterns.
#prospector.scanner.recursive_glob: true

# If symlinks is enabled, symlinks are opened and harvested. The harvester is opening the
# original for harvesting but will report the symlink name as source.
#prospector.scanner.symlinks: false

### State options

# Files for the modification data is older then clean_inactive the state from the registry is removed
# By default this is disabled.
#clean_inactive: 0

# Removes the state for file which cannot be found on disk anymore immediately
#clean_removed: true

# Method to determine if two files are the same or not. By default
# the Beat considers two files the same if their inode and device id are the same.
#file_identity.native: ~

# Optional additional fields. These fields can be freely picked
# to add additional information to the crawled log files for filtering
#fields:
# level: debug
# review: 1

# Set to true to publish fields with null values in events.
#keep_null: false

# By default, all events contain `host.name`. This option can be set to true
# to disable the addition of this field to all events. The default value is
# false.
#publisher_pipeline.disable_host: false

# Ignore files which were modified more then the defined timespan in the past.
# ignore_older is disabled by default, so no files are ignored by setting it to 0.
# Time strings like 2h (2 hours), 5m (5 minutes) can be used.
#ignore_older: 0

# Defines the buffer size every harvester uses when fetching the file
#harvester_buffer_size: 16384

# Maximum number of bytes a single log event can have
# All bytes after max_bytes are discarded and not sent. The default is 10MB.
# This is especially useful for multiline log messages which can get large.
#message_max_bytes: 10485760

# Characters which separate the lines. Valid values: auto, line_feed, vertical_tab, form_feed,
# carriage_return, carriage_return_line_feed, next_line, line_separator, paragraph_separator.
#line_terminator: auto

# The Ingest Node pipeline ID associated with this input. If this is set, it
# overwrites the pipeline option from the Elasticsearch output.
#pipeline:

# Backoff values define how aggressively filebeat crawls new files for updates
# The default values can be used in most cases. Backoff defines how long it is waited
# to check a file again after EOF is reached. Default is 1s which means the file
# is checked every second if new lines were added. This leads to a near real time crawling.
# Every time a new line appears, backoff is reset to the initial value.
#backoff.init: 1s

# Max backoff defines what the maximum backoff time is. After having backed off multiple times
# from checking the files, the waiting time will never exceed max_backoff independent of the
# backoff factor. Having it set to 10s means in the worst case a new line can be added to a log
# file after having backed off multiple times, it takes a maximum of 10s to read the new line
#backoff.max: 10s

### Harvester closing options

# Close inactive closes the file handler after the predefined period.
# The period starts when the last line of the file was, not the file ModTime.
# Time strings like 2h (2 hours), 5m (5 minutes) can be used.
#close.on_state_change.inactive: 5m

# Close renamed closes a file handler when the file is renamed or rotated.
# Note: Potential data loss. Make sure to read and understand the docs for this option.
#close.on_state_change.renamed: false

# When enabling this option, a file handler is closed immediately in case a file can't be found
# any more. In case the file shows up again later, harvesting will continue at the last known position
# after scan_frequency.
#close.on_state_change.removed: true

# Closes the file handler as soon as the harvesters reaches the end of the file.
# By default this option is disabled.
# Note: Potential data loss. Make sure to read and understand the docs for this option.
#close.reader.eof: false

# Close timeout closes the harvester after the predefined time.
# This is independent if the harvester did finish reading the file or not.
# By default this option is disabled.
# Note: Potential data loss. Make sure to read and understand the docs for this option.
#close.reader.after_interval: 0

#----------------------------- Stdin input -------------------------------
# Configuration to use stdin input
#- type: stdin
Expand Down
29 changes: 29 additions & 0 deletions filebeat/filebeat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,35 @@ filebeat.inputs:
# Note: After is the equivalent to previous and before is the equivalent to to next in Logstash
#multiline.match: after

# filestream is an experimental input. It is going to replace log input in the future.
- type: filestream

# Change to true to enable this input configuration.
enabled: false

# Paths that should be crawled and fetched. Glob based paths.
paths:
- /var/log/*.log
#- c:\programdata\elasticsearch\logs\*

# Exclude lines. A list of regular expressions to match. It drops the lines that are
# matching any regular expression from the list.
#exclude_lines: ['^DBG']

# Include lines. A list of regular expressions to match. It exports the lines that are
# matching any regular expression from the list.
#include_lines: ['^ERR', '^WARN']

# Exclude files. A list of regular expressions to match. Filebeat drops the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Optional additional fields. These fields can be freely picked
# to add additional information to the crawled log files for filtering
#fields:
# level: debug
# review: 1

# ============================== Filebeat modules ==============================

filebeat.config.modules:
Expand Down
Loading

0 comments on commit ee33d01

Please sign in to comment.