Skip to content

Commit

Permalink
Merge pull request #3015 from fluent/rfc5424-string-parser
Browse files Browse the repository at this point in the history
syslog_parser: Add string parser for rfc5424
  • Loading branch information
repeatedly authored Jun 2, 2020
2 parents 35c8245 + 3226d94 commit 60e4e8f
Show file tree
Hide file tree
Showing 2 changed files with 205 additions and 16 deletions.
146 changes: 143 additions & 3 deletions lib/fluent/plugin/parser_syslog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def initialize
@space_count = nil
@space_count_rfc5424 = nil
@skip_space_count = false
@skip_space_count_rfc5424 = false
end

def configure(conf)
Expand All @@ -88,18 +89,26 @@ class << self
end
RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
when :rfc5424
class << self
alias_method :parse, :parse_rfc5424_regex
if @regexp_parser
class << self
alias_method :parse, :parse_rfc5424_regex
end
else
class << self
alias_method :parse, :parse_rfc5424
end
end
@time_format = @rfc5424_time_format unless conf.has_key?('time_format')
@support_rfc5424_without_subseconds = true
@skip_space_count_rfc5424 = @time_format.count(' ').zero?
RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
when :auto
class << self
alias_method :parse, :parse_auto
end
@time_parser_rfc3164 = time_parser_create(format: @time_format)
@time_parser_rfc5424 = time_parser_create(format: @rfc5424_time_format)
@skip_space_count_rfc5424 = @rfc5424_time_format.count(' ').zero?
nil
end

Expand Down Expand Up @@ -127,7 +136,11 @@ def parse_auto(text, &block)
@regexp = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc5424
@support_rfc5424_without_subseconds = true
parse_rfc5424_regex(text, &block)
if @regexp_parser
parse_rfc5424_regex(text, &block)
else
parse_rfc5424(text, &block)
end
else
@regexp = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc3164
Expand Down Expand Up @@ -350,6 +363,133 @@ def parse_rfc3164(text, &block)

yield time, record
end

NILVALUE = '-'.freeze

def parse_rfc5424(text, &block)
pri = nil
cursor = 0
if @with_priority
if text.start_with?('<'.freeze)
i = text.index('>'.freeze, 1)
if i < 2
yield nil, nil
return
end
pri = text.slice(1, i - 1).to_i
i = text.index(SPLIT_CHAR, i)
cursor = i + 1
else
yield nil, nil
return
end
end

# timestamp part
if @skip_space_count_rfc5424
i = text.index(SPLIT_CHAR, cursor)
time_str = text.slice(cursor, i - cursor)
cursor = i + 1
else
i = cursor - 1
sq = false
@space_count.times do
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
end
i = text.index(SPLIT_CHAR, i + 1)
end

time_str = sq ? text.slice(idx, i - cursor).squeeze(SPLIT_CHAR) : text.slice(cursor, i - cursor)
cursor = i + 1
end

# Repeat same code for the performance

# host part
i = text.index(SPLIT_CHAR, cursor)
unless i
yield nil, nil
return
end
slice_size = i - cursor
host = text.slice(cursor, slice_size)
cursor += slice_size + 1

# ident part
i = text.index(SPLIT_CHAR, cursor)
unless i
yield nil, nil
return
end
slice_size = i - cursor
ident = text.slice(cursor, slice_size)
cursor += slice_size + 1

# pid part
i = text.index(SPLIT_CHAR, cursor)
unless i
yield nil, nil
return
end
slice_size = i - cursor
pid = text.slice(cursor, slice_size)
cursor += slice_size + 1

# msgid part
i = text.index(SPLIT_CHAR, cursor)
unless i
yield nil, nil
return
end
slice_size = i - cursor
msgid = text.slice(cursor, slice_size)
cursor += slice_size + 1

record = {'host' => host, 'ident' => ident, 'pid' => pid, 'msgid' => msgid}
record['pri'] = pri if pri

# extradata part
ed_start = text[cursor]
if ed_start == NILVALUE
record['extradata'] = NILVALUE
cursor += 1
else
start = cursor
i = text.index('] '.freeze, cursor)
extradata = if i
diff = i + 1 - start # calculate ']' position
cursor += diff
text.slice(start, diff)
else # No message part case
cursor = text.bytesize
text.slice(start, cursor)
end
extradata.tr!("\\".freeze, ''.freeze)
record['extradata'] = extradata
end

# message part
if cursor != text.bytesize
msg = text[cursor + 1..-1]
msg.chomp!
record['message'] = msg
end

time = begin
@time_parser.parse(time_str)
rescue Fluent::TimeParser::TimeParseError => e
if @support_rfc5424_without_subseconds
@time_parser_rfc5424_without_subseconds.parse(time_str)
else
raise
end
end
record['time'] = time_str if @keep_time_key

yield time, record
end
end
end
end
75 changes: 62 additions & 13 deletions test/plugin/test_parser_syslog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,13 @@ def test_parse_various_characters_for_tag_with_priority(param)
end

class TestRFC5424Regexp < self
def test_parse_with_rfc5424_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -216,11 +218,13 @@ def test_parse_with_rfc5424_message
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_message_trailing_eol
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_trailing_eol(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = "<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!\n"
@parser.instance.parse(text) do |time, record|
Expand All @@ -233,11 +237,13 @@ def test_parse_with_rfc5424_message_trailing_eol
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_multiline_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_multiline_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = "<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi,\nfrom\nFluentd!"
@parser.instance.parse(text) do |time, record|
Expand All @@ -250,10 +256,12 @@ def test_parse_with_rfc5424_multiline_message
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_message_and_without_priority
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_and_without_priority(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'parser_type' => param
)
text = '2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -266,10 +274,12 @@ def test_parse_with_rfc5424_message_and_without_priority
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_empty_message_and_without_priority
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_empty_message_and_without_priority(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'parser_type' => param
)
text = '2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - -'
@parser.instance.parse(text) do |time, record|
Expand All @@ -282,10 +292,12 @@ def test_parse_with_rfc5424_empty_message_and_without_priority
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_message_without_time_format
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_without_time_format(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -297,10 +309,12 @@ def test_parse_with_rfc5424_message_without_time_format
end
end

def test_parse_with_rfc5424_message_with_priority_and_pid
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_with_priority_and_pid(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<28>1 2018-09-26T15:54:26.620412+09:00 machine minissdpd 1298 - - peer 192.168.0.5:50123 is not from a LAN'
@parser.instance.parse(text) do |time, record|
Expand All @@ -312,11 +326,13 @@ def test_parse_with_rfc5424_message_with_priority_and_pid
end
end

def test_parse_with_rfc5424_structured_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_structured_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] [Hi] from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -329,11 +345,13 @@ def test_parse_with_rfc5424_structured_message
end
end

def test_parse_with_rfc5424_multiple_structured_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_multiple_structured_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"][exampleSDID@20224 class="high"] Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -346,11 +364,13 @@ def test_parse_with_rfc5424_multiple_structured_message
end
end

def test_parse_with_rfc5424_message_includes_right_bracket
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_includes_right_bracket(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] [Hi] from Fluentd]!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -363,11 +383,13 @@ def test_parse_with_rfc5424_message_includes_right_bracket
end
end

def test_parse_with_rfc5424_empty_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_empty_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"]'
@parser.instance.parse(text) do |time, record|
Expand All @@ -380,10 +402,35 @@ def test_parse_with_rfc5424_empty_message
end
end

def test_parse_with_rfc5424_message_without_subseconds
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_space_empty_message(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] '
@parser.instance.parse(text) do |time, record|
if param == 'string'
assert_equal(event_time("2017-02-06T13:14:15.003Z", format: '%Y-%m-%dT%H:%M:%S.%L%z'), time)
assert_equal "11111", record["pid"]
assert_equal "ID24224", record["msgid"]
assert_equal "[exampleSDID@20224 iut=\"3\" eventSource=\"Application\" eventID=\"11211\"]",
record["extradata"]
assert_equal '', record["message"]
else
assert_nil time
assert_nil record
end
end
end

data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_without_subseconds(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -395,10 +442,12 @@ def test_parse_with_rfc5424_message_without_subseconds
end
end

def test_parse_with_rfc5424_message_both_timestamp
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_both_timestamp(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand Down

0 comments on commit 60e4e8f

Please sign in to comment.