From 4134cc041fc23e2272c10f7a79eefa6fe81cc593 Mon Sep 17 00:00:00 2001 From: Andrew Kroh Date: Wed, 26 Jan 2022 20:01:10 -0500 Subject: [PATCH] [Filebeat] Fix panic in decode_cef when recovering from invalid data (#30038) * Fix panic in decode_cef when recovering from invalid data When recovering from an invalid extension value the escape sequence state was not cleared. This caused the parser to attempt to unescape the next extension which resulted in invalid data or a panic. Fixes #30010 * Encapsulate non-ragel state Document and encapsulate the non-ragel state variables. ``` $ benchcmp before.txt after.txt benchmark old ns/op new ns/op delta BenchmarkEventUnpack-12 1991 1544 -22.45% benchmark old allocs new allocs delta BenchmarkEventUnpack-12 13 13 +0.00% benchmark old bytes new bytes delta BenchmarkEventUnpack-12 642 642 +0.00% ``` (cherry picked from commit 47b8d02ab6d7a8ed4ed8d08194326b76971b285c) --- CHANGELOG.next.asciidoc | 1 + .../filebeat/processors/decode_cef/cef/cef.go | 23 +- .../filebeat/processors/decode_cef/cef/cef.rl | 72 +++-- .../processors/decode_cef/cef/cef_test.go | 12 + .../processors/decode_cef/cef/parser.go | 284 +++++++++--------- 5 files changed, 219 insertions(+), 173 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index b512fd5c46c8..75d40231fec9 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -110,6 +110,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - ibmmq: Fixed `@timestamp` not being populated with correct values. {pull}29773[29773] - Fix using log_group_name_prefix in aws-cloudwatch input. {pull}29695[29695] - aws-s3: Improve gzip detection to avoid false negatives. {issue}29968[29968] +- decode_cef: Fix panic when recovering from invalid CEF extensions that contain escape characters. {issue}30010[30010] *Heartbeat* diff --git a/x-pack/filebeat/processors/decode_cef/cef/cef.go b/x-pack/filebeat/processors/decode_cef/cef/cef.go index 73808171aceb..70480335f0ad 100644 --- a/x-pack/filebeat/processors/decode_cef/cef/cef.go +++ b/x-pack/filebeat/processors/decode_cef/cef/cef.go @@ -152,28 +152,31 @@ func (e *Event) Unpack(data string, opts ...Option) error { return multierr.Combine(errs...) } +type escapePosition struct { + start, end int +} + // replaceEscapes replaces the escaped characters contained in v with their // unescaped value. -func replaceEscapes(v string, startOffset int, escapes []int) string { +func replaceEscapes(v string, startOffset int, escapes []escapePosition) string { if len(escapes) == 0 { return v } // Adjust escape offsets relative to the start offset of v. for i := 0; i < len(escapes); i++ { - escapes[i] = escapes[i] - startOffset + escapes[i].start = escapes[i].start - startOffset + escapes[i].end = escapes[i].end - startOffset } var buf strings.Builder - var end int + var prevEnd int // Iterate over escapes and replace them. - for i := 0; i < len(escapes); i += 2 { - start := escapes[i] - buf.WriteString(v[end:start]) + for _, escape := range escapes { + buf.WriteString(v[prevEnd:escape.start]) - end = escapes[i+1] - value := v[start:end] + value := v[escape.start:escape.end] switch value { case `\n`: @@ -186,8 +189,10 @@ func replaceEscapes(v string, startOffset int, escapes []int) string { buf.WriteString(value[1:]) } } + + prevEnd = escape.end } - buf.WriteString(v[end:]) + buf.WriteString(v[prevEnd:]) return buf.String() } diff --git a/x-pack/filebeat/processors/decode_cef/cef/cef.rl b/x-pack/filebeat/processors/decode_cef/cef/cef.rl index 09b7fd6e962e..256d5ee86723 100644 --- a/x-pack/filebeat/processors/decode_cef/cef/cef.rl +++ b/x-pack/filebeat/processors/decode_cef/cef/cef.rl @@ -15,17 +15,31 @@ import ( variable pe pe; }%% +type cefState struct { + key string // Extension key. + valueStart int // Start index of extension value. + valueEnd int // End index of extension value. + escapes []escapePosition // Array of escapes indices within the current value. +} + +func (s *cefState) reset() { + s.key = "" + s.valueStart = 0 + s.valueEnd = 0 + s.escapes = s.escapes[:0] +} + +func (s *cefState) pushEscape(start, end int) { + s.escapes = append(s.escapes, escapePosition{start, end}) +} + // unpack unpacks a CEF message. func (e *Event) unpack(data string) error { cs, p, pe, eof := 0, 0, len(data), len(data) mark, mark_slash := 0, 0 - var escapes []int - - // Extension key. - var extKey string - // Extension value start and end indices. - extValueStart, extValueEnd := 0, 0 + // state related to CEF values. + var state cefState // recoveredErrs are problems with the message that the parser was able to // recover from (though the parsing might not be "correct"). @@ -42,62 +56,62 @@ func (e *Event) unpack(data string) error { mark_slash = p } action mark_escape { - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) } action version { e.Version, _ = strconv.Atoi(data[mark:p]) } action device_vendor { - e.DeviceVendor = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceVendor = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() } action device_product { - e.DeviceProduct = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceProduct = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() } action device_version { - e.DeviceVersion = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceVersion = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() } action device_event_class_id { - e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() } action name { - e.Name = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.Name = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() } action severity { e.Severity = data[mark:p] } action extension_key { // A new extension key marks the end of the last extension value. - if len(extKey) > 0 && extValueStart <= mark - 1 { - e.pushExtension(extKey, replaceEscapes(data[extValueStart:mark-1], extValueStart, escapes)) - extKey, extValueStart, extValueEnd, escapes = "", 0, 0, escapes[:0] + if len(state.key) > 0 && state.valueStart <= mark - 1 { + e.pushExtension(state.key, replaceEscapes(data[state.valueStart:mark-1], state.valueStart, state.escapes)) + state.reset() } - extKey = data[mark:p] + state.key = data[mark:p] } action extension_value_start { - extValueStart = p; - extValueEnd = p + state.valueStart = p; + state.valueEnd = p } action extension_value_mark { - extValueEnd = p+1 + state.valueEnd = p+1 } action extension_eof { // Reaching the EOF marks the end of the final extension value. - if len(extKey) > 0 && extValueStart <= extValueEnd { - e.pushExtension(extKey, replaceEscapes(data[extValueStart:extValueEnd], extValueStart, escapes)) - extKey, extValueStart, extValueEnd, escapes = "", 0, 0, escapes[:0] + if len(state.key) > 0 && state.valueStart <= state.valueEnd { + e.pushExtension(state.key, replaceEscapes(data[state.valueStart:state.valueEnd], state.valueStart, state.escapes)) + state.reset() } } action extension_err { - recoveredErrs = append(recoveredErrs, fmt.Errorf("malformed value for %s at pos %d", extKey, p+1)) + recoveredErrs = append(recoveredErrs, fmt.Errorf("malformed value for %s at pos %d", state.key, p+1)) fhold; fnext gobble_extension; } action recover_next_extension { - extKey, extValueStart, extValueEnd = "", 0, 0 + state.reset() // Resume processing at p, the start of the next extension key. p = mark; fnext extensions; diff --git a/x-pack/filebeat/processors/decode_cef/cef/cef_test.go b/x-pack/filebeat/processors/decode_cef/cef/cef_test.go index 7ab286e1f2ce..51f3c937ebf0 100644 --- a/x-pack/filebeat/processors/decode_cef/cef/cef_test.go +++ b/x-pack/filebeat/processors/decode_cef/cef/cef_test.go @@ -389,6 +389,18 @@ func TestEventUnpack(t *testing.T) { "msg": StringField("Newlines in messages\nare allowed.\r\nAnd so are carriage feeds\\newlines\\=."), }, e.Extensions) }) + + t.Run("error recovery with escape", func(t *testing.T) { + // Ensure no panic or regression of https://github.com/elastic/beats/issues/30010. + // key1 contains an escape, but then an invalid non-escaped =. + // This triggers the error recovery to try to read the next key. + var e Event + err := e.Unpack(`CEF:0|||||||key1=\\hi= key2=a`) + assert.Error(t, err) + assert.Equal(t, map[string]*Field{ + "key2": UndocumentedField("a"), + }, e.Extensions) + }) } func TestEventUnpackWithFullExtensionNames(t *testing.T) { diff --git a/x-pack/filebeat/processors/decode_cef/cef/parser.go b/x-pack/filebeat/processors/decode_cef/cef/parser.go index b9e19b03e7c2..1968ea3ce425 100644 --- a/x-pack/filebeat/processors/decode_cef/cef/parser.go +++ b/x-pack/filebeat/processors/decode_cef/cef/parser.go @@ -33,17 +33,31 @@ const cef_en_main_cef_extensions int = 29 //line cef.rl:16 +type cefState struct { + key string // Extension key. + valueStart int // Start index of extension value. + valueEnd int // End index of extension value. + escapes []escapePosition // Array of escapes indices within the current value. +} + +func (s *cefState) reset() { + s.key = "" + s.valueStart = 0 + s.valueEnd = 0 + s.escapes = s.escapes[:0] +} + +func (s *cefState) pushEscape(start, end int) { + s.escapes = append(s.escapes, escapePosition{start, end}) +} + // unpack unpacks a CEF message. func (e *Event) unpack(data string) error { cs, p, pe, eof := 0, 0, len(data), len(data) mark, mark_slash := 0, 0 - var escapes []int - // Extension key. - var extKey string - - // Extension value start and end indices. - extValueStart, extValueEnd := 0, 0 + // state related to CEF values. + var state cefState // recoveredErrs are problems with the message that the parser was able to // recover from (though the parsing might not be "correct"). @@ -51,12 +65,12 @@ func (e *Event) unpack(data string) error { e.init(data) -//line parser.go:56 +//line parser.go:70 { cs = cef_start } -//line parser.go:61 +//line parser.go:75 { if (p) == (pe) { goto _test_eof @@ -961,299 +975,299 @@ func (e *Event) unpack(data string) error { goto f26 f0: -//line cef.rl:38 +//line cef.rl:52 mark = p goto _again f4: -//line cef.rl:41 +//line cef.rl:55 mark_slash = p goto _again f6: -//line cef.rl:44 +//line cef.rl:58 - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) goto _again f1: -//line cef.rl:47 +//line cef.rl:61 e.Version, _ = strconv.Atoi(data[mark:p]) goto _again f5: -//line cef.rl:50 +//line cef.rl:64 - e.DeviceVendor = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceVendor = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f10: -//line cef.rl:54 +//line cef.rl:68 - e.DeviceProduct = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceProduct = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f13: -//line cef.rl:58 +//line cef.rl:72 - e.DeviceVersion = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceVersion = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f16: -//line cef.rl:62 +//line cef.rl:76 - e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f19: -//line cef.rl:66 +//line cef.rl:80 - e.Name = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.Name = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f22: -//line cef.rl:70 +//line cef.rl:84 e.Severity = data[mark:p] goto _again f23: -//line cef.rl:73 +//line cef.rl:87 // A new extension key marks the end of the last extension value. - if len(extKey) > 0 && extValueStart <= mark-1 { - e.pushExtension(extKey, replaceEscapes(data[extValueStart:mark-1], extValueStart, escapes)) - extKey, extValueStart, extValueEnd, escapes = "", 0, 0, escapes[:0] + if len(state.key) > 0 && state.valueStart <= mark-1 { + e.pushExtension(state.key, replaceEscapes(data[state.valueStart:mark-1], state.valueStart, state.escapes)) + state.reset() } - extKey = data[mark:p] + state.key = data[mark:p] goto _again f29: -//line cef.rl:81 +//line cef.rl:95 - extValueStart = p - extValueEnd = p + state.valueStart = p + state.valueEnd = p goto _again f25: -//line cef.rl:85 +//line cef.rl:99 - extValueEnd = p + 1 + state.valueEnd = p + 1 goto _again f24: -//line cef.rl:95 +//line cef.rl:109 - recoveredErrs = append(recoveredErrs, fmt.Errorf("malformed value for %s at pos %d", extKey, p+1)) + recoveredErrs = append(recoveredErrs, fmt.Errorf("malformed value for %s at pos %d", state.key, p+1)) (p)-- cs = 33 goto _again f26: -//line cef.rl:99 +//line cef.rl:113 - extKey, extValueStart, extValueEnd = "", 0, 0 + state.reset() // Resume processing at p, the start of the next extension key. p = mark cs = 29 goto _again f2: -//line cef.rl:38 +//line cef.rl:52 mark = p -//line cef.rl:41 +//line cef.rl:55 mark_slash = p goto _again f3: -//line cef.rl:38 +//line cef.rl:52 mark = p -//line cef.rl:50 +//line cef.rl:64 - e.DeviceVendor = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceVendor = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f9: -//line cef.rl:38 +//line cef.rl:52 mark = p -//line cef.rl:54 +//line cef.rl:68 - e.DeviceProduct = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceProduct = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f12: -//line cef.rl:38 +//line cef.rl:52 mark = p -//line cef.rl:58 +//line cef.rl:72 - e.DeviceVersion = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceVersion = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f15: -//line cef.rl:38 +//line cef.rl:52 mark = p -//line cef.rl:62 +//line cef.rl:76 - e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f18: -//line cef.rl:38 +//line cef.rl:52 mark = p -//line cef.rl:66 +//line cef.rl:80 - e.Name = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.Name = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f21: -//line cef.rl:38 +//line cef.rl:52 mark = p -//line cef.rl:70 +//line cef.rl:84 e.Severity = data[mark:p] goto _again f33: -//line cef.rl:38 +//line cef.rl:52 mark = p -//line cef.rl:85 +//line cef.rl:99 - extValueEnd = p + 1 + state.valueEnd = p + 1 goto _again f7: -//line cef.rl:44 +//line cef.rl:58 - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) -//line cef.rl:41 +//line cef.rl:55 mark_slash = p goto _again f8: -//line cef.rl:44 +//line cef.rl:58 - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) -//line cef.rl:50 +//line cef.rl:64 - e.DeviceVendor = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceVendor = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f11: -//line cef.rl:44 +//line cef.rl:58 - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) -//line cef.rl:54 +//line cef.rl:68 - e.DeviceProduct = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceProduct = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f14: -//line cef.rl:44 +//line cef.rl:58 - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) -//line cef.rl:58 +//line cef.rl:72 - e.DeviceVersion = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceVersion = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f17: -//line cef.rl:44 +//line cef.rl:58 - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) -//line cef.rl:62 +//line cef.rl:76 - e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f20: -//line cef.rl:44 +//line cef.rl:58 - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) -//line cef.rl:66 +//line cef.rl:80 - e.Name = replaceEscapes(data[mark:p], mark, escapes) - escapes = escapes[:0] + e.Name = replaceEscapes(data[mark:p], mark, state.escapes) + state.reset() goto _again f35: -//line cef.rl:44 +//line cef.rl:58 - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) -//line cef.rl:85 +//line cef.rl:99 - extValueEnd = p + 1 + state.valueEnd = p + 1 goto _again f30: -//line cef.rl:81 +//line cef.rl:95 - extValueStart = p - extValueEnd = p + state.valueStart = p + state.valueEnd = p -//line cef.rl:41 +//line cef.rl:55 mark_slash = p goto _again f28: -//line cef.rl:81 +//line cef.rl:95 - extValueStart = p - extValueEnd = p + state.valueStart = p + state.valueEnd = p -//line cef.rl:85 +//line cef.rl:99 - extValueEnd = p + 1 + state.valueEnd = p + 1 goto _again f32: -//line cef.rl:85 +//line cef.rl:99 - extValueEnd = p + 1 + state.valueEnd = p + 1 -//line cef.rl:38 +//line cef.rl:52 mark = p @@ -1272,49 +1286,49 @@ func (e *Event) unpack(data string) error { if (p) == eof { switch _cef_eof_actions[cs] { case 32: -//line cef.rl:88 +//line cef.rl:102 // Reaching the EOF marks the end of the final extension value. - if len(extKey) > 0 && extValueStart <= extValueEnd { - e.pushExtension(extKey, replaceEscapes(data[extValueStart:extValueEnd], extValueStart, escapes)) - extKey, extValueStart, extValueEnd, escapes = "", 0, 0, escapes[:0] + if len(state.key) > 0 && state.valueStart <= state.valueEnd { + e.pushExtension(state.key, replaceEscapes(data[state.valueStart:state.valueEnd], state.valueStart, state.escapes)) + state.reset() } case 25: -//line cef.rl:95 +//line cef.rl:109 - recoveredErrs = append(recoveredErrs, fmt.Errorf("malformed value for %s at pos %d", extKey, p+1)) + recoveredErrs = append(recoveredErrs, fmt.Errorf("malformed value for %s at pos %d", state.key, p+1)) (p)-- cs = 33 case 35: -//line cef.rl:44 +//line cef.rl:58 - escapes = append(escapes, mark_slash, p) + state.pushEscape(mark_slash, p) -//line cef.rl:88 +//line cef.rl:102 // Reaching the EOF marks the end of the final extension value. - if len(extKey) > 0 && extValueStart <= extValueEnd { - e.pushExtension(extKey, replaceEscapes(data[extValueStart:extValueEnd], extValueStart, escapes)) - extKey, extValueStart, extValueEnd, escapes = "", 0, 0, escapes[:0] + if len(state.key) > 0 && state.valueStart <= state.valueEnd { + e.pushExtension(state.key, replaceEscapes(data[state.valueStart:state.valueEnd], state.valueStart, state.escapes)) + state.reset() } case 28: -//line cef.rl:81 +//line cef.rl:95 - extValueStart = p - extValueEnd = p + state.valueStart = p + state.valueEnd = p -//line cef.rl:88 +//line cef.rl:102 // Reaching the EOF marks the end of the final extension value. - if len(extKey) > 0 && extValueStart <= extValueEnd { - e.pushExtension(extKey, replaceEscapes(data[extValueStart:extValueEnd], extValueStart, escapes)) - extKey, extValueStart, extValueEnd, escapes = "", 0, 0, escapes[:0] + if len(state.key) > 0 && state.valueStart <= state.valueEnd { + e.pushExtension(state.key, replaceEscapes(data[state.valueStart:state.valueEnd], state.valueStart, state.escapes)) + state.reset() } -//line parser.go:1116 +//line parser.go:1130 } } @@ -1323,7 +1337,7 @@ func (e *Event) unpack(data string) error { } } -//line cef.rl:161 +//line cef.rl:175 // Check if state machine completed. if cs < cef_first_final {