diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 94fda3776fc..0af010a5f30 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -139,6 +139,8 @@ https://github.com/elastic/beats/compare/v6.6.0...6.x[Check the HEAD diff] *Winlogbeat* +- Prevent Winlogbeat from dropping events with invalid XML. {pull}11006{11006} + *Functionbeat* - Ensure that functionbeat is logging at info level not debug. {issue}10262[10262] diff --git a/winlogbeat/eventlog/eventlog.go b/winlogbeat/eventlog/eventlog.go index 1b6dcc475e1..fab0c12ab4d 100644 --- a/winlogbeat/eventlog/eventlog.go +++ b/winlogbeat/eventlog/eventlog.go @@ -127,6 +127,13 @@ func (e Record) ToEvent() beat.Event { userData := addPairs(m, "user_data", e.UserData.Pairs) addOptional(userData, "xml_name", e.UserData.Name.Local) + // Errors + if len(e.RenderErr) == 1 { + addOptional(m, "message_error", e.RenderErr[0]) + } else { + addOptional(m, "message_error", e.RenderErr) + } + return beat.Event{ Timestamp: e.TimeCreated.SystemTime, Fields: m, diff --git a/winlogbeat/eventlog/eventlogging_test.go b/winlogbeat/eventlog/eventlogging_test.go index b102a80fd2a..3f5acd67955 100644 --- a/winlogbeat/eventlog/eventlogging_test.go +++ b/winlogbeat/eventlog/eventlogging_test.go @@ -267,7 +267,7 @@ func TestFormatMessageWithLargeMessage(t *testing.T) { assert.Len(t, records, 1) for _, record := range records { t.Log(record) - assert.Equal(t, "The data area passed to a system call is too small.", record.RenderErr) + assert.Equal(t, []string{"The data area passed to a system call is too small."}, record.RenderErr) } } diff --git a/winlogbeat/eventlog/wineventlog.go b/winlogbeat/eventlog/wineventlog.go index eec37549666..36df4289677 100644 --- a/winlogbeat/eventlog/wineventlog.go +++ b/winlogbeat/eventlog/wineventlog.go @@ -172,13 +172,7 @@ func (l *winEventLog) Read() ([]Record, error) { continue } - r, err := l.buildRecordFromXML(l.outputBuf.Bytes(), err) - if err != nil { - logp.Err("%s Dropping event. %v", l.logPrefix, err) - incrementMetric(dropReasons, err) - continue - } - + r, _ := l.buildRecordFromXML(l.outputBuf.Bytes(), err) r.Offset = checkpoint.EventLogState{ Name: l.channelName, RecordNumber: r.RecordID, @@ -229,9 +223,12 @@ func (l *winEventLog) eventHandles(maxRead int) ([]win.EvtHandle, int, error) { } func (l *winEventLog) buildRecordFromXML(x []byte, recoveredErr error) (Record, error) { + includeXML := l.config.IncludeXML e, err := sys.UnmarshalEventXML(x) if err != nil { - return Record{}, fmt.Errorf("Failed to unmarshal XML='%s'. %v", x, err) + e.RenderErr = append(e.RenderErr, err.Error()) + // Add raw XML to event.original when decoding fails + includeXML = true } err = sys.PopulateAccount(&e.User) @@ -243,9 +240,9 @@ func (l *winEventLog) buildRecordFromXML(x []byte, recoveredErr error) (Record, if e.RenderErrorCode != 0 { // Convert the render error code to an error message that can be // included in the "message_error" field. - e.RenderErr = syscall.Errno(e.RenderErrorCode).Error() + e.RenderErr = append(e.RenderErr, syscall.Errno(e.RenderErrorCode).Error()) } else if recoveredErr != nil { - e.RenderErr = recoveredErr.Error() + e.RenderErr = append(e.RenderErr, recoveredErr.Error()) } if e.Level == "" { @@ -262,7 +259,7 @@ func (l *winEventLog) buildRecordFromXML(x []byte, recoveredErr error) (Record, Event: e, } - if l.config.IncludeXML { + if includeXML { r.XML = string(x) } diff --git a/winlogbeat/sys/event.go b/winlogbeat/sys/event.go index f113ba87f58..e9a9f473aac 100644 --- a/winlogbeat/sys/event.go +++ b/winlogbeat/sys/event.go @@ -26,7 +26,8 @@ import ( // UnmarshalEventXML unmarshals the given XML into a new Event. func UnmarshalEventXML(rawXML []byte) (Event, error) { var event Event - err := xml.Unmarshal(rawXML, &event) + decoder := xml.NewDecoder(newXMLSafeReader(rawXML)) + err := decoder.Decode(&event) return event, err } @@ -60,7 +61,7 @@ type Event struct { // ProcessingErrorData RenderErrorCode uint32 `xml:"ProcessingErrorData>ErrorCode"` RenderErrorDataItemName string `xml:"ProcessingErrorData>DataItemName"` - RenderErr string + RenderErr []string } // Provider identifies the provider that logged the event. The Name and GUID diff --git a/winlogbeat/sys/event_test.go b/winlogbeat/sys/event_test.go index 91797dcc59c..c77c05c2ea3 100644 --- a/winlogbeat/sys/event_test.go +++ b/winlogbeat/sys/event_test.go @@ -168,6 +168,23 @@ func TestXML(t *testing.T) { } } +func TestInvalidXML(t *testing.T) { + eventXML := fmt.Sprintf(` + + + + {00000000-0000-0000-0000-000000000000} + じゃあ宇宙カウボーイ。。。%s + + + +`, "\x1b") + _, err := UnmarshalEventXML([]byte(eventXML)) + if !assert.NoError(t, err) { + assert.Equal(t, err.Error(), "XML syntax error on line 6: illegal character code U+001B") + } +} + func BenchmarkXMLUnmarshal(b *testing.B) { for i := 0; i < b.N; i++ { _, err := UnmarshalEventXML([]byte(allXML)) diff --git a/winlogbeat/sys/eventlogging/eventlogging_windows.go b/winlogbeat/sys/eventlogging/eventlogging_windows.go index 6c5b45c0d3e..29d2c47fdef 100644 --- a/winlogbeat/sys/eventlogging/eventlogging_windows.go +++ b/winlogbeat/sys/eventlogging/eventlogging_windows.go @@ -163,7 +163,7 @@ func RenderEvents( } // Parse the UTF-16 message insert strings. if err = insertStrings.Parse(record, recordBuf); err != nil { - event.RenderErr = err.Error() + event.RenderErr = append(event.RenderErr, err.Error()) events = append(events, event) continue } @@ -176,7 +176,7 @@ func RenderEvents( event.Message, err = formatMessage(record.sourceName, record.eventID, lang, insertStrings.Pointer(), buffer, pubHandleProvider) if err != nil { - event.RenderErr = err.Error() + event.RenderErr = append(event.RenderErr, err.Error()) if errno, ok := err.(syscall.Errno); ok { event.RenderErrorCode = uint32(errno) } diff --git a/winlogbeat/sys/xmlreader.go b/winlogbeat/sys/xmlreader.go new file mode 100644 index 00000000000..8eb23e1ce14 --- /dev/null +++ b/winlogbeat/sys/xmlreader.go @@ -0,0 +1,78 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package sys + +import ( + "bytes" + "fmt" + "io" + "unicode" + "unicode/utf8" +) + +// The type xmlSafeReader escapes UTF control characters in the io.Reader +// it wraps, so that it can be fed to Go's xml parser. +// Characters for which `unicode.IsControl` returns true will be output as +// an hexadecimal unicode escape sequence "\\uNNNN". +type xmlSafeReader struct { + inner io.Reader + backing [256]byte + buf []byte + code []byte +} + +var _ io.Reader = (*xmlSafeReader)(nil) + +func output(n int) (int, error) { + if n == 0 { + return 0, io.EOF + } + return n, nil +} + +// Read implements the io.Reader interface. +func (r *xmlSafeReader) Read(d []byte) (n int, err error) { + if len(r.code) > 0 { + n = copy(d, r.code) + r.code = r.code[n:] + return output(n) + } + if len(r.buf) == 0 { + n, _ = r.inner.Read(r.backing[:]) + r.buf = r.backing[:n] + } + for i := 0; i < len(r.buf); { + code, size := utf8.DecodeRune(r.buf[i:]) + if unicode.IsControl(code) { + n = copy(d, r.buf[:i]) + r.buf = r.buf[n+1:] + r.code = []byte(fmt.Sprintf("\\u%04x", code)) + m := copy(d[n:], r.code) + r.code = r.code[m:] + return output(n + m) + } + i += size + } + n = copy(d, r.buf) + r.buf = r.buf[n:] + return output(n) +} + +func newXMLSafeReader(rawXML []byte) io.Reader { + return &xmlSafeReader{inner: bytes.NewReader(rawXML)} +}