diff --git a/winlogbeat/sys/strings.go b/winlogbeat/sys/strings.go index 25f27a9d3a55..ecd71dcaaf87 100644 --- a/winlogbeat/sys/strings.go +++ b/winlogbeat/sys/strings.go @@ -6,27 +6,49 @@ import ( "unicode/utf16" ) -// UTF16BytesToString returns the Unicode code point sequence represented -// by the UTF-16 buffer b. +// UTF16BytesToString returns a string that is decoded from the UTF-16 bytes. +// The byte slice must be of even length otherwise an error will be returned. +// The integer returned is the offset to the start of the next string with +// buffer if it exists, otherwise -1 is returned. func UTF16BytesToString(b []byte) (string, int, error) { if len(b)%2 != 0 { - return "", 0, fmt.Errorf("Slice must have an even length (length=%d)", - len(b)) + return "", 0, fmt.Errorf("Slice must have an even length (length=%d)", len(b)) + } + + offset := -1 + + // Find the null terminator if it exists and re-slice the b. + if nullIndex := indexNullTerminator(b); nullIndex > 0 { + if len(b) > nullIndex+2 { + offset = nullIndex + 2 + } + + b = b[:nullIndex] } - offset := len(b)/2 + 2 s := make([]uint16, len(b)/2) for i := range s { s[i] = uint16(b[i*2]) + uint16(b[(i*2)+1])<<8 + } - if s[i] == 0 { - s = s[0:i] - offset = i*2 + 2 - break + return string(utf16.Decode(s)), offset, nil +} + +// indexNullTerminator returns the index of a null terminator within a buffer +// containing UTF-16 encoded data. If the null terminator is not found -1 is +// returned. +func indexNullTerminator(b []byte) int { + if len(b) < 2 { + return -1 + } + + for i := 0; i < len(b); i += 2 { + if b[i] == 0 && b[i+1] == 0 { + return i } } - return string(utf16.Decode(s)), offset, nil + return -1 } // RemoveWindowsLineEndings replaces carriage return line feed (CRLF) with diff --git a/winlogbeat/sys/strings_test.go b/winlogbeat/sys/strings_test.go new file mode 100644 index 000000000000..48cae4831aa5 --- /dev/null +++ b/winlogbeat/sys/strings_test.go @@ -0,0 +1,77 @@ +package sys + +import ( + "bytes" + "encoding/binary" + "testing" + "unicode/utf16" + + "github.com/stretchr/testify/assert" +) + +func toUTF16Bytes(in string) []byte { + var u16 []uint16 = utf16.Encode([]rune(in)) + buf := &bytes.Buffer{} + binary.Write(buf, binary.LittleEndian, u16) + return buf.Bytes() +} + +func TestUTF16BytesToString(t *testing.T) { + input := "abc白鵬翔\u145A6" + utf16Bytes := toUTF16Bytes(input) + + output, _, err := UTF16BytesToString(utf16Bytes) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, input, output) +} + +func TestUTF16BytesToStringOffset(t *testing.T) { + in := bytes.Join([][]byte{toUTF16Bytes("one"), toUTF16Bytes("two"), toUTF16Bytes("three")}, []byte{0, 0}) + + output, offset, err := UTF16BytesToString(in) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, "one", output) + assert.Equal(t, 8, offset) + + in = in[offset:] + output, offset, err = UTF16BytesToString(in) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, "two", output) + assert.Equal(t, 8, offset) + + in = in[offset:] + output, offset, err = UTF16BytesToString(in) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, "three", output) + assert.Equal(t, -1, offset) +} + +func BenchmarkUTF16BytesToString(b *testing.B) { + utf16Bytes := toUTF16Bytes("A logon was attempted using explicit credentials.") + + b.Run("simple_string", func(b *testing.B) { + b.ResetTimer() + + for i := 0; i < b.N; i++ { + UTF16BytesToString(utf16Bytes) + } + }) + + // Buffer larger than the string. + b.Run("larger_buffer", func(b *testing.B) { + utf16Bytes = append(utf16Bytes, make([]byte, 2048)...) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + UTF16BytesToString(utf16Bytes) + } + }) +}