Skip to content

Commit

Permalink
[libbeat] Escape BOM on JsonReader before trying to decode line (#11661)
Browse files Browse the repository at this point in the history
* fix json bom + testing
  • Loading branch information
michalpristas authored May 21, 2019
1 parent d3ef979 commit eca4b19
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Not hiding error in case of http failure using elastic fetcher {pull}11604[11604]
- Relax validation of the X-Pack license UID value. {issue}11640[11640]
- Fix a parsing error with the X-Pack license check on 32-bit system. {issue}11650[11650]
- Escape BOM on JsonReader before trying to decode line {pull}11661[11661]
- Fix ILM policy always being overwritten. {pull}11671[11671]
- Fix template always being overwritten. {pull}11671[11671]
- Fix matching of string arrays in contains condition. {pull}11691[11691]
Expand Down
7 changes: 0 additions & 7 deletions filebeat/input/log/harvester.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
package log

import (
"bytes"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -283,12 +282,6 @@ func (h *Harvester) Run() error {
return nil
}

// Strip UTF-8 BOM if beginning of file
// As all BOMS are converted to UTF-8 it is enough to only remove this one
if h.state.Offset == 0 {
message.Content = bytes.Trim(message.Content, "\xef\xbb\xbf")
}

// Get copy of state to work on
// This is important in case sending is not successful so on shutdown
// the old offset is reported
Expand Down
3 changes: 2 additions & 1 deletion libbeat/reader/readfile/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package readfile

import (
"bytes"
"io"
"time"

Expand Down Expand Up @@ -53,7 +54,7 @@ func (r EncoderReader) Next() (reader.Message, error) {
// Creating message object
return reader.Message{
Ts: time.Now(),
Content: c,
Content: bytes.Trim(c, "\xef\xbb\xbf"),
Bytes: sz,
}, err
}
73 changes: 73 additions & 0 deletions libbeat/reader/readfile/encode_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package readfile

import (
"bytes"
"testing"

"github.com/stretchr/testify/assert"

"github.com/elastic/beats/libbeat/reader/readfile/encoding"
)

func TestEncodeLines(t *testing.T) {
testCases := map[string]struct {
Input []byte
Output []string
}{
"simple": {[]byte("testing simple line\n"), []string{"testing simple line\n"}},
"multiline": {[]byte("testing\nmultiline\n"), []string{"testing\n", "multiline\n"}},
"bom-on-first": {[]byte("\xef\xbb\xbftesting simple line\n"), []string{"testing simple line\n"}},
"bom-on-each": {[]byte("\xef\xbb\xbftesting\n\xef\xbb\xbfmultiline\n"), []string{"testing\n", "multiline\n"}},
"bom-in-the-middle": {[]byte("testing simple \xef\xbb\xbfline\n"), []string{"testing simple \xef\xbb\xbfline\n"}},
}

bufferSize := 1000
encFactory, ok := encoding.FindEncoding("plain")
if !ok {
t.Fatal("failed to initiate encoding")
}

for name, testCase := range testCases {
t.Run(name, func(t *testing.T) {
r := bytes.NewReader(testCase.Input)
codec, err := encFactory(r)
assert.Nil(t, err, "failed to initialize encoding: %v", err)

config := Config{
Codec: codec,
BufferSize: bufferSize,
Terminator: LineFeed,
}
er, err := NewEncodeReader(r, config)
assert.Nil(t, err, "failed to create new encoder: %v", err)

var output []string
for {
msg, err := er.Next()
if err != nil {
break
}
output = append(output, string(msg.Content))
}

assert.Equal(t, testCase.Output, output)
})
}
}

0 comments on commit eca4b19

Please sign in to comment.