-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
json_decode_fields processor #2605
Changes from 1 commit
21555b2
842c996
91b64a8
3e53022
c4c3a8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
/build | ||
/*/data | ||
/*/logs | ||
/.vscode | ||
|
||
# Files | ||
.DS_Store | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package common | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this file moves helpers and exposes the json specific helpers with generic names. I'd rather see some json parsing utility function/module doing the right thing instead of exposing internal helpers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @urso : Apologies for missing to see this comment. Does wrapping it into something like this make sense?
|
||
|
||
import ( | ||
"encoding/json" | ||
) | ||
|
||
// transformNumbersDict walks a json decoded tree an replaces json.Number | ||
// with int64, float64, or string, in this order of preference (i.e. if it | ||
// parses as an int, use int. if it parses as a float, use float. etc). | ||
func TransformNumbersDict(dict MapStr) { | ||
for k, v := range dict { | ||
switch vv := v.(type) { | ||
case json.Number: | ||
dict[k] = TransformNumber(vv) | ||
case map[string]interface{}: | ||
TransformNumbersDict(vv) | ||
case []interface{}: | ||
TransformNumbersArray(vv) | ||
} | ||
} | ||
} | ||
|
||
func TransformNumber(value json.Number) interface{} { | ||
i64, err := value.Int64() | ||
if err == nil { | ||
return i64 | ||
} | ||
f64, err := value.Float64() | ||
if err == nil { | ||
return f64 | ||
} | ||
return value.String() | ||
} | ||
|
||
func TransformNumbersArray(arr []interface{}) { | ||
for i, v := range arr { | ||
switch vv := v.(type) { | ||
case json.Number: | ||
arr[i] = TransformNumber(vv) | ||
case map[string]interface{}: | ||
TransformNumbersDict(vv) | ||
case []interface{}: | ||
TransformNumbersArray(vv) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
package actions | ||
|
||
import ( | ||
"bytes" | ||
"encoding/json" | ||
"fmt" | ||
"strings" | ||
|
||
"github.com/elastic/beats/libbeat/common" | ||
"github.com/elastic/beats/libbeat/logp" | ||
"github.com/elastic/beats/libbeat/processors" | ||
"github.com/pkg/errors" | ||
) | ||
|
||
type decodeJSONFields struct { | ||
Fields []string | ||
} | ||
|
||
var debug = logp.MakeDebug("filters") | ||
|
||
func init() { | ||
processors.RegisterPlugin("decode_json_fields", configChecked(newDecodeJSONFields, | ||
requireFields("fields"), allowedFields("fields", "when"))) | ||
} | ||
|
||
func newDecodeJSONFields(c common.Config) (processors.Processor, error) { | ||
config := struct { | ||
Fields []string `config:"fields"` | ||
}{} | ||
err := c.Unpack(&config) | ||
if err != nil { | ||
logp.Warn("Error unpacking config for decode_json_fields") | ||
return nil, fmt.Errorf("fail to unpack the decode_json_fields configuration: %s", err) | ||
} | ||
|
||
f := decodeJSONFields{Fields: config.Fields} | ||
return f, nil | ||
} | ||
|
||
func (f decodeJSONFields) Run(event common.MapStr) (common.MapStr, error) { | ||
var errs []string | ||
|
||
for _, field := range f.Fields { | ||
data, err := event.GetValue(field) | ||
if err != nil && errors.Cause(err) != common.ErrKeyNotFound { | ||
debug("Error trying to GetValue for field : %s in event : %v", field, event) | ||
errs = append(errs, err.Error()) | ||
continue | ||
} | ||
text, ok := data.(string) | ||
if ok { | ||
var output map[string]interface{} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we want to restrict ourselves to json objects only here? Using |
||
err := unmarshal([]byte(text), &output) | ||
if err != nil { | ||
debug("Error trying to unmarshal %s", event[field]) | ||
errs = append(errs, err.Error()) | ||
continue | ||
} | ||
|
||
_, err = event.Put(field, output) | ||
if err != nil { | ||
debug("Error trying to Put value %v for field : %s", output, field) | ||
errs = append(errs, err.Error()) | ||
continue | ||
} | ||
} | ||
} | ||
|
||
return event, fmt.Errorf(strings.Join(errs, ", ")) | ||
} | ||
|
||
// unmarshal is equivalent with json.Unmarshal but it converts numbers | ||
// to int64 where possible, instead of using always float64. | ||
func unmarshal(text []byte, fields *map[string]interface{}) error { | ||
dec := json.NewDecoder(bytes.NewReader(text)) | ||
dec.UseNumber() | ||
err := dec.Decode(fields) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
//Iterate through all the fields to perform deep parsing | ||
for k, v := range *fields { | ||
switch vv := v.(type) { | ||
case string: | ||
var output map[string]interface{} | ||
sErr := unmarshal([]byte(vv), &output) | ||
if sErr == nil { | ||
(*fields)[k] = output | ||
} | ||
} | ||
} | ||
|
||
common.TransformNumbersDict(*fields) | ||
return nil | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thinking a little more about
Then (extending unmarshal a little), unmarshal can be generalized a little:
As you can see, I added a maximum parsing depth. Not sure we really want to parse fully recursively here. I'd use Having DecodeJson, there is no need for exporting jsontransform as is (but I'd keep jsontransform package for now, as other modules in beats might make use of it). What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the idea of having a maxDepth setup so that we don't recursively parse till the end. |
||
|
||
func (f decodeJSONFields) String() string { | ||
return "decode_json_fields=" + strings.Join(f.Fields, ", ") | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
package actions | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/elastic/beats/libbeat/common" | ||
"github.com/elastic/beats/libbeat/logp" | ||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
var fields = [1]string{"msg"} | ||
var config, _ = common.NewConfigFrom(map[string]interface{}{ | ||
"fields": fields, | ||
}) | ||
|
||
func TestMissingKey(t *testing.T) { | ||
input := common.MapStr{ | ||
"pipeline": "us1", | ||
} | ||
|
||
actual := getActualValue(t, config, input) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These test cases that expect an error should also assert that an error occurred. You can use |
||
|
||
expected := common.MapStr{ | ||
"pipeline": "us1", | ||
} | ||
|
||
assert.Equal(t, expected.String(), actual.String()) | ||
} | ||
|
||
func TestFieldNotString(t *testing.T) { | ||
input := common.MapStr{ | ||
"msg": 123, | ||
"pipeline": "us1", | ||
} | ||
|
||
actual := getActualValue(t, config, input) | ||
|
||
expected := common.MapStr{ | ||
"msg": 123, | ||
"pipeline": "us1", | ||
} | ||
|
||
assert.Equal(t, expected.String(), actual.String()) | ||
|
||
} | ||
|
||
func TestInvalidJSON(t *testing.T) { | ||
input := common.MapStr{ | ||
"msg": "{\"log\":\"{\\\"level\\\":\\\"info\\\"}\",\"stream\":\"stderr\",\"count\":3", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you create a raw string literal by surrounding the string with back ticks (`) then you don't need to use escaping on those quotes. I don't care if you fix this, it's more of a golang tip to keep you sane. |
||
"pipeline": "us1", | ||
} | ||
|
||
actual := getActualValue(t, config, input) | ||
|
||
expected := common.MapStr{ | ||
"msg": "{\"log\":\"{\\\"level\\\":\\\"info\\\"}\",\"stream\":\"stderr\",\"count\":3", | ||
"pipeline": "us1", | ||
} | ||
assert.Equal(t, expected.String(), actual.String()) | ||
|
||
} | ||
|
||
func TestValidJSON(t *testing.T) { | ||
input := common.MapStr{ | ||
"msg": "{\"log\":\"{\\\"level\\\":\\\"info\\\"}\",\"stream\":\"stderr\",\"count\":3}", | ||
"pipeline": "us1", | ||
} | ||
|
||
actual := getActualValue(t, config, input) | ||
|
||
expected := common.MapStr{ | ||
"msg": map[string]interface{}{ | ||
"log": map[string]interface{}{ | ||
"level": "info", | ||
}, | ||
"stream": "stderr", | ||
"count": 3, | ||
}, | ||
"pipeline": "us1", | ||
} | ||
|
||
assert.Equal(t, expected.String(), actual.String()) | ||
|
||
} | ||
|
||
func getActualValue(t *testing.T, config *common.Config, input common.MapStr) common.MapStr { | ||
if testing.Verbose() { | ||
logp.LogInit(logp.LOG_DEBUG, "", false, true, []string{"*"}) | ||
} | ||
|
||
p, err := newDecodeJSONFields(*config) | ||
if err != nil { | ||
logp.Err("Error initializing decode_json_fields") | ||
t.Fatal(err) | ||
} | ||
|
||
actual, err := p.Run(input) | ||
|
||
return actual | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should probably not be under bug fixes? This is a new feature as far as I understand.