diff --git a/libbeat/common/enc/mxj/mxj.go b/libbeat/common/enc/mxj/mxj.go index b83975201ea..1437de243aa 100644 --- a/libbeat/common/enc/mxj/mxj.go +++ b/libbeat/common/enc/mxj/mxj.go @@ -18,6 +18,15 @@ package mxj import ( + "bytes" + "encoding/json" + "encoding/xml" + "errors" + "fmt" + "github.com/elastic/beats/v7/libbeat/common" + "io" + "os" + "strings" "sync" "github.com/clbanning/mxj/v2" @@ -53,3 +62,93 @@ func UnmarshalXML(body []byte, prepend bool, toLower bool) (map[string]interface } return out, nil } + +type Decoder struct { + prependHyphen bool + lowercaseKeys bool + xmlDec *xml.Decoder +} + +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{xmlDec: xml.NewDecoder(r)} +} + +func (d *Decoder) PrependHyphen() { d.prependHyphen = true } +func (d *Decoder) LowercaseKeys() { d.lowercaseKeys = true } + +var ( + errUnexpectedEnd = errors.New("unexpected end of xml") +) + +func (d *Decoder) Decode() (map[string]interface{}, error) { + _, m, err := d.decode(nil) + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + enc.Encode(m) + return m, err +} + +type Map map[string]interface{} + +func (d *Decoder) decode(attrs []xml.Attr) (string, map[string]interface{}, error) { + elements := Map{} + var cdata []byte + for { + t, err := d.xmlDec.Token() + if err != nil { + if err == io.EOF { + return "", elements, nil + } + return "", nil, err + } + + switch elem := t.(type) { + case xml.StartElement: + cdata, subElements, err := d.decode(elem.Attr) + if err != nil { + return "", nil, err + } + + var add interface{} = subElements + if len(subElements) == 0 { + add = cdata + } + + value := elements[elem.Name.Local] + switch v := value.(type) { + case nil: + elements[elem.Name.Local] = add + case []interface{}: + elements[elem.Name.Local] = append(v, add) + default: + elements[elem.Name.Local] = []interface{}{v, add} + } + case xml.CharData: + if elemData := bytes.TrimSpace(elem.Copy()); len(elemData) > 0 { + fmt.Println(string(elemData)) + cdata = elemData + } + case xml.EndElement: + for _, attr := range attrs { + elements[attr.Name.Local] = attr.Value + } + return string(cdata), elements, nil + } + } + return "", nil, errors.New("no end element") +} + +func (d *Decoder) addAttributes(attrs []xml.Attr, m map[string]interface{}) { + for _, attr := range attrs { + m[attr.Name.Local] = attr.Value + } +} + +func mapGet(key []string, m map[string]interface{}) interface{} { + v, _ := common.MapStr(m).GetValue(strings.Join(key, ".")) + return v +} + +func mapPut(key []string, value interface{}, m map[string]interface{}) { + common.MapStr(m).Put(strings.Join(key, "."), value) +} diff --git a/libbeat/common/enc/mxj/mxj_test.go b/libbeat/common/enc/mxj/mxj_test.go index c6e72c27cfa..f5fbb5635e0 100644 --- a/libbeat/common/enc/mxj/mxj_test.go +++ b/libbeat/common/enc/mxj/mxj_test.go @@ -20,8 +20,12 @@ package mxj import ( + "bytes" + "fmt" "testing" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" ) @@ -60,8 +64,7 @@ func TestUnmarshalXML(t *testing.T) { Computer 44.95 2000-10-01 - An in-depth look at creating applications - with XML. + An in-depth look at creating applications with XML. Ralls, Kim @@ -69,25 +72,24 @@ func TestUnmarshalXML(t *testing.T) { Fantasy 5.95 2000-12-16 - A former architect battles corporate zombies, - an evil sorceress, and her own childhood to become queen - of the world. + A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world. `), Output: map[string]interface{}{ "catalog": map[string]interface{}{ - "book": []interface{}{map[string]interface{}{ - "author": "Gambardella, Matthew", - "description": "An in-depth look at creating applications \n\t\t\t\t\twith XML.", - "genre": "Computer", - "id": "bk101", - "price": "44.95", - "publish_date": "2000-10-01", - "title": "XML Developer's Guide", - }, + "book": []interface{}{ + map[string]interface{}{ + "author": "Gambardella, Matthew", + "description": "An in-depth look at creating applications with XML.", + "genre": "Computer", + "id": "bk101", + "price": "44.95", + "publish_date": "2000-10-01", + "title": "XML Developer's Guide", + }, map[string]interface{}{ "author": "Ralls, Kim", - "description": "A former architect battles corporate zombies, \n\t\t\t\t\tan evil sorceress, and her own childhood to become queen \n\t\t\t\t\tof the world.", + "description": "A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world.", "genre": "Fantasy", "id": "bk102", "price": "5.95", @@ -105,8 +107,7 @@ func TestUnmarshalXML(t *testing.T) { Computer 44.95 2000-10-01 - An in-depth look at creating applications - with XML. + An in-depth look at creating applications with XML. Ralls, Kim @@ -114,9 +115,7 @@ func TestUnmarshalXML(t *testing.T) { Fantasy 5.95 2000-12-16 - A former architect battles corporate zombies, - an evil sorceress, and her own childhood to become queen - of the world. + A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world. `), Output: map[string]interface{}{ @@ -124,7 +123,7 @@ func TestUnmarshalXML(t *testing.T) { "book": []interface{}{ map[string]interface{}{ "author": "Gambardella, Matthew", - "description": "An in-depth look at creating applications \n\t\t\t\t\twith XML.", + "description": "An in-depth look at creating applications with XML.", "genre": "Computer", "id": "bk101", "price": "44.95", @@ -132,7 +131,7 @@ func TestUnmarshalXML(t *testing.T) { "title": "XML Developer's Guide"}, map[string]interface{}{ "author": "Ralls, Kim", - "description": "A former architect battles corporate zombies, \n\t\t\t\t\tan evil sorceress, and her own childhood to become queen \n\t\t\t\t\tof the world.", + "description": "A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world.", "genre": "Fantasy", "id": "bk102", "price": "5.95", @@ -150,15 +149,12 @@ func TestUnmarshalXML(t *testing.T) { Computer 44.95 2000-10-01 - An in-depth look at creating applications - with XML. + An in-depth look at creating applications with XML. Ralls, Kim - A former architect battles corporate zombies, - an evil sorceress, and her own childhood to become queen - of the world. + A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world. `), @@ -166,7 +162,7 @@ func TestUnmarshalXML(t *testing.T) { "catalog": map[string]interface{}{ "book": map[string]interface{}{ "author": "Gambardella, Matthew", - "description": "An in-depth look at creating applications \n\t\t\t\t\twith XML.", + "description": "An in-depth look at creating applications with XML.", "genre": "Computer", "id": "bk101", "price": "44.95", @@ -174,16 +170,22 @@ func TestUnmarshalXML(t *testing.T) { "title": "XML Developer's Guide"}, "secondcategory": map[string]interface{}{ "paper": map[string]interface{}{ - "description": "A former architect battles corporate zombies, \n\t\t\t\t\t\tan evil sorceress, and her own childhood to become queen \n\t\t\t\t\t\tof the world.", + "description": "A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world.", "id": "bk102", "test2": "Ralls, Kim"}}}}, Error: nil, }, } - for _, test := range tests { - out, err := UnmarshalXML(test.Input, false, true) - assert.Equal(t, test.Output, out) - assert.Equal(t, test.Error, err) + for i, test := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + d := NewDecoder(bytes.NewReader(test.Input)) + d.LowercaseKeys() + + out, err := d.Decode() + + require.NoError(t, err) + assert.EqualValues(t, test.Output, out) + }) } }