Skip to content

Commit

Permalink
[Libbeat][New Processor] XML Decode (elastic#23678)
Browse files Browse the repository at this point in the history
* stashing before initial commit

* Initial commit

* updating go.sum

* updating it again

* adding feedback from PR comments and removing expandkeys config entry

* Updating changelog

* removing expanded_keys from allowed fields

* adding new changes based on PR comments, a few more changes remains

* moving the xml decoder to its own subpackage based on PR comments

* reverting back to Target being a string pointer, to be able to differentiate between null and empty string

* Updating certain tests to fit the new ignore_failure and ignore_missing options

* Updating unit test to test with missing field

* updating license headers

* adding benchmark test

* benchmark, now also with allocation results

* updating changelog entry

* removing duplicate Changelog entry

* changing changelog entry name to new name

* Simplify error handling and fix race

$ benchcmp old.txt new.txt
benchmark                                             old ns/op     new ns/op     delta
BenchmarkProcessor_Run/single_object-12               15691         15686         -0.03%
BenchmarkProcessor_Run/nested_and_array_object-12     39673         39098         -1.45%

benchmark                                             old allocs     new allocs     delta
BenchmarkProcessor_Run/single_object-12               158            158            +0.00%
BenchmarkProcessor_Run/nested_and_array_object-12     376            374            -0.53%

benchmark                                             old bytes     new bytes     delta
BenchmarkProcessor_Run/single_object-12               8597          8597          +0.00%
BenchmarkProcessor_Run/nested_and_array_object-12     20310         19798         -2.52%

* internal xml to json implementation

* Use internal xml to json decoder

benchmark                                             old ns/op     new ns/op     delta
BenchmarkProcessor_Run/single_object-12               15686         8051          -48.67%
BenchmarkProcessor_Run/nested_and_array_object-12     39098         20540         -47.47%

benchmark                                             old allocs     new allocs     delta
BenchmarkProcessor_Run/single_object-12               158            75             -52.53%
BenchmarkProcessor_Run/nested_and_array_object-12     374            184            -50.80%

benchmark                                             old bytes     new bytes     delta
BenchmarkProcessor_Run/single_object-12               8597          3520          -59.06%
BenchmarkProcessor_Run/nested_and_array_object-12     19798         7824          -60.48%
benchmark                                             old ns/op     new ns/op     delta
BenchmarkProcessor_Run/single_object-12               15686         8051          -48.67%
BenchmarkProcessor_Run/nested_and_array_object-12     39098         20540         -47.47%

benchmark                                             old allocs     new allocs     delta
BenchmarkProcessor_Run/single_object-12               158            75             -52.53%
BenchmarkProcessor_Run/nested_and_array_object-12     374            184            -50.80%

benchmark                                             old bytes     new bytes     delta
BenchmarkProcessor_Run/single_object-12               8597          3520          -59.06%
BenchmarkProcessor_Run/nested_and_array_object-12     19798         7824          -60.48%

* changelog fix

* Update docs

* Add godoc example of xml to json

* updating test name to fit Example naming convention

Co-authored-by: Andrew Kroh <andrew.kroh@elastic.co>
(cherry picked from commit 6839307)
  • Loading branch information
P1llus committed Feb 15, 2021
1 parent 8cb3644 commit 454e7d8
Show file tree
Hide file tree
Showing 9 changed files with 1,327 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Update the baseline version of Sarama (Kafka support library) to 1.27.2. {pull}23595[23595]
- Add kubernetes.volume.fs.used.pct field. {pull}23564[23564]
- Add the `enable_krb5_fast` flag to the Kafka output to explicitly opt-in to FAST authentication. {pull}23629[23629]
- Added new decode_xml processor to libbeat that is available to all beat types. {pull}23678[23678]
- Add deployment name in pod's meta. {pull}23610[23610]

*Auditbeat*
Expand Down
1 change: 1 addition & 0 deletions libbeat/cmd/instance/imports_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
_ "github.com/elastic/beats/v7/libbeat/processors/add_process_metadata"
_ "github.com/elastic/beats/v7/libbeat/processors/communityid"
_ "github.com/elastic/beats/v7/libbeat/processors/convert"
_ "github.com/elastic/beats/v7/libbeat/processors/decode_xml"
_ "github.com/elastic/beats/v7/libbeat/processors/dissect"
_ "github.com/elastic/beats/v7/libbeat/processors/dns"
_ "github.com/elastic/beats/v7/libbeat/processors/extract_array"
Expand Down
120 changes: 120 additions & 0 deletions libbeat/common/encoding/xml/decode.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package xml

import (
"bytes"
"encoding/xml"
"io"
"strings"
)

// A Decoder reads and decodes XML from an input stream.
type Decoder struct {
prependHyphenToAttr bool
lowercaseKeys bool
xmlDec *xml.Decoder
}

// NewDecoder returns a new decoder that reads from r.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{xmlDec: xml.NewDecoder(r)}
}

// PrependHyphenToAttr causes the Decoder to prepend a hyphen ('-') to to all
// XML attribute names.
func (d *Decoder) PrependHyphenToAttr() { d.prependHyphenToAttr = true }

// LowercaseKeys causes the Decoder to transform all key name to lowercase.
func (d *Decoder) LowercaseKeys() { d.lowercaseKeys = true }

// Decode reads XML from the input stream and return a map containing the data.
func (d *Decoder) Decode() (map[string]interface{}, error) {
_, m, err := d.decode(nil)
return m, err
}

func (d *Decoder) decode(attrs []xml.Attr) (string, map[string]interface{}, error) {
elements := map[string]interface{}{}
var cdata string

for {
t, err := d.xmlDec.Token()
if err != nil {
if err == io.EOF {
return "", elements, nil
}
return "", nil, err
}

switch elem := t.(type) {
case xml.StartElement:
cdata, subElements, err := d.decode(elem.Attr)
if err != nil {
return "", nil, err
}

// Combine sub-elements and cdata.
var add interface{} = subElements
if len(subElements) == 0 {
add = cdata
} else if len(cdata) > 0 {
subElements["#text"] = cdata
}

// Add the data to the current object while taking into account
// if the current key already exists (in the case of lists).
key := d.key(elem.Name.Local)
value := elements[elem.Name.Local]
switch v := value.(type) {
case nil:
elements[key] = add
case []interface{}:
elements[key] = append(v, add)
default:
elements[key] = []interface{}{v, add}
}
case xml.CharData:
cdata = string(bytes.TrimSpace(elem.Copy()))
case xml.EndElement:
d.addAttributes(attrs, elements)
return cdata, elements, nil
}
}
}

func (d *Decoder) addAttributes(attrs []xml.Attr, m map[string]interface{}) {
for _, attr := range attrs {
key := d.attrKey(attr.Name.Local)
m[key] = attr.Value
}
}

func (d *Decoder) key(in string) string {
if d.lowercaseKeys {
return strings.ToLower(in)
}
return in
}

func (d *Decoder) attrKey(in string) string {
if d.prependHyphenToAttr {
return d.key("-" + in)
}
return d.key(in)
}
Loading

0 comments on commit 454e7d8

Please sign in to comment.