Skip to content

Commit c656db3

Browse files
committed
[pkg/ottl] Add ElementizeValuesXML Converter
1 parent a191550 commit c656db3

File tree

6 files changed

+303
-0
lines changed

6 files changed

+303
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: pkg/ottl
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add ElementizeValuesXML Converter
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [35364]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []

pkg/ottl/e2e/e2e_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,12 @@ func Test_e2e_converters(t *testing.T) {
377377
tCtx.GetLogRecord().Attributes().PutStr("test", "pass")
378378
},
379379
},
380+
{
381+
statement: `set(body, ElementizeValuesXML("<a><b/>foo</a>"))`,
382+
want: func(tCtx ottllog.TransformContext) {
383+
tCtx.GetLogRecord().Body().SetStr("<a><b></b><value>foo</value></a>")
384+
},
385+
},
380386
{
381387
statement: `set(attributes["test"], ExtractPatterns("aa123bb", "(?P<numbers>\\d+)"))`,
382388
want: func(tCtx ottllog.TransformContext) {

pkg/ottl/ottlfuncs/README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ Available Converters:
416416
- [Day](#day)
417417
- [Double](#double)
418418
- [Duration](#duration)
419+
- [ElementizeValues](#elementizevalues)
419420
- [ExtractPatterns](#extractpatterns)
420421
- [ExtractGrokPatterns](#extractgrokpatterns)
421422
- [FNV](#fnv)
@@ -600,6 +601,39 @@ Examples:
600601
- `Duration("333ms")`
601602
- `Duration("1000000h")`
602603

604+
605+
### ElementizeValuesXML
606+
607+
`ElementizeValuesXML(target, Optional[xpath])`
608+
609+
The `ElementizeValuesXML` Converter returns an edited version of an XML string where all content belongs to a dedicated element.
610+
611+
`target` is a Getter that returns a string. This string should be in XML format.
612+
If `target` is not a string, nil, or cannot be parsed as XML, `ElementizeValuesXML` will return an error.
613+
614+
615+
`xpath` (optional) is a string that specifies an [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expression that
616+
selects one or more elements. Content will only be converted within the result(s) of the xpath. The default is `/`.
617+
618+
`elementName` (optional) is a string that is used for any element tags that are created to wrap content.
619+
The default is `"value"`.
620+
621+
For example, `<a><b>foo</b>bar</a>` will be converted to `<a><b>foo</b><value>bar</value></a>`.
622+
623+
Examples:
624+
625+
Ensure all content in a document is wrapped in a dedicated element
626+
627+
- `ElementizeValuesXML(body)`
628+
629+
Use a custom naem for any new elements
630+
631+
- `ElementizeValuesXML(body, "custom")`
632+
633+
Convert only part of the document
634+
635+
- `ElementizeValuesXML(body, "value", "/some/part/)`
636+
603637
### ExtractPatterns
604638

605639
`ExtractPatterns(target, pattern)`
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"
5+
6+
import (
7+
"context"
8+
"fmt"
9+
10+
"github.com/antchfx/xmlquery"
11+
12+
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
13+
)
14+
15+
type ElementizeValuesXMLArguments[K any] struct {
16+
Target ottl.StringGetter[K]
17+
XPath ottl.Optional[string]
18+
ElementName ottl.Optional[string]
19+
}
20+
21+
func NewElementizeValuesXMLFactory[K any]() ottl.Factory[K] {
22+
return ottl.NewFactory("ElementizeValuesXML", &ElementizeValuesXMLArguments[K]{}, createElementizeValuesXMLFunction[K])
23+
}
24+
25+
func createElementizeValuesXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
26+
args, ok := oArgs.(*ElementizeValuesXMLArguments[K])
27+
28+
if !ok {
29+
return nil, fmt.Errorf("ElementizeValuesXML args must be of type *ElementizeValuesXMLAguments[K]")
30+
}
31+
32+
xPath := args.XPath.Get()
33+
if xPath == "" {
34+
xPath = "/"
35+
} else if err := validateXPath(xPath); err != nil {
36+
return nil, err
37+
}
38+
39+
elementName := args.ElementName.Get()
40+
if elementName == "" {
41+
elementName = "value"
42+
}
43+
44+
return elementizeValuesXML(args.Target, xPath, elementName), nil
45+
}
46+
47+
// elementizeValuesXML returns a `pcommon.String` that is a result of removing all matching nodes from the target XML.
48+
// This currently supports removal of elements, attributes, text values, comments, and CharData.
49+
func elementizeValuesXML[K any](target ottl.StringGetter[K], xPath string, elementName string) ottl.ExprFunc[K] {
50+
return func(ctx context.Context, tCtx K) (any, error) {
51+
var doc *xmlquery.Node
52+
if targetVal, err := target.Get(ctx, tCtx); err != nil {
53+
return nil, err
54+
} else if doc, err = parseNodesXML(targetVal); err != nil {
55+
return nil, err
56+
}
57+
for _, n := range xmlquery.Find(doc, xPath) {
58+
elementizeValuesForNode(n, elementName)
59+
}
60+
return doc.OutputXML(false), nil
61+
}
62+
}
63+
64+
func elementizeValuesForNode(parent *xmlquery.Node, elementName string) {
65+
switch parent.Type {
66+
case xmlquery.ElementNode: // ok
67+
case xmlquery.DocumentNode: // ok
68+
default:
69+
return
70+
}
71+
72+
if parent.FirstChild == nil {
73+
return
74+
}
75+
76+
// Convert any child nodes and count text and element nodes.
77+
var valueCount, elementCount int
78+
for child := parent.FirstChild; child != nil; child = child.NextSibling {
79+
if child.Type == xmlquery.ElementNode {
80+
elementizeValuesForNode(child, elementName)
81+
elementCount++
82+
} else if child.Type == xmlquery.TextNode {
83+
valueCount++
84+
}
85+
}
86+
87+
// If there are no values to elementize, or if there is exactly one value OR one element, this node is all set.
88+
if valueCount == 0 || elementCount+valueCount <= 1 {
89+
return
90+
}
91+
92+
// At this point, we either have multiple values, or a mix of values and elements.
93+
// Either way, we need to elementize the values.
94+
for child := parent.FirstChild; child != nil; child = child.NextSibling {
95+
if child.Type != xmlquery.TextNode {
96+
continue
97+
}
98+
newTextNode := &xmlquery.Node{
99+
Type: xmlquery.TextNode,
100+
Data: child.Data,
101+
}
102+
// Change this node into an element
103+
child.Type = xmlquery.ElementNode
104+
child.Data = elementName
105+
child.FirstChild = newTextNode
106+
child.LastChild = newTextNode
107+
}
108+
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"
5+
6+
import (
7+
"context"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
12+
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
13+
)
14+
15+
func Test_ElementizeValuesXML(t *testing.T) {
16+
tests := []struct {
17+
name string
18+
document string
19+
xPath string
20+
elementName string
21+
want string
22+
}{
23+
{
24+
name: "nop",
25+
document: `<a><b/></a>`,
26+
want: `<a><b></b></a>`,
27+
},
28+
{
29+
name: "nop declaration",
30+
document: `<?xml version="1.0" encoding="UTF-8"?><a><b/></a>`,
31+
want: `<?xml version="1.0" encoding="UTF-8"?><a><b></b></a>`,
32+
},
33+
{
34+
name: "nop attributes",
35+
document: `<a foo="bar" hello="world"/>`,
36+
want: `<a foo="bar" hello="world"></a>`,
37+
},
38+
{
39+
name: "nop wrapped text",
40+
document: `<a>hello world</a>`,
41+
want: `<a>hello world</a>`,
42+
},
43+
{
44+
name: "simple hanging",
45+
document: `<a><b/>foo</a>`,
46+
want: `<a><b></b><value>foo</value></a>`,
47+
},
48+
{
49+
name: "simple hanging with tag name",
50+
elementName: "bar",
51+
document: `<a><b/>foo</a>`,
52+
want: `<a><b></b><bar>foo</bar></a>`,
53+
},
54+
{
55+
name: "multiple hanging same level",
56+
document: `<a>foo<b/>bar</a>`,
57+
want: `<a><value>foo</value><b></b><value>bar</value></a>`,
58+
},
59+
{
60+
name: "multiple hanging multiple levels",
61+
document: `<a>foo<b/>bar<c/>1<d>not</d>2<e><f/><f/></e></a>`,
62+
elementName: "v",
63+
want: `<a><v>foo</v><b></b><v>bar</v><c></c><v>1</v><d>not</d><v>2</v><e><f></f><f></f></e></a>`,
64+
},
65+
{
66+
name: "xpath select some",
67+
document: `<a><b><c/>foo</b><d><c/>bar</d><b><c/>baz</b></a>`,
68+
xPath: "/a/b",
69+
want: `<a><b><c></c><value>foo</value></b><d><c></c>bar</d><b><c></c><value>baz</value></b></a>`,
70+
},
71+
{
72+
name: "xpath with element name",
73+
document: `<a><b><c/>foo</b><d><c/>bar</d><b><c/>baz</b></a>`,
74+
xPath: "/a/b",
75+
elementName: "V",
76+
want: `<a><b><c></c><V>foo</V></b><d><c></c>bar</d><b><c></c><V>baz</V></b></a>`,
77+
},
78+
}
79+
factory := NewElementizeValuesXMLFactory[any]()
80+
for _, tt := range tests {
81+
t.Run(tt.name, func(t *testing.T) {
82+
args := &ElementizeValuesXMLArguments[any]{
83+
Target: ottl.StandardStringGetter[any]{
84+
Getter: func(_ context.Context, _ any) (any, error) {
85+
return tt.document, nil
86+
},
87+
},
88+
XPath: ottl.NewTestingOptional(tt.xPath),
89+
ElementName: ottl.NewTestingOptional(tt.elementName),
90+
}
91+
exprFunc, err := factory.CreateFunction(ottl.FunctionContext{}, args)
92+
assert.NoError(t, err)
93+
94+
result, err := exprFunc(context.Background(), nil)
95+
assert.NoError(t, err)
96+
assert.Equal(t, tt.want, result)
97+
})
98+
}
99+
}
100+
101+
func TestCreateElementizeValuesXMLFunc(t *testing.T) {
102+
factory := NewElementizeValuesXMLFactory[any]()
103+
fCtx := ottl.FunctionContext{}
104+
105+
// Invalid arg type
106+
exprFunc, err := factory.CreateFunction(fCtx, nil)
107+
assert.Error(t, err)
108+
assert.Nil(t, exprFunc)
109+
110+
// Invalid XPath should error on function creation
111+
exprFunc, err = factory.CreateFunction(
112+
fCtx, &ElementizeValuesXMLArguments[any]{
113+
XPath: ottl.NewTestingOptional("!"),
114+
})
115+
assert.Error(t, err)
116+
assert.Nil(t, exprFunc)
117+
118+
// Invalid XML should error on function execution
119+
exprFunc, err = factory.CreateFunction(
120+
fCtx, &ElementizeValuesXMLArguments[any]{
121+
Target: invalidXMLGetter(),
122+
})
123+
assert.NoError(t, err)
124+
assert.NotNil(t, exprFunc)
125+
_, err = exprFunc(context.Background(), nil)
126+
assert.Error(t, err)
127+
}

pkg/ottl/ottlfuncs/functions.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ func converters[K any]() []ottl.Factory[K] {
4343
NewDayFactory[K](),
4444
NewDoubleFactory[K](),
4545
NewDurationFactory[K](),
46+
NewElementizeValuesXMLFactory[K](),
4647
NewExtractPatternsFactory[K](),
4748
NewExtractGrokPatternsFactory[K](),
4849
NewFnvFactory[K](),

0 commit comments

Comments
 (0)