Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

heuristic: make wildcard character configurable #1418

Merged
merged 15 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions docs/sources/configure/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -828,8 +828,14 @@ Possible values for the `unmatched` property are:
- 🚨 Caution: this option could lead to cardinality explosion at the ingester side.
- `wildcard` will set the `http.route` field property to a generic asterisk based `/**` value.
- `heuristic` will automatically derive the `http.route` field property from the path value, based on the following rules:
- Any path components which have numbers or characters outside of the ASCII alphabet (or `-` and `_`), will be replaced by an asterisk `*`.
- Any alphabetical components which don't look like words, will be replaced by an asterisk `*`.
- Any path components which have numbers or characters outside of the ASCII alphabet (or `-` and `_`), are replaced by `wildcard_char`.
- Any alphabetical components which don't look like words, are replaced by `wildcard_char`.

| YAML | Environment variable | Type | Default |
| ----------- | ------- | ------ | ---------- |
| `wildcard_char` | -- | string | `'*'` |

Can be used together with `unmatched: heuristic` to choose what character the path components identified by the heuristic mode are replaced by. By default, an asterisk (`'*'`) is used. The value should be quoted and must be a single character.

### Special considerations when using the `heuristic` route decorator mode

Expand All @@ -848,7 +854,7 @@ document/d/CfMkAGbE_aivhFydEpaRafPuGWbmHfG/edit (no numbers in the ID)
document/d/C2fMkAGb3E_aivhFyd5EpaRafP123uGWbmHfG/edit
```

will be converted to a low cardinality route:
are converted to a low cardinality route (using the default `wildcard_char`):

```
document/d/*/edit
Expand Down
9 changes: 8 additions & 1 deletion pkg/beyla/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ var DefaultConfig = Config{
FetchTimeout: 500 * time.Millisecond,
},
},
Routes: &transform.RoutesConfig{Unmatch: transform.UnmatchDefault},
Routes: &transform.RoutesConfig{
Unmatch: transform.UnmatchDefault,
WildcardChar: "*",
},
NetworkFlows: defaultNetworkConfig,
Processes: process.CollectConfig{
RunMode: process.RunModePrivileged,
Expand Down Expand Up @@ -275,6 +278,10 @@ func (c *Config) Validate() error {
" grafana, otel_metrics_export, otel_traces_export or prometheus_export")
}

if len(c.Routes.WildcardChar) > 1 {
return ConfigError("wildcard_char can only be a single character, multiple characters are not allowed")
}

return nil
}

Expand Down
39 changes: 38 additions & 1 deletion pkg/beyla/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ network:
},
},
Routes: &transform.RoutesConfig{
Unmatch: transform.UnmatchHeuristic,
Unmatch: transform.UnmatchHeuristic,
WildcardChar: "*",
},
NameResolver: &transform.NameResolverConfig{
Sources: []string{"k8s", "dns"},
Expand Down Expand Up @@ -365,6 +366,42 @@ func TestConfigValidate_TracePrinterFallback(t *testing.T) {
assert.Equal(t, cfg.TracePrinter, debug.TracePrinterText)
}

func TestConfigValidateRoutes(t *testing.T) {
userConfig := bytes.NewBufferString(`executable_name: foo
trace_printer: text
routes:
unmatched: heuristic
wildcard_char: "*"
`)
cfg, err := LoadConfig(userConfig)
require.NoError(t, err)
require.NoError(t, cfg.Validate())
}

func TestConfigValidateRoutes_Errors(t *testing.T) {
for _, tc := range []string{
`executable_name: foo
trace_printer: text
routes:
unmatched: heuristic
wildcard_char: "##"
`, `executable_name: foo
trace_printer: text
routes:
unmatched: heuristic
wildcard_char: "random"
`,
} {
testCaseName := regexp.MustCompile("wildcard_char: (.+)\n").FindStringSubmatch(tc)[1]
t.Run(testCaseName, func(t *testing.T) {
userConfig := bytes.NewBufferString(tc)
cfg, err := LoadConfig(userConfig)
require.NoError(t, err)
require.Error(t, cfg.Validate())
})
}
}

func TestConfig_OtelGoAutoEnv(t *testing.T) {
// OTEL_GO_AUTO_TARGET_EXE is an alias to BEYLA_EXECUTABLE_NAME
// (Compatibility with OpenTelemetry)
Expand Down
10 changes: 5 additions & 5 deletions pkg/internal/transform/route/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func InitAutoClassifier() error {
}

//nolint:cyclop
func ClusterPath(path string) string {
func ClusterPath(path string, replacement byte) string {
if path == "" {
return path
}
Expand All @@ -61,11 +61,11 @@ func ClusterPath(path string) string {
if c == '/' {
nSegments++
if skip {
p[sPos] = '*'
p[sPos] = replacement
sPos++
} else if sFwd > sPos {
if !okWord(string(p[sPos:sFwd])) {
p[sPos] = '*'
p[sPos] = replacement
sPos++
} else {
sPos = sFwd
Expand Down Expand Up @@ -95,11 +95,11 @@ func ClusterPath(path string) string {
}

if skip {
p[sPos] = '*'
p[sPos] = replacement
sPos++
} else if sFwd > sPos {
if !okWord(string(p[sPos:sFwd])) {
p[sPos] = '*'
p[sPos] = replacement
sPos++
} else {
sPos = sFwd
Expand Down
52 changes: 30 additions & 22 deletions pkg/internal/transform/route/cluster_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package route

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -9,26 +10,33 @@ import (
func TestURLClustering(t *testing.T) {
err := InitAutoClassifier()
assert.NoError(t, err)
assert.Equal(t, "", ClusterPath(""))
assert.Equal(t, "/", ClusterPath("/"))
assert.Equal(t, "/users/*/j4elk/*/job/*", ClusterPath("/users/fdklsd/j4elk/23993/job/2"))
assert.Equal(t, "*", ClusterPath("123"))
assert.Equal(t, "/*", ClusterPath("/123"))
assert.Equal(t, "*/", ClusterPath("123/"))
assert.Equal(t, "*/*", ClusterPath("123/ljgdflgjf"))
assert.Equal(t, "/*", ClusterPath("/**"))
assert.Equal(t, "/u/*", ClusterPath("/u/2"))
assert.Equal(t, "/v1/products/*", ClusterPath("/v1/products/2"))
assert.Equal(t, "/v1/products/*", ClusterPath("/v1/products/22"))
assert.Equal(t, "/v1/products/*", ClusterPath("/v1/products/22j"))
assert.Equal(t, "/products/*/org/*", ClusterPath("/products/1/org/3"))
assert.Equal(t, "/products//org/*", ClusterPath("/products//org/3"))
assert.Equal(t, "/v1/k6-test-runs/*", ClusterPath("/v1/k6-test-runs/1"))
assert.Equal(t, "/attach", ClusterPath("/attach"))
assert.Equal(t, "/usuarios/*/j4elk/*/trabajo/*", ClusterPath("/usuarios/fdklsd/j4elk/23993/trabajo/2"))
assert.Equal(t, "/Benutzer/*/j4elk/*/Arbeit/*", ClusterPath("/Benutzer/fdklsd/j4elk/23993/Arbeit/2"))
assert.Equal(t, "/utilisateurs/*/j4elk/*/tache/*", ClusterPath("/utilisateurs/fdklsd/j4elk/23993/tache/2"))
assert.Equal(t, "/products/", ClusterPath("/products/"))
assert.Equal(t, "/user-space/", ClusterPath("/user-space/"))
assert.Equal(t, "/user_space/", ClusterPath("/user_space/"))

for _, tc := range []byte{'*', '#', '_', '-'} {
tcString := string(tc)

t.Run(tcString, func(t *testing.T) {
assert.Equal(t, "", ClusterPath("", tc))
assert.Equal(t, "/", ClusterPath("/", tc))
assert.Equal(t, fmt.Sprintf("/users/%[1]s/j4elk/%[1]s/job/%[1]s", tcString), ClusterPath("/users/fdklsd/j4elk/23993/job/2", tc))
assert.Equal(t, tcString, ClusterPath("123", tc))
assert.Equal(t, fmt.Sprintf("/%s", tcString), ClusterPath("/123", tc))
assert.Equal(t, fmt.Sprintf("%s/", tcString), ClusterPath("123/", tc))
assert.Equal(t, fmt.Sprintf("%[1]s/%[1]s", tcString), ClusterPath("123/ljgdflgjf", tc))
assert.Equal(t, fmt.Sprintf("/%s", tcString), ClusterPath("/**", tc))
assert.Equal(t, fmt.Sprintf("/u/%s", tcString), ClusterPath("/u/2", tc))
assert.Equal(t, fmt.Sprintf("/v1/products/%s", tcString), ClusterPath("/v1/products/2", tc))
assert.Equal(t, fmt.Sprintf("/v1/products/%s", tcString), ClusterPath("/v1/products/22", tc))
assert.Equal(t, fmt.Sprintf("/v1/products/%s", tcString), ClusterPath("/v1/products/22j", tc))
assert.Equal(t, fmt.Sprintf("/products/%[1]s/org/%[1]s", tcString), ClusterPath("/products/1/org/3", tc))
assert.Equal(t, fmt.Sprintf("/products//org/%s", tcString), ClusterPath("/products//org/3", tc))
assert.Equal(t, fmt.Sprintf("/v1/k6-test-runs/%s", tcString), ClusterPath("/v1/k6-test-runs/1", tc))
assert.Equal(t, "/attach", ClusterPath("/attach", tc))
assert.Equal(t, fmt.Sprintf("/usuarios/%[1]s/j4elk/%[1]s/trabajo/%[1]s", tcString), ClusterPath("/usuarios/fdklsd/j4elk/23993/trabajo/2", tc))
assert.Equal(t, fmt.Sprintf("/Benutzer/%[1]s/j4elk/%[1]s/Arbeit/%[1]s", tcString), ClusterPath("/Benutzer/fdklsd/j4elk/23993/Arbeit/2", tc))
assert.Equal(t, fmt.Sprintf("/utilisateurs/%[1]s/j4elk/%[1]s/tache/%[1]s", tcString), ClusterPath("/utilisateurs/fdklsd/j4elk/23993/tache/2", tc))
assert.Equal(t, "/products/", ClusterPath("/products/", tc))
assert.Equal(t, "/user-space/", ClusterPath("/user-space/", tc))
assert.Equal(t, "/user_space/", ClusterPath("/user_space/", tc))
})
}
}
18 changes: 10 additions & 8 deletions pkg/transform/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ type RoutesConfig struct {
Patterns []string `yaml:"patterns"`
IgnorePatterns []string `yaml:"ignored_patterns"`
IgnoredEvents IgnoreMode `yaml:"ignore_mode"`
// Character that will be used to replace route segments
WildcardChar string `yaml:"wildcard_char,omitempty"`
}

func RoutesProvider(rc *RoutesConfig) pipe.MiddleProvider[[]request.Span, []request.Span] {
Expand Down Expand Up @@ -98,15 +100,15 @@ func (rn *routerNode) provideRoutes() (pipe.MiddleFunc[[]request.Span, []request
if routesEnabled {
s.Route = matcher.Find(s.Path)
}
unmatchAction(s)
unmatchAction(rc, s)
}
out <- spans
}
}, nil
}

func chooseUnmatchPolicy(rc *RoutesConfig) (func(span *request.Span), error) {
var unmatchAction func(span *request.Span)
func chooseUnmatchPolicy(rc *RoutesConfig) (func(rc *RoutesConfig, span *request.Span), error) {
var unmatchAction func(rc *RoutesConfig, span *request.Span)

switch rc.Unmatch {
case UnmatchWildcard, "":
Expand Down Expand Up @@ -142,23 +144,23 @@ func chooseUnmatchPolicy(rc *RoutesConfig) (func(span *request.Span), error) {
return unmatchAction, nil
}

func leaveUnmatchEmpty(_ *request.Span) {}
func leaveUnmatchEmpty(_ *RoutesConfig, _ *request.Span) {}

func setUnmatchToWildcard(str *request.Span) {
func setUnmatchToWildcard(_ *RoutesConfig, str *request.Span) {
if str.Route == "" {
str.Route = wildCard
}
}

func setUnmatchToPath(str *request.Span) {
func setUnmatchToPath(_ *RoutesConfig, str *request.Span) {
if str.Route == "" {
str.Route = str.Path
}
}

func classifyFromPath(s *request.Span) {
func classifyFromPath(rc *RoutesConfig, s *request.Span) {
if s.Route == "" && (s.Type == request.EventTypeHTTP || s.Type == request.EventTypeHTTPClient) {
s.Route = route.ClusterPath(s.Path)
s.Route = route.ClusterPath(s.Path, rc.WildcardChar[0])
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/transform/routes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func TestUnmatchedEmpty(t *testing.T) {
func TestUnmatchedAuto(t *testing.T) {
for _, tc := range []UnmatchType{UnmatchHeuristic} {
t.Run(string(tc), func(t *testing.T) {
router, err := RoutesProvider(&RoutesConfig{Unmatch: tc, Patterns: []string{"/user/:id"}})()
router, err := RoutesProvider(&RoutesConfig{Unmatch: tc, Patterns: []string{"/user/:id"}, WildcardChar: "*"})()
require.NoError(t, err)
in, out := make(chan []request.Span, 10), make(chan []request.Span, 10)
defer close(in)
Expand Down
Loading