From f6c86a471019e09611a15efa3f090b14c78a5943 Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Thu, 31 Mar 2022 18:27:40 -0700 Subject: [PATCH 1/5] Add support to filter on log body --- .../processor/filterconfig/config.go | 12 ++++++++++-- .../processor/filterlog/filterlog.go | 16 ++++++++++++++++ .../processor/filterlog/filterlog_test.go | 7 ++++--- processor/attributesprocessor/README.md | 4 ++++ 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/internal/coreinternal/processor/filterconfig/config.go b/internal/coreinternal/processor/filterconfig/config.go index d767b101f152..5410e3b0e230 100644 --- a/internal/coreinternal/processor/filterconfig/config.go +++ b/internal/coreinternal/processor/filterconfig/config.go @@ -95,6 +95,10 @@ type MatchProperties struct { // Deprecated: the Name field is removed from the log data model. LogNames []string `mapstructure:"log_names"` + // LogBodies is a list of strings that the LogRecord's body field must match + // against. + LogBodies []string `mapstructure:"log_bodies"` + // MetricNames is a list of strings to match metric name against. // A match occurs if metric name matches at least one item in the list. // This field is optional. @@ -123,6 +127,10 @@ func (mp *MatchProperties) ValidateForSpans() error { return errors.New("log_names should not be specified for trace spans") } + if len(mp.LogBodies) > 0 { + return errors.New("log_bodies should not be specified for trace spans") + } + if len(mp.Services) == 0 && len(mp.SpanNames) == 0 && len(mp.Attributes) == 0 && len(mp.Libraries) == 0 && len(mp.Resources) == 0 { return errors.New(`at least one of "services", "span_names", "attributes", "libraries" or "resources" field must be specified`) @@ -137,8 +145,8 @@ func (mp *MatchProperties) ValidateForLogs() error { return errors.New("neither services nor span_names should be specified for log records") } - if len(mp.Attributes) == 0 && len(mp.Libraries) == 0 && len(mp.Resources) == 0 { - return errors.New(`at least one of "attributes", "libraries" or "resources" field must be specified`) + if len(mp.Attributes) == 0 && len(mp.Libraries) == 0 && len(mp.Resources) == 0 && len(mp.LogBodies) == 0 { + return errors.New(`at least one of "attributes", "libraries", "resources" or "log_bodies" field must be specified`) } return nil diff --git a/internal/coreinternal/processor/filterlog/filterlog.go b/internal/coreinternal/processor/filterlog/filterlog.go index c981fc18c655..4ee2343c0989 100644 --- a/internal/coreinternal/processor/filterlog/filterlog.go +++ b/internal/coreinternal/processor/filterlog/filterlog.go @@ -38,6 +38,9 @@ type propertiesMatcher struct { // log names to compare to. nameFilters filterset.FilterSet + + // log bodies to compare to. + bodyFilters filterset.FilterSet } // NewMatcher creates a LogRecord Matcher that matches based on the given MatchProperties. @@ -62,20 +65,33 @@ func NewMatcher(mp *filterconfig.MatchProperties) (Matcher, error) { return nil, fmt.Errorf("error creating log record name filters: %v", err) } } + var bodyFS filterset.FilterSet + if len(mp.LogBodies) > 0 { + bodyFS, err = filterset.CreateFilterSet(mp.LogBodies, &mp.Config) + if err != nil { + return nil, fmt.Errorf("error creating log record body filters: %v", err) + } + } return &propertiesMatcher{ PropertiesMatcher: rm, nameFilters: nameFS, + bodyFilters: bodyFS, }, nil } // MatchLogRecord matches a log record to a set of properties. // There are 3 sets of properties to match against. // The log record names are matched, if specified. +// The log record bodies are matched, if specified. // The attributes are then checked, if specified. // At least one of log record names or attributes must be specified. It is // supported to have more than one of these specified, and all specified must // evaluate to true for a match to occur. func (mp *propertiesMatcher) MatchLogRecord(lr pdata.LogRecord, resource pdata.Resource, library pdata.InstrumentationScope) bool { + if lr.Body().Type() == pdata.ValueTypeString && mp.bodyFilters.Matches(lr.Body().StringVal()) { + return true + } + return mp.PropertiesMatcher.Match(lr.Attributes(), resource, library) } diff --git a/internal/coreinternal/processor/filterlog/filterlog_test.go b/internal/coreinternal/processor/filterlog/filterlog_test.go index 7fc38a285d33..9fc6c862e7a5 100644 --- a/internal/coreinternal/processor/filterlog/filterlog_test.go +++ b/internal/coreinternal/processor/filterlog/filterlog_test.go @@ -40,14 +40,15 @@ func TestLogRecord_validateMatchesConfiguration_InvalidConfig(t *testing.T) { { name: "empty_property", property: filterconfig.MatchProperties{}, - errorString: "at least one of \"attributes\", \"libraries\" or \"resources\" field must be specified", + errorString: `at least one of "attributes", "libraries", "resources" or "log_bodies" field must be specified`, }, { name: "empty_log_names_and_attributes", property: filterconfig.MatchProperties{ - LogNames: []string{}, + LogNames: []string{}, + LogBodies: []string{}, }, - errorString: "at least one of \"attributes\", \"libraries\" or \"resources\" field must be specified", + errorString: `at least one of "attributes", "libraries", "resources" or "log_bodies" field must be specified`, }, { name: "span_properties", diff --git a/processor/attributesprocessor/README.md b/processor/attributesprocessor/README.md index 301a1333edcc..1cdc5a89ecdf 100644 --- a/processor/attributesprocessor/README.md +++ b/processor/attributesprocessor/README.md @@ -218,6 +218,10 @@ attributes: # This is an optional field. log_names: [, ..., ] + # The log body must match at least one of the items. + # This is an optional field. + log_bodies: [, ..., ] + # The metric name must match at least one of the items. # This is an optional field. metric_names: [, ..., ] From 1a298d0fabba3bf300672e6e481a918b560e2928 Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Fri, 1 Apr 2022 09:52:49 -0700 Subject: [PATCH 2/5] code review --- .../processor/filterlog/filterlog.go | 2 +- .../processor/filterlog/filterlog_test.go | 8 ++++++++ processor/attributesprocessor/README.md | 6 +++--- .../attributesprocessor/testdata/config.yaml | 18 ++++++++++++++++++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/internal/coreinternal/processor/filterlog/filterlog.go b/internal/coreinternal/processor/filterlog/filterlog.go index 4ee2343c0989..c22fee6188bb 100644 --- a/internal/coreinternal/processor/filterlog/filterlog.go +++ b/internal/coreinternal/processor/filterlog/filterlog.go @@ -89,7 +89,7 @@ func NewMatcher(mp *filterconfig.MatchProperties) (Matcher, error) { // supported to have more than one of these specified, and all specified must // evaluate to true for a match to occur. func (mp *propertiesMatcher) MatchLogRecord(lr pdata.LogRecord, resource pdata.Resource, library pdata.InstrumentationScope) bool { - if lr.Body().Type() == pdata.ValueTypeString && mp.bodyFilters.Matches(lr.Body().StringVal()) { + if lr.Body().Type() == pdata.ValueTypeString && mp.bodyFilters != nil && mp.bodyFilters.Matches(lr.Body().StringVal()) { return true } diff --git a/internal/coreinternal/processor/filterlog/filterlog_test.go b/internal/coreinternal/processor/filterlog/filterlog_test.go index 9fc6c862e7a5..8aa0440048ce 100644 --- a/internal/coreinternal/processor/filterlog/filterlog_test.go +++ b/internal/coreinternal/processor/filterlog/filterlog_test.go @@ -150,10 +150,18 @@ func TestLogRecord_Matching_True(t *testing.T) { }, }, }, + { + name: "log_body_regexp_match", + properties: &filterconfig.MatchProperties{ + Config: *createConfig(filterset.Regexp), + LogBodies: []string{"AUTH.*"}, + }, + }, } lr := pdata.NewLogRecord() lr.Attributes().InsertString("abc", "def") + lr.Body().SetStringVal("AUTHENTICATION FAILED") for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { diff --git a/processor/attributesprocessor/README.md b/processor/attributesprocessor/README.md index 1cdc5a89ecdf..6a2270fc0f6a 100644 --- a/processor/attributesprocessor/README.md +++ b/processor/attributesprocessor/README.md @@ -166,13 +166,13 @@ if the input data should be included or excluded from the processor. To configur this option, under `include` and/or `exclude` at least `match_type` and one of the following is required: - For spans, one of `services`, `span_names`, `attributes`, `resources`, or `libraries` must be specified -with a non-empty value for a valid configuration. The `log_names`, `expressions`, `resource_attributes` and +with a non-empty value for a valid configuration. The `log_names`, `log_bodies`, `expressions`, `resource_attributes` and `metric_names` fields are invalid. -- For logs, one of `log_names`, `attributes`, `resources`, or `libraries` must be specified with a +- For logs, one of `log_names`, `log_bodies`, `attributes`, `resources`, or `libraries` must be specified with a non-empty value for a valid configuration. The `span_names`, `metric_names`, `expressions`, `resource_attributes`, and `services` fields are invalid. - For metrics, one of `metric_names`, `resources` must be specified -with a valid non-empty value for a valid configuration. The `span_names`, `log_names`, and +with a valid non-empty value for a valid configuration. The `span_names`, `log_names`, `log_bodies` and `services` fields are invalid. diff --git a/processor/attributesprocessor/testdata/config.yaml b/processor/attributesprocessor/testdata/config.yaml index e8a7428c34d7..2c68eb918de6 100644 --- a/processor/attributesprocessor/testdata/config.yaml +++ b/processor/attributesprocessor/testdata/config.yaml @@ -307,6 +307,24 @@ processors: action: update value: "SELECT * FROM USERS [obfuscated]" + + # The following demonstrates how to process logs that have a body that match regexp + # patterns. This processor will remove "token" attribute and will obfuscate "password" + # attribute in spans where body matches "AUTH.*". + attributes/log_body_regexp: + # Specifies the span properties that must exist for the processor to be applied. + include: + # match_type defines that "services" is an array of regexp-es. + match_type: regexp + # The span service name must match "auth.*" pattern. + log_bodies: ["AUTH.*"] + actions: + - key: password + action: update + value: "obfuscated" + - key: token + action: delete + receivers: nop: From 088dd038ea9c388d73271bbdfbac1d1f0ec2278c Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Fri, 1 Apr 2022 09:55:59 -0700 Subject: [PATCH 3/5] add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4aef2c97f2fb..a152d970d0ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased ### 💡 Enhancements 💡 +- `attributesprocessor`: Add support to filter on log body (#8996) ### 🛑 Breaking changes 🛑 From 71baf980888e3f8004d71add7ae48dc53d68e02b Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Fri, 1 Apr 2022 12:53:21 -0700 Subject: [PATCH 4/5] Update processor/attributesprocessor/README.md Co-authored-by: Przemek Maciolek <58699843+pmm-sumo@users.noreply.github.com> --- processor/attributesprocessor/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/processor/attributesprocessor/README.md b/processor/attributesprocessor/README.md index 6a2270fc0f6a..c29b41ae33ff 100644 --- a/processor/attributesprocessor/README.md +++ b/processor/attributesprocessor/README.md @@ -219,6 +219,7 @@ attributes: log_names: [, ..., ] # The log body must match at least one of the items. + # Currently only string body types are supported. # This is an optional field. log_bodies: [, ..., ] From 9b838e5f2c415bb4f4f071c3379b9b5b5aaf6df0 Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Fri, 1 Apr 2022 18:34:19 -0700 Subject: [PATCH 5/5] Update CHANGELOG.md Co-authored-by: Dmitrii Anoshin --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a152d970d0ef..97a3757de9b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased ### 💡 Enhancements 💡 + - `attributesprocessor`: Add support to filter on log body (#8996) ### 🛑 Breaking changes 🛑