Skip to content
This repository has been archived by the owner on Aug 30, 2019. It is now read-only.

filters: apply replace rules only after obfuscation #477

Merged
merged 2 commits into from
Sep 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 9 additions & 15 deletions cmd/trace-agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ import (
"github.com/DataDog/datadog-trace-agent/writer"
)

const (
processStatsInterval = time.Minute
)
const processStatsInterval = time.Minute

type processedTrace struct {
Trace model.Trace
Expand Down Expand Up @@ -50,7 +48,8 @@ func (pt *processedTrace) getSamplingPriority() (int, bool) {
type Agent struct {
Receiver *HTTPReceiver
Concentrator *Concentrator
Filters []filters.Filter
Blacklister *filters.Blacklister

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I suggest Denylister instead?
rails/rails#33677

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm more along the lines of the second comment in that thread.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still worth considering though.

Replacer *filters.Replacer
ScoreSampler *Sampler
ErrorsScoreSampler *Sampler
PrioritySampler *Sampler
Expand Down Expand Up @@ -94,7 +93,6 @@ func NewAgent(ctx context.Context, conf *config.AgentConfig) *Agent {
conf.BucketInterval.Nanoseconds(),
statsChan,
)
f := filters.Setup(conf)

obf := obfuscate.NewObfuscator(conf.Obfuscation)
ss := NewScoreSampler(conf)
Expand All @@ -110,7 +108,8 @@ func NewAgent(ctx context.Context, conf *config.AgentConfig) *Agent {
return &Agent{
Receiver: r,
Concentrator: c,
Filters: f,
Blacklister: filters.NewBlacklister(conf.Ignore["resource"]),
Replacer: filters.NewReplacer(conf.ReplaceTags),
ScoreSampler: ss,
ErrorsScoreSampler: ess,
PrioritySampler: ps,
Expand Down Expand Up @@ -179,8 +178,6 @@ func (a *Agent) Run() {
// passes it downstream.
func (a *Agent) Process(t model.Trace) {
if len(t) == 0 {
// XXX Should never happen since we reject empty traces during
// normalization.
log.Debugf("skipping received empty trace")
return
}
Expand Down Expand Up @@ -230,15 +227,10 @@ func (a *Agent) Process(t model.Trace) {
return
}

for _, f := range a.Filters {
if f.Keep(root, &t) {
continue
}

log.Debugf("rejecting trace by filter: %T %v", f, *root)
if !a.Blacklister.Allows(root) {
log.Debugf("trace rejected by blacklister. root: %v", root)
atomic.AddInt64(&ts.TracesFiltered, 1)
atomic.AddInt64(&ts.SpansFiltered, int64(len(t)))

return
}

Expand All @@ -264,6 +256,8 @@ func (a *Agent) Process(t model.Trace) {
span.Truncate()
}

a.Replacer.Replace(&t)

pt := processedTrace{
Trace: t,
WeightedTrace: wt,
Expand Down
67 changes: 67 additions & 0 deletions cmd/trace-agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"net/http"
"regexp"
"runtime"
"strings"
"testing"
Expand All @@ -13,6 +14,7 @@ import (

"github.com/DataDog/datadog-trace-agent/config"
"github.com/DataDog/datadog-trace-agent/fixtures"
"github.com/DataDog/datadog-trace-agent/info"
"github.com/DataDog/datadog-trace-agent/model"
"github.com/DataDog/datadog-trace-agent/obfuscate"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -107,6 +109,71 @@ func TestFormatTrace(t *testing.T) {
assert.Contains(result.Meta["sql.query"], "SELECT name FROM people WHERE age = ?")
}

func TestProcess(t *testing.T) {
t.Run("Replacer", func(t *testing.T) {
// Ensures that for "sql" type spans:
// • obfuscator runs before replacer
// • obfuscator obfuscates both resource and "sql.query" tag
// • resulting resource is obfuscated with replacements applied
// • resulting "sql.query" tag is obfuscated with no replacements applied
cfg := config.New()
cfg.APIKey = "test"
cfg.ReplaceTags = []*config.ReplaceRule{{
Name: "resource.name",
Re: regexp.MustCompile("AND.*"),
Repl: "...",

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This replacement is something that would normally break the parser for normalization, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly, although that wasn't the case being tested here. But you're right, this is yet another bug that is resolved by this change.

}}
ctx, cancel := context.WithCancel(context.Background())
agent := NewAgent(ctx, cfg)
defer cancel()

now := time.Now()
span := &model.Span{
Resource: "SELECT name FROM people WHERE age = 42 AND extra = 55",
Type: "sql",
Start: now.Add(-time.Second).UnixNano(),
Duration: (500 * time.Millisecond).Nanoseconds(),
}
agent.Process(model.Trace{span})

assert := assert.New(t)
assert.Equal("SELECT name FROM people WHERE age = ? ...", span.Resource)
assert.Equal("SELECT name FROM people WHERE age = ? AND extra = ?", span.Meta["sql.query"])
})

t.Run("Blacklister", func(t *testing.T) {
cfg := config.New()
cfg.APIKey = "test"
cfg.Ignore["resource"] = []string{"^INSERT.*"}
ctx, cancel := context.WithCancel(context.Background())
agent := NewAgent(ctx, cfg)
defer cancel()

now := time.Now()
spanValid := &model.Span{
Resource: "SELECT name FROM people WHERE age = 42 AND extra = 55",
Type: "sql",
Start: now.Add(-time.Second).UnixNano(),
Duration: (500 * time.Millisecond).Nanoseconds(),
}
spanInvalid := &model.Span{
Resource: "INSERT INTO db VALUES (1, 2, 3)",
Type: "sql",
Start: now.Add(-time.Second).UnixNano(),
Duration: (500 * time.Millisecond).Nanoseconds(),
}

stats := agent.Receiver.stats.GetTagStats(info.Tags{})
assert := assert.New(t)

agent.Process(model.Trace{spanValid})
assert.EqualValues(0, stats.TracesFiltered)

agent.Process(model.Trace{spanInvalid})
assert.EqualValues(1, stats.TracesFiltered)
})
}

func BenchmarkAgentTraceProcessing(b *testing.B) {
c := config.New()
c.APIKey = "test"
Expand Down
19 changes: 0 additions & 19 deletions filters/base.go

This file was deleted.

45 changes: 45 additions & 0 deletions filters/blacklister.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package filters

import (
"regexp"

"github.com/DataDog/datadog-trace-agent/model"

log "github.com/cihub/seelog"
)

// Blacklister holds a list of regular expressions which will match resources
// on spans that should be dropped.
type Blacklister struct {
list []*regexp.Regexp
}

// Allows returns true if the Blacklister permits this span.
func (f *Blacklister) Allows(span *model.Span) bool {
for _, entry := range f.list {
if entry.MatchString(span.Resource) {
return false
}
}
return true
}

// NewBlacklister creates a new Blacklister based on the given list of
// regular expressions.
func NewBlacklister(exprs []string) *Blacklister {
return &Blacklister{list: compileRules(exprs)}
}

// compileRules compiles as many rules as possible from the list of expressions.
func compileRules(exprs []string) []*regexp.Regexp {
list := make([]*regexp.Regexp, 0, len(exprs))
for _, entry := range exprs {
rule, err := regexp.Compile(entry)
if err != nil {
log.Errorf("invalid resource filter: %q", entry)
continue
}
list = append(list, rule)
}
return list
}
47 changes: 47 additions & 0 deletions filters/blacklister_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package filters

import (
"testing"

"github.com/DataDog/datadog-trace-agent/fixtures"
"github.com/stretchr/testify/assert"
)

func TestBlacklister(t *testing.T) {
tests := []struct {
filter []string
resource string
expectation bool
}{
{[]string{"/foo/bar"}, "/foo/bar", false},
{[]string{"/foo/b.r"}, "/foo/bar", false},
{[]string{"[0-9]+"}, "/abcde", true},
{[]string{"[0-9]+"}, "/abcde123", false},
{[]string{"\\(foobar\\)"}, "(foobar)", false},
{[]string{"\\(foobar\\)"}, "(bar)", true},
{[]string{"(GET|POST) /healthcheck"}, "GET /foobar", true},
{[]string{"(GET|POST) /healthcheck"}, "GET /healthcheck", false},
{[]string{"(GET|POST) /healthcheck"}, "POST /healthcheck", false},
{[]string{"SELECT COUNT\\(\\*\\) FROM BAR"}, "SELECT COUNT(*) FROM BAR", false},
{[]string{"[123"}, "[123", true},
{[]string{"\\[123"}, "[123", false},
{[]string{"ABC+", "W+"}, "ABCCCC", false},
{[]string{"ABC+", "W+"}, "WWW", false},
}

for _, test := range tests {
span := fixtures.RandomSpan()
span.Resource = test.resource
filter := NewBlacklister(test.filter)

assert.Equal(t, test.expectation, filter.Allows(span))
}
}

func TestCompileRules(t *testing.T) {
filter := NewBlacklister([]string{"[123", "]123", "{6}"})
for i := 0; i < 100; i++ {
span := fixtures.RandomSpan()
assert.True(t, filter.Allows(span))
}
}
22 changes: 9 additions & 13 deletions filters/tag.go → filters/replacer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,20 @@ import (
"github.com/DataDog/datadog-trace-agent/model"
)

var _ Filter = (*tagReplacer)(nil)

// tagReplacer is a filter which replaces tag values based on its
// Replacer is a filter which replaces tag values based on its
// settings. It keeps all spans.
type tagReplacer struct {
// replace maps tag keys to one or more sets of replacements
replace []*config.ReplaceRule
type Replacer struct {
rules []*config.ReplaceRule
}

func newTagReplacer(c *config.AgentConfig) *tagReplacer {
return &tagReplacer{replace: c.ReplaceTags}
// NewReplacer returns a new Replacer which will use the given set of rules.
func NewReplacer(rules []*config.ReplaceRule) *Replacer {
return &Replacer{rules: rules}
}

// Keep implements Filter.
func (f tagReplacer) Keep(root *model.Span, trace *model.Trace) bool {
for _, rule := range f.replace {
// Replace replaces all tags matching the Replacer's rules.
func (f Replacer) Replace(trace *model.Trace) {
for _, rule := range f.rules {
key, str, re := rule.Name, rule.Repl, rule.Re
for _, s := range *trace {
switch key {
Expand All @@ -42,6 +40,4 @@ func (f tagReplacer) Keep(root *model.Span, trace *model.Trace) bool {
}
}
}
// always return true as the goal of this filter is only to mutate data
return true
}
7 changes: 3 additions & 4 deletions filters/tag_test.go → filters/replacer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/stretchr/testify/assert"
)

func TestTagReplacer(t *testing.T) {
func TestReplacer(t *testing.T) {
assert := assert.New(t)
for _, tt := range []struct {
rules [][3]string
Expand Down Expand Up @@ -54,12 +54,11 @@ func TestTagReplacer(t *testing.T) {
},
} {
rules := parseRulesFromString(tt.rules)
tr := &tagReplacer{replace: rules}
tr := NewReplacer(rules)
root := replaceFilterTestSpan(tt.got)
childSpan := replaceFilterTestSpan(tt.got)
trace := model.Trace{root, childSpan}
ok := tr.Keep(root, &trace)
assert.True(ok)
tr.Replace(&trace)
for k, v := range tt.want {
switch k {
case "resource.name":
Expand Down
49 changes: 0 additions & 49 deletions filters/resource.go

This file was deleted.

Loading