Skip to content

Commit

Permalink
Merge pull request #2968 from DataDog/sampling-rules-config
Browse files Browse the repository at this point in the history
  • Loading branch information
marcotc authored Jul 14, 2023
2 parents 838d12a + 824c22d commit 8d3ef4f
Show file tree
Hide file tree
Showing 11 changed files with 255 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/GettingStarted.md
Original file line number Diff line number Diff line change
Expand Up @@ -2240,6 +2240,7 @@ For example, if `tracing.sampling.default_rate` is configured by [Remote Configu
| `tracing.sampler` | | `nil` | Advanced usage only. Sets a custom `Datadog::Tracing::Sampling::Sampler` instance. If provided, the tracer will use this sampler to determine sampling behavior. See [Application-side sampling](#application-side-sampling) for details. |
| `tracing.sampling.default_rate` | `DD_TRACE_SAMPLE_RATE` | `nil` | Sets the trace sampling rate between `0.0` (0%) and `1.0` (100%). See [Application-side sampling](#application-side-sampling) for details. |
| `tracing.sampling.rate_limit` | `DD_TRACE_RATE_LIMIT` | `100` (per second) | Sets a maximum number of traces per second to sample. Set a rate limit to avoid the ingestion volume overages in the case of traffic spikes. |
| `tracing.sampling.rules` | `DD_TRACE_SAMPLING_RULES` | `nil` | Sets trace-level sampling rules, matching against the local root span. The format is a `String` with JSON, containing an Array of Objects. Each Object must have a float attribute `sample_rate` (between 0.0 and 1.0, inclusive), and optionally `name` and `service` string attributes. `name` and `service` control to which traces this sampling rule applies; if both are absent, then this rule applies to all traces. Rules are evaluted in order of declartion in the array; only the first to match is applied. If none apply, then `tracing.sampling.default_rate` is applied. |
| `tracing.sampling.span_rules` | `DD_SPAN_SAMPLING_RULES`,`ENV_SPAN_SAMPLING_RULES_FILE` | `nil` | Sets [Single Span Sampling](#single-span-sampling) rules. These rules allow you to keep spans even when their respective traces are dropped. |
| `tracing.trace_id_128_bit_generation_enabled` | `DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED` | `false` | `true` to generate 128 bits trace ID and `false` to generate 64 bits trace ID |
| `tracing.report_hostname` | `DD_TRACE_REPORT_HOSTNAME` | `false` | Adds hostname tag to traces. |
Expand Down
10 changes: 10 additions & 0 deletions lib/datadog/core/backport.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ def self.dup(value)
value.dup
end
end

if ::Hash.method_defined?(:compact!)
def self.hash_compact!(hash)
hash.compact!
end
else
def self.hash_compact!(hash)
hash.reject! { |_key, value| value.nil? }
end
end
end
end
end
17 changes: 17 additions & 0 deletions lib/datadog/tracing/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,23 @@ def build_sampler(settings)
else
ensure_priority_sampling(sampler, settings)
end
elsif (rules = settings.tracing.sampling.rules)
post_sampler = Tracing::Sampling::RuleSampler.parse(
rules,
settings.tracing.sampling.rate_limit,
settings.tracing.sampling.default_rate
)

post_sampler ||= # Fallback RuleSampler in case `rules` parsing fails
Tracing::Sampling::RuleSampler.new(
rate_limit: settings.tracing.sampling.rate_limit,
default_sample_rate: settings.tracing.sampling.default_rate
)

Tracing::Sampling::PrioritySampler.new(
base_sampler: Tracing::Sampling::AllSampler.new,
post_sampler: post_sampler
)
elsif settings.tracing.priority_sampling == false
Tracing::Sampling::RuleSampler.new(
rate_limit: settings.tracing.sampling.rate_limit,
Expand Down
1 change: 1 addition & 0 deletions lib/datadog/tracing/configuration/ext.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ module NET
module Sampling
ENV_SAMPLE_RATE = 'DD_TRACE_SAMPLE_RATE'
ENV_RATE_LIMIT = 'DD_TRACE_RATE_LIMIT'
ENV_RULES = 'DD_TRACE_SAMPLING_RULES'

# @public_api
module Span
Expand Down
16 changes: 16 additions & 0 deletions lib/datadog/tracing/configuration/settings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,22 @@ def self.extended(base)
o.default { env_to_float(Tracing::Configuration::Ext::Sampling::ENV_RATE_LIMIT, 100) }
end

# Trace sampling rules.
# These rules control whether a trace is kept or dropped by the tracer.
#
# The `rules` format is a String with a JSON array of objects:
# Each object must have a `sample_rate`, and the `name` and `service` fields
# are optional. The `sample_rate` value must be between 0.0 and 1.0 (inclusive).
# `name` and `service` are Strings that allow the `sample_rate` to be applied only
# to traces matching the `name` and `service`.
#
# @default `DD_TRACE_SAMPLING_RULES` environment variable. Otherwise `nil`.
# @return [String,nil]
# @public_api
option :rules do |o|
o.default { ENV.fetch(Configuration::Ext::Sampling::ENV_RULES, nil) }
end

# Single span sampling rules.
# These rules allow a span to be kept when its encompassing trace is dropped.
#
Expand Down
29 changes: 29 additions & 0 deletions lib/datadog/tracing/sampling/rule_sampler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,35 @@ def initialize(
end
end

def self.parse(rules, rate_limit, default_sample_rate)
parsed_rules = JSON.parse(rules).map do |rule|
sample_rate = rule['sample_rate']

begin
sample_rate = Float(sample_rate)
rescue
raise "Rule '#{rule.inspect}' does not contain a float property `sample_rate`"
end

kwargs = {
name: rule['name'],
service: rule['service'],
sample_rate: sample_rate,
}

Core::BackportFrom24.hash_compact!(kwargs)

SimpleRule.new(**kwargs)
end

new(parsed_rules, rate_limit: rate_limit, default_sample_rate: default_sample_rate)
rescue => e
Datadog.logger.error do
"Could not parse trace sampling rules '#{rules}': #{e.class.name} #{e.message} at #{Array(e.backtrace).first}"
end
nil
end

# /RuleSampler's components (it's rate limiter, for example) are
# not be guaranteed to be size-effect free.
# It is not possible to guarantee that a call to {#sample?} will
Expand Down
1 change: 1 addition & 0 deletions sig/datadog/tracing/configuration/ext.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ module Datadog
ENV_REPORT_HOSTNAME: "DD_TRACE_REPORT_HOSTNAME"
end
module Sampling
ENV_RULES: String
ENV_SAMPLE_RATE: "DD_TRACE_SAMPLE_RATE"

ENV_RATE_LIMIT: "DD_TRACE_RATE_LIMIT"
Expand Down
20 changes: 20 additions & 0 deletions spec/datadog/core/configuration/components_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,26 @@
end
end

context 'with sampling.rules' do
before { allow(settings.tracing.sampling).to receive(:rules).and_return(rules) }

context 'with rules' do
let(:rules) { '[{"sample_rate":"0.123"}]' }

it_behaves_like 'new tracer' do
let(:sampler) do
lambda do |sampler|
expect(sampler).to be_a(Datadog::Tracing::Sampling::PrioritySampler)
expect(sampler.pre_sampler).to be_a(Datadog::Tracing::Sampling::AllSampler)

expect(sampler.priority_sampler.rules).to have(1).item
expect(sampler.priority_sampler.rules[0].sampler.sample_rate).to eq(0.123)
end
end
end
end
end

context 'with sampling.span_rules' do
before { allow(settings.tracing.sampling).to receive(:span_rules).and_return(rules) }

Expand Down
18 changes: 18 additions & 0 deletions spec/datadog/tracing/configuration/settings_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,24 @@ def propagation_inject_style
end
end

describe '#rules' do
subject(:rules) { settings.tracing.sampling.rules }

context 'default' do
it { is_expected.to be_nil }
end

context 'when ENV is provided' do
around do |example|
ClimateControl.modify('DD_TRACE_SAMPLING_RULES' => '[{"sample_rate":0.2}]') do
example.run
end
end

it { is_expected.to eq('[{"sample_rate":0.2}]') }
end
end

describe '#span_rules' do
subject(:rules) { settings.tracing.sampling.span_rules }

Expand Down
48 changes: 48 additions & 0 deletions spec/datadog/tracing/integration_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,17 @@ def agent_receives_span_step3(previous_success)
it_behaves_like 'sampling decision', nil
end

shared_context 'DD_TRACE_SAMPLING_RULES configuration' do
let(:sampler) { nil }
let(:rules_json) { [rule].to_json }

around do |example|
ClimateControl.modify('DD_TRACE_SAMPLING_RULES' => rules_json) do
example.run
end
end
end

context 'with rule' do
let(:rule_sampler) { Datadog::Tracing::Sampling::RuleSampler.new([rule], **rule_sampler_opt) }
let(:rule_sampler_opt) { {} }
Expand All @@ -298,6 +309,18 @@ def agent_receives_span_step3(previous_success)
it_behaves_like 'rate limit metric', 1.0
it_behaves_like 'sampling decision', '-3'

context 'set through DD_TRACE_SAMPLING_RULES environment variable' do
include_context 'DD_TRACE_SAMPLING_RULES configuration' do
let(:rule) { { name: 'my.op', sample_rate: 1.0 } }
end

it_behaves_like 'flushed trace'
it_behaves_like 'priority sampled', Datadog::Tracing::Sampling::Ext::Priority::USER_KEEP
it_behaves_like 'rule sampling rate metric', 1.0
it_behaves_like 'rate limit metric', 1.0
it_behaves_like 'sampling decision', '-3'
end

context 'with low sample rate' do
let(:rule) { Datadog::Tracing::Sampling::SimpleRule.new(sample_rate: Float::MIN) }

Expand All @@ -306,6 +329,18 @@ def agent_receives_span_step3(previous_success)
it_behaves_like 'rule sampling rate metric', Float::MIN
it_behaves_like 'rate limit metric', nil # Rate limiter is never reached, thus has no value to provide
it_behaves_like 'sampling decision', nil

context 'set through DD_TRACE_SAMPLING_RULES environment variable' do
include_context 'DD_TRACE_SAMPLING_RULES configuration' do
let(:rule) { { sample_rate: Float::MIN } }
end

it_behaves_like 'flushed trace'
it_behaves_like 'priority sampled', Datadog::Tracing::Sampling::Ext::Priority::USER_REJECT
it_behaves_like 'rule sampling rate metric', Float::MIN
it_behaves_like 'rate limit metric', nil # Rate limiter is never reached, thus has no value to provide
it_behaves_like 'sampling decision', nil
end
end

context 'rate limited' do
Expand All @@ -328,6 +363,19 @@ def agent_receives_span_step3(previous_success)
it_behaves_like 'rule sampling rate metric', nil
it_behaves_like 'rate limit metric', nil
it_behaves_like 'sampling decision', '-0'

context 'set through DD_TRACE_SAMPLING_RULES environment variable' do
include_context 'DD_TRACE_SAMPLING_RULES configuration' do
let(:rule) { { name: 'not.my.op' } }

it_behaves_like 'flushed trace'
# The PrioritySampler was responsible for the sampling decision, not the Rule Sampler.
it_behaves_like 'priority sampled', Datadog::Tracing::Sampling::Ext::Priority::AUTO_KEEP
it_behaves_like 'rule sampling rate metric', nil
it_behaves_like 'rate limit metric', nil
it_behaves_like 'sampling decision', '-0'
end
end
end
end
end
Expand Down
94 changes: 94 additions & 0 deletions spec/datadog/tracing/sampling/rule_sampler_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,100 @@
end
end

describe '.parse' do
subject(:parse) { described_class.parse(rules.to_json, rate_limit, default_sample_rate) }
let(:rules) { [rule] }
let(:rate_limit) { nil }
let(:default_sample_rate) { nil }

let(:actual_rule) do
subject
expect(parse.rules).to have(1).item
parse.rules[0]
end

let(:actual_rules) do
subject
parse.rules
end

context 'with sample_rate' do
let(:rule) { { sample_rate: 0.1 } }

it 'parses as a match any' do
expect(actual_rule.matcher.name).to eq(Datadog::Tracing::Sampling::SimpleMatcher::MATCH_ALL)
expect(actual_rule.matcher.service).to eq(Datadog::Tracing::Sampling::SimpleMatcher::MATCH_ALL)
expect(actual_rule.sampler.sample_rate).to eq(0.1)
end

context 'and name' do
let(:rule) { { sample_rate: 0.1, name: 'test-name' } }

it 'parses matching any service' do
expect(actual_rule.matcher.name).to eq('test-name')
expect(actual_rule.matcher.service).to eq(Datadog::Tracing::Sampling::SimpleMatcher::MATCH_ALL)
expect(actual_rule.sampler.sample_rate).to eq(0.1)
end
end

context 'and service' do
let(:rule) { { sample_rate: 0.1, service: 'test-service' } }

it 'parses matching any name' do
expect(actual_rule.matcher.name).to eq(Datadog::Tracing::Sampling::SimpleMatcher::MATCH_ALL)
expect(actual_rule.matcher.service).to eq('test-service')
expect(actual_rule.sampler.sample_rate).to eq(0.1)
end
end

context 'with multiple rules' do
let(:rules) { [{ sample_rate: 0.1 }, { sample_rate: 0.2 }] }

it 'parses all rules in order' do
expect(actual_rules).to have(2).item
expect(actual_rules[0].sampler.sample_rate).to eq(0.1)
expect(actual_rules[1].sampler.sample_rate).to eq(0.2)
end
end
end

context 'with a non-float sample_rate' do
let(:rule) { { sample_rate: 'oops' } }

it 'does not accept rule with a non-float sample_rate' do
expect(Datadog.logger).to receive(:error)
is_expected.to be_nil
end
end

context 'without a sample_rate' do
let(:rule) { { name: 'test' } }

it 'does not accept rule missing the mandatory sample_rate' do
expect(Datadog.logger).to receive(:error)
is_expected.to be_nil
end

context 'with multiple rules' do
let(:rules) { [{ sample_rate: 0.1 }, { name: 'test' }] }

it 'rejects all rules if one is missing the mandatory sample_rate' do
expect(Datadog.logger).to receive(:error)
is_expected.to be_nil
end
end
end

context 'without a valid JSON array' do
let(:rules) { 'not a json array' }

it 'returns nil in case of parsing error' do
expect(Datadog.logger).to receive(:error)
is_expected.to be_nil
end
end
end

shared_context 'matching rule' do
let(:rules) { [rule] }
let(:rule) { instance_double(Datadog::Tracing::Sampling::Rule) }
Expand Down

0 comments on commit 8d3ef4f

Please sign in to comment.