Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix priority sampling dropping traces #686

Merged
merged 6 commits into from
Feb 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 93 additions & 27 deletions lib/ddtrace/sampler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,30 @@
module Datadog
# \Sampler performs client-side trace sampling.
class Sampler
def sample(_span)
raise NotImplementedError, 'samplers have to implement the sample() method'
def sample?(_span)
raise NotImplementedError, 'Samplers must implement the #sample? method'
end

def sample!(_span)
raise NotImplementedError, 'Samplers must implement the #sample! method'
end
end

# \AllSampler samples all the traces.
class AllSampler < Sampler
def sample(span)
def sample?(span)
true
end

def sample!(span)
span.sampled = true
end
end

# \RateSampler is based on a sample rate.
class RateSampler < Sampler
KNUTH_FACTOR = 1111111111111111111
SAMPLE_RATE_METRIC_KEY = '_sample_rate'.freeze()
SAMPLE_RATE_METRIC_KEY = '_sample_rate'.freeze

attr_reader :sample_rate

Expand All @@ -45,9 +53,14 @@ def sample_rate=(sample_rate)
@sampling_id_threshold = sample_rate * Span::MAX_ID
end

def sample(span)
span.set_metric(SAMPLE_RATE_METRIC_KEY, @sample_rate)
span.sampled = ((span.trace_id * KNUTH_FACTOR) % Datadog::Span::MAX_ID) <= @sampling_id_threshold
def sample?(span)
((span.trace_id * KNUTH_FACTOR) % Datadog::Span::MAX_ID) <= @sampling_id_threshold
end

def sample!(span)
(span.sampled = sample?(span)).tap do |sampled|
span.set_metric(SAMPLE_RATE_METRIC_KEY, @sample_rate) if sampled
end
end
end

Expand All @@ -62,11 +75,27 @@ def initialize(rate = 1.0, opts = {})
@sampler = { DEFAULT_KEY => @fallback }
end

def sample(span)
def sample?(span)
key = key_for(span)

@mutex.synchronize do
@sampler.fetch(key, @fallback).sample(span)
@sampler.fetch(key, @fallback).sample?(span)
end
end

def sample!(span)
key = key_for(span)

@mutex.synchronize do
@sampler.fetch(key, @fallback).sample!(span)
end
end

def sample_rate(span)
key = key_for(span)

@mutex.synchronize do
@sampler.fetch(key, @fallback).sample_rate
end
end

Expand All @@ -92,38 +121,75 @@ def key_for(span)
class PrioritySampler
extend Forwardable

SAMPLE_RATE_METRIC_KEY = '_sample_rate'.freeze

def initialize(opts = {})
@base_sampler = opts[:base_sampler] || RateSampler.new
@post_sampler = opts[:post_sampler] || RateByServiceSampler.new
@pre_sampler = opts[:base_sampler] || AllSampler.new
@priority_sampler = opts[:post_sampler] || RateByServiceSampler.new
end

def sample(span)
return perform_sampling(span) unless span.context
return sampled_by_upstream(span) if span.context.sampling_priority
def sample?(span)
@pre_sampler.sample?(span)
end

perform_sampling(span).tap do |sampled|
span.context.sampling_priority = if sampled
Datadog::Ext::Priority::AUTO_KEEP
else
Datadog::Ext::Priority::AUTO_REJECT
end
def sample!(span)
# If pre-sampling is configured, do it first. (By default, this will sample at 100%.)
# NOTE: Pre-sampling at rates < 100% may result in partial traces; not recommended.
span.sampled = pre_sample?(span) ? @pre_sampler.sample!(span) : true
brettlangdon marked this conversation as resolved.
Show resolved Hide resolved

if span.sampled
# If priority sampling has already been applied upstream, use that, otherwise...
unless priority_assigned_upstream?(span)
# Roll the dice and determine whether how we set the priority.
# NOTE: We'll want to leave `span.sampled = true` here; all spans for priority sampling must
# be sent to the agent. Otherwise metrics for traces will not be accurate, since the
# agent will have an incomplete dataset.
priority = priority_sample(span) ? Datadog::Ext::Priority::AUTO_KEEP : Datadog::Ext::Priority::AUTO_REJECT
assign_priority!(span, priority)
end
else
# If discarded by pre-sampling, set "reject" priority, so other
# services for the same trace don't sample needlessly.
assign_priority!(span, Datadog::Ext::Priority::AUTO_REJECT)
end

span.sampled
end

def_delegators :@post_sampler, :update
def_delegators :@priority_sampler, :update

private

def sampled_by_upstream(span)
span.sampled = priority_keep?(span.context.sampling_priority)
def pre_sample?(span)
case @pre_sampler
when RateSampler
@pre_sampler.sample_rate < 1.0
when RateByServiceSampler
@pre_sampler.sample_rate(span) < 1.0
else
true
end
end

def priority_assigned_upstream?(span)
span.context && !span.context.sampling_priority.nil?
end

def priority_keep?(sampling_priority)
sampling_priority == Datadog::Ext::Priority::USER_KEEP || sampling_priority == Datadog::Ext::Priority::AUTO_KEEP
def priority_sample(span)
@priority_sampler.sample?(span)
end

def perform_sampling(span)
@base_sampler.sample(span) && @post_sampler.sample(span)
def assign_priority!(span, priority)
if span.context
span.context.sampling_priority = priority
else
# Set the priority directly on the span instead, since otherwise
# it won't receive the appropriate tag.
span.set_metric(
Ext::DistributedTracing::SAMPLING_PRIORITY_KEY,
priority
)
end
end
end
end
2 changes: 1 addition & 1 deletion lib/ddtrace/tracer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def start_span(name, options = {})
span = Span.new(self, name, opts)
if parent.nil?
# root span
@sampler.sample(span)
@sampler.sample!(span)
span.set_tag('system.pid', Process.pid)
if ctx && ctx.trace_id && ctx.span_id
span.trace_id = ctx.trace_id
Expand Down
Loading