Skip to content

Commit

Permalink
rescue all exceptions for metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
orioldsm committed Sep 6, 2024
1 parent 0ef322c commit 0a1f02f
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 12 deletions.
12 changes: 9 additions & 3 deletions lib/sidekiq/instrument/middleware/server.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,21 @@ def call(worker, job, _queue, &block)
execution_time_ms = (Time.now - start_time) * 1000
Statter.statsd.measure(metric_name(worker, 'runtime'), execution_time_ms)
Statter.dogstatsd&.timing('sidekiq.runtime', execution_time_ms, worker_dog_options(worker))
rescue StandardError => e
rescue Exception => e
dd_options = worker_dog_options(worker)
dd_options[:tags] << "error:#{e.class.name}"

# if we have retries left, increment the enqueue.retry counter to indicate the job is going back on the queue
if max_retries(worker) > current_retries(job) + 1
WorkerMetrics.trace_workers_increment_counter(worker.class.to_s.underscore)
Statter.dogstatsd&.increment('sidekiq.enqueue.retry', worker_dog_options(worker))
Statter.dogstatsd&.increment('sidekiq.enqueue.retry', dd_options)
end

# categorize rate limit lock errors differently from actual errors
error_string = e.class.name.eql?("RedisRateLimit::Throttle::LockFailedError") ? 'sidekiq.error.redis_rate_lock' : 'sidekiq.error'
Statter.dogstatsd&.increment(error_string, dd_options)
Statter.statsd.increment(metric_name(worker, 'error'))
Statter.dogstatsd&.increment('sidekiq.error', worker_dog_options(worker))

raise e
ensure
WorkerMetrics.trace_workers_decrement_counter(worker.class.to_s.underscore)
Expand Down
2 changes: 1 addition & 1 deletion lib/sidekiq/instrument/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module Sidekiq
module Instrument
VERSION = '0.7.1'
VERSION = '0.7.2'
end
end
49 changes: 41 additions & 8 deletions spec/sidekiq-instrument/server_middleware_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
RSpec.describe Sidekiq::Instrument::ServerMiddleware do
describe '#call' do
let(:expected_dog_options) { { tags: ['queue:default', 'worker:my_worker'] } }
let(:expected_error_dog_options) { { tags: ['queue:default', 'worker:my_worker', 'error:RuntimeError'] } }

let(:worker_metric_name) do
'sidekiq_instrument_trace_workers::in_queue'
end
Expand Down Expand Up @@ -83,7 +85,7 @@
context 'when a retried job succeeds' do
before do
Sidekiq[:max_retries] = 1
allow_any_instance_of(MyWorker).to receive(:perform).and_raise('foo')
allow_any_instance_of(MyWorker).to receive(:perform).and_raise(RuntimeError.new('foo'))

# This makes the job look like a retry since we can't access the job argument
allow_any_instance_of(Sidekiq::Instrument::ServerMiddleware).to receive(:current_retries).and_return(0)
Expand Down Expand Up @@ -111,7 +113,7 @@
context 'when a job fails' do
before do
Sidekiq[:max_retries] = 0
allow_any_instance_of(MyWorker).to receive(:perform).and_raise('foo')
allow_any_instance_of(MyWorker).to receive(:perform).and_raise(RuntimeError.new('foo'))
end

it 'increments the StatsD error counter' do
Expand All @@ -129,7 +131,7 @@
expect(Sidekiq::Instrument::Statter.dogstatsd).not_to receive(:time)
expect(
Sidekiq::Instrument::Statter.dogstatsd
).to receive(:increment).with('sidekiq.error', expected_dog_options).once
).to receive(:increment).with('sidekiq.error', expected_error_dog_options).once

begin
MyWorker.perform_async
Expand All @@ -151,11 +153,11 @@
).to receive(:increment).with('sidekiq.dequeue', expected_dog_options).once
expect(
Sidekiq::Instrument::Statter.dogstatsd
).not_to receive(:increment).with('sidekiq.enqueue.retry', expected_dog_options)
).not_to receive(:increment).with('sidekiq.enqueue.retry', expected_error_dog_options)
expect(Sidekiq::Instrument::Statter.dogstatsd).not_to receive(:time)
expect(
Sidekiq::Instrument::Statter.dogstatsd
).to receive(:increment).with('sidekiq.error', expected_dog_options).once
).to receive(:increment).with('sidekiq.error', expected_error_dog_options).once

begin
MyWorker.perform_async
Expand Down Expand Up @@ -185,11 +187,11 @@
).to receive(:increment).with('sidekiq.dequeue', expected_dog_options).once
expect(
Sidekiq::Instrument::Statter.dogstatsd
).to receive(:increment).with('sidekiq.enqueue.retry', expected_dog_options).once
).to receive(:increment).with('sidekiq.enqueue.retry', expected_error_dog_options).once
expect(Sidekiq::Instrument::Statter.dogstatsd).not_to receive(:time)
expect(
Sidekiq::Instrument::Statter.dogstatsd
).to receive(:increment).with('sidekiq.error', expected_dog_options).once
).to receive(:increment).with('sidekiq.error', expected_error_dog_options).once

begin
MyWorker.perform_async
Expand Down Expand Up @@ -223,7 +225,38 @@
expect(Sidekiq::Instrument::Statter.dogstatsd).not_to receive(:time)
expect(
Sidekiq::Instrument::Statter.dogstatsd
).to receive(:increment).with('sidekiq.error', expected_dog_options).once
).to receive(:increment).with('sidekiq.error', expected_error_dog_options).once

begin
MyWorker.perform_async
rescue StandardError
nil
end
end
end

context 'when the error is RedisRateLimit::Throttle::LockFailedError' do
let(:expected_lock_error_dog_options) { { tags: ['queue:default', 'worker:my_worker', 'error:RedisRateLimit::Throttle::LockFailedError'] } }
let(:lock_error) { RuntimeError.new('foo') }

before do
# force the error's class name to be the RedisRateLimit error type string
# it's an external error class and we can't actually create an instance of the error
allow(lock_error).to receive_message_chain(:class, :name).and_return("RedisRateLimit::Throttle::LockFailedError")
allow_any_instance_of(MyWorker).to receive(:perform).and_raise(lock_error)
end

it 'increments the DogStatsD sidekiq.error.redis_rate_lock counter' do
expect(
Sidekiq::Instrument::Statter.dogstatsd
).to receive(:increment).with('sidekiq.dequeue', expected_dog_options).once
expect(
Sidekiq::Instrument::Statter.dogstatsd
).not_to receive(:increment).with('sidekiq.enqueue.retry', expected_lock_error_dog_options)
expect(Sidekiq::Instrument::Statter.dogstatsd).not_to receive(:time)
expect(
Sidekiq::Instrument::Statter.dogstatsd
).to receive(:increment).with('sidekiq.error.redis_rate_lock', expected_lock_error_dog_options).once

begin
MyWorker.perform_async
Expand Down

0 comments on commit 0a1f02f

Please sign in to comment.