Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce remote configuration error logging #3011

Merged
merged 4 commits into from
Jul 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lib/datadog/core/remote/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module Core
module Remote
# Client communicates with the agent and sync remote configuration
class Client
class TransportError < StandardError; end
class SyncError < StandardError; end

attr_reader :transport, :repository, :id, :dispatcher
Expand Down Expand Up @@ -107,6 +108,8 @@ def sync
else
dispatcher.dispatch(changes, repository)
end
elsif response.internal_error?
raise TransportError, response.to_s
end
end
# rubocop:enable Metrics/AbcSize,Metrics/PerceivedComplexity,Metrics/MethodLength,Metrics/CyclomaticComplexity
Expand Down
5 changes: 5 additions & 0 deletions lib/datadog/core/remote/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ def initialize(settings, capabilities, agent_settings)
"remote worker client sync error: #{e.message} location: #{Array(e.backtrace).first}. skipping sync"
end
rescue StandardError => e
# In case of unexpected errors, reset the negotiation object
# given external conditions have changed and the negotiation
# negotiation object stores error logging state that should be reset.
negotiation = Negotiation.new(settings, agent_settings)

Datadog.logger.error do
"remote worker error: #{e.class.name} #{e.message} location: #{Array(e.backtrace).first}. "\
'reseting client state'
Expand Down
21 changes: 17 additions & 4 deletions lib/datadog/core/remote/negotiation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,44 @@ def initialize(_settings, agent_settings)
transport_options[:agent_settings] = agent_settings if agent_settings

@transport_root = Datadog::Core::Transport::HTTP.root(**transport_options.dup)
@logged = {}
end

def endpoint?(path)
res = @transport_root.send_info

if res.internal_error? && network_error?(res.error)
Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" }
unless @logged[:agent_unreachable]
Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" }
@logged[:agent_unreachable] = true
end

return false
end

if res.not_found?
Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" }
unless @logged[:no_info_endpoint]
Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" }
@logged[:no_info_endpoint] = true
end

return false
end

unless res.ok?
Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" }
unless @logged[:unexpected_response]
Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" }
@logged[:unexpected_response] = true
end

return false
end

unless res.endpoints.include?(path)
Datadog.logger.error { "agent reachable but does not report #{path}" }
unless @logged[:no_config_endpoint]
Datadog.logger.error { "agent reachable but does not report #{path}" }
@logged[:no_config_endpoint] = true
end

return false
end
Expand Down
2 changes: 1 addition & 1 deletion lib/datadog/core/transport/http/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def send_request(request, &block)
"Internal error during #{self.class.name} request. Cause: #{e.class.name} #{e.message} " \
"Location: #{Array(e.backtrace).first}"

Datadog.logger.error(message)
Datadog.logger.debug(message)

Datadog::Transport::InternalErrorResponse.new(e)
end
Expand Down
3 changes: 3 additions & 0 deletions sig/datadog/core/remote/client.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ module Datadog
module Core
module Remote
class Client
class TransportError < StandardError
end

class SyncError < StandardError
end

Expand Down
1 change: 1 addition & 0 deletions sig/datadog/core/remote/negotiation.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module Datadog
module Remote
class Negotiation
@transport_root: Datadog::Core::Transport::Negotiation::Transport
@logged: ::Hash[::Symbol, bool]

def initialize: (Datadog::Core::Configuration::Settings _settings, Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings agent_settings) -> void

Expand Down
10 changes: 9 additions & 1 deletion spec/datadog/core/remote/client_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
allow(http_request).to receive(:body=)
allow(request_class).to receive(:new).and_return(http_request)

http_connection = instance_double(::Net::HTTP)
allow(::Net::HTTP).to receive(:new).and_return(http_connection)

allow(http_connection).to receive(:open_timeout=)
Expand All @@ -37,6 +36,7 @@
end
end

let(:http_connection) { instance_double(::Net::HTTP) }
let(:transport) { Datadog::Core::Transport::HTTP.v7(&proc { |_client| }) }
let(:roots) do
[
Expand Down Expand Up @@ -458,6 +458,14 @@
end
end
end

context 'with a network error' do
it 'raises a transport error' do
expect(http_connection).to receive(:request).and_raise(IOError)

expect { client.sync }.to raise_error(Datadog::Core::Remote::Client::TransportError)
end
end
end

describe '#payload' do
Expand Down
10 changes: 9 additions & 1 deletion spec/datadog/core/remote/component_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
before do
expect(Datadog::Core::Transport::HTTP).to receive(:v7).and_return(transport_v7)
expect(Datadog::Core::Remote::Client).to receive(:new).and_return(client)
expect(Datadog::Core::Remote::Negotiation).to receive(:new).and_return(negotiation)
allow(Datadog::Core::Remote::Negotiation).to receive(:new).and_return(negotiation)

expect(worker).to receive(:start).and_call_original
expect(worker).to receive(:stop).and_call_original
Expand Down Expand Up @@ -118,6 +118,14 @@

expect(component.client.object_id).to eql(second_client.object_id)
end

it 'resets the negotiation object' do
allow(Datadog::Core::Remote::Client).to receive(:new).and_return(second_client)

component.barrier(:once)

expect(Datadog::Core::Remote::Negotiation).to have_received(:new).twice
end
end

context 'Client::SyncError' do
Expand Down
54 changes: 45 additions & 9 deletions spec/datadog/core/remote/negotiation_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
describe '#endpoint?' do
include_context 'HTTP connection stub'

subject(:negotiation) { described_class.new(settings, agent_settings) }
subject(:endpoint?) { negotiation.endpoint?('/foo') }
let(:negotiation) { described_class.new(settings, agent_settings) }

context 'when /info exists' do
let(:response_code) { 200 }
Expand All @@ -50,7 +51,7 @@
it do
expect(Datadog.logger).to_not receive(:error)

expect(negotiation.endpoint?('/foo')).to be true
expect(endpoint?).to be true
end

it do
Expand All @@ -68,7 +69,14 @@
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end

context 'when agent rejects request' do
Expand All @@ -79,7 +87,14 @@
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end

context 'when agent is in error' do
Expand All @@ -90,7 +105,14 @@
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end

context 'when agent causes an error' do
Expand All @@ -100,20 +122,34 @@
end

before do
expect(Datadog.logger).to receive(:error).twice.and_return(nil)
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end

context 'when agent is unreachable' do
let(:request_exception) { Errno::ECONNREFUSED.new }

before do
expect(Datadog.logger).to receive(:error).twice.and_return(nil)
expect(Datadog.logger).to receive(:error).and_return(nil)
end

it { expect(negotiation.endpoint?('/foo')).to be false }
it { expect(endpoint?).to be false }

context 'on repeated errors' do
it 'only logs once' do
negotiation.endpoint?('/foo')
negotiation.endpoint?('/foo')
end
end
end
end
end
13 changes: 12 additions & 1 deletion spec/datadog/core/transport/http_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
allow(http_request).to receive(:body=)
allow(request_class).to receive(:new).and_return(http_request)

http_connection = instance_double(::Net::HTTP)
allow(::Net::HTTP).to receive(:new).and_return(http_connection)

allow(http_connection).to receive(:open_timeout=)
Expand All @@ -32,6 +31,8 @@
end
end

let(:http_connection) { instance_double(::Net::HTTP) }

describe '.root' do
subject(:transport) { described_class.root(&client_options) }

Expand Down Expand Up @@ -199,6 +200,16 @@
it { is_expected.to have_attributes(:roots => be_a(Array)) }
it { is_expected.to have_attributes(:targets => be_a(Hash)) }
it { is_expected.to have_attributes(:target_files => be_a(Array)) }

context 'with a network error' do
it 'raises a transport error' do
expect(http_connection).to receive(:request).and_raise(IOError)

expect(Datadog.logger).to receive(:debug).with(/IOError/)

expect(response).to have_attributes(internal_error?: true)
end
end
end
end
end