From 020bd8271129a55d2b3325153781614007586727 Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Fri, 28 Jul 2023 21:08:02 +0200 Subject: [PATCH 1/3] Reduce remote configuration error logging --- lib/datadog/core/remote/client.rb | 3 +++ lib/datadog/core/remote/component.rb | 3 +++ lib/datadog/core/remote/negotiation.rb | 21 +++++++++++++++++---- lib/datadog/core/transport/http/client.rb | 2 +- sig/datadog/core/remote/client.rbs | 3 +++ sig/datadog/core/remote/negotiation.rbs | 1 + 6 files changed, 28 insertions(+), 5 deletions(-) diff --git a/lib/datadog/core/remote/client.rb b/lib/datadog/core/remote/client.rb index 911011515fa..8c5c2767245 100644 --- a/lib/datadog/core/remote/client.rb +++ b/lib/datadog/core/remote/client.rb @@ -10,6 +10,7 @@ module Core module Remote # Client communicates with the agent and sync remote configuration class Client + class TransportError < StandardError; end class SyncError < StandardError; end attr_reader :transport, :repository, :id, :dispatcher @@ -107,6 +108,8 @@ def sync else dispatcher.dispatch(changes, repository) end + elsif response.internal_error? + raise TransportError, response.to_s end end # rubocop:enable Metrics/AbcSize,Metrics/PerceivedComplexity,Metrics/MethodLength,Metrics/CyclomaticComplexity diff --git a/lib/datadog/core/remote/component.rb b/lib/datadog/core/remote/component.rb index 34838c8d75f..b846e469745 100644 --- a/lib/datadog/core/remote/component.rb +++ b/lib/datadog/core/remote/component.rb @@ -45,6 +45,9 @@ def initialize(settings, capabilities, agent_settings) "remote worker client sync error: #{e.message} location: #{Array(e.backtrace).first}. skipping sync" end rescue StandardError => e + # reset negotiation to log errors again + negotiation = Negotiation.new(settings, agent_settings) + Datadog.logger.error do "remote worker error: #{e.class.name} #{e.message} location: #{Array(e.backtrace).first}. "\ 'reseting client state' diff --git a/lib/datadog/core/remote/negotiation.rb b/lib/datadog/core/remote/negotiation.rb index f9307dfe041..823d330f5f6 100644 --- a/lib/datadog/core/remote/negotiation.rb +++ b/lib/datadog/core/remote/negotiation.rb @@ -12,31 +12,44 @@ def initialize(_settings, agent_settings) transport_options[:agent_settings] = agent_settings if agent_settings @transport_root = Datadog::Core::Transport::HTTP.root(**transport_options.dup) + @logged = {} end def endpoint?(path) res = @transport_root.send_info if res.internal_error? && network_error?(res.error) - Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" } + unless @logged[:agent_unreachable] + Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" } + @logged[:agent_unreachable] = true + end return false end if res.not_found? - Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" } + unless @logged[:no_info_endpoint] + Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" } + @logged[:no_info_endpoint] = true + end return false end unless res.ok? - Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" } + unless @logged[:unexpected_response] + Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" } + @logged[:unexpected_response] = true + end return false end unless res.endpoints.include?(path) - Datadog.logger.error { "agent reachable but does not report #{path}" } + unless @logged[:no_config_endpoint] + Datadog.logger.error { "agent reachable but does not report #{path}" } + @logged[:no_config_endpoint] = true + end return false end diff --git a/lib/datadog/core/transport/http/client.rb b/lib/datadog/core/transport/http/client.rb index 09437936060..89d1325daa5 100644 --- a/lib/datadog/core/transport/http/client.rb +++ b/lib/datadog/core/transport/http/client.rb @@ -30,7 +30,7 @@ def send_request(request, &block) "Internal error during #{self.class.name} request. Cause: #{e.class.name} #{e.message} " \ "Location: #{Array(e.backtrace).first}" - Datadog.logger.error(message) + Datadog.logger.debug(message) Datadog::Transport::InternalErrorResponse.new(e) end diff --git a/sig/datadog/core/remote/client.rbs b/sig/datadog/core/remote/client.rbs index 1bcdf76d7bf..0521f3bb25b 100644 --- a/sig/datadog/core/remote/client.rbs +++ b/sig/datadog/core/remote/client.rbs @@ -2,6 +2,9 @@ module Datadog module Core module Remote class Client + class TransportError < StandardError + end + class SyncError < StandardError end diff --git a/sig/datadog/core/remote/negotiation.rbs b/sig/datadog/core/remote/negotiation.rbs index d892890f040..126cfc398de 100644 --- a/sig/datadog/core/remote/negotiation.rbs +++ b/sig/datadog/core/remote/negotiation.rbs @@ -3,6 +3,7 @@ module Datadog module Remote class Negotiation @transport_root: Datadog::Core::Transport::Negotiation::Transport + @logged: ::Hash[::Symbol, bool] def initialize: (Datadog::Core::Configuration::Settings _settings, Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings agent_settings) -> void From e58d2fa68dddc95dca2dce50ae2156caf27e5592 Mon Sep 17 00:00:00 2001 From: Marco Costa Date: Fri, 28 Jul 2023 14:26:42 -0700 Subject: [PATCH 2/3] Add testing --- lib/datadog/core/remote/component.rb | 4 +- spec/datadog/core/remote/client_spec.rb | 10 +++- spec/datadog/core/remote/component_spec.rb | 10 +++- spec/datadog/core/remote/negotiation_spec.rb | 54 ++++++++++++++++---- spec/datadog/core/transport/http_spec.rb | 13 ++++- 5 files changed, 78 insertions(+), 13 deletions(-) diff --git a/lib/datadog/core/remote/component.rb b/lib/datadog/core/remote/component.rb index b846e469745..aea2ed828ca 100644 --- a/lib/datadog/core/remote/component.rb +++ b/lib/datadog/core/remote/component.rb @@ -45,7 +45,9 @@ def initialize(settings, capabilities, agent_settings) "remote worker client sync error: #{e.message} location: #{Array(e.backtrace).first}. skipping sync" end rescue StandardError => e - # reset negotiation to log errors again + # In case of unexpected errors, reset the negotiation object + # given external conditions have changed and the negotiation + # negotiation object stores error logging state that should be reset. negotiation = Negotiation.new(settings, agent_settings) Datadog.logger.error do diff --git a/spec/datadog/core/remote/client_spec.rb b/spec/datadog/core/remote/client_spec.rb index 830f66d911e..e4ee968652f 100644 --- a/spec/datadog/core/remote/client_spec.rb +++ b/spec/datadog/core/remote/client_spec.rb @@ -12,7 +12,6 @@ allow(http_request).to receive(:body=) allow(request_class).to receive(:new).and_return(http_request) - http_connection = instance_double(::Net::HTTP) allow(::Net::HTTP).to receive(:new).and_return(http_connection) allow(http_connection).to receive(:open_timeout=) @@ -37,6 +36,7 @@ end end + let(:http_connection) { instance_double(::Net::HTTP) } let(:transport) { Datadog::Core::Transport::HTTP.v7(&proc { |_client| }) } let(:roots) do [ @@ -458,6 +458,14 @@ end end end + + context 'with a network error' do + it 'it raises a transport error' do + expect(http_connection).to receive(:request).and_raise(IOError) + + expect { client.sync }.to raise_error(Datadog::Core::Remote::Client::TransportError) + end + end end describe '#payload' do diff --git a/spec/datadog/core/remote/component_spec.rb b/spec/datadog/core/remote/component_spec.rb index d0ff1dbc98f..ce99f0a7f63 100644 --- a/spec/datadog/core/remote/component_spec.rb +++ b/spec/datadog/core/remote/component_spec.rb @@ -61,7 +61,7 @@ before do expect(Datadog::Core::Transport::HTTP).to receive(:v7).and_return(transport_v7) expect(Datadog::Core::Remote::Client).to receive(:new).and_return(client) - expect(Datadog::Core::Remote::Negotiation).to receive(:new).and_return(negotiation) + allow(Datadog::Core::Remote::Negotiation).to receive(:new).and_return(negotiation) expect(worker).to receive(:start).and_call_original expect(worker).to receive(:stop).and_call_original @@ -118,6 +118,14 @@ expect(component.client.object_id).to eql(second_client.object_id) end + + it 'resets the negotiation object' do + allow(Datadog::Core::Remote::Client).to receive(:new).and_return(second_client) + + component.barrier(:once) + + expect(Datadog::Core::Remote::Negotiation).to have_received(:new).twice + end end context 'Client::SyncError' do diff --git a/spec/datadog/core/remote/negotiation_spec.rb b/spec/datadog/core/remote/negotiation_spec.rb index 556bdc2b252..ed1c503e38a 100644 --- a/spec/datadog/core/remote/negotiation_spec.rb +++ b/spec/datadog/core/remote/negotiation_spec.rb @@ -34,7 +34,8 @@ describe '#endpoint?' do include_context 'HTTP connection stub' - subject(:negotiation) { described_class.new(settings, agent_settings) } + subject(:endpoint?) { negotiation.endpoint?('/foo') } + let(:negotiation) { described_class.new(settings, agent_settings) } context 'when /info exists' do let(:response_code) { 200 } @@ -50,7 +51,7 @@ it do expect(Datadog.logger).to_not receive(:error) - expect(negotiation.endpoint?('/foo')).to be true + expect(endpoint?).to be true end it do @@ -68,7 +69,14 @@ expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end context 'when agent rejects request' do @@ -79,7 +87,14 @@ expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end context 'when agent is in error' do @@ -90,7 +105,14 @@ expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end context 'when agent causes an error' do @@ -100,20 +122,34 @@ end before do - expect(Datadog.logger).to receive(:error).twice.and_return(nil) + expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end context 'when agent is unreachable' do let(:request_exception) { Errno::ECONNREFUSED.new } before do - expect(Datadog.logger).to receive(:error).twice.and_return(nil) + expect(Datadog.logger).to receive(:error).and_return(nil) end - it { expect(negotiation.endpoint?('/foo')).to be false } + it { expect(endpoint?).to be false } + + context 'on repeated errors' do + it 'only logs once' do + negotiation.endpoint?('/foo') + negotiation.endpoint?('/foo') + end + end end end end diff --git a/spec/datadog/core/transport/http_spec.rb b/spec/datadog/core/transport/http_spec.rb index e89cbd64037..df57527e592 100644 --- a/spec/datadog/core/transport/http_spec.rb +++ b/spec/datadog/core/transport/http_spec.rb @@ -18,7 +18,6 @@ allow(http_request).to receive(:body=) allow(request_class).to receive(:new).and_return(http_request) - http_connection = instance_double(::Net::HTTP) allow(::Net::HTTP).to receive(:new).and_return(http_connection) allow(http_connection).to receive(:open_timeout=) @@ -31,6 +30,8 @@ allow(http_connection).to receive(:request).with(http_request).and_return(http_response) end end + + let(:http_connection) { instance_double(::Net::HTTP) } describe '.root' do subject(:transport) { described_class.root(&client_options) } @@ -199,6 +200,16 @@ it { is_expected.to have_attributes(:roots => be_a(Array)) } it { is_expected.to have_attributes(:targets => be_a(Hash)) } it { is_expected.to have_attributes(:target_files => be_a(Array)) } + + context 'with a network error' do + it 'it raises a transport error' do + expect(http_connection).to receive(:request).and_raise(IOError) + + expect(Datadog.logger).to receive(:debug).with(/IOError/) + + expect(response).to have_attributes(internal_error?: true) + end + end end end end From 88ce4d12fab83ffaf47f4a38b4b20a9bb9f34fd8 Mon Sep 17 00:00:00 2001 From: Marco Costa Date: Fri, 28 Jul 2023 14:47:12 -0700 Subject: [PATCH 3/3] Lint --- spec/datadog/core/remote/client_spec.rb | 2 +- spec/datadog/core/transport/http_spec.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/datadog/core/remote/client_spec.rb b/spec/datadog/core/remote/client_spec.rb index e4ee968652f..1db669f53a9 100644 --- a/spec/datadog/core/remote/client_spec.rb +++ b/spec/datadog/core/remote/client_spec.rb @@ -460,7 +460,7 @@ end context 'with a network error' do - it 'it raises a transport error' do + it 'raises a transport error' do expect(http_connection).to receive(:request).and_raise(IOError) expect { client.sync }.to raise_error(Datadog::Core::Remote::Client::TransportError) diff --git a/spec/datadog/core/transport/http_spec.rb b/spec/datadog/core/transport/http_spec.rb index df57527e592..f3b645fbf32 100644 --- a/spec/datadog/core/transport/http_spec.rb +++ b/spec/datadog/core/transport/http_spec.rb @@ -30,7 +30,7 @@ allow(http_connection).to receive(:request).with(http_request).and_return(http_response) end end - + let(:http_connection) { instance_double(::Net::HTTP) } describe '.root' do @@ -202,7 +202,7 @@ it { is_expected.to have_attributes(:target_files => be_a(Array)) } context 'with a network error' do - it 'it raises a transport error' do + it 'raises a transport error' do expect(http_connection).to receive(:request).and_raise(IOError) expect(Datadog.logger).to receive(:debug).with(/IOError/)