diff --git a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/acs/session/session.go b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/acs/session/session.go index 6a3f733cf99..e913613a50b 100644 --- a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/acs/session/session.go +++ b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/acs/session/session.go @@ -4,7 +4,7 @@ // not use this file except in compliance with the License. A copy of the // License is located at // -// http://aws.amazon.com/apache2.0/ +// http://aws.amazon.com/apache2.0/ // // or in the "license" file accompanying this file. This file is distributed // on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either @@ -99,6 +99,8 @@ type session struct { disconnectJitter time.Duration inactiveInstanceReconnectDelay time.Duration lastConnectedTime time.Time + firstACSConnectionTime time.Time + firstDiscoverPollEndpointTime time.Time } // NewSession creates a new Session. @@ -158,6 +160,8 @@ func NewSession(containerInstanceARN string, disconnectJitter: wsclient.DisconnectJitterMax, inactiveInstanceReconnectDelay: inactiveInstanceReconnectDelay, lastConnectedTime: time.Time{}, + firstACSConnectionTime: time.Time{}, + firstDiscoverPollEndpointTime: time.Time{}, } } @@ -234,7 +238,12 @@ func (s *session) Start(ctx context.Context) error { // startSessionOnce creates a session with ACS and handles requests using the passed // in arguments. func (s *session) startSessionOnce(ctx context.Context) error { + if s.GetFirstDiscoverPollEndpointTime().IsZero() { + s.firstDiscoverPollEndpointTime = time.Now() + } + acsEndpoint, err := s.ecsClient.DiscoverPollEndpoint(s.containerInstanceARN) + if err != nil { logger.Error("ACS: Unable to discover poll endpoint", logger.Fields{ "containerInstanceARN": s.containerInstanceARN, @@ -253,8 +262,10 @@ func (s *session) startSessionOnce(ctx context.Context) error { // Invoke Connect method as soon as we create client. This will ensure all the // request handlers to be associated with this client have a valid connection. + acsConnectionStartTime := time.Now() disconnectTimer, err := client.Connect(metrics.ACSDisconnectTimeoutMetricName, s.disconnectTimeout, s.disconnectJitter) + s.metricsFactory.New(metrics.ACSSessionCallName).Done(err) if err != nil { logger.Error("Failed to connect to ACS", logger.Fields{ "containerInstanceARN": s.containerInstanceARN, @@ -262,8 +273,13 @@ func (s *session) startSessionOnce(ctx context.Context) error { }) return err } + s.metricsFactory.New(metrics.ACSSessionCallDurationName).WithGauge(time.Since(acsConnectionStartTime)).Done(nil) defer disconnectTimer.Stop() + if s.GetFirstACSConnectionTime().IsZero() { + s.firstACSConnectionTime = time.Now() + } + // Record the timestamp of the last connection to ACS. s.lastConnectedTime = time.Now() @@ -475,3 +491,11 @@ func formatDockerVersion(dockerVersionValue string) string { func (s *session) GetLastConnectedTime() time.Time { return s.lastConnectedTime } + +func (s *session) GetFirstACSConnectionTime() time.Time { + return s.firstACSConnectionTime +} + +func (s *session) GetFirstDiscoverPollEndpointTime() time.Time { + return s.firstDiscoverPollEndpointTime +} diff --git a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/client/ecs_client.go b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/client/ecs_client.go index 6e57ba6b27e..5894c152d5e 100644 --- a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/client/ecs_client.go +++ b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/client/ecs_client.go @@ -39,6 +39,7 @@ import ( "github.com/aws/amazon-ecs-agent/ecs-agent/httpclient" "github.com/aws/amazon-ecs-agent/ecs-agent/logger" "github.com/aws/amazon-ecs-agent/ecs-agent/logger/field" + "github.com/aws/amazon-ecs-agent/ecs-agent/metrics" "github.com/aws/amazon-ecs-agent/ecs-agent/utils" "github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry" ) @@ -77,6 +78,7 @@ type ecsClient struct { shouldExcludeIPv6PortBinding bool sascCustomRetryBackoff func(func() error) error stscAttachmentCustomRetryBackoff func(func() error) error + metricsFactory metrics.EntryFactory } // NewECSClient creates a new ECSClient interface object. @@ -112,6 +114,9 @@ func NewECSClient( if client.submitStateChangeClient == nil { client.submitStateChangeClient = newSubmitStateChangeClient(&ecsConfig) } + if client.metricsFactory == nil { + client.metricsFactory = metrics.NewNopEntryFactory() + } return client, nil } @@ -747,7 +752,7 @@ func (client *ecsClient) discoverPollEndpoint(containerInstanceArn string, } } } - + discoverPollEndpointStartTime := time.Now() // Cache miss or expired, invoke the ECS DiscoverPollEndpoint API. logger.Debug("Invoking DiscoverPollEndpoint", logger.Fields{ field.ContainerInstanceARN: containerInstanceArn, @@ -758,6 +763,7 @@ func (client *ecsClient) discoverPollEndpoint(containerInstanceArn string, Cluster: aws.String(client.configAccessor.Cluster()), ZoneId: aws.String(availabilityZone), }) + client.metricsFactory.New(metrics.DiscoverPollEndpointCallName).Done(err) if err != nil { // If we got an error calling the API, fallback to an expired cached endpoint if // we have it. @@ -776,7 +782,7 @@ func (client *ecsClient) discoverPollEndpoint(containerInstanceArn string, } return nil, err } - + client.metricsFactory.New(metrics.DiscoverPollEndpointDurationName).WithGauge(time.Since(discoverPollEndpointStartTime)).Done(nil) // Cache the response from ECS. client.pollEndpointCache.Set(containerInstanceArn, output) return output, nil diff --git a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/client/ecs_client_option.go b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/client/ecs_client_option.go index ee9ad5c8686..14ac8382b0a 100644 --- a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/client/ecs_client_option.go +++ b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/client/ecs_client_option.go @@ -16,6 +16,7 @@ package ecsclient import ( "github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs" "github.com/aws/amazon-ecs-agent/ecs-agent/async" + "github.com/aws/amazon-ecs-agent/ecs-agent/metrics" ) // ECSClientOption allows for configuration of an ecsClient. @@ -87,3 +88,12 @@ func WithSubmitStateChangeClient(s ecs.ECSSubmitStateSDK) ECSClientOption { client.submitStateChangeClient = s } } + +// WithMetricsFactory is an ECSClientOption that configures +// ecsClient.metricsFactory with the value passed as a parameter. +// This is especially useful for emitting metrics in the ECS Client +func WithMetricsFactory(metricsFactory metrics.EntryFactory) ECSClientOption { + return func(client *ecsClient) { + client.metricsFactory = metricsFactory + } +} diff --git a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/metrics/constants.go b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/metrics/constants.go index 9537a58fd7a..0b4c1634f45 100644 --- a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/metrics/constants.go +++ b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/metrics/constants.go @@ -46,6 +46,16 @@ const ( ACSDisconnectTimeoutMetricName = agentAvailabilityNamespace + ".ACSDisconnectTimeout" TCSDisconnectTimeoutMetricName = agentAvailabilityNamespace + ".TCSDisconnectTimeout" + // ACS Session Metrics + acsSessionNamespace = "ACSSession" + ACSSessionCallName = acsSessionNamespace + ".ACSConnect" + ACSSessionCallDurationName = acsSessionNamespace + ".ACSConnectDuration" + + // ECS Client Metrics + ecsClientNamespace = "ECSClient" + DiscoverPollEndpointCallName = ecsClientNamespace + ".DiscoverPollEndpoint" + DiscoverPollEndpointDurationName = ecsClientNamespace + ".DiscoverPollEndpointDuration" + dbClientMetricNamespace = "Data" GetNetworkConfigurationByTaskMetricName = dbClientMetricNamespace + ".GetNetworkConfigurationByTask" SaveNetworkNamespaceMetricName = dbClientMetricNamespace + ".SaveNetworkNamespace" diff --git a/ecs-agent/acs/session/session.go b/ecs-agent/acs/session/session.go index 6a3f733cf99..e913613a50b 100644 --- a/ecs-agent/acs/session/session.go +++ b/ecs-agent/acs/session/session.go @@ -4,7 +4,7 @@ // not use this file except in compliance with the License. A copy of the // License is located at // -// http://aws.amazon.com/apache2.0/ +// http://aws.amazon.com/apache2.0/ // // or in the "license" file accompanying this file. This file is distributed // on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either @@ -99,6 +99,8 @@ type session struct { disconnectJitter time.Duration inactiveInstanceReconnectDelay time.Duration lastConnectedTime time.Time + firstACSConnectionTime time.Time + firstDiscoverPollEndpointTime time.Time } // NewSession creates a new Session. @@ -158,6 +160,8 @@ func NewSession(containerInstanceARN string, disconnectJitter: wsclient.DisconnectJitterMax, inactiveInstanceReconnectDelay: inactiveInstanceReconnectDelay, lastConnectedTime: time.Time{}, + firstACSConnectionTime: time.Time{}, + firstDiscoverPollEndpointTime: time.Time{}, } } @@ -234,7 +238,12 @@ func (s *session) Start(ctx context.Context) error { // startSessionOnce creates a session with ACS and handles requests using the passed // in arguments. func (s *session) startSessionOnce(ctx context.Context) error { + if s.GetFirstDiscoverPollEndpointTime().IsZero() { + s.firstDiscoverPollEndpointTime = time.Now() + } + acsEndpoint, err := s.ecsClient.DiscoverPollEndpoint(s.containerInstanceARN) + if err != nil { logger.Error("ACS: Unable to discover poll endpoint", logger.Fields{ "containerInstanceARN": s.containerInstanceARN, @@ -253,8 +262,10 @@ func (s *session) startSessionOnce(ctx context.Context) error { // Invoke Connect method as soon as we create client. This will ensure all the // request handlers to be associated with this client have a valid connection. + acsConnectionStartTime := time.Now() disconnectTimer, err := client.Connect(metrics.ACSDisconnectTimeoutMetricName, s.disconnectTimeout, s.disconnectJitter) + s.metricsFactory.New(metrics.ACSSessionCallName).Done(err) if err != nil { logger.Error("Failed to connect to ACS", logger.Fields{ "containerInstanceARN": s.containerInstanceARN, @@ -262,8 +273,13 @@ func (s *session) startSessionOnce(ctx context.Context) error { }) return err } + s.metricsFactory.New(metrics.ACSSessionCallDurationName).WithGauge(time.Since(acsConnectionStartTime)).Done(nil) defer disconnectTimer.Stop() + if s.GetFirstACSConnectionTime().IsZero() { + s.firstACSConnectionTime = time.Now() + } + // Record the timestamp of the last connection to ACS. s.lastConnectedTime = time.Now() @@ -475,3 +491,11 @@ func formatDockerVersion(dockerVersionValue string) string { func (s *session) GetLastConnectedTime() time.Time { return s.lastConnectedTime } + +func (s *session) GetFirstACSConnectionTime() time.Time { + return s.firstACSConnectionTime +} + +func (s *session) GetFirstDiscoverPollEndpointTime() time.Time { + return s.firstDiscoverPollEndpointTime +} diff --git a/ecs-agent/acs/session/session_test.go b/ecs-agent/acs/session/session_test.go index 170cfccd898..1cd742773be 100644 --- a/ecs-agent/acs/session/session_test.go +++ b/ecs-agent/acs/session/session_test.go @@ -39,6 +39,7 @@ import ( "github.com/aws/amazon-ecs-agent/ecs-agent/doctor" "github.com/aws/amazon-ecs-agent/ecs-agent/eventstream" metricsfactory "github.com/aws/amazon-ecs-agent/ecs-agent/metrics" + mock_metrics "github.com/aws/amazon-ecs-agent/ecs-agent/metrics/mocks" "github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry" mock_retry "github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry/mock" "github.com/aws/amazon-ecs-agent/ecs-agent/wsclient" @@ -224,6 +225,16 @@ func TestSessionReconnectsOnConnectErrors(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry).AnyTimes() + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry).AnyTimes() + ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() @@ -243,7 +254,7 @@ func TestSessionReconnectsOnConnectErrors(t *testing.T) { // Connect fails 10 times. mockWsClient.EXPECT().Connect(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, io.EOF).Times(10), // Cancel trying to connect to ACS on the 11th attempt. - // Failure to retry on Connect() errors should cause the test to time out as the context is never canceled. + // Failure to retry on ConnACSDisconnectTimeoutMetricNameect() errors should cause the test to time out as the context is never canceled. mockWsClient.EXPECT().Connect(gomock.Any(), gomock.Any(), gomock.Any()).Do(func(interface{}, interface{}, interface{}) { cancel() @@ -253,6 +264,7 @@ func TestSessionReconnectsOnConnectErrors(t *testing.T) { containerInstanceARN: testconst.ContainerInstanceARN, ecsClient: ecsClient, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, disconnectTimeout: 30 * time.Millisecond, @@ -345,6 +357,16 @@ func TestSessionReconnectsWithoutBackoffOnEOFError(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry).AnyTimes() + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry).AnyTimes() + ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() @@ -377,6 +399,7 @@ func TestSessionReconnectsWithoutBackoffOnEOFError(t *testing.T) { inactiveInstanceCB: noopFunc, backoff: mockBackoff, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, disconnectTimeout: 30 * time.Millisecond, @@ -394,6 +417,16 @@ func TestSessionReconnectsWithBackoffOnNonEOFError(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry).AnyTimes() + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry).AnyTimes() + ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() @@ -427,6 +460,7 @@ func TestSessionReconnectsWithBackoffOnNonEOFError(t *testing.T) { inactiveInstanceCB: noopFunc, backoff: mockBackoff, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, disconnectTimeout: 30 * time.Millisecond, @@ -444,6 +478,16 @@ func TestSessionCallsInactiveInstanceCB(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry).AnyTimes() + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry).AnyTimes() + ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() @@ -480,6 +524,7 @@ func TestSessionCallsInactiveInstanceCB(t *testing.T) { ecsClient: ecsClient, inactiveInstanceCB: inactiveInstanceCB, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, disconnectTimeout: 30 * time.Millisecond, @@ -499,6 +544,16 @@ func TestSessionReconnectDelayForInactiveInstanceError(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry).AnyTimes() + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry).AnyTimes() + ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() @@ -540,6 +595,7 @@ func TestSessionReconnectDelayForInactiveInstanceError(t *testing.T) { ecsClient: ecsClient, inactiveInstanceCB: noopFunc, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, disconnectTimeout: 30 * time.Millisecond, @@ -559,6 +615,16 @@ func TestSessionReconnectsOnServeErrors(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry).AnyTimes() + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry).AnyTimes() + ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() @@ -590,6 +656,7 @@ func TestSessionReconnectsOnServeErrors(t *testing.T) { ecsClient: ecsClient, inactiveInstanceCB: noopFunc, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, disconnectTimeout: 30 * time.Millisecond, @@ -608,6 +675,16 @@ func TestSessionStopsWhenContextIsCanceled(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry).AnyTimes() + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()).AnyTimes() + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry).AnyTimes() + ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() @@ -635,6 +712,7 @@ func TestSessionStopsWhenContextIsCanceled(t *testing.T) { ecsClient: ecsClient, inactiveInstanceCB: noopFunc, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, disconnectTimeout: 30 * time.Millisecond, @@ -653,6 +731,16 @@ func TestSessionStopsWhenContextIsErrorDueToTimeout(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry) + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()) + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()) + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry) + ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() @@ -677,6 +765,7 @@ func TestSessionStopsWhenContextIsErrorDueToTimeout(t *testing.T) { ecsClient: ecsClient, inactiveInstanceCB: noopFunc, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, inactiveInstanceReconnectDelay: 1 * time.Hour, @@ -694,6 +783,16 @@ func TestSessionReconnectsOnDiscoverPollEndpointError(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry).Times(2) + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()).Times(2) + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()) + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry) + ecsClient := mock_ecs.NewMockECSClient(ctrl) ctx, cancel := context.WithCancel(context.Background()) @@ -725,6 +824,7 @@ func TestSessionReconnectsOnDiscoverPollEndpointError(t *testing.T) { ecsClient: ecsClient, inactiveInstanceCB: noopFunc, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, disconnectTimeout: 30 * time.Millisecond, @@ -756,6 +856,16 @@ func TestConnectionIsClosedOnIdle(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry) + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()) + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()) + ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() ctx, cancel := context.WithCancel(context.Background()) @@ -783,6 +893,7 @@ func TestConnectionIsClosedOnIdle(t *testing.T) { ecsClient: ecsClient, inactiveInstanceCB: noopFunc, clientFactory: mockClientFactory, + metricsFactory: mockMetricsFactory, heartbeatTimeout: 20 * time.Millisecond, heartbeatJitter: 10 * time.Millisecond, disconnectTimeout: 30 * time.Millisecond, @@ -1003,6 +1114,16 @@ func TestSessionCorrectlySetsSendCredentials(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() + mockMetricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + mockDiscoverPollEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.DiscoverPollEndpointDuration").Return(mockDiscoverPollEndpointEntry).Times(10) + mockDiscoverPollEndpointEntry.EXPECT().Done(gomock.Any()).Times(10) + + mockACSConnectEndpointEntry := mock_metrics.NewMockEntry(ctrl) + mockMetricsFactory.EXPECT().New("ACSStartSession.ACSConnectEndpointDuration").Return(mockACSConnectEndpointEntry).Times(10) + mockACSConnectEndpointEntry.EXPECT().Done(gomock.Any()).Times(10) + const numInvocations = 10 ecsClient := mock_ecs.NewMockECSClient(ctrl) ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes() @@ -1025,7 +1146,7 @@ func TestSessionCorrectlySetsSendCredentials(t *testing.T) { nil, noopFunc, mockClientFactory, - metricsfactory.NewNopEntryFactory(), + mockMetricsFactory, agentVersion, agentGitShortHash, dockerVersion, diff --git a/ecs-agent/api/ecs/client/ecs_client.go b/ecs-agent/api/ecs/client/ecs_client.go index 6e57ba6b27e..5894c152d5e 100644 --- a/ecs-agent/api/ecs/client/ecs_client.go +++ b/ecs-agent/api/ecs/client/ecs_client.go @@ -39,6 +39,7 @@ import ( "github.com/aws/amazon-ecs-agent/ecs-agent/httpclient" "github.com/aws/amazon-ecs-agent/ecs-agent/logger" "github.com/aws/amazon-ecs-agent/ecs-agent/logger/field" + "github.com/aws/amazon-ecs-agent/ecs-agent/metrics" "github.com/aws/amazon-ecs-agent/ecs-agent/utils" "github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry" ) @@ -77,6 +78,7 @@ type ecsClient struct { shouldExcludeIPv6PortBinding bool sascCustomRetryBackoff func(func() error) error stscAttachmentCustomRetryBackoff func(func() error) error + metricsFactory metrics.EntryFactory } // NewECSClient creates a new ECSClient interface object. @@ -112,6 +114,9 @@ func NewECSClient( if client.submitStateChangeClient == nil { client.submitStateChangeClient = newSubmitStateChangeClient(&ecsConfig) } + if client.metricsFactory == nil { + client.metricsFactory = metrics.NewNopEntryFactory() + } return client, nil } @@ -747,7 +752,7 @@ func (client *ecsClient) discoverPollEndpoint(containerInstanceArn string, } } } - + discoverPollEndpointStartTime := time.Now() // Cache miss or expired, invoke the ECS DiscoverPollEndpoint API. logger.Debug("Invoking DiscoverPollEndpoint", logger.Fields{ field.ContainerInstanceARN: containerInstanceArn, @@ -758,6 +763,7 @@ func (client *ecsClient) discoverPollEndpoint(containerInstanceArn string, Cluster: aws.String(client.configAccessor.Cluster()), ZoneId: aws.String(availabilityZone), }) + client.metricsFactory.New(metrics.DiscoverPollEndpointCallName).Done(err) if err != nil { // If we got an error calling the API, fallback to an expired cached endpoint if // we have it. @@ -776,7 +782,7 @@ func (client *ecsClient) discoverPollEndpoint(containerInstanceArn string, } return nil, err } - + client.metricsFactory.New(metrics.DiscoverPollEndpointDurationName).WithGauge(time.Since(discoverPollEndpointStartTime)).Done(nil) // Cache the response from ECS. client.pollEndpointCache.Set(containerInstanceArn, output) return output, nil diff --git a/ecs-agent/api/ecs/client/ecs_client_option.go b/ecs-agent/api/ecs/client/ecs_client_option.go index ee9ad5c8686..14ac8382b0a 100644 --- a/ecs-agent/api/ecs/client/ecs_client_option.go +++ b/ecs-agent/api/ecs/client/ecs_client_option.go @@ -16,6 +16,7 @@ package ecsclient import ( "github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs" "github.com/aws/amazon-ecs-agent/ecs-agent/async" + "github.com/aws/amazon-ecs-agent/ecs-agent/metrics" ) // ECSClientOption allows for configuration of an ecsClient. @@ -87,3 +88,12 @@ func WithSubmitStateChangeClient(s ecs.ECSSubmitStateSDK) ECSClientOption { client.submitStateChangeClient = s } } + +// WithMetricsFactory is an ECSClientOption that configures +// ecsClient.metricsFactory with the value passed as a parameter. +// This is especially useful for emitting metrics in the ECS Client +func WithMetricsFactory(metricsFactory metrics.EntryFactory) ECSClientOption { + return func(client *ecsClient) { + client.metricsFactory = metricsFactory + } +} diff --git a/ecs-agent/api/ecs/client/ecs_client_option_test.go b/ecs-agent/api/ecs/client/ecs_client_option_test.go index 390a2bcd2e8..23d476e8bd6 100644 --- a/ecs-agent/api/ecs/client/ecs_client_option_test.go +++ b/ecs-agent/api/ecs/client/ecs_client_option_test.go @@ -22,6 +22,7 @@ import ( mock_ecs "github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/mocks" "github.com/aws/amazon-ecs-agent/ecs-agent/async" + "github.com/aws/amazon-ecs-agent/ecs-agent/metrics" "github.com/stretchr/testify/assert" ) @@ -102,3 +103,10 @@ func TestWithSubmitStateChangeClient(t *testing.T) { option(client) assert.Equal(t, newSubmitStateChangeClient, client.submitStateChangeClient) } + +func TestWithMetricsFactory(t *testing.T) { + client := &ecsClient{} // client.metricsFactory is nil by default + option := WithMetricsFactory(metrics.NewNopEntryFactory()) + option(client) + assert.NotNil(t, client.metricsFactory) +} diff --git a/ecs-agent/metrics/constants.go b/ecs-agent/metrics/constants.go index 9537a58fd7a..0b4c1634f45 100644 --- a/ecs-agent/metrics/constants.go +++ b/ecs-agent/metrics/constants.go @@ -46,6 +46,16 @@ const ( ACSDisconnectTimeoutMetricName = agentAvailabilityNamespace + ".ACSDisconnectTimeout" TCSDisconnectTimeoutMetricName = agentAvailabilityNamespace + ".TCSDisconnectTimeout" + // ACS Session Metrics + acsSessionNamespace = "ACSSession" + ACSSessionCallName = acsSessionNamespace + ".ACSConnect" + ACSSessionCallDurationName = acsSessionNamespace + ".ACSConnectDuration" + + // ECS Client Metrics + ecsClientNamespace = "ECSClient" + DiscoverPollEndpointCallName = ecsClientNamespace + ".DiscoverPollEndpoint" + DiscoverPollEndpointDurationName = ecsClientNamespace + ".DiscoverPollEndpointDuration" + dbClientMetricNamespace = "Data" GetNetworkConfigurationByTaskMetricName = dbClientMetricNamespace + ".GetNetworkConfigurationByTask" SaveNetworkNamespaceMetricName = dbClientMetricNamespace + ".SaveNetworkNamespace" diff --git a/go.mod b/go.mod new file mode 100644 index 00000000000..bd833852a62 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/aws/amazon-ecs-agent/ecs-agent + +go 1.22.7