From 075af8250c8b337c1844b8028e895dc0f48f63cb Mon Sep 17 00:00:00 2001 From: David Lu Date: Mon, 7 Dec 2020 19:19:23 -0800 Subject: [PATCH 01/12] added metrics for current number of connected clients, individual device connect/disconnected to/from iot edge --- doc/BuiltInMetrics.md | 38 ++++++++++--------- .../Authenticator.cs | 3 +- .../ConnectionManager.cs | 21 ++++++++-- .../DeviceConnectionMetrics.cs | 33 ++++++++++++++++ 4 files changed, 73 insertions(+), 22 deletions(-) create mode 100644 edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs diff --git a/doc/BuiltInMetrics.md b/doc/BuiltInMetrics.md index 011245c880b..9c98b38a4cd 100644 --- a/doc/BuiltInMetrics.md +++ b/doc/BuiltInMetrics.md @@ -21,25 +21,27 @@ instance_number | A Guid representing the current runtime. On restart, all metri ### EdgeHub | Name | Dimensions | Description | Type | |-------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| -| `edgehub_gettwin_total` | `source` (Operation source)
`id` (Module ID) | Total number of GetTwin calls | counter | -| `edgehub_messages_received_total` | `route_output` (Output that sent the message)
`id` (Module ID) | Total number of messages received from clients | counter | -| `edgehub_messages_sent_total` | `from` (Message source)
`to` (Message destination)
`from_route_output` (Output that sent the message)
`to_route_input` (Message destination input [empty when "to" is $upstream])
`priority` (message priority to destination) | Total number of messages sent to clients or upstream | counter | -| `edgehub_reported_properties_total` | `target`(Update target)
`id` (Module ID) | Total reported property updates calls | counter | -| `edgehub_message_size_bytes` | `id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 message size from clients. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | -| `edgehub_gettwin_duration_seconds` | `source` (Operation source)
`id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time taken for get twin operations. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | +| `edgehub_gettwin_total` | `source` (Operation source)
`id` (Module ID) | Total number of GetTwin calls | counter | +| `edgehub_messages_received_total` | `route_output` (Output that sent the message)
`id` (Module ID) | Total number of messages received from clients | counter | +| `edgehub_messages_sent_total` | `from` (Message source)
`to` (Message destination)
`from_route_output` (Output that sent the message)
`to_route_input` (Message destination input [empty when "to" is $upstream])
`priority` (message priority to destination) | Total number of messages sent to clients or upstream | counter | +| `edgehub_reported_properties_total` | `target`(Update target)
`id` (Module ID) | Total reported property updates calls | counter | +| `edgehub_message_size_bytes` | `id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 message size from clients. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | +| `edgehub_gettwin_duration_seconds` | `source` (Operation source)
`id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time taken for get twin operations. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | | `edgehub_message_send_duration_seconds` | `from` (Message source)
`to` (Message destination)
`from_route_output` (Output that sent the message)
`to_route_input` (Message destination input [empty when "to" is $upstream])
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time taken to send a message. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | -| `edgehub_message_process_duration_seconds` | `from` (Message source)
`to` (Message destination)
`priority` (Message priority)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time taken to process a message from the queue. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | -| `edgehub_reported_properties_update_duration_seconds` | `target` (Operation target)
`id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time taken to update reported properties. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | -| `edgehub_direct_method_duration_seconds` | `from` (Caller)
`to` (Reciever)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time taken to resolve a direct message. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | -| `edgehub_direct_methods_total` | `from` (Message source)
`to` (Message destination) | Total number of direct messages sent | counter | -| `edgehub_queue_length` | `endpoint` (Message source)
`priority` (queue priority) | Current length of edgeHub's queue for a given priority | gauge | -| `edgehub_messages_dropped_total` | `reason` (no_route, ttl_expiry)
`from` (Message source)
`from_route_output` (Output that sent the message)
| Total number of messages removed because of reason | counter | -| `edgehub_messages_unack_total` | `reason` (storage_failure)
`from` (Message source)
`from_route_output` (Output that sent the message)
| Total number of messages unack because storage failure | counter | -| `edgehub_offline_count_total` | `id` (Module ID)
| Total number of times edgeHub went offline | counter | -| `edgehub_offline_duration_seconds` | `id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time edge hub was offline. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | -| `edgehub_operation_retry_total` | `id` (Module ID)
`operation` (Operation name) | Total number of times edgeHub operations were retried | counter | -| `edgehub_client_connect_failed_total` | `id` (Module ID)
`reason` (not authenticated)
| Total number of times clients failed to connect to edgeHub | counter | - +| `edgehub_message_process_duration_seconds` | `from` (Message source)
`to` (Message destination)
`priority` (Message priority)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time taken to process a message from the queue. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | +| `edgehub_reported_properties_update_duration_seconds` | `target` (Operation target)
`id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99] | P50, P90, P95, P99, P99.9 and P99.99 time taken to update reported properties. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | +| `edgehub_direct_method_duration_seconds` | `from` (Caller)
`to` (Reciever)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time taken to resolve a direct message. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | +| `edgehub_direct_methods_total` | `from` (Message source)
`to` (Message destination) | Total number of direct messages sent | counter | +| `edgehub_queue_length` | `endpoint` (Message source)
`priority` (queue priority) | Current length of edgeHub's queue for a given priority | gauge | +| `edgehub_messages_dropped_total` | `reason` (no_route, ttl_expiry)
`from` (Message source)
`from_route_output` (Output that sent the message)
| Total number of messages removed because of reason | counter | +| `edgehub_messages_unack_total` | `reason` (storage_failure)
`from` (Message source)
`from_route_output` (Output that sent the message)
| Total number of messages unack because storage failure | counter | +| `edgehub_offline_count_total` | `id` (Module ID)
| Total number of times edgeHub went offline | counter | +| `edgehub_offline_duration_seconds` | `id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time edge hub was offline. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | +| `edgehub_operation_retry_total` | `id` (Module ID)
`operation` (Operation name) | Total number of times edgeHub operations were retried | counter | +| `edgehub_client_connect_failed_total` | `id` (Device/Module ID)
`reason` (not authenticated)
| Total number of times individual client failed to connect to edgeHub | counter | +| `edgehub_connected_clients` | | Current number of connected clients to edge hub | gauge | +| `edgehub_client_connect_success_total` | `id` (Device/Module ID)
| Total number of times individual client successfully connect to edgeHub | counter | | counter | +| `edgehub_client_disconnect_total` | `id` (Device/Module ID)
| Total number of times individual client disconnected from edgeHub | counter | ### EdgeAgent diff --git a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/Authenticator.cs b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/Authenticator.cs index 3ecee1d2088..df66b23cf53 100644 --- a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/Authenticator.cs +++ b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/Authenticator.cs @@ -57,8 +57,9 @@ async Task AuthenticateAsync(IClientCredentials clientCredentials, bool re { await this.credentialsCache.Add(clientCredentials); } - else + else if (!reAuthenticating) { + // only report authentication failure on initial authentication Metrics.AddAuthenticationFailure(clientCredentials.Identity.Id); } diff --git a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/ConnectionManager.cs b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/ConnectionManager.cs index 9142629b006..7cb8eb3cb85 100644 --- a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/ConnectionManager.cs +++ b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/ConnectionManager.cs @@ -72,6 +72,7 @@ public async Task AddDeviceConnection(IIdentity identity, IDeviceProxy devicePro await currentDeviceConnection .Filter(dc => dc.IsActive) .ForEachAsync(dc => dc.CloseAsync(new MultipleConnectionsException($"Multiple connections detected for device {identity.Id}"))); + this.OnDeviceConnected(identity); this.DeviceConnected?.Invoke(this, identity); } @@ -181,9 +182,10 @@ static Try GetCloudProxyFromCloudConnection(Try c async Task RemoveDeviceConnection(ConnectedDevice device, bool removeCloudConnection) { - Events.RemovingDeviceConnection(device.Identity.Id, removeCloudConnection); + var id = device.Identity.Id; + Events.RemovingDeviceConnection(id, removeCloudConnection); await device.DeviceConnection.Filter(dp => dp.IsActive) - .ForEachAsync(dp => dp.CloseAsync(new EdgeHubConnectionException($"Connection closed for device {device.Identity.Id}."))); + .ForEachAsync(dp => dp.CloseAsync(new EdgeHubConnectionException($"Connection closed for device {id}."))); if (removeCloudConnection) { @@ -191,7 +193,8 @@ await device.CloudConnection.Filter(cp => cp.IsActive) .ForEachAsync(cp => cp.CloseAsync()); } - Events.RemoveDeviceConnection(device.Identity.Id); + Events.RemoveDeviceConnection(id); + this.OnDeviceDisconnected(device.Identity); this.DeviceDisconnected?.Invoke(this, device.Identity); } @@ -574,5 +577,17 @@ public static void SetConnectedClientCountGauge(ConnectionManager connectionMana Util.Metrics.MetricsV0.SetGauge(ConnectedClientGaugeOptions, connectedClients); } } + + void OnDeviceConnected(IIdentity identity) + { + DeviceConnectionMetrics.OnDeviceConnected(identity.ToString()); + DeviceConnectionMetrics.UpdateConnectedClients(this.GetConnectedClients().Count() - 1); + } + + void OnDeviceDisconnected(IIdentity identity) + { + DeviceConnectionMetrics.OnDeviceDisconnected(identity.ToString()); + DeviceConnectionMetrics.UpdateConnectedClients(this.GetConnectedClients().Count() - 1); + } } } diff --git a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs new file mode 100644 index 00000000000..1cacce66a62 --- /dev/null +++ b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft. All rights reserved. +namespace Microsoft.Azure.Devices.Edge.Hub.Core +{ + using System.Collections.Generic; + using Microsoft.Azure.Devices.Edge.Util.Metrics; + using EdgeMetrics = Util.Metrics.Metrics; + + public static class DeviceConnectionMetrics + { + static readonly List EmptyStringList = new List(); + static readonly string[] EmptyStringArray = new string[0]; + public static readonly IMetricsGauge ConnectedClientsGauge = EdgeMetrics.Instance.CreateGauge( + "connected_clients", + "Number of device client connected to edge hub", + EmptyStringList); + + public static readonly IMetricsCounter ClientsConnectCounter = EdgeMetrics.Instance.CreateCounter( + "client_connect_success", + "Device client successfully connected to edge hub", + new List() { "id" }); + + public static readonly IMetricsCounter ClientsDiscconnectCounter = EdgeMetrics.Instance.CreateCounter( + "client_disconnect", + "Device client disconnected from edge hub", + new List() { "id" }); + + public static void UpdateConnectedClients(int connectedClients) => ConnectedClientsGauge.Set(connectedClients, EmptyStringArray); + + public static void OnDeviceConnected(string deviceId) => ClientsConnectCounter.Increment(1, new string[] { deviceId }); + + public static void OnDeviceDisconnected(string deviceId) => ClientsDiscconnectCounter.Increment(1, new string[] { deviceId }); + } +} From c493d1c50e2cd30b0af9d4b9d90ed0f0f5bd7440 Mon Sep 17 00:00:00 2001 From: David Lu Date: Mon, 7 Dec 2020 19:30:00 -0800 Subject: [PATCH 02/12] fixed metrics description --- .../DeviceConnectionMetrics.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs index 1cacce66a62..8761b0a42d4 100644 --- a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs +++ b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs @@ -11,17 +11,17 @@ public static class DeviceConnectionMetrics static readonly string[] EmptyStringArray = new string[0]; public static readonly IMetricsGauge ConnectedClientsGauge = EdgeMetrics.Instance.CreateGauge( "connected_clients", - "Number of device client connected to edge hub", + "Current number of connected clients to edge hub", EmptyStringList); public static readonly IMetricsCounter ClientsConnectCounter = EdgeMetrics.Instance.CreateCounter( "client_connect_success", - "Device client successfully connected to edge hub", + "Total number of times individual client successfully connect to edgeHub", new List() { "id" }); public static readonly IMetricsCounter ClientsDiscconnectCounter = EdgeMetrics.Instance.CreateCounter( "client_disconnect", - "Device client disconnected from edge hub", + "Total number of times individual client disconnected from edgeHub", new List() { "id" }); public static void UpdateConnectedClients(int connectedClients) => ConnectedClientsGauge.Set(connectedClients, EmptyStringArray); From 3a09c423c7dd50fff1a034a50d60f4e8db8d5cfa Mon Sep 17 00:00:00 2001 From: David Lu Date: Tue, 8 Dec 2020 14:00:35 -0800 Subject: [PATCH 03/12] fixed UT ConnectionManagerTest by adding missing mock which impacted by metrics change --- .../ConnectionManagerTest.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/edge-hub/test/Microsoft.Azure.Devices.Edge.Hub.Core.Test/ConnectionManagerTest.cs b/edge-hub/test/Microsoft.Azure.Devices.Edge.Hub.Core.Test/ConnectionManagerTest.cs index 5017d82ff1a..d4b09b513ff 100644 --- a/edge-hub/test/Microsoft.Azure.Devices.Edge.Hub.Core.Test/ConnectionManagerTest.cs +++ b/edge-hub/test/Microsoft.Azure.Devices.Edge.Hub.Core.Test/ConnectionManagerTest.cs @@ -462,6 +462,7 @@ public async Task CloudProxyCallbackTest2() .ReturnsAsync(Try.Success(cloudConnection as ICloudConnection)); var deviceProxy = new Mock(MockBehavior.Strict); + deviceProxy.Setup(dp => dp.IsActive).Returns(true); var credentialsCache = new Mock(MockBehavior.Strict); credentialsCache.Setup(c => c.Get(deviceIdentity)).ReturnsAsync(Option.Some((IClientCredentials)updatedDeviceCredentials)); From 3b5451daabe6c9368fa5ff35baef8482b924a6f5 Mon Sep 17 00:00:00 2001 From: davilu <45977130+davilu@users.noreply.github.com> Date: Tue, 8 Dec 2020 14:12:40 -0800 Subject: [PATCH 04/12] Update edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs Co-authored-by: Venkat Yalla --- .../DeviceConnectionMetrics.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs index 8761b0a42d4..cce9b071a97 100644 --- a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs +++ b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs @@ -16,7 +16,7 @@ public static class DeviceConnectionMetrics public static readonly IMetricsCounter ClientsConnectCounter = EdgeMetrics.Instance.CreateCounter( "client_connect_success", - "Total number of times individual client successfully connect to edgeHub", + "Total number of times each client successfully connected to edgeHub", new List() { "id" }); public static readonly IMetricsCounter ClientsDiscconnectCounter = EdgeMetrics.Instance.CreateCounter( From 247ad0d27b1ab0dac44a3632e24904142960f131 Mon Sep 17 00:00:00 2001 From: davilu <45977130+davilu@users.noreply.github.com> Date: Tue, 8 Dec 2020 14:12:47 -0800 Subject: [PATCH 05/12] Update edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs Co-authored-by: Venkat Yalla --- .../DeviceConnectionMetrics.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs index cce9b071a97..79d455d24ad 100644 --- a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs +++ b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs @@ -21,7 +21,7 @@ public static class DeviceConnectionMetrics public static readonly IMetricsCounter ClientsDiscconnectCounter = EdgeMetrics.Instance.CreateCounter( "client_disconnect", - "Total number of times individual client disconnected from edgeHub", + "Total number of times each client disconnected from edgeHub", new List() { "id" }); public static void UpdateConnectedClients(int connectedClients) => ConnectedClientsGauge.Set(connectedClients, EmptyStringArray); From fa76c748bf19de3acf7dffad9c70f2887a141e76 Mon Sep 17 00:00:00 2001 From: davilu <45977130+davilu@users.noreply.github.com> Date: Tue, 8 Dec 2020 14:12:55 -0800 Subject: [PATCH 06/12] Update edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs Co-authored-by: Venkat Yalla --- .../DeviceConnectionMetrics.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs index 79d455d24ad..1569b78b8ea 100644 --- a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs +++ b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs @@ -11,7 +11,7 @@ public static class DeviceConnectionMetrics static readonly string[] EmptyStringArray = new string[0]; public static readonly IMetricsGauge ConnectedClientsGauge = EdgeMetrics.Instance.CreateGauge( "connected_clients", - "Current number of connected clients to edge hub", + "Current number of clients connected to edgeHub", EmptyStringList); public static readonly IMetricsCounter ClientsConnectCounter = EdgeMetrics.Instance.CreateCounter( From 86acf28d470a901190f78992324431668a2163fc Mon Sep 17 00:00:00 2001 From: davilu <45977130+davilu@users.noreply.github.com> Date: Tue, 8 Dec 2020 14:13:07 -0800 Subject: [PATCH 07/12] Update doc/BuiltInMetrics.md Co-authored-by: Venkat Yalla --- doc/BuiltInMetrics.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/BuiltInMetrics.md b/doc/BuiltInMetrics.md index 9c98b38a4cd..7bee7686c70 100644 --- a/doc/BuiltInMetrics.md +++ b/doc/BuiltInMetrics.md @@ -39,7 +39,7 @@ instance_number | A Guid representing the current runtime. On restart, all metri | `edgehub_offline_duration_seconds` | `id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time edge hub was offline. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | | `edgehub_operation_retry_total` | `id` (Module ID)
`operation` (Operation name) | Total number of times edgeHub operations were retried | counter | | `edgehub_client_connect_failed_total` | `id` (Device/Module ID)
`reason` (not authenticated)
| Total number of times individual client failed to connect to edgeHub | counter | -| `edgehub_connected_clients` | | Current number of connected clients to edge hub | gauge | +| `edgehub_connected_clients` | | Current number of clients connected to edgeHub | gauge | | `edgehub_client_connect_success_total` | `id` (Device/Module ID)
| Total number of times individual client successfully connect to edgeHub | counter | | counter | | `edgehub_client_disconnect_total` | `id` (Device/Module ID)
| Total number of times individual client disconnected from edgeHub | counter | @@ -86,4 +86,4 @@ For mapping to host, the port will need to be exposed from Edge Hub's `createOpt } } -``` \ No newline at end of file +``` From 5ffd1ad150ddfd10824d3aa49592bbefcb099dfd Mon Sep 17 00:00:00 2001 From: davilu <45977130+davilu@users.noreply.github.com> Date: Tue, 8 Dec 2020 14:14:27 -0800 Subject: [PATCH 08/12] Update doc/BuiltInMetrics.md Co-authored-by: Venkat Yalla --- doc/BuiltInMetrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/BuiltInMetrics.md b/doc/BuiltInMetrics.md index 7bee7686c70..0d5fc7483d0 100644 --- a/doc/BuiltInMetrics.md +++ b/doc/BuiltInMetrics.md @@ -40,7 +40,7 @@ instance_number | A Guid representing the current runtime. On restart, all metri | `edgehub_operation_retry_total` | `id` (Module ID)
`operation` (Operation name) | Total number of times edgeHub operations were retried | counter | | `edgehub_client_connect_failed_total` | `id` (Device/Module ID)
`reason` (not authenticated)
| Total number of times individual client failed to connect to edgeHub | counter | | `edgehub_connected_clients` | | Current number of clients connected to edgeHub | gauge | -| `edgehub_client_connect_success_total` | `id` (Device/Module ID)
| Total number of times individual client successfully connect to edgeHub | counter | | counter | +| `edgehub_client_connect_success_total` | `id` (Device/Module ID)
| Total number of times each client successfully connected to edgeHub | counter | | counter | | `edgehub_client_disconnect_total` | `id` (Device/Module ID)
| Total number of times individual client disconnected from edgeHub | counter | From 88d29dde0099d4bf3f22f1eb93b0d5e543ded6ce Mon Sep 17 00:00:00 2001 From: davilu <45977130+davilu@users.noreply.github.com> Date: Tue, 8 Dec 2020 14:14:34 -0800 Subject: [PATCH 09/12] Update doc/BuiltInMetrics.md Co-authored-by: Venkat Yalla --- doc/BuiltInMetrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/BuiltInMetrics.md b/doc/BuiltInMetrics.md index 0d5fc7483d0..d80f54e7da0 100644 --- a/doc/BuiltInMetrics.md +++ b/doc/BuiltInMetrics.md @@ -41,7 +41,7 @@ instance_number | A Guid representing the current runtime. On restart, all metri | `edgehub_client_connect_failed_total` | `id` (Device/Module ID)
`reason` (not authenticated)
| Total number of times individual client failed to connect to edgeHub | counter | | `edgehub_connected_clients` | | Current number of clients connected to edgeHub | gauge | | `edgehub_client_connect_success_total` | `id` (Device/Module ID)
| Total number of times each client successfully connected to edgeHub | counter | | counter | -| `edgehub_client_disconnect_total` | `id` (Device/Module ID)
| Total number of times individual client disconnected from edgeHub | counter | +| `edgehub_client_disconnect_total` | `id` (Device/Module ID)
| Total number of times each client disconnected from edgeHub | counter | ### EdgeAgent From 28c9105c24e71faa04d58288138f7c16392f0c5a Mon Sep 17 00:00:00 2001 From: davilu <45977130+davilu@users.noreply.github.com> Date: Tue, 8 Dec 2020 14:18:18 -0800 Subject: [PATCH 10/12] Update doc/BuiltInMetrics.md Co-authored-by: Venkat Yalla --- doc/BuiltInMetrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/BuiltInMetrics.md b/doc/BuiltInMetrics.md index d80f54e7da0..7b44a32da27 100644 --- a/doc/BuiltInMetrics.md +++ b/doc/BuiltInMetrics.md @@ -38,7 +38,7 @@ instance_number | A Guid representing the current runtime. On restart, all metri | `edgehub_offline_count_total` | `id` (Module ID)
| Total number of times edgeHub went offline | counter | | `edgehub_offline_duration_seconds` | `id` (Module ID)
`quantile`(Percentile [50, 90, 95, 99, 99.9, 99.99]) | P50, P90, P95, P99, P99.9 and P99.99 time edge hub was offline. Values may be reported as `NaN` if no new measurements are reported for a certain period of time (currently 10 minutes). As this is `summary` type, corresponding `_count` and `_sum` counters will be emitted. | summary | | `edgehub_operation_retry_total` | `id` (Module ID)
`operation` (Operation name) | Total number of times edgeHub operations were retried | counter | -| `edgehub_client_connect_failed_total` | `id` (Device/Module ID)
`reason` (not authenticated)
| Total number of times individual client failed to connect to edgeHub | counter | +| `edgehub_client_connect_failed_total` | `id` (Device/Module ID)
`reason` (not authenticated)
| Total number of times each client failed to connect to edgeHub | counter | | `edgehub_connected_clients` | | Current number of clients connected to edgeHub | gauge | | `edgehub_client_connect_success_total` | `id` (Device/Module ID)
| Total number of times each client successfully connected to edgeHub | counter | | counter | | `edgehub_client_disconnect_total` | `id` (Device/Module ID)
| Total number of times each client disconnected from edgeHub | counter | From 9b6eb5dccf0212fb058cf92a19d0890cd9ee1d88 Mon Sep 17 00:00:00 2001 From: David Lu Date: Wed, 9 Dec 2020 11:11:10 -0800 Subject: [PATCH 11/12] modified to use Array.Empty --- .../DeviceConnectionMetrics.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs index 1569b78b8ea..f50c057afcd 100644 --- a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs +++ b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/DeviceConnectionMetrics.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. namespace Microsoft.Azure.Devices.Edge.Hub.Core { + using System; using System.Collections.Generic; using Microsoft.Azure.Devices.Edge.Util.Metrics; using EdgeMetrics = Util.Metrics.Metrics; @@ -8,7 +9,6 @@ namespace Microsoft.Azure.Devices.Edge.Hub.Core public static class DeviceConnectionMetrics { static readonly List EmptyStringList = new List(); - static readonly string[] EmptyStringArray = new string[0]; public static readonly IMetricsGauge ConnectedClientsGauge = EdgeMetrics.Instance.CreateGauge( "connected_clients", "Current number of clients connected to edgeHub", @@ -24,7 +24,7 @@ public static class DeviceConnectionMetrics "Total number of times each client disconnected from edgeHub", new List() { "id" }); - public static void UpdateConnectedClients(int connectedClients) => ConnectedClientsGauge.Set(connectedClients, EmptyStringArray); + public static void UpdateConnectedClients(int connectedClients) => ConnectedClientsGauge.Set(connectedClients, Array.Empty()); public static void OnDeviceConnected(string deviceId) => ClientsConnectCounter.Increment(1, new string[] { deviceId }); From 63a1f8cbcba9765a4ce2f48bcbd1c5fcdc40f8e9 Mon Sep 17 00:00:00 2001 From: David Lu Date: Wed, 9 Dec 2020 13:59:13 -0800 Subject: [PATCH 12/12] modified failed connection metrics description to align with doc and added ignore disconnect in e2e test --- .../src/Microsoft.Azure.Devices.Edge.Hub.Core/Authenticator.cs | 2 +- .../MetricsValidator/src/tests/ValidateDocumentedMetrics.cs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/Authenticator.cs b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/Authenticator.cs index df66b23cf53..fa623a121a2 100644 --- a/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/Authenticator.cs +++ b/edge-hub/src/Microsoft.Azure.Devices.Edge.Hub.Core/Authenticator.cs @@ -102,7 +102,7 @@ static class Metrics { static readonly IMetricsCounter AuthCounter = Util.Metrics.Metrics.Instance.CreateCounter( "client_connect_failed", - "Client connection failure", + "Total number of times each client failed to connect to edgeHub", new List { "id", "reason", MetricsConstants.MsTelemetry }); public static void AddAuthenticationFailure(string id) => AuthCounter.Increment(1, new[] { id, "not_authenticated", bool.TrueString }); diff --git a/test/modules/MetricsValidator/src/tests/ValidateDocumentedMetrics.cs b/test/modules/MetricsValidator/src/tests/ValidateDocumentedMetrics.cs index 948a227a20b..3695f225ac4 100644 --- a/test/modules/MetricsValidator/src/tests/ValidateDocumentedMetrics.cs +++ b/test/modules/MetricsValidator/src/tests/ValidateDocumentedMetrics.cs @@ -80,7 +80,8 @@ protected override async Task Test(CancellationToken cancellationToken) "edgehub_messages_dropped_total", "edgehub_messages_unack_total", "edgehub_offline_count_total", - "edgehub_operation_retry_total" + "edgehub_operation_retry_total", + "edgehub_client_disconnect_total" }; foreach (string skippingMetric in skippingMetrics)