Skip to content

Commit

Permalink
Direct 3.21.0: Adds new API changes (#2687)
Browse files Browse the repository at this point in the history
1. Add new system usage telemetry
2. Adds new ClientSideRequestStats interface to fix the start and end time
3. Fixes lock on client telemetry logging
4. Adds optimization to DiagnosticsHandler
5. Removes duplicate CPU collector in HA layer
  • Loading branch information
j82w authored Sep 2, 2021
1 parent e2ea281 commit 43f7887
Show file tree
Hide file tree
Showing 24 changed files with 404 additions and 476 deletions.
2 changes: 1 addition & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<ClientOfficialVersion>3.20.1</ClientOfficialVersion>
<ClientPreviewVersion>3.20.1</ClientPreviewVersion>
<ClientPreviewSuffixVersion>preview</ClientPreviewSuffixVersion>
<DirectVersion>3.20.0</DirectVersion>
<DirectVersion>3.21.0</DirectVersion>
<EncryptionVersion>1.0.0-previewV16</EncryptionVersion>
<HybridRowVersion>1.1.0-preview3</HybridRowVersion>
<AboveDirBuildProps>$([MSBuild]::GetPathOfFileAbove('Directory.Build.props', '$(MSBuildThisFileDirectory)../'))</AboveDirBuildProps>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ private void WalkTraceTreeForRegionsContated(ITrace currentTrace, HashSet<(strin
{
if (datums is ClientSideRequestStatisticsTraceDatum clientSideRequestStatisticsTraceDatum)
{
regionsContacted.UnionWith(clientSideRequestStatisticsTraceDatum.RegionsContactedWithName);
regionsContacted.UnionWith(clientSideRequestStatisticsTraceDatum.RegionsContacted);
return;
}
}
Expand Down
1 change: 0 additions & 1 deletion Microsoft.Azure.Cosmos/src/DocumentClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6518,7 +6518,6 @@ private void InitializeDirectConnectivity(IStoreClientFactory storeClientFactory
this.rntbdPortPoolBindAttempts,
receiveHangDetectionTimeSeconds: this.rntbdReceiveHangDetectionTimeSeconds,
sendHangDetectionTimeSeconds: this.rntbdSendHangDetectionTimeSeconds,
enableCpuMonitor: this.enableCpuMonitor,
retryWithConfiguration: this.ConnectionPolicy.RetryOptions?.GetRetryWithConfiguration(),
enableTcpConnectionEndpointRediscovery: this.ConnectionPolicy.EnableTcpConnectionEndpointRediscovery,
addressResolver: this.AddressResolver,
Expand Down
18 changes: 16 additions & 2 deletions Microsoft.Azure.Cosmos/src/Handler/ClientPipelineBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ public ClientPipelineBuilder(
this.PartitionKeyRangeHandler = new PartitionKeyRangeHandler(client);
Debug.Assert(this.PartitionKeyRangeHandler.InnerHandler == null, "The PartitionKeyRangeHandler.InnerHandler must be null to allow other handlers to be linked.");

// Disable system usage for internal builds. Cosmos DB owns the VMs and already logs
// the system information so no need to track it.
#if !INTERNAL
this.diagnosticsHandler = new DiagnosticsHandler();
Debug.Assert(this.diagnosticsHandler.InnerHandler == null, nameof(this.diagnosticsHandler));

Expand All @@ -48,6 +51,11 @@ public ClientPipelineBuilder(
this.telemetryHandler = new TelemetryHandler(telemetry);
Debug.Assert(this.telemetryHandler.InnerHandler == null, nameof(this.telemetryHandler));
}
#else
this.diagnosticsHandler = null;
this.telemetryHandler = null;
#endif

this.UseRetryPolicy();
this.AddCustomHandlers(customHandlers);
}
Expand Down Expand Up @@ -158,9 +166,15 @@ public RequestInvokerHandler Build()
}
}

// Public SDK should always have the diagnostics handler
#if !INTERNAL
Debug.Assert(this.diagnosticsHandler != null, nameof(this.diagnosticsHandler));
current.InnerHandler = this.diagnosticsHandler;
current = current.InnerHandler;
#endif
if (this.diagnosticsHandler != null)
{
current.InnerHandler = this.diagnosticsHandler;
current = current.InnerHandler;
}

Debug.Assert(this.retryHandler != null, nameof(this.retryHandler));
current.InnerHandler = this.retryHandler;
Expand Down
21 changes: 14 additions & 7 deletions Microsoft.Azure.Cosmos/src/Handler/DiagnosticsHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@

namespace Microsoft.Azure.Cosmos.Handlers
{
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Handler;
using Microsoft.Azure.Cosmos.Tracing;
using Microsoft.Azure.Cosmos.Handler;
using Microsoft.Azure.Cosmos.Tracing.TraceData;
using Microsoft.Azure.Documents.Rntbd;

Expand All @@ -21,12 +18,22 @@ namespace Microsoft.Azure.Cosmos.Handlers
/// </summary>
internal class DiagnosticsHandler : RequestHandler
{
public override Task<ResponseMessage> SendAsync(
public override async Task<ResponseMessage> SendAsync(
RequestMessage request,
CancellationToken cancellationToken)
{
DiagnosticsHandlerHelper.Instance().RecordCpuDiagnostics(request, DiagnosticsHandlerHelper.Diagnostickey);
return base.SendAsync(request, cancellationToken);
ResponseMessage responseMessage = await base.SendAsync(request, cancellationToken);

// Record the diagnostics on the response to get the CPU of when the request was executing
SystemUsageHistory systemUsageHistory = DiagnosticsHandlerHelper.Instance.GetDiagnosticsSystemHistory();
if (systemUsageHistory != null)
{
request.Trace.AddDatum(
"CPU Load History",
new CpuHistoryTraceDatum(systemUsageHistory));
}

return responseMessage;
}
}
}
115 changes: 52 additions & 63 deletions Microsoft.Azure.Cosmos/src/Handler/DiagnosticsHandlerHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,65 +6,57 @@ namespace Microsoft.Azure.Cosmos.Handler
{
using System;
using System.Collections.Generic;
using System.Text;
using Documents.Rntbd;
using Microsoft.Azure.Cosmos.Core.Trace;
using Tracing.TraceData;

/// <summary>
/// This is a helper class that creates a single static instance to avoid each
/// client instance from creating a new CPU monitor.
/// </summary>
internal class DiagnosticsHandlerHelper
{
private static DiagnosticsHandlerHelper helper;
private readonly SystemUsageMonitorBase systemUsageMonitor = null;
public static readonly TimeSpan DiagnosticsRefreshInterval = TimeSpan.FromSeconds(10);
private readonly SystemUsageRecorder diagnosticSystemUsageRecorder = new SystemUsageRecorder(
identifier: Diagnostickey,
historyLength: 6,
refreshInterval: DiagnosticsHandlerHelper.DiagnosticsRefreshInterval);

internal const string Diagnostickey = "diagnostic";
private const int HistoryLengthForDiagnostics = 6;
private readonly TimeSpan refreshIntervalForDiagnostics = TimeSpan.FromSeconds(10);
private readonly SystemUsageRecorder telemetrySystemUsageRecorder = new SystemUsageRecorder(
identifier: Telemetrykey,
historyLength: 120,
refreshInterval: TimeSpan.FromSeconds(5));

internal const string Diagnostickey = "diagnostic";
internal const string Telemetrykey = "telemetry";
private const int HistoryLengthForTelemetry = 120;
private readonly TimeSpan refreshIntervalForTelemetry = TimeSpan.FromSeconds(5);

private bool isMonitoringEnabled = false;

private static readonly object staticLock = new object();

/// <summary>
/// Singleton to make sureonly one intsane of DiagnosticHandlerHelper is there
/// Singleton to make sure only one instance of DiagnosticHandlerHelper is there.
/// The system usage collection is disabled for internal builds so it is set to null to avoid
/// compute for accidentally creating an instance or trying to use it.
/// </summary>
public static DiagnosticsHandlerHelper Instance()
{
lock (staticLock)
{
if (helper != null)
{
return helper;
}
return helper = new DiagnosticsHandlerHelper();
}
}
public static readonly DiagnosticsHandlerHelper Instance =
#if INTERNAL
null;
#else
new DiagnosticsHandlerHelper();
#endif

private DiagnosticsHandlerHelper()
{
this.isMonitoringEnabled = false;

// If the CPU monitor fails for some reason don't block the application
try
{
this.systemUsageMonitor = SystemUsageMonitorBase.Create(
new List<CpuAndMemoryUsageRecorder>
SystemUsageMonitor systemUsageMonitor = SystemUsageMonitor.CreateAndStart(
new List<SystemUsageRecorder>
{
new CpuAndMemoryUsageRecorder(Diagnostickey, HistoryLengthForDiagnostics, this.refreshIntervalForDiagnostics),
new CpuAndMemoryUsageRecorder(Telemetrykey, HistoryLengthForTelemetry, this.refreshIntervalForTelemetry)
this.diagnosticSystemUsageRecorder,
this.telemetrySystemUsageRecorder,
});

if (this.systemUsageMonitor is SystemUsageMonitorNoOps)
{
throw new Exception("Unsupported System Usage Monitor");
}

this.systemUsageMonitor.Start();
this.isMonitoringEnabled = true;
}
catch (Exception ex)
Expand All @@ -78,50 +70,47 @@ private DiagnosticsHandlerHelper()
/// The diagnostics should never block a request, and is a best attempt
/// If the CPU load history fails then don't try it in the future.
/// </summary>
public void RecordCpuDiagnostics(RequestMessage request, string recorderKey)
public SystemUsageHistory GetDiagnosticsSystemHistory()
{
if (this.isMonitoringEnabled)
if (!this.isMonitoringEnabled)
{
try
{
CpuLoadHistory cpuHistory = this.systemUsageMonitor.GetRecorder(recorderKey).CpuUsage;
if (cpuHistory != null)
{
request.Trace.AddDatum(
"CPU Load History",
new CpuHistoryTraceDatum(cpuHistory));
}
}
catch (Exception ex)
{
DefaultTrace.TraceError(ex.Message);
this.isMonitoringEnabled = false;
}
return null;
}

try
{
return this.diagnosticSystemUsageRecorder.Data;
}
catch (Exception ex)
{
DefaultTrace.TraceError(ex.Message);
this.isMonitoringEnabled = false;
return null;
}
}

/// <summary>
/// This method will give CPU Usage(%) and Memory Usage(kb) for a given recorder,
/// Right now only 2 recorders are available : Diagnostic and Telemetry
/// </summary>
/// <param name="recorderKey"></param>
/// <returns> CpuAndMemoryUsageRecorder</returns>
public CpuAndMemoryUsageRecorder GetUsageRecorder(string recorderKey)
public SystemUsageHistory GetClientTelemtrySystemHistory()
{
if (this.isMonitoringEnabled)
if (!this.isMonitoringEnabled)
{
try
{
return this.systemUsageMonitor.GetRecorder(recorderKey);
}
catch (Exception ex)
{
DefaultTrace.TraceError(ex.Message);
this.isMonitoringEnabled = false;
}
return null;
}

return null;
try
{
return this.telemetrySystemUsageRecorder.Data;
}
catch (Exception ex)
{
DefaultTrace.TraceError(ex.Message);
this.isMonitoringEnabled = false;
return null;
}
}
}
}
10 changes: 10 additions & 0 deletions Microsoft.Azure.Cosmos/src/Regions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -319,5 +319,15 @@ public static class Regions
/// Name of the Azure US SLV region in the Azure Cosmos DB service.
/// </summary>
public const string EastUSSLV = "East US SLV";

/// <summary>
/// Name of the Azure Sweden Central region in the Azure Cosmos DB service.
/// </summary>
public const string SwedenCentral = "Sweden Central";

/// <summary>
/// Name of the Azure Sweden South region in the Azure Cosmos DB service.
/// </summary>
public const string SwedenSouth = "Sweden South";
}
}
2 changes: 1 addition & 1 deletion Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ internal static CosmosClientContext Create(
userAgent: connectionPolicy.UserAgentContainer.UserAgent,
connectionMode: connectionPolicy.ConnectionMode,
authorizationTokenProvider: cosmosClient.AuthorizationTokenProvider,
diagnosticsHelper: DiagnosticsHandlerHelper.Instance(),
diagnosticsHelper: DiagnosticsHandlerHelper.Instance,
preferredRegions: clientOptions.ApplicationPreferredRegions);
}
else
Expand Down
7 changes: 3 additions & 4 deletions Microsoft.Azure.Cosmos/src/Telemetry/ClientTelemetry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -282,18 +282,17 @@ private void RecordSystemUtilization()
{
DefaultTrace.TraceVerbose("Started Recording System Usage for telemetry.");

CpuAndMemoryUsageRecorder systemUsageRecorder = this.diagnosticsHelper.GetUsageRecorder(DiagnosticsHandlerHelper.Telemetrykey);
SystemUsageHistory systemUsageRecorder = this.diagnosticsHelper.GetClientTelemtrySystemHistory();

if (systemUsageRecorder != null )
{
SystemInfo cpuUsagePayload = ClientTelemetryHelper.RecordCpuUsage(systemUsageRecorder);
(SystemInfo cpuUsagePayload, SystemInfo memoryUsagePayload) = ClientTelemetryHelper.RecordSystemUsage(systemUsageRecorder);
if (cpuUsagePayload != null)
{
this.clientTelemetryInfo.SystemInfo.Add(cpuUsagePayload);
DefaultTrace.TraceVerbose("Recorded CPU Usage for telemetry.");
}

SystemInfo memoryUsagePayload = ClientTelemetryHelper.RecordMemoryUsage(systemUsageRecorder);

if (memoryUsagePayload != null)
{
this.clientTelemetryInfo.SystemInfo.Add(memoryUsagePayload);
Expand Down
Loading

0 comments on commit 43f7887

Please sign in to comment.