diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 328dc2f767..0bd962e5a6 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -21,6 +21,7 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy { private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. private const int MaxRetryCount = 120; + private const int MaxServiceUnavailableRetryCount = 1; private readonly IDocumentClientRetryPolicy throttlingRetry; private readonly GlobalEndpointManager globalEndpointManager; @@ -28,6 +29,7 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy private int failoverRetryCount; private int sessionTokenRetryCount; + private int serviceUnavailableRetryCount; private bool isReadRequest; private bool canUseMultipleWriteLocations; private Uri locationEndpoint; @@ -48,6 +50,7 @@ public ClientRetryPolicy( this.failoverRetryCount = 0; this.enableEndpointDiscovery = enableEndpointDiscovery; this.sessionTokenRetryCount = 0; + this.serviceUnavailableRetryCount = 0; this.canUseMultipleWriteLocations = false; } @@ -65,8 +68,7 @@ public async Task ShouldRetryAsync( this.retryContext = null; // Received Connection error (HttpRequestException), initiate the endpoint rediscovery - HttpRequestException httpException = exception as HttpRequestException; - if (httpException != null) + if (exception is HttpRequestException) { DefaultTrace.TraceWarning("Endpoint not reachable. Refresh cache and retry"); return await this.ShouldRetryOnEndpointFailureAsync(this.isReadRequest, false); @@ -187,6 +189,13 @@ private async Task ShouldRetryInternalAsync( return this.ShouldRetryOnSessionNotAvailable(); } + // Received 503.0 due to client connect timeout or Gateway + if (statusCode == HttpStatusCode.ServiceUnavailable + && subStatusCode == SubStatusCodes.Unknown) + { + return this.ShouldRetryOnServiceUnavailable(); + } + return null; } @@ -293,6 +302,47 @@ private ShouldRetryResult ShouldRetryOnSessionNotAvailable() } } + /// + /// For a ServiceUnavailable (503.0) we could be having a timeout from Direct/TCP locally or a request to Gateway request with a similar response due to an endpoint not yet available. + /// We try and retry the request only if there are other regions available. + /// + private ShouldRetryResult ShouldRetryOnServiceUnavailable() + { + if (this.serviceUnavailableRetryCount++ >= ClientRetryPolicy.MaxServiceUnavailableRetryCount) + { + DefaultTrace.TraceInformation($"ShouldRetryOnServiceUnavailable() Not retrying. Retry count = {this.serviceUnavailableRetryCount}."); + return ShouldRetryResult.NoRetry(); + } + + if (!this.canUseMultipleWriteLocations + && !this.isReadRequest) + { + // Write requests on single master cannot be retried, no other regions available + return ShouldRetryResult.NoRetry(); + } + + int availablePreferredLocations = this.globalEndpointManager.PreferredLocationCount; + + if (availablePreferredLocations <= 1) + { + // No other regions to retry on + DefaultTrace.TraceInformation($"ShouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {availablePreferredLocations}."); + return ShouldRetryResult.NoRetry(); + } + + DefaultTrace.TraceInformation($"ShouldRetryOnServiceUnavailable() Retrying. Received on endpoint {this.locationEndpoint}, IsReadRequest = {this.isReadRequest}."); + + // Retrying on second PreferredLocations + // RetryCount is used as zero-based index + this.retryContext = new RetryContext() + { + RetryCount = this.serviceUnavailableRetryCount, + RetryRequestOnPreferredLocations = true + }; + + return ShouldRetryResult.RetryAfter(TimeSpan.Zero); + } + private sealed class RetryContext { public int RetryCount { get; set; } diff --git a/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs b/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs index f7c94a5f2a..ad8de1bbb5 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs @@ -91,6 +91,14 @@ public ReadOnlyCollection WriteEndpoints } } + public int PreferredLocationCount + { + get + { + return this.connectionPolicy.PreferredLocations != null ? this.connectionPolicy.PreferredLocations.Count : 0; + } + } + public static async Task GetDatabaseAccountFromAnyLocationsAsync( Uri defaultEndpoint, IList locations, Func> getDatabaseAccountFn) { diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs index 23cac2d2d1..953ab537ef 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs @@ -523,6 +523,91 @@ await this.ValidateLocationCacheAsync( } } + [DataTestMethod] + [DataRow(true, false, false, false, DisplayName = "Read request - Single master - no preferred locations - should NOT retry")] + [DataRow(false, false, false, false, DisplayName = "Write request - Single master - no preferred locations - should NOT retry")] + [DataRow(true, true, false, false, DisplayName = "Read request - Multi master - no preferred locations - should NOT retry")] + [DataRow(false, true, false, false, DisplayName = "Write request - Multi master - no preferred locations - should NOT retry")] + [DataRow(true, false, true, true, DisplayName = "Read request - Single master - with preferred locations - should retry")] + [DataRow(false, false, true, false, DisplayName = "Write request - Single master - with preferred locations - should NOT retry")] + [DataRow(true, true, true, true, DisplayName = "Read request - Multi master - with preferred locations - should retry")] + [DataRow(false, true, true, true, DisplayName = "Write request - Multi master - with preferred locations - should retry")] + public async Task ClientRetryPolicy_ValidateRetryOnServiceUnavailable( + bool isReadRequest, + bool useMultipleWriteLocations, + bool usesPreferredLocations, + bool shouldHaveRetried) + { + const bool enableEndpointDiscovery = true; + + this.Initialize( + useMultipleWriteLocations: useMultipleWriteLocations, + enableEndpointDiscovery: enableEndpointDiscovery, + isPreferredLocationsListEmpty: !usesPreferredLocations); + + await this.endpointManager.RefreshLocationAsync(this.databaseAccount); + ClientRetryPolicy retryPolicy = new ClientRetryPolicy(this.endpointManager, enableEndpointDiscovery, new RetryOptions()); + + using (DocumentServiceRequest request = this.CreateRequest(isReadRequest: isReadRequest, isMasterResourceType: false)) + { + int retryCount = 0; + + try + { + await BackoffRetryUtility.ExecuteAsync( + () => + { + retryPolicy.OnBeforeSendRequest(request); + + if (retryCount == 1) + { + Uri expectedEndpoint = null; + if (usesPreferredLocations) + { + expectedEndpoint = LocationCacheTests.EndpointByLocation[this.preferredLocations[1]]; + } + else + { + if (isReadRequest) + { + expectedEndpoint = new Uri(this.databaseAccount.ReadLocationsInternal[1].Endpoint); + } + else + { + expectedEndpoint = new Uri(this.databaseAccount.WriteLocationsInternal[1].Endpoint); + } + } + + Assert.AreEqual(expectedEndpoint, request.RequestContext.LocationEndpointToRoute); + } + else if (retryCount > 1) + { + Assert.Fail("Should retry once"); + } + + retryCount++; + + throw new ServiceUnavailableException(); + }, + retryPolicy); + + Assert.Fail(); + } + catch (ServiceUnavailableException) + { + DefaultTrace.TraceInformation("Received expected ServiceUnavailableException"); + if (shouldHaveRetried) + { + Assert.AreEqual(2, retryCount, $"Retry count {retryCount}, shouldHaveRetried {shouldHaveRetried} isReadRequest {isReadRequest} useMultipleWriteLocations {useMultipleWriteLocations} usesPreferredLocations {usesPreferredLocations}"); + } + else + { + Assert.AreEqual(1, retryCount, $"Retry count {retryCount}, shouldHaveRetried {shouldHaveRetried} isReadRequest {isReadRequest} useMultipleWriteLocations {useMultipleWriteLocations} usesPreferredLocations {usesPreferredLocations}"); + } + } + } + } + private static AccountProperties CreateDatabaseAccount(bool useMultipleWriteLocations) { AccountProperties databaseAccount = new AccountProperties()