From 147a01a5e2531638d7c9860dcce4e86d33ce9deb Mon Sep 17 00:00:00 2001 From: Matias Quaranta Date: Thu, 17 Dec 2020 07:59:49 -0800 Subject: [PATCH 1/4] applying fix --- .../src/ClientRetryPolicy.cs | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 9866c648ef..c2e763a5ae 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -68,10 +68,13 @@ public async Task ShouldRetryAsync( this.retryContext = null; // Received Connection error (HttpRequestException), initiate the endpoint rediscovery - if (exception is HttpRequestException httpException) + if (exception is HttpRequestException _) { DefaultTrace.TraceWarning("Endpoint not reachable. Refresh cache and retry"); - return await this.ShouldRetryOnEndpointFailureAsync(this.isReadRequest, false); + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + forceRefresh: false, + retryOnPreferredLocations: true); } DocumentClientException clientException = exception as DocumentClientException; @@ -146,7 +149,7 @@ public void OnBeforeSendRequest(DocumentServiceRequest request) if (this.retryContext != null) { // set location-based routing directive based on request retry context - request.RequestContext.RouteToLocation(this.retryContext.RetryCount, this.retryContext.RetryRequestOnPreferredLocations); + request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); } // Resolve the endpoint for the request and pin the resolution to the resolved endpoint @@ -171,7 +174,10 @@ private async Task ShouldRetryInternalAsync( && subStatusCode == SubStatusCodes.WriteForbidden) { DefaultTrace.TraceWarning("Endpoint not writable. Refresh cache and retry"); - return await this.ShouldRetryOnEndpointFailureAsync(false, true); + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: false, + forceRefresh: true, + retryOnPreferredLocations: false); } // Regional endpoint is not available yet for reads (e.g. add/ online of region is in progress) @@ -180,7 +186,10 @@ private async Task ShouldRetryInternalAsync( && (this.isReadRequest || this.canUseMultipleWriteLocations)) { DefaultTrace.TraceWarning("Endpoint not available for reads. Refresh cache and retry"); - return await this.ShouldRetryOnEndpointFailureAsync(true, false); + return await this.ShouldRetryOnEndpointFailureAsync( + isReadRequest: this.isReadRequest, + forceRefresh: false, + retryOnPreferredLocations: false); } if (statusCode == HttpStatusCode.NotFound @@ -199,7 +208,10 @@ private async Task ShouldRetryInternalAsync( return null; } - private async Task ShouldRetryOnEndpointFailureAsync(bool isReadRequest, bool forceRefresh) + private async Task ShouldRetryOnEndpointFailureAsync( + bool isReadRequest, + bool forceRefresh, + bool retryOnPreferredLocations) { if (!this.enableEndpointDiscovery || this.failoverRetryCount > MaxRetryCount) { @@ -239,10 +251,16 @@ private async Task ShouldRetryOnEndpointFailureAsync(bool isR await this.globalEndpointManager.RefreshLocationAsync(null, forceRefresh); + int retryLocationIndex = this.failoverRetryCount; // Used to generate a round-robin effect + if (retryOnPreferredLocations) + { + retryLocationIndex = 0; // When the endpoint is marked as unavailable, it is moved to the bottom of the preferrence list + } + this.retryContext = new RetryContext { - RetryCount = this.failoverRetryCount, - RetryRequestOnPreferredLocations = false + RetryLocationIndex = retryLocationIndex, + RetryRequestOnPreferredLocations = retryOnPreferredLocations, }; return ShouldRetryResult.RetryAfter(retryDelay); @@ -273,7 +291,7 @@ private ShouldRetryResult ShouldRetryOnSessionNotAvailable() { this.retryContext = new RetryContext() { - RetryCount = this.sessionTokenRetryCount - 1, + RetryLocationIndex = this.sessionTokenRetryCount - 1, RetryRequestOnPreferredLocations = this.sessionTokenRetryCount > 1 }; @@ -292,7 +310,7 @@ private ShouldRetryResult ShouldRetryOnSessionNotAvailable() { this.retryContext = new RetryContext { - RetryCount = this.sessionTokenRetryCount - 1, + RetryLocationIndex = this.sessionTokenRetryCount - 1, RetryRequestOnPreferredLocations = false }; @@ -336,7 +354,7 @@ private ShouldRetryResult ShouldRetryOnServiceUnavailable() // RetryCount is used as zero-based index this.retryContext = new RetryContext() { - RetryCount = this.serviceUnavailableRetryCount, + RetryLocationIndex = this.serviceUnavailableRetryCount, RetryRequestOnPreferredLocations = true }; @@ -345,7 +363,7 @@ private ShouldRetryResult ShouldRetryOnServiceUnavailable() private sealed class RetryContext { - public int RetryCount { get; set; } + public int RetryLocationIndex { get; set; } public bool RetryRequestOnPreferredLocations { get; set; } } } From af09b69d1084c835cc70791ab1a2f48c8de40e9d Mon Sep 17 00:00:00 2001 From: Matias Quaranta Date: Thu, 17 Dec 2020 08:05:22 -0800 Subject: [PATCH 2/4] tests --- .../LocationCacheTests.cs | 188 +++++++++++++++--- 1 file changed, 156 insertions(+), 32 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs index fd11face74..e7a25aee82 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs @@ -8,6 +8,7 @@ namespace Microsoft.Azure.Cosmos.Client.Tests using System.Collections.ObjectModel; using System.Globalization; using System.Linq; + using System.Net.Http; using System.Threading; using System.Threading.Tasks; using Microsoft.Azure.Cosmos.Core.Trace; @@ -227,10 +228,16 @@ private async Task ValidateRetryOnReadSessionNotAvailabeWithEnableMultipleWriteL const bool useMultipleWriteLocations = true; bool enableEndpointDiscovery = true; + ReadOnlyCollection preferredList = new List() { + "location2", + "location1" + }.AsReadOnly(); + this.Initialize( useMultipleWriteLocations: useMultipleWriteLocations, enableEndpointDiscovery: enableEndpointDiscovery, - isPreferredLocationsListEmpty: false); + isPreferredLocationsListEmpty: false, + preferedRegionListOverride: preferredList); await this.endpointManager.RefreshLocationAsync(this.databaseAccount); ClientRetryPolicy retryPolicy = new ClientRetryPolicy(this.endpointManager, enableEndpointDiscovery, new RetryOptions()); @@ -248,7 +255,7 @@ await BackoffRetryUtility.ExecuteAsync( if (retryCount == 0) { - Uri expectedEndpoint = LocationCacheTests.EndpointByLocation[this.preferredLocations[0]]; + Uri expectedEndpoint = LocationCacheTests.EndpointByLocation[preferredList[0]]; Assert.AreEqual(expectedEndpoint, request.RequestContext.LocationEndpointToRoute); } @@ -262,7 +269,7 @@ await BackoffRetryUtility.ExecuteAsync( else if (retryCount == 2) { // Second request must go to first write endpoint - Uri expectedEndpoint = LocationCacheTests.EndpointByLocation[this.preferredLocations[1]]; + Uri expectedEndpoint = LocationCacheTests.EndpointByLocation[preferredList[1]]; Assert.AreEqual(expectedEndpoint, request.RequestContext.LocationEndpointToRoute); } else @@ -523,6 +530,99 @@ await this.ValidateLocationCacheAsync( } } + [TestMethod] + public async Task ValidateRetryOnHttpExceptionAsync() + { + await this.ValidateRetryOnHttpExceptionAsync(enableMultipleWriteLocations: false, isReadRequest: false); + await this.ValidateRetryOnHttpExceptionAsync(enableMultipleWriteLocations: false, isReadRequest: true); + await this.ValidateRetryOnHttpExceptionAsync(enableMultipleWriteLocations: true, isReadRequest: false); + await this.ValidateRetryOnHttpExceptionAsync(enableMultipleWriteLocations: true, isReadRequest: true); + } + + private async Task ValidateRetryOnHttpExceptionAsync(bool enableMultipleWriteLocations, bool isReadRequest) + { + ReadOnlyCollection preferredList = new List() { + "location2", + "location1" + }.AsReadOnly(); + + this.Initialize( + useMultipleWriteLocations: enableMultipleWriteLocations, + enableEndpointDiscovery: true, + isPreferredLocationsListEmpty: false, + preferedRegionListOverride: preferredList, + enforceSingleMasterSingleWriteLocation: true); + + await this.endpointManager.RefreshLocationAsync(this.databaseAccount); + ClientRetryPolicy retryPolicy = new ClientRetryPolicy(this.endpointManager, true, new RetryOptions()); + + using (DocumentServiceRequest request = this.CreateRequest(isReadRequest: isReadRequest, isMasterResourceType: false)) + { + int retryCount = 0; + + try + { + await BackoffRetryUtility.ExecuteAsync( + () => + { + retryCount++; + retryPolicy.OnBeforeSendRequest(request); + + if (retryCount == 1) + { + Uri expectedEndpoint = null; + if (enableMultipleWriteLocations + || isReadRequest) + { + // MultiMaster or Single Master Read can use preferred locations for first request + expectedEndpoint = LocationCacheTests.EndpointByLocation[preferredList[0]]; + } + else + { + // Single Master Write always goes to the only write region + expectedEndpoint = new Uri(this.databaseAccount.WriteLocationsInternal[0].Endpoint); + } + + Assert.AreEqual(expectedEndpoint, request.RequestContext.LocationEndpointToRoute); + + HttpRequestException httpException = new HttpRequestException(); + throw httpException; + } + else if (retryCount == 2) + { + Uri expectedEndpoint = null; + if (enableMultipleWriteLocations + || isReadRequest) + { + // Next request must go to next preferred endpoint + expectedEndpoint = LocationCacheTests.EndpointByLocation[preferredList[1]]; + } + else + { + // Single Master Write does not have anywhere else to go + expectedEndpoint = new Uri(this.databaseAccount.WriteLocationsInternal[0].Endpoint); + } + + Assert.AreEqual(expectedEndpoint, request.RequestContext.LocationEndpointToRoute); + + return Task.FromResult(true); + } + else + { + Assert.Fail(); + } + + return Task.FromResult(true); + }, + retryPolicy); + } + catch (ForbiddenException) + { + Assert.Fail(); + } + } + } + [DataTestMethod] [DataRow(true, false, false, false, DisplayName = "Read request - Single master - no preferred locations - should NOT retry")] [DataRow(false, false, false, false, DisplayName = "Write request - Single master - no preferred locations - should NOT retry")] @@ -540,10 +640,17 @@ public async Task ClientRetryPolicy_ValidateRetryOnServiceUnavailable( { const bool enableEndpointDiscovery = true; + ReadOnlyCollection preferredList = new List() { + "location2", + "location1" + }.AsReadOnly(); + this.Initialize( useMultipleWriteLocations: useMultipleWriteLocations, enableEndpointDiscovery: enableEndpointDiscovery, - isPreferredLocationsListEmpty: !usesPreferredLocations); + isPreferredLocationsListEmpty: !usesPreferredLocations, + preferedRegionListOverride: preferredList, + enforceSingleMasterSingleWriteLocation: true); await this.endpointManager.RefreshLocationAsync(this.databaseAccount); ClientRetryPolicy retryPolicy = new ClientRetryPolicy(this.endpointManager, enableEndpointDiscovery, new RetryOptions()); @@ -561,23 +668,13 @@ await BackoffRetryUtility.ExecuteAsync( if (retryCount == 1) { - Uri expectedEndpoint = null; - if (usesPreferredLocations) + if (!usesPreferredLocations) { - expectedEndpoint = LocationCacheTests.EndpointByLocation[this.preferredLocations[1]]; - } - else - { - if (isReadRequest) - { - expectedEndpoint = new Uri(this.databaseAccount.ReadLocationsInternal[1].Endpoint); - } - else - { - expectedEndpoint = new Uri(this.databaseAccount.WriteLocationsInternal[1].Endpoint); - } + Assert.Fail("Should not be retrying if preferredlocations is not being used"); } + Uri expectedEndpoint = LocationCacheTests.EndpointByLocation[preferredList[1]]; + Assert.AreEqual(expectedEndpoint, request.RequestContext.LocationEndpointToRoute); } else if (retryCount > 1) @@ -608,8 +705,28 @@ await BackoffRetryUtility.ExecuteAsync( } } - private static AccountProperties CreateDatabaseAccount(bool useMultipleWriteLocations) + private static AccountProperties CreateDatabaseAccount( + bool useMultipleWriteLocations, + bool enforceSingleMasterSingleWriteLocation) { + Collection writeLocations = new Collection() + { + { new AccountRegion() { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() } }, + { new AccountRegion() { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() } }, + { new AccountRegion() { Name = "location3", Endpoint = LocationCacheTests.Location3Endpoint.ToString() } }, + }; + + if (!useMultipleWriteLocations + && enforceSingleMasterSingleWriteLocation) + { + // Some pre-existing tests depend on the account having multiple write locations even on single master setup + // Newer tests can correctly define a single master account (single write region) without breaking existing tests + writeLocations = new Collection() + { + { new AccountRegion() { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() } } + }; + } + AccountProperties databaseAccount = new AccountProperties() { EnableMultipleWriteLocations = useMultipleWriteLocations, @@ -619,12 +736,7 @@ private static AccountProperties CreateDatabaseAccount(bool useMultipleWriteLoca { new AccountRegion() { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() } }, { new AccountRegion() { Name = "location4", Endpoint = LocationCacheTests.Location4Endpoint.ToString() } }, }, - WriteLocationsInternal = new Collection() - { - { new AccountRegion() { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() } }, - { new AccountRegion() { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() } }, - { new AccountRegion() { Name = "location3", Endpoint = LocationCacheTests.Location3Endpoint.ToString() } }, - } + WriteLocationsInternal = writeLocations }; return databaseAccount; @@ -633,16 +745,28 @@ private static AccountProperties CreateDatabaseAccount(bool useMultipleWriteLoca private void Initialize( bool useMultipleWriteLocations, bool enableEndpointDiscovery, - bool isPreferredLocationsListEmpty) + bool isPreferredLocationsListEmpty, + bool enforceSingleMasterSingleWriteLocation = false, // Some tests depend on the Initialize to create an account with multiple write locations, even when not multi master + ReadOnlyCollection preferedRegionListOverride = null) { - this.databaseAccount = LocationCacheTests.CreateDatabaseAccount(useMultipleWriteLocations); + this.databaseAccount = LocationCacheTests.CreateDatabaseAccount( + useMultipleWriteLocations, + enforceSingleMasterSingleWriteLocation); - this.preferredLocations = isPreferredLocationsListEmpty ? new List().AsReadOnly() : new List() + if (isPreferredLocationsListEmpty) { - "location1", - "location2", - "location3" - }.AsReadOnly(); + this.preferredLocations = new List().AsReadOnly(); + } + else + { + // Allow for override at the test method level if needed + this.preferredLocations = preferedRegionListOverride != null ? preferedRegionListOverride : new List() + { + "location1", + "location2", + "location3" + }.AsReadOnly(); + } this.cache = new LocationCache( this.preferredLocations, From 13fe03800cb68dc3f44b4593db2e74eb7f43fb6e Mon Sep 17 00:00:00 2001 From: Matias Quaranta Date: Thu, 17 Dec 2020 08:28:41 -0800 Subject: [PATCH 3/4] matching configuration --- .../tests/Microsoft.Azure.Cosmos.Tests/testhost.dll.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/testhost.dll.config b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/testhost.dll.config index be1b38ddcf..07817febd1 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/testhost.dll.config +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/testhost.dll.config @@ -1,9 +1,8 @@  - + - From 3b1296f4f5ce21959093e3a020d8272175509775 Mon Sep 17 00:00:00 2001 From: Matias Quaranta Date: Thu, 17 Dec 2020 08:52:53 -0800 Subject: [PATCH 4/4] tests depending on environment configuration are not wise --- .../Microsoft.Azure.Cosmos.Tests/GlobalEndpointManagerTest.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GlobalEndpointManagerTest.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GlobalEndpointManagerTest.cs index b17f9ab481..ff29a9bd4d 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GlobalEndpointManagerTest.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GlobalEndpointManagerTest.cs @@ -70,7 +70,7 @@ public async Task EndpointFailureMockTest() Assert.AreEqual(globalEndpointManager.ReadEndpoints[0], globalEndpointManager.WriteEndpoints[0]); //Sleep a second for the unavailable endpoint entry to expire and background refresh timer to kick in - Thread.Sleep(2000); + Thread.Sleep(3000); await globalEndpointManager.RefreshLocationAsync(null); Assert.AreEqual(globalEndpointManager.ReadEndpoints[0], new Uri(readLocation1.Endpoint)); }