Skip to content

Commit

Permalink
Automatic failover to other endpoints when possible #643
Browse files Browse the repository at this point in the history
Automatic failover to other endpoints when possible #643 - see
NuGet/Home#643
  • Loading branch information
maartenba committed May 29, 2015
1 parent 1396868 commit 72580c4
Show file tree
Hide file tree
Showing 12 changed files with 644 additions and 162 deletions.
10 changes: 8 additions & 2 deletions NuGetGallery.sln
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Frontend", "Frontend", "{05
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Operations", "Operations", "{2ECA1159-9B9D-4D65-95AF-F14337FD3DA6}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Backend", "Backend", "{B7092BA7-77A3-44AA-9C0A-D600F6C31182}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NuGetGallery", "src\NuGetGallery\NuGetGallery.csproj", "{1DACF781-5CD0-4123-8BAC-CD385D864BE5}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NuGetGallery.Facts", "tests\NuGetGallery.Facts\NuGetGallery.Facts.csproj", "{FDC76BEF-3360-45AC-A13E-AE8F14D343D5}"
Expand All @@ -26,14 +28,18 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NuGetGallery.Core.Facts", "
EndProject
Project("{CC5FD16D-436D-48AD-A40C-5A424C6E3E79}") = "NuGetGallery.Cloud", "src\NuGetGallery.Cloud\NuGetGallery.Cloud.ccproj", "{0041ACA0-30EC-4554-8C7C-0AF810F3086F}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Backend", "Backend", "{B7092BA7-77A3-44AA-9C0A-D600F6C31182}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NuGetGallery.Backend", "src\NuGetGallery.Backend\NuGetGallery.Backend.csproj", "{7F92C174-5310-4090-A6A3-3CC7C67C19D6}"
EndProject
Project("{CC5FD16D-436D-48AD-A40C-5A424C6E3E79}") = "NuGetGallery.Backend.Cloud", "src\NuGetGallery.Backend.Cloud\NuGetGallery.Backend.Cloud.ccproj", "{1D0164B6-92D4-455A-AC68-C30B61733748}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NuGet.Services.Search.Client", "src\NuGet.Services.Search.Client\NuGet.Services.Search.Client.csproj", "{CE0AC7E5-CDF7-4899-AD72-DCB64E402EBE}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{C0FBDA28-EA32-4011-9D3D-01E0EA3ED6DE}"
ProjectSection(SolutionItems) = preProject
Performance1.psess = Performance1.psess
Performance2.psess = Performance2.psess
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Concurrent;

namespace NuGet.Services.Search.Client
{
public class BaseUrlHealthIndicatorStore : IEndpointHealthIndicatorStore
{
private static readonly int[] HealthIndicatorRange = { 100, 90, 75, 50, 25, 20, 15, 10, 5, 1 };
private readonly ConcurrentDictionary<string, int> _healthIndicators = new ConcurrentDictionary<string, int>();

public int GetHealth(Uri endpoint)
{
int health;
if (!_healthIndicators.TryGetValue(GetBaseUrl(endpoint), out health))
{
health = HealthIndicatorRange[0];
}
return health;
}

public void DecreaseHealth(Uri endpoint)
{
var queryLessUri = GetBaseUrl(endpoint);

_healthIndicators.AddOrUpdate(queryLessUri, HealthIndicatorRange[1], (key, currentValue) =>
{
if (currentValue <= HealthIndicatorRange[HealthIndicatorRange.Length - 1])
{
return HealthIndicatorRange[HealthIndicatorRange.Length - 1];
}

for (int i = 0; i < HealthIndicatorRange.Length; i++)
{
if (HealthIndicatorRange[i] < currentValue)
{
return HealthIndicatorRange[i];
}
}

return HealthIndicatorRange[HealthIndicatorRange.Length - 1];
});
}

public void IncreaseHealth(Uri endpoint)
{
var queryLessUri = GetBaseUrl(endpoint);

_healthIndicators.AddOrUpdate(queryLessUri, HealthIndicatorRange[0], (key, currentValue) =>
{
if (currentValue >= HealthIndicatorRange[0])
{
return HealthIndicatorRange[0];
}

for (int i = HealthIndicatorRange.Length - 1; i >= 0; i--)
{
if (HealthIndicatorRange[i] > currentValue)
{
return HealthIndicatorRange[i];
}
}

return HealthIndicatorRange[0];
});
}

private static string GetBaseUrl(Uri uri)
{
var uriString = uri.ToString();
var queryStart = uriString.IndexOf("?", StringComparison.Ordinal);
if (queryStart >= 0)
{
return uriString.Substring(0, queryStart);
}
return uriString;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Generic;

namespace NuGet.Services.Search.Client
{
public interface IEndpointHealthIndicatorStore
{
int GetHealth(Uri endpoint);
void DecreaseHealth(Uri endpoint);
void IncreaseHealth(Uri endpoint);
}
}
212 changes: 212 additions & 0 deletions src/NuGet.Services.Search.Client/Client/RetryingHttpClientWrapper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;

namespace NuGet.Services.Search.Client
{
public sealed class RetryingHttpClientWrapper
{
private readonly HttpClient _httpClient;
private readonly IEndpointHealthIndicatorStore _endpointHealthIndicatorStore;

private static readonly Random Random = new Random((int) DateTime.UtcNow.Ticks);
private static readonly int PeriodToDelayAlternateRequest = 4000;
private static readonly IComparer<int> HealthComparer;

static RetryingHttpClientWrapper()
{
HealthComparer = new WeightedRandomComparer(Random);
}

public RetryingHttpClientWrapper(HttpClient httpClient)
: this (httpClient, new BaseUrlHealthIndicatorStore())
{
_httpClient = httpClient;
}

public RetryingHttpClientWrapper(HttpClient httpClient, IEndpointHealthIndicatorStore endpointHealthIndicatorStore)
{
_httpClient = httpClient;
_endpointHealthIndicatorStore = endpointHealthIndicatorStore;
}

public async Task<string> GetStringAsync(IEnumerable<Uri> endpoints)
{
return await GetWithRetry(endpoints, (client, uri, cancellationToken) => _httpClient.GetStringAsync(uri));
}

public async Task<HttpResponseMessage> GetAsync(IEnumerable<Uri> endpoints)
{
return await GetWithRetry(endpoints, (client, uri, cancellationToken) => _httpClient.GetAsync(uri, cancellationToken));
}

private async Task<TResponseType> GetWithRetry<TResponseType>(IEnumerable<Uri> endpoints, Func<HttpClient, Uri, CancellationToken, Task<TResponseType>> run)
{
// Build endpoints, ordered by health (with a chance of less health)
var healthyEndpoints = endpoints.OrderByDescending(e => _endpointHealthIndicatorStore.GetHealth(e), HealthComparer).ToList();

// Make all requests cancellable using this CancellationTokenSource
var cancellationTokenSource = new CancellationTokenSource();

// Create requests queue
var tasks = CreateRequestQueue(healthyEndpoints, run, cancellationTokenSource);

// When the first succesful task comes in, return it. If no succesfull tasks are returned, throw an AggregateException.
var exceptions = new List<Exception>();

var taskList = tasks.ToList();
Task<TResponseType> completedTask = null;
do
{
completedTask = await Task.WhenAny(taskList);
taskList.Remove(completedTask);

if (completedTask.Exception != null)
{
exceptions.AddRange(completedTask.Exception.InnerExceptions);
}
} while ((completedTask.IsFaulted || completedTask.IsCanceled) && taskList.Any());

cancellationTokenSource.Cancel(false);

if (completedTask == null || completedTask.IsFaulted || completedTask.IsCanceled)
{
throw new AggregateException(exceptions);
}
return await completedTask;
}

private List<Task<TResponseType>> CreateRequestQueue<TResponseType>(List<Uri> endpoints, Func<HttpClient, Uri, CancellationToken, Task<TResponseType>> run, CancellationTokenSource cancellatonTokenSource)
{
// Queue up a series of requests. Make each request wait a little longer.
var tasks = new List<Task<TResponseType>>(endpoints.Count);

for (var i = 0; i < endpoints.Count; i++)
{
var endpoint = endpoints[i];

tasks.Add(Task.Delay(i * PeriodToDelayAlternateRequest, cancellatonTokenSource.Token)
.ContinueWith(task =>
{
try
{
var response = run(_httpClient, endpoint, cancellatonTokenSource.Token).Result;

var responseMessage = response as HttpResponseMessage;
if (responseMessage != null && !responseMessage.IsSuccessStatusCode)
{
if (ShouldTryOther(responseMessage))
{
_endpointHealthIndicatorStore.DecreaseHealth(endpoint);
throw new HttpRequestException(responseMessage.ReasonPhrase);
}
else
{
cancellatonTokenSource.Cancel();
}
}

_endpointHealthIndicatorStore.IncreaseHealth(endpoint);

return response;
}
catch (Exception ex)
{
if (ShouldTryOther(ex))
{
_endpointHealthIndicatorStore.DecreaseHealth(endpoint);
}
else
{
cancellatonTokenSource.Cancel();
}
throw;
}
}, cancellatonTokenSource.Token));
}

return tasks;
}

private static bool ShouldTryOther(Exception ex)
{
var aex = ex as AggregateException;
if (aex != null)
{
ex = aex.InnerExceptions.FirstOrDefault();
}

var wex = ex as WebException;
if (wex == null)
{
wex = ex.InnerException as WebException;
}
if (wex != null && (
wex.Status == WebExceptionStatus.UnknownError
|| wex.Status == WebExceptionStatus.ConnectFailure
|| (int)wex.Status == 1 // NameResolutionFailure
))
{
return true;
}

var reqex = ex as HttpRequestException;
if (reqex != null)
{
return true;
}

if (ex is TaskCanceledException)
{
return true;
}

return false;
}

private static bool ShouldTryOther(HttpResponseMessage response)
{
if (response.IsSuccessStatusCode
|| response.StatusCode == HttpStatusCode.BadGateway
|| response.StatusCode == HttpStatusCode.GatewayTimeout
|| response.StatusCode == HttpStatusCode.ServiceUnavailable
|| response.StatusCode == HttpStatusCode.RequestTimeout
|| response.StatusCode == HttpStatusCode.InternalServerError)
{
return true;
}

return false;
}

class WeightedRandomComparer
: IComparer<int>
{
private readonly Random _random;

public WeightedRandomComparer(Random random)
{
_random = random;
}

public int Compare(int x, int y)
{
var totalWeight = x + y;
var randomNumber = _random.Next(0, totalWeight);

if (randomNumber < x)
{
return 1;
}
return -1;
}
}
}
}
Loading

0 comments on commit 72580c4

Please sign in to comment.