From f2b1d715e4fe3386314439a42f0e880293a3d3cc Mon Sep 17 00:00:00 2001 From: Paul Hebble Date: Fri, 28 May 2021 17:32:56 -0500 Subject: [PATCH] Pass token for moved files on GitHub --- Core/Net/Net.cs | 89 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/Core/Net/Net.cs b/Core/Net/Net.cs index cc4366e021..7bd511fcc2 100644 --- a/Core/Net/Net.cs +++ b/Core/Net/Net.cs @@ -95,7 +95,7 @@ public static string Download(string url, out string etag, string filename = nul try { - var agent = MakeDefaultHttpClient(); + var agent = new RedirectingTimeoutWebClient(); agent.DownloadFile(url, filename); etag = agent.ResponseHeaders.Get("ETag")?.Replace("\"", ""); } @@ -207,7 +207,7 @@ public static string DownloadText(Uri url, string authToken = "", string mimeTyp { log.DebugFormat("About to download {0}", url.OriginalString); - WebClient agent = MakeDefaultHttpClient(timeout); + WebClient agent = new RedirectingTimeoutWebClient(timeout, mimeType); // Check whether to use an auth token for this host if (!string.IsNullOrEmpty(authToken) @@ -218,11 +218,6 @@ public static string DownloadText(Uri url, string authToken = "", string mimeTyp // Send our auth token to the GitHub API (or whoever else needs one) agent.Headers.Add("Authorization", $"token {authToken}"); } - if (!string.IsNullOrEmpty(mimeType)) - { - log.InfoFormat("Setting MIME type {0}", mimeType); - agent.Headers.Add("Accept", mimeType); - } for (int whichAttempt = 0; whichAttempt < MaxRetries + 1; ++whichAttempt) { @@ -361,41 +356,75 @@ public static Uri GetRawUri(Uri remoteUri) } /// - /// Create a WebClient with some CKAN-sepcific adjustments, like a user agent string. + /// A WebClient with some CKAN-sepcific adjustments: + /// - A user agent string (required by GitHub API policy) + /// - Sets the Accept header to a given MIME type (needed to get raw files from GitHub API) + /// - Times out after a specified amount of time in milliseconds, 100 000 milliseconds (=100 seconds) by default (https://stackoverflow.com/a/3052637) + /// - Handles permanent redirects to the same host without clearing the Authorization header (needed to get files from renamed GitHub repositories via API) /// - /// Timeout for the request in milliseconds, defaulting to 100 000 (=100 seconds) - /// A custom WebClient - private static WebClient MakeDefaultHttpClient(int timeout = 100000) + private sealed class RedirectingTimeoutWebClient : WebClient { - var client = new TimeoutWebClient(timeout); - client.Headers.Add("User-Agent", UserAgentString); - return client; - } - - /// - /// A WebClient that times out after a specified amount of time in milliseconds, 100 000 milliseconds (=100 seconds) by default. - /// Taken from https://stackoverflow.com/a/3052637 - /// - private sealed class TimeoutWebClient : WebClient - { - public int Timeout { get; set; } - - public TimeoutWebClient() : this (100000) { } - - public TimeoutWebClient(int timeout) + /// + /// Initialize our special web client + /// + /// Timeout for the request in milliseconds, defaulting to 100 000 (=100 seconds) + /// A mime type sent with the "Accept" header + public RedirectingTimeoutWebClient(int timeout = 100000, string mimeType = "") { - Timeout = timeout; + this.timeout = timeout; + this.mimeType = mimeType; } protected override WebRequest GetWebRequest(Uri address) { + // Set user agent and MIME type for every request. including redirects + Headers.Add("User-Agent", UserAgentString); + if (!string.IsNullOrEmpty(mimeType)) + { + log.InfoFormat("Setting MIME type {0}", mimeType); + Headers.Add("Accept", mimeType); + } var request = base.GetWebRequest(address); - if (request != null) + if (request is HttpWebRequest hwr) { - request.Timeout = this.Timeout; + // GitHub API tokens cannot be passed via auto-redirect + hwr.AllowAutoRedirect = false; + hwr.Timeout = timeout; } return request; } + + protected override WebResponse GetWebResponse(WebRequest request) + { + if (request == null) + return null; + var response = base.GetWebResponse(request); + if (response == null) + return null; + + if (response is HttpWebResponse hwr) + { + int statusCode = (int)hwr.StatusCode; + var location = hwr.Headers["Location"]; + if (statusCode >= 300 && statusCode <= 399 && location != null) + { + log.InfoFormat("Redirecting to {0}", location); + hwr.Close(); + var redirUri = new Uri(request.RequestUri, location); + if (Headers.AllKeys.Contains("Authorization") + && request.RequestUri.Host != redirUri.Host) + { + log.InfoFormat("Host mismatch, purging token for redirect"); + Headers.Remove("Authorization"); + } + return GetWebResponse(GetWebRequest(redirUri)); + } + } + return response; + } + + private int timeout; + private string mimeType; } // HACK: The ancient WebClient doesn't support setting the request type to HEAD and WebRequest doesn't support