From a361f7f84ac627b34a611be89b52d9dc75293bbf Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Tue, 14 Jun 2022 08:21:36 -0400 Subject: [PATCH] Use IndexOf in WebUtility (#70700) The IndexOfHtmlDecodingChars method was iterating character by character looking for either a `&` or a surrogate, but then the slow path if one of those is found doesn't special-case surrogates. So, we can just collapse this to a vectorized `IndexOf('&')`, which makes the fast path of detecting whether there's anything to decode much faster if there's any meaningful amount of input prior to a `&`. (I experimented with also using `IndexOf('&')` in the main routine, but it made cases with lots of entities slower, and so I'm not including that here.) --- .../src/System/Net/WebUtility.cs | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Net/WebUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Net/WebUtility.cs index 398873d249a50..257b0e1e633ce 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Net/WebUtility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Net/WebUtility.cs @@ -185,7 +185,7 @@ private static void HtmlEncode(ReadOnlySpan input, ref ValueStringBuilder ReadOnlySpan valueSpan = value.AsSpan(); - int index = IndexOfHtmlDecodingChars(valueSpan); + int index = valueSpan.IndexOf('&'); if (index < 0) { return value; @@ -215,7 +215,7 @@ public static void HtmlDecode(string? value, TextWriter output) ReadOnlySpan valueSpan = value.AsSpan(); - int index = IndexOfHtmlDecodingChars(valueSpan); + int index = valueSpan.IndexOf('&'); if (index == -1) { output.Write(value); @@ -701,21 +701,6 @@ private static bool ValidateUrlEncodingParameters(byte[]? bytes, int offset, int return true; } - private static int IndexOfHtmlDecodingChars(ReadOnlySpan input) - { - // this string requires html decoding if it contains '&' or a surrogate character - for (int i = 0; i < input.Length; i++) - { - char c = input[i]; - if (c == '&' || char.IsSurrogate(c)) - { - return i; - } - } - - return -1; - } - #endregion // Internal struct to facilitate URL decoding -- keeps char buffer and byte buffer, allows appending of either chars or bytes