From 1d51ecb4966f497e8c6def1f90c75837ec0fba17 Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Fri, 31 May 2024 14:24:25 +0300 Subject: [PATCH 1/6] Optimize HttpUtility.JavaScriptStringEncode by using SearchValues for invalid JavaScript characters. --- .../src/System/Web/HttpUtility.cs | 8 +- .../src/System/Web/Util/HttpEncoder.cs | 145 ++++++++++-------- .../tests/HttpUtility/HttpUtilityTest.cs | 2 + 3 files changed, 81 insertions(+), 74 deletions(-) diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs index 58dfcd39a9bdf0..270aedb275c946 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs @@ -237,12 +237,8 @@ public static NameValueCollection ParseQueryString(string query, Encoding encodi [return: NotNullIfNotNull(nameof(bytes))] public static byte[]? UrlDecodeToBytes(byte[]? bytes, int offset, int count) => HttpEncoder.UrlDecode(bytes, offset, count); - public static string JavaScriptStringEncode(string? value) => HttpEncoder.JavaScriptStringEncode(value); + public static string JavaScriptStringEncode(string? value) => HttpEncoder.JavaScriptStringEncode(value, false); - public static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) - { - string encoded = HttpEncoder.JavaScriptStringEncode(value); - return addDoubleQuotes ? "\"" + encoded + "\"" : encoded; - } + public static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) => HttpEncoder.JavaScriptStringEncode(value, addDoubleQuotes); } } diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs index 3c96060bd20a92..2aefee027390a6 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs @@ -25,17 +25,27 @@ private static void AppendCharAsUnicodeJavaScript(StringBuilder builder, char c) builder.Append($"\\u{(int)c:x4}"); } - private static bool CharRequiresJavaScriptEncoding(char c) => - c < 0x20 // control chars always have to be encoded - || c == '\"' // chars which must be encoded per JSON spec - || c == '\\' - || c == '\'' // HTML-sensitive chars encoded for safety - || c == '<' - || c == '>' - || (c == '&') - || c == '\u0085' // newline chars (see Unicode 6.2, Table 5-1 [http://www.unicode.org/versions/Unicode6.2.0/ch05.pdf]) have to be encoded - || c == '\u2028' - || c == '\u2029'; + private static string GetRequiresJavaScriptEncodingPattern() + { + StringBuilder sb = new StringBuilder(30); + // control chars always have to be encoded + for (int i = 0; i < 32; i++) + { + sb.Append((char)i); + } + + sb.Append('\"'); // chars which must be encoded per JSON spec + sb.Append('\\'); + sb.Append('\''); // HTML-sensitive chars encoded for safety + sb.Append('<'); + sb.Append('>'); + sb.Append('&'); + sb.Append('\u0085'); // newline chars (see Unicode 6.2, Table 5-1 [http://www.unicode.org/versions/Unicode6.2.0/ch05.pdf]) have to be encoded + sb.Append('\u2028'); + sb.Append('\u2029'); + + return sb.ToString(); + } [return: NotNullIfNotNull(nameof(value))] internal static string? HtmlAttributeEncode(string? value) @@ -137,79 +147,78 @@ private static int IndexOfHtmlAttributeEncodingChars(string s) => private static bool IsNonAsciiByte(byte b) => b >= 0x7F || b < 0x20; - internal static string JavaScriptStringEncode(string? value) + private static readonly SearchValues s_invalid_JavaScriptChars = SearchValues.Create(GetRequiresJavaScriptEncodingPattern()); + internal static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) { if (string.IsNullOrEmpty(value)) { - return string.Empty; + return addDoubleQuotes ? @"""""" : string.Empty; } - StringBuilder? b = null; - int startIndex = 0; - int count = 0; - for (int i = 0; i < value.Length; i++) + int i = value.AsSpan().IndexOfAny(s_invalid_JavaScriptChars); + if (i < 0) { - char c = value[i]; - - // Append the unhandled characters (that do not require special treament) - // to the string builder when special characters are detected. - if (CharRequiresJavaScriptEncoding(c)) - { - b ??= new StringBuilder(value.Length + 5); - - if (count > 0) - { - b.Append(value, startIndex, count); - } - - startIndex = i + 1; - count = 0; + return addDoubleQuotes ? "\"" + value + "\"" : value; + } - switch (c) - { - case '\r': - b.Append("\\r"); - break; - case '\t': - b.Append("\\t"); - break; - case '\"': - b.Append("\\\""); - break; - case '\\': - b.Append("\\\\"); - break; - case '\n': - b.Append("\\n"); - break; - case '\b': - b.Append("\\b"); - break; - case '\f': - b.Append("\\f"); - break; - default: - AppendCharAsUnicodeJavaScript(b, c); - break; - } - } - else + StringBuilder sb; + if (addDoubleQuotes) + { + sb = new StringBuilder(value.Length + 7); + sb.Append('"'); + } + else + { + sb = new StringBuilder(value.Length + 5); + } + int startIndex = 0; + do + { + sb.Append(value, startIndex, i); + char c = value[startIndex + i]; + switch (c) { - count++; + case '\r': + sb.Append("\\r"); + break; + case '\t': + sb.Append("\\t"); + break; + case '\"': + sb.Append("\\\""); + break; + case '\\': + sb.Append("\\\\"); + break; + case '\n': + sb.Append("\\n"); + break; + case '\b': + sb.Append("\\b"); + break; + case '\f': + sb.Append("\\f"); + break; + default: + AppendCharAsUnicodeJavaScript(sb, c); + break; } - } - if (b == null) + startIndex += i + 1; + i = value.AsSpan(startIndex).IndexOfAny(s_invalid_JavaScriptChars); + } while (i != -1); + + if (startIndex < value.Length) { - return value; + sb.Append(value.AsSpan(startIndex)); } - if (count > 0) + if (addDoubleQuotes) { - b.Append(value, startIndex, count); + sb.Append('"'); } - return b.ToString(); + return sb.ToString(); } [return: NotNullIfNotNull(nameof(bytes))] diff --git a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs index b95c663e932d4b..c1cfff139885f0 100644 --- a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs +++ b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs @@ -310,6 +310,8 @@ public static IEnumerable JavaScriptStringEncodeData yield return new object[] { "", "" }; yield return new object[] {"No escaping needed.", "No escaping needed."}; yield return new object[] {"The \t and \n will need to be escaped.", "The \\t and \\n will need to be escaped."}; + yield return new object[] { "The \t and \n will need to be escaped.", "The \\t and \\n will need to be escaped." }; + yield return new object[] { "The \t and \n will need to be escaped.>", "The \\t and \\n will need to be escaped.\\u003e" }; for (char c = char.MinValue; c < TestMaxChar; c++) { if (c >= 0 && c <= 7 || c == 11 || c >= 14 && c <= 31 || c == 38 || c == 39 || c == 60 || c == 62 || c == 133 || c == 8232 || c == 8233) From 9fba64e711b9e91f78292fbe3973c05bfdd4a657 Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Fri, 31 May 2024 14:41:44 +0300 Subject: [PATCH 2/6] remove duplicated test --- .../tests/HttpUtility/HttpUtilityTest.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs index c1cfff139885f0..27942d50051001 100644 --- a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs +++ b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs @@ -310,8 +310,7 @@ public static IEnumerable JavaScriptStringEncodeData yield return new object[] { "", "" }; yield return new object[] {"No escaping needed.", "No escaping needed."}; yield return new object[] {"The \t and \n will need to be escaped.", "The \\t and \\n will need to be escaped."}; - yield return new object[] { "The \t and \n will need to be escaped.", "The \\t and \\n will need to be escaped." }; - yield return new object[] { "The \t and \n will need to be escaped.>", "The \\t and \\n will need to be escaped.\\u003e" }; + yield return new object[] {"The \t and \n will need to be escaped.>", "The \\t and \\n will need to be escaped.\\u003e" }; for (char c = char.MinValue; c < TestMaxChar; c++) { if (c >= 0 && c <= 7 || c == 11 || c >= 14 && c <= 31 || c == 38 || c == 39 || c == 60 || c == 62 || c == 133 || c == 8232 || c == 8233) From eed137852024657754b1f241514172714be57a06 Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Fri, 31 May 2024 21:53:57 +0300 Subject: [PATCH 3/6] more suggestions --- .../src/System.Web.HttpUtility.csproj | 2 + .../src/System/Web/Util/HttpEncoder.cs | 84 +++++++------------ 2 files changed, 30 insertions(+), 56 deletions(-) diff --git a/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj b/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj index b7c44506200dd3..f62eca42ec0974 100644 --- a/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj +++ b/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj @@ -15,6 +15,8 @@ + diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs index 2aefee027390a6..eb4cc68ca8c24d 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs @@ -20,32 +20,13 @@ internal static class HttpEncoder private static readonly SearchValues s_urlSafeBytes = SearchValues.Create( "!()*-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"u8); - private static void AppendCharAsUnicodeJavaScript(StringBuilder builder, char c) - { - builder.Append($"\\u{(int)c:x4}"); - } - - private static string GetRequiresJavaScriptEncodingPattern() - { - StringBuilder sb = new StringBuilder(30); - // control chars always have to be encoded - for (int i = 0; i < 32; i++) - { - sb.Append((char)i); - } - - sb.Append('\"'); // chars which must be encoded per JSON spec - sb.Append('\\'); - sb.Append('\''); // HTML-sensitive chars encoded for safety - sb.Append('<'); - sb.Append('>'); - sb.Append('&'); - sb.Append('\u0085'); // newline chars (see Unicode 6.2, Table 5-1 [http://www.unicode.org/versions/Unicode6.2.0/ch05.pdf]) have to be encoded - sb.Append('\u2028'); - sb.Append('\u2029'); - - return sb.ToString(); - } + private static readonly SearchValues s_invalidJavaScriptChars = SearchValues.Create( + // Any Control, < 32 (' ') + "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" + + // Chars which must be encoded per JSON spec / HTML-sensitive chars encoded for safety + "\"&'<>\\" + + // newline chars (see Unicode 6.2, Table 5-1 [http://www.unicode.org/versions/Unicode6.2.0/ch05.pdf]) have to be encoded + "\u0085\u2028\u2029"); [return: NotNullIfNotNull(nameof(value))] internal static string? HtmlAttributeEncode(string? value) @@ -147,7 +128,6 @@ private static int IndexOfHtmlAttributeEncodingChars(string s) => private static bool IsNonAsciiByte(byte b) => b >= 0x7F || b < 0x20; - private static readonly SearchValues s_invalid_JavaScriptChars = SearchValues.Create(GetRequiresJavaScriptEncodingPattern()); internal static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) { if (string.IsNullOrEmpty(value)) @@ -155,70 +135,62 @@ internal static string JavaScriptStringEncode(string? value, bool addDoubleQuote return addDoubleQuotes ? @"""""" : string.Empty; } - int i = value.AsSpan().IndexOfAny(s_invalid_JavaScriptChars); + int i = value.AsSpan().IndexOfAny(s_invalidJavaScriptChars); if (i < 0) { - return addDoubleQuotes ? "\"" + value + "\"" : value; + return addDoubleQuotes ? $"\"{value}\"" : value; } - StringBuilder sb; + var vsb = new ValueStringBuilder(stackalloc char[512]); if (addDoubleQuotes) { - sb = new StringBuilder(value.Length + 7); - sb.Append('"'); - } - else - { - sb = new StringBuilder(value.Length + 5); + vsb.Append('"'); } - int startIndex = 0; + ReadOnlySpan chars = value; do { - sb.Append(value, startIndex, i); - char c = value[startIndex + i]; + vsb.Append(chars.Slice(0, i)); + char c = chars[i]; + chars = chars.Slice(i + 1); switch (c) { case '\r': - sb.Append("\\r"); + vsb.Append("\\r"); break; case '\t': - sb.Append("\\t"); + vsb.Append("\\t"); break; case '\"': - sb.Append("\\\""); + vsb.Append("\\\""); break; case '\\': - sb.Append("\\\\"); + vsb.Append("\\\\"); break; case '\n': - sb.Append("\\n"); + vsb.Append("\\n"); break; case '\b': - sb.Append("\\b"); + vsb.Append("\\b"); break; case '\f': - sb.Append("\\f"); + vsb.Append("\\f"); break; default: - AppendCharAsUnicodeJavaScript(sb, c); + vsb.Append($"\\u{(int)c:x4}"); break; } - startIndex += i + 1; - i = value.AsSpan(startIndex).IndexOfAny(s_invalid_JavaScriptChars); - } while (i != -1); + i = chars.IndexOfAny(s_invalidJavaScriptChars); + } while (i >= 0); - if (startIndex < value.Length) - { - sb.Append(value.AsSpan(startIndex)); - } + vsb.Append(chars); if (addDoubleQuotes) { - sb.Append('"'); + vsb.Append('"'); } - return sb.ToString(); + return vsb.ToString(); } [return: NotNullIfNotNull(nameof(bytes))] From 583bab4c825a7ec7270c710fc61e4d4e76b07caf Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Mon, 3 Jun 2024 10:03:06 +0300 Subject: [PATCH 4/6] EncodeCore and AppendSpanFormattable --- .../src/System.Web.HttpUtility.csproj | 2 + .../src/System/Web/Util/HttpEncoder.cs | 93 ++++++++++--------- 2 files changed, 52 insertions(+), 43 deletions(-) diff --git a/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj b/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj index f62eca42ec0974..5da18b0fae6c82 100644 --- a/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj +++ b/src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj @@ -17,6 +17,8 @@ Link="Common\System\HexConverter.cs" /> + diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs index eb4cc68ca8c24d..d03236dd9909c8 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs @@ -141,56 +141,63 @@ internal static string JavaScriptStringEncode(string? value, bool addDoubleQuote return addDoubleQuotes ? $"\"{value}\"" : value; } - var vsb = new ValueStringBuilder(stackalloc char[512]); - if (addDoubleQuotes) - { - vsb.Append('"'); - } - ReadOnlySpan chars = value; - do + return EncodeCore(value, i, addDoubleQuotes); + + static string EncodeCore(ReadOnlySpan value, int i, bool addDoubleQuotes) { - vsb.Append(chars.Slice(0, i)); - char c = chars[i]; - chars = chars.Slice(i + 1); - switch (c) + var vsb = new ValueStringBuilder(stackalloc char[512]); + if (addDoubleQuotes) { - case '\r': - vsb.Append("\\r"); - break; - case '\t': - vsb.Append("\\t"); - break; - case '\"': - vsb.Append("\\\""); - break; - case '\\': - vsb.Append("\\\\"); - break; - case '\n': - vsb.Append("\\n"); - break; - case '\b': - vsb.Append("\\b"); - break; - case '\f': - vsb.Append("\\f"); - break; - default: - vsb.Append($"\\u{(int)c:x4}"); - break; + vsb.Append('"'); } - i = chars.IndexOfAny(s_invalidJavaScriptChars); - } while (i >= 0); + ReadOnlySpan chars = value; + do + { + vsb.Append(chars.Slice(0, i)); + char c = chars[i]; + chars = chars.Slice(i + 1); + switch (c) + { + case '\r': + vsb.Append("\\r"); + break; + case '\t': + vsb.Append("\\t"); + break; + case '\"': + vsb.Append("\\\""); + break; + case '\\': + vsb.Append("\\\\"); + break; + case '\n': + vsb.Append("\\n"); + break; + case '\b': + vsb.Append("\\b"); + break; + case '\f': + vsb.Append("\\f"); + break; + default: + vsb.Append("\\u"); + vsb.AppendSpanFormattable((int)c, "x4"); + break; + } - vsb.Append(chars); + i = chars.IndexOfAny(s_invalidJavaScriptChars); + } while (i >= 0); - if (addDoubleQuotes) - { - vsb.Append('"'); - } + vsb.Append(chars); - return vsb.ToString(); + if (addDoubleQuotes) + { + vsb.Append('"'); + } + + return vsb.ToString(); + } } [return: NotNullIfNotNull(nameof(bytes))] From 5d498a4d7ac665d6e53a8ae9acbc3e03cf4cd065 Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Mon, 3 Jun 2024 10:06:38 +0300 Subject: [PATCH 5/6] Remove check for null --- .../src/System/Web/Util/HttpEncoder.cs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs index d03236dd9909c8..f1590c8080bb3e 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs @@ -130,15 +130,10 @@ private static int IndexOfHtmlAttributeEncodingChars(string s) => internal static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) { - if (string.IsNullOrEmpty(value)) - { - return addDoubleQuotes ? @"""""" : string.Empty; - } - int i = value.AsSpan().IndexOfAny(s_invalidJavaScriptChars); if (i < 0) { - return addDoubleQuotes ? $"\"{value}\"" : value; + return addDoubleQuotes ? $"\"{value}\"" : value ?? string.Empty; } return EncodeCore(value, i, addDoubleQuotes); From 76e7e44b3b626f110a48370efac1bd07ef3d15e8 Mon Sep 17 00:00:00 2001 From: Traian Zaprianov Date: Tue, 4 Jun 2024 14:50:44 +0300 Subject: [PATCH 6/6] Use StackallocThreshold const --- .../System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs index f1590c8080bb3e..690645e0759147 100644 --- a/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs +++ b/src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs @@ -140,7 +140,7 @@ internal static string JavaScriptStringEncode(string? value, bool addDoubleQuote static string EncodeCore(ReadOnlySpan value, int i, bool addDoubleQuotes) { - var vsb = new ValueStringBuilder(stackalloc char[512]); + var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]); if (addDoubleQuotes) { vsb.Append('"');