Skip to content

Commit ddd19a7

Browse files
Optimize HttpUtility.JavaScriptStringEncode by using SearchValues (#102917)
* Optimize HttpUtility.JavaScriptStringEncode by using SearchValues for invalid JavaScript characters. * remove duplicated test * more suggestions * EncodeCore and AppendSpanFormattable * Remove check for null * Use StackallocThreshold const
1 parent b2601ed commit ddd19a7

File tree

4 files changed

+49
-65
lines changed

4 files changed

+49
-65
lines changed

src/libraries/System.Web.HttpUtility/src/System.Web.HttpUtility.csproj

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
<Compile Include="System\Web\Util\Utf16StringValidator.cs" />
1616
<Compile Include="$(CommonPath)System\HexConverter.cs"
1717
Link="Common\System\HexConverter.cs" />
18+
<Compile Include="$(CommonPath)System\Text\ValueStringBuilder.cs"
19+
Link="Common\System\Text\ValueStringBuilder.cs" />
20+
<Compile Include="$(CommonPath)System\Text\ValueStringBuilder.AppendSpanFormattable.cs"
21+
Link="Common\System\Text\ValueStringBuilder.AppendSpanFormattable.cs" />
1822
</ItemGroup>
1923

2024
<ItemGroup>

src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs

+2-6
Original file line numberDiff line numberDiff line change
@@ -237,12 +237,8 @@ public static NameValueCollection ParseQueryString(string query, Encoding encodi
237237
[return: NotNullIfNotNull(nameof(bytes))]
238238
public static byte[]? UrlDecodeToBytes(byte[]? bytes, int offset, int count) => HttpEncoder.UrlDecode(bytes, offset, count);
239239

240-
public static string JavaScriptStringEncode(string? value) => HttpEncoder.JavaScriptStringEncode(value);
240+
public static string JavaScriptStringEncode(string? value) => HttpEncoder.JavaScriptStringEncode(value, false);
241241

242-
public static string JavaScriptStringEncode(string? value, bool addDoubleQuotes)
243-
{
244-
string encoded = HttpEncoder.JavaScriptStringEncode(value);
245-
return addDoubleQuotes ? "\"" + encoded + "\"" : encoded;
246-
}
242+
public static string JavaScriptStringEncode(string? value, bool addDoubleQuotes) => HttpEncoder.JavaScriptStringEncode(value, addDoubleQuotes);
247243
}
248244
}

src/libraries/System.Web.HttpUtility/src/System/Web/Util/HttpEncoder.cs

+42-59
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,13 @@ internal static class HttpEncoder
2020
private static readonly SearchValues<byte> s_urlSafeBytes = SearchValues.Create(
2121
"!()*-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"u8);
2222

23-
private static void AppendCharAsUnicodeJavaScript(StringBuilder builder, char c)
24-
{
25-
builder.Append($"\\u{(int)c:x4}");
26-
}
27-
28-
private static bool CharRequiresJavaScriptEncoding(char c) =>
29-
c < 0x20 // control chars always have to be encoded
30-
|| c == '\"' // chars which must be encoded per JSON spec
31-
|| c == '\\'
32-
|| c == '\'' // HTML-sensitive chars encoded for safety
33-
|| c == '<'
34-
|| c == '>'
35-
|| (c == '&')
36-
|| c == '\u0085' // newline chars (see Unicode 6.2, Table 5-1 [http://www.unicode.org/versions/Unicode6.2.0/ch05.pdf]) have to be encoded
37-
|| c == '\u2028'
38-
|| c == '\u2029';
23+
private static readonly SearchValues<char> s_invalidJavaScriptChars = SearchValues.Create(
24+
// Any Control, < 32 (' ')
25+
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
26+
// Chars which must be encoded per JSON spec / HTML-sensitive chars encoded for safety
27+
"\"&'<>\\" +
28+
// newline chars (see Unicode 6.2, Table 5-1 [http://www.unicode.org/versions/Unicode6.2.0/ch05.pdf]) have to be encoded
29+
"\u0085\u2028\u2029");
3930

4031
[return: NotNullIfNotNull(nameof(value))]
4132
internal static string? HtmlAttributeEncode(string? value)
@@ -137,79 +128,71 @@ private static int IndexOfHtmlAttributeEncodingChars(string s) =>
137128

138129
private static bool IsNonAsciiByte(byte b) => b >= 0x7F || b < 0x20;
139130

140-
internal static string JavaScriptStringEncode(string? value)
131+
internal static string JavaScriptStringEncode(string? value, bool addDoubleQuotes)
141132
{
142-
if (string.IsNullOrEmpty(value))
133+
int i = value.AsSpan().IndexOfAny(s_invalidJavaScriptChars);
134+
if (i < 0)
143135
{
144-
return string.Empty;
136+
return addDoubleQuotes ? $"\"{value}\"" : value ?? string.Empty;
145137
}
146138

147-
StringBuilder? b = null;
148-
int startIndex = 0;
149-
int count = 0;
150-
for (int i = 0; i < value.Length; i++)
151-
{
152-
char c = value[i];
139+
return EncodeCore(value, i, addDoubleQuotes);
153140

154-
// Append the unhandled characters (that do not require special treament)
155-
// to the string builder when special characters are detected.
156-
if (CharRequiresJavaScriptEncoding(c))
141+
static string EncodeCore(ReadOnlySpan<char> value, int i, bool addDoubleQuotes)
142+
{
143+
var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]);
144+
if (addDoubleQuotes)
157145
{
158-
b ??= new StringBuilder(value.Length + 5);
159-
160-
if (count > 0)
161-
{
162-
b.Append(value, startIndex, count);
163-
}
164-
165-
startIndex = i + 1;
166-
count = 0;
146+
vsb.Append('"');
147+
}
167148

149+
ReadOnlySpan<char> chars = value;
150+
do
151+
{
152+
vsb.Append(chars.Slice(0, i));
153+
char c = chars[i];
154+
chars = chars.Slice(i + 1);
168155
switch (c)
169156
{
170157
case '\r':
171-
b.Append("\\r");
158+
vsb.Append("\\r");
172159
break;
173160
case '\t':
174-
b.Append("\\t");
161+
vsb.Append("\\t");
175162
break;
176163
case '\"':
177-
b.Append("\\\"");
164+
vsb.Append("\\\"");
178165
break;
179166
case '\\':
180-
b.Append("\\\\");
167+
vsb.Append("\\\\");
181168
break;
182169
case '\n':
183-
b.Append("\\n");
170+
vsb.Append("\\n");
184171
break;
185172
case '\b':
186-
b.Append("\\b");
173+
vsb.Append("\\b");
187174
break;
188175
case '\f':
189-
b.Append("\\f");
176+
vsb.Append("\\f");
190177
break;
191178
default:
192-
AppendCharAsUnicodeJavaScript(b, c);
179+
vsb.Append("\\u");
180+
vsb.AppendSpanFormattable((int)c, "x4");
193181
break;
194182
}
195-
}
196-
else
183+
184+
i = chars.IndexOfAny(s_invalidJavaScriptChars);
185+
} while (i >= 0);
186+
187+
vsb.Append(chars);
188+
189+
if (addDoubleQuotes)
197190
{
198-
count++;
191+
vsb.Append('"');
199192
}
200-
}
201193

202-
if (b == null)
203-
{
204-
return value;
205-
}
206-
207-
if (count > 0)
208-
{
209-
b.Append(value, startIndex, count);
194+
return vsb.ToString();
210195
}
211-
212-
return b.ToString();
213196
}
214197

215198
[return: NotNullIfNotNull(nameof(bytes))]

src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs

+1
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ public static IEnumerable<object[]> JavaScriptStringEncodeData
310310
yield return new object[] { "", "" };
311311
yield return new object[] {"No escaping needed.", "No escaping needed."};
312312
yield return new object[] {"The \t and \n will need to be escaped.", "The \\t and \\n will need to be escaped."};
313+
yield return new object[] {"The \t and \n will need to be escaped.>", "The \\t and \\n will need to be escaped.\\u003e" };
313314
for (char c = char.MinValue; c < TestMaxChar; c++)
314315
{
315316
if (c >= 0 && c <= 7 || c == 11 || c >= 14 && c <= 31 || c == 38 || c == 39 || c == 60 || c == 62 || c == 133 || c == 8232 || c == 8233)

0 commit comments

Comments
 (0)