Skip to content

Commit 48c8805

Browse files
buyaa-ngfoidlbartonjsMihaZupan
authored
Add Base64url encoding/decoding (#102364)
* Base64Url encoding, validation impelementation * Validation related updates * Try fix perf regression in vectorized methods * Add decoder implementation and unit tests * Share code in place decoding code * Add span<char> oveloads with vectorization * Generalize Span char/byte implementations * Move ref update to runtime and other cleanup * Generalize the AdvSimd.Arm64 vectorization added recently * Apply suggestions from code review Co-authored-by: Günther Foidl <gue@korporal.at> * Apply some feedback * Try fix ARM failure * Use array pool whenever applicable * Handle '%' as url padding, add more tests and fix findings * Fix assertion failure, apply some feedback, try fix ARM failure * Update docs, small clean ups * Try fix ARM failure * Update src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlEncoder.cs Co-authored-by: Günther Foidl <gue@korporal.at> * Rename StoreToDetionation overloads, reuse some duplicate code * Improve perf for Base.IsValid() oveerloads, exclude ARM vectorization for char(ushort) overload * Apply feedbacks * Revert Assert * Fix ARM vectorization failure for char overload * Apply suggestions from code review Co-authored-by: Jeremy Barton <jbarton@microsoft.com> * Apply suggestions from code review Co-authored-by: Günther Foidl <gue@korporal.at> * Apply more feedback * Apply suggestions from code review Co-authored-by: Günther Foidl <gue@korporal.at> * Apply review comment left overs * Apply suggestions from code review Co-authored-by: Miha Zupan <mihazupan.zupan1@gmail.com> * Apply remaining feedback * Apply suggestions from code review Co-authored-by: Jeremy Barton <jbarton@microsoft.com> * Apply the doc feedback for other API docs * Fix Base64Url fuzzer findings * Apply suggestions from code review Co-authored-by: Jeremy Barton <jbarton@microsoft.com> * Rename utf8 -> source/destintion * Apply feedbacks * Apply feedback * Apply left out feedbacks --------- Co-authored-by: Günther Foidl <gue@korporal.at> Co-authored-by: Jeremy Barton <jbarton@microsoft.com> Co-authored-by: Miha Zupan <mihazupan.zupan1@gmail.com>
1 parent 39c4905 commit 48c8805

19 files changed

+4443
-392
lines changed

src/libraries/System.Memory/tests/Base64/Base64DecoderUnitTests.cs

+8-18
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4-
using System.Collections;
54
using System.Collections.Generic;
65
using System.Linq;
76
using System.Text;
@@ -273,6 +272,9 @@ public void BasicDecodingWithFinalBlockTrueKnownInputDone(string inputString, in
273272

274273
[Theory]
275274
[InlineData("A", 0, 0)]
275+
[InlineData("A===", 0, 0)]
276+
[InlineData("A==", 0, 0)]
277+
[InlineData("A=", 0, 0)]
276278
[InlineData("AQ", 0, 0)]
277279
[InlineData("AQI", 0, 0)]
278280
[InlineData("AQIDBA", 4, 3)]
@@ -285,16 +287,18 @@ public void BasicDecodingWithFinalBlockTrueKnownInputInvalid(string inputString,
285287
Assert.Equal(OperationStatus.InvalidData, Base64.DecodeFromUtf8(source, decodedBytes, out int consumed, out int decodedByteCount));
286288
Assert.Equal(expectedConsumed, consumed);
287289
Assert.Equal(expectedWritten, decodedByteCount); // expectedWritten == decodedBytes.Length
288-
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, decodedBytes.Length, source, decodedBytes));
290+
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, expectedWritten, source, decodedBytes));
289291
}
290292

291293
[Theory]
292294
[InlineData("\u00ecz/T", 0, 0)] // scalar code-path
293295
[InlineData("z/Ta123\u00ec", 4, 3)]
294296
[InlineData("\u00ecz/TpH7sqEkerqMweH1uSw==", 0, 0)] // Vector128 code-path
295-
[InlineData("z/TpH7sqEkerqMweH1uSw\u00ec==", 20, 15)]
296-
[InlineData("\u00ecz/TpH7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo==", 0, 0)] // Vector256 / AVX code-path
297+
[InlineData("z/TpH7sqEkerqMweH1uSw\u5948==", 20, 15)]
298+
[InlineData("\u5948/TpH7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo==", 0, 0)] // Vector256 / AVX code-path
297299
[InlineData("z/TpH7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo\u00ec==", 44, 33)]
300+
[InlineData("\u5948z+T/H7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo01234567890123456789012345678901234567890123456789==", 0, 0)] // Vector512 / Avx512Vbmi code-path
301+
[InlineData("z/T+H7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo01234567890123456789012345678901234567890123456789\u5948==", 92, 69)]
298302
public void BasicDecodingNonAsciiInputInvalid(string inputString, int expectedConsumed, int expectedWritten)
299303
{
300304
Span<byte> source = Encoding.UTF8.GetBytes(inputString);
@@ -749,19 +753,5 @@ public void BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes(strin
749753
Assert.Equal(expectedWritten, decodedByteCount);
750754
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, expectedWritten, source, decodedBytes));
751755
}
752-
753-
public static IEnumerable<object[]> BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes_MemberData()
754-
{
755-
var r = new Random(42);
756-
for (int i = 0; i < 5; i++)
757-
{
758-
yield return new object[] { "AQ==" + new string(r.GetItems<char>(" \n\t\r", i)), 4 + i, 1 };
759-
}
760-
761-
foreach (string s in new[] { "MTIz", "M TIz", "MT Iz", "MTI z", "MTIz ", "M TI z", "M T I Z " })
762-
{
763-
yield return new object[] { s + s + s + s, s.Length * 4, 12 };
764-
}
765-
}
766756
}
767757
}

src/libraries/System.Memory/tests/Base64/Base64TestBase.cs

+14
Original file line numberDiff line numberDiff line change
@@ -107,5 +107,19 @@ public static IEnumerable<object[]> StringsOnlyWithCharsToBeIgnored()
107107

108108
string GetRepeatedChar(char charToInsert, int numberOfTimesToInsert) => new string(charToInsert, numberOfTimesToInsert);
109109
}
110+
111+
public static IEnumerable<object[]> BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes_MemberData()
112+
{
113+
var r = new Random(42);
114+
for (int i = 0; i < 5; i++)
115+
{
116+
yield return new object[] { "AQ==" + new string(r.GetItems<char>(" \n\t\r", i)), 4 + i, 1 };
117+
}
118+
119+
foreach (string s in new[] { "MTIz", "M TIz", "MT Iz", "MTI z", "MTIz ", "M TI z", "M T I Z " })
120+
{
121+
yield return new object[] { s + s + s + s, s.Length * 4, 12 };
122+
}
123+
}
110124
}
111125
}

src/libraries/System.Memory/tests/Base64/Base64TestHelper.cs

+84-4
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,23 @@ public static class Base64TestHelper
2424
52, 53, 54, 55, 56, 57, 43, 47 //4..9, +, /
2525
};
2626

27+
public static readonly byte[] s_urlEncodingMap = {
28+
65, 66, 67, 68, 69, 70, 71, 72, //A..H
29+
73, 74, 75, 76, 77, 78, 79, 80, //I..P
30+
81, 82, 83, 84, 85, 86, 87, 88, //Q..X
31+
89, 90, 97, 98, 99, 100, 101, 102, //Y..Z, a..f
32+
103, 104, 105, 106, 107, 108, 109, 110, //g..n
33+
111, 112, 113, 114, 115, 116, 117, 118, //o..v
34+
119, 120, 121, 122, 48, 49, 50, 51, //w..z, 0..3
35+
52, 53, 54, 55, 56, 57, 45, 95 //4..9, -, _
36+
};
37+
2738
// Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in tests)
2839
public static readonly sbyte[] s_decodingMap = {
2940
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
3041
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
3142
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, //62 is placed at index 43 (for +), 63 at index 47 (for /)
32-
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9), 64 at index 61 (for =)
43+
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9)
3344
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
3445
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, //0-25 are placed at index 65-90 (for A-Z)
3546
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
@@ -44,9 +55,29 @@ public static class Base64TestHelper
4455
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
4556
};
4657

58+
public static readonly sbyte[] s_urlDecodingMap = {
59+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, //62 is placed at index 45 (for -), 63 at index 95 (for _)
62+
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9)
63+
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
64+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, //0-25 are placed at index 65-90 (for A-Z)
65+
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
66+
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, //26-51 are placed at index 97-122 (for a-z)
67+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bytes over 122 ('z') are invalid and cannot be decoded
68+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Hence, padding the map with 255, which indicates invalid input
69+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
70+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
71+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
72+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
73+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
74+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
75+
};
76+
4777
public static bool IsByteToBeIgnored(byte charByte) => charByte is (byte)' ' or (byte)'\t' or (byte)'\r' or (byte)'\n';
4878

4979
public const byte EncodingPad = (byte)'='; // '=', for padding
80+
public const byte UrlEncodingPad = (byte)'%'; // '%', for url padding
5081
public const sbyte InvalidByte = -1; // Designating -1 for invalid bytes in the decoding map
5182

5283
public static byte[] InvalidBytes
@@ -60,6 +91,17 @@ public static byte[] InvalidBytes
6091
}
6192
}
6293

94+
public static byte[] UrlInvalidBytes
95+
{
96+
get
97+
{
98+
int[] indices = s_urlDecodingMap.FindAllIndexOf(InvalidByte);
99+
// Workaround for indices.Cast<byte>().ToArray() since it throws
100+
// InvalidCastException: Unable to cast object of type 'System.Int32' to type 'System.Byte'
101+
return indices.Select(i => (byte)i).ToArray();
102+
}
103+
}
104+
63105
internal static void InitializeBytes(Span<byte> bytes, int seed = 100)
64106
{
65107
var rnd = new Random(seed);
@@ -79,6 +121,26 @@ internal static void InitializeDecodableBytes(Span<byte> bytes, int seed = 100)
79121
}
80122
}
81123

124+
internal static void InitializeUrlDecodableChars(Span<char> bytes, int seed = 100)
125+
{
126+
var rnd = new Random(seed);
127+
for (int i = 0; i < bytes.Length; i++)
128+
{
129+
int index = (byte)rnd.Next(0, s_urlEncodingMap.Length);
130+
bytes[i] = (char)s_urlEncodingMap[index];
131+
}
132+
}
133+
134+
internal static void InitializeUrlDecodableBytes(Span<byte> bytes, int seed = 100)
135+
{
136+
var rnd = new Random(seed);
137+
for (int i = 0; i < bytes.Length; i++)
138+
{
139+
int index = (byte)rnd.Next(0, s_urlEncodingMap.Length);
140+
bytes[i] = s_urlEncodingMap[index];
141+
}
142+
}
143+
82144
[Fact]
83145
public static void GenerateEncodingMapAndVerify()
84146
{
@@ -112,16 +174,34 @@ public static int[] FindAllIndexOf<T>(this IEnumerable<T> values, T valueToFind)
112174

113175
public static bool VerifyEncodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> encodedBytes)
114176
{
115-
string expectedText = Convert.ToBase64String(source.Slice(0, expectedConsumed).ToArray());
116-
string encodedText = Encoding.ASCII.GetString(encodedBytes.Slice(0, expectedWritten).ToArray());
177+
string expectedText = Convert.ToBase64String(source.Slice(0, expectedConsumed));
178+
string encodedText = Encoding.ASCII.GetString(encodedBytes.Slice(0, expectedWritten));
179+
return expectedText.Equals(encodedText);
180+
}
181+
182+
public static bool VerifyUrlEncodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> encodedBytes)
183+
{
184+
string expectedText = Convert.ToBase64String(source.Slice(0, expectedConsumed))
185+
.Replace('+', '-').Replace('/', '_').TrimEnd('=');
186+
string encodedText = Encoding.ASCII.GetString(encodedBytes.Slice(0, expectedWritten));
117187
return expectedText.Equals(encodedText);
118188
}
119189

120190
public static bool VerifyDecodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> decodedBytes)
121191
{
122-
string sourceString = Encoding.ASCII.GetString(source.Slice(0, expectedConsumed).ToArray());
192+
string sourceString = Encoding.ASCII.GetString(source.Slice(0, expectedConsumed));
123193
byte[] expectedBytes = Convert.FromBase64String(sourceString);
124194
return expectedBytes.AsSpan().SequenceEqual(decodedBytes.Slice(0, expectedWritten));
125195
}
196+
197+
public static bool VerifyUrlDecodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> decodedBytes)
198+
{
199+
string sourceString = Encoding.ASCII.GetString(source.Slice(0, expectedConsumed));
200+
string padded = sourceString.Length % 4 == 0 ? sourceString :
201+
sourceString.PadRight(sourceString.Length + (4 - sourceString.Length % 4), '=');
202+
string base64 = padded.Replace('_', '/').Replace('-', '+').Replace('%', '=');
203+
byte[] expectedBytes = Convert.FromBase64String(base64);
204+
return expectedBytes.AsSpan().SequenceEqual(decodedBytes.Slice(0, expectedWritten));
205+
}
126206
}
127207
}

src/libraries/System.Memory/tests/Base64/Base64ValidationUnitTests.cs

+14-7
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ public void BasicValidationInvalidInputLengthBytes()
6969
} while (numBytes % 4 == 0); // ensure we have a invalid length
7070

7171
Span<byte> source = new byte[numBytes];
72+
Base64TestHelper.InitializeDecodableBytes(source, numBytes);
7273

7374
Assert.False(Base64.IsValid(source));
7475
Assert.False(Base64.IsValid(source, out int decodedLength));
@@ -88,10 +89,16 @@ public void BasicValidationInvalidInputLengthChars()
8889
numBytes = rnd.Next(100, 1000 * 1000);
8990
} while (numBytes % 4 == 0); // ensure we have a invalid length
9091

91-
Span<char> source = new char[numBytes];
92+
Span<byte> source = new byte[numBytes];
93+
Base64TestHelper.InitializeDecodableBytes(source, numBytes);
94+
Span<char> chars = source
95+
.ToArray()
96+
.Select(Convert.ToChar)
97+
.ToArray()
98+
.AsSpan();
9299

93-
Assert.False(Base64.IsValid(source));
94-
Assert.False(Base64.IsValid(source, out int decodedLength));
100+
Assert.False(Base64.IsValid(chars));
101+
Assert.False(Base64.IsValid(chars, out int decodedLength));
95102
Assert.Equal(0, decodedLength);
96103
}
97104
}
@@ -267,7 +274,7 @@ public void InvalidSizeBytes(string utf8WithByteToBeIgnored)
267274
[InlineData("Y")]
268275
public void InvalidSizeChars(string utf8WithByteToBeIgnored)
269276
{
270-
byte[] utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithByteToBeIgnored);
277+
ReadOnlySpan<char> utf8BytesWithByteToBeIgnored = utf8WithByteToBeIgnored;
271278

272279
Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored));
273280
Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored, out int decodedLength));
@@ -329,10 +336,10 @@ public void InvalidBase64Bytes(string utf8WithByteToBeIgnored)
329336
[InlineData(" a ")]
330337
public void InvalidBase64Chars(string utf8WithByteToBeIgnored)
331338
{
332-
byte[] utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithByteToBeIgnored);
339+
ReadOnlySpan<char> utf8CharsWithCharToBeIgnored = utf8WithByteToBeIgnored;
333340

334-
Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored));
335-
Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored, out int decodedLength));
341+
Assert.False(Base64.IsValid(utf8CharsWithCharToBeIgnored));
342+
Assert.False(Base64.IsValid(utf8CharsWithCharToBeIgnored, out int decodedLength));
336343
Assert.Equal(0, decodedLength);
337344
}
338345
}

0 commit comments

Comments
 (0)