diff --git a/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Base64Fuzzer.cs b/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Base64Fuzzer.cs index 056874a30a57f3..a19008283ce34e 100644 --- a/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Base64Fuzzer.cs +++ b/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Base64Fuzzer.cs @@ -24,8 +24,9 @@ public void FuzzTarget(ReadOnlySpan bytes) } private void TestCases(Span input, PoisonPagePlacement poison) - { + { TestBase64(input, poison); + TestBase64Chars(input, poison); TestToStringToCharArray(input, Base64FormattingOptions.None); TestToStringToCharArray(input, Base64FormattingOptions.InsertLineBreaks); } @@ -129,6 +130,167 @@ private void TestBase64(Span input, PoisonPagePlacement poison) Assert.Equal(OperationStatus.InvalidData, Base64.DecodeFromUtf8InPlace(input, out int inPlaceDecoded)); } } + + { // Test new simplified UTF-8 APIs + // Test EncodeToUtf8 returning byte[] + byte[] encodedArray = Base64.EncodeToUtf8(input); + Assert.Equal(true, maxEncodedLength >= encodedArray.Length && maxEncodedLength - 2 <= encodedArray.Length); + + // Test EncodeToUtf8 returning int + encoderDest.Clear(); + int charsWritten = Base64.EncodeToUtf8(input, encoderDest); + Assert.SequenceEqual(encodedArray.AsSpan(), encoderDest.Slice(0, charsWritten)); + + // Test TryEncodeToUtf8 + encoderDest.Clear(); + Assert.Equal(true, Base64.TryEncodeToUtf8(input, encoderDest, out int tryCharsWritten)); + Assert.Equal(charsWritten, tryCharsWritten); + Assert.SequenceEqual(encodedArray.AsSpan(), encoderDest.Slice(0, tryCharsWritten)); + + // Test DecodeFromUtf8 returning byte[] + byte[] decodedArray = Base64.DecodeFromUtf8(encodedArray); + Assert.SequenceEqual(input, decodedArray.AsSpan()); + + // Test DecodeFromUtf8 returning int + decoderDest.Clear(); + int bytesWritten = Base64.DecodeFromUtf8(encodedArray, decoderDest); + Assert.Equal(input.Length, bytesWritten); + Assert.SequenceEqual(input, decoderDest.Slice(0, bytesWritten)); + + // Test TryDecodeFromUtf8 + decoderDest.Clear(); + Assert.Equal(true, Base64.TryDecodeFromUtf8(encodedArray, decoderDest, out int tryBytesWritten)); + Assert.Equal(input.Length, tryBytesWritten); + Assert.SequenceEqual(input, decoderDest.Slice(0, tryBytesWritten)); + + // Test TryEncodeToUtf8InPlace + using PooledBoundedMemory inPlaceBuffer = PooledBoundedMemory.Rent(maxEncodedLength, poison); + Span inPlaceDest = inPlaceBuffer.Span; + input.CopyTo(inPlaceDest); + Assert.Equal(true, Base64.TryEncodeToUtf8InPlace(inPlaceDest, input.Length, out int inPlaceWritten)); + Assert.SequenceEqual(encodedArray.AsSpan(), inPlaceDest.Slice(0, inPlaceWritten)); + + // Test GetEncodedLength matches GetMaxEncodedToUtf8Length + Assert.Equal(Base64.GetMaxEncodedToUtf8Length(input.Length), Base64.GetEncodedLength(input.Length)); + + // Test GetMaxDecodedLength matches GetMaxDecodedFromUtf8Length + Assert.Equal(Base64.GetMaxDecodedFromUtf8Length(maxEncodedLength), Base64.GetMaxDecodedLength(maxEncodedLength)); + } + } + + private static void TestBase64Chars(Span input, PoisonPagePlacement poison) + { + int encodedLength = Base64.GetEncodedLength(input.Length); + int maxDecodedLength = Base64.GetMaxDecodedLength(encodedLength); + + using PooledBoundedMemory destPoisoned = PooledBoundedMemory.Rent(encodedLength, poison); + using PooledBoundedMemory decoderDestPoisoned = PooledBoundedMemory.Rent(maxDecodedLength, poison); + + Span encoderDest = destPoisoned.Span; + Span decoderDest = decoderDestPoisoned.Span; + + { // IsFinalBlock = true + OperationStatus status = Base64.EncodeToChars(input, encoderDest, out int bytesConsumed, out int charsEncoded); + + Assert.Equal(OperationStatus.Done, status); + Assert.Equal(input.Length, bytesConsumed); + Assert.Equal(encodedLength, charsEncoded); + + string encodedString = Base64.EncodeToString(input); + Assert.Equal(encodedString, new string(encoderDest.Slice(0, charsEncoded))); + + status = Base64.DecodeFromChars(encoderDest.Slice(0, charsEncoded), decoderDest, out int charsRead, out int bytesDecoded); + + Assert.Equal(OperationStatus.Done, status); + Assert.Equal(input.Length, bytesDecoded); + Assert.Equal(charsEncoded, charsRead); + Assert.SequenceEqual(input, decoderDest.Slice(0, bytesDecoded)); + } + + { // IsFinalBlock = false + encoderDest.Clear(); + decoderDest.Clear(); + OperationStatus status = Base64.EncodeToChars(input, encoderDest, out int bytesConsumed, out int charsEncoded, isFinalBlock: false); + Span decodeInput = encoderDest.Slice(0, charsEncoded); + + if (input.Length % 3 == 0) + { + Assert.Equal(OperationStatus.Done, status); + Assert.Equal(input.Length, bytesConsumed); + Assert.Equal(encodedLength, charsEncoded); + + status = Base64.DecodeFromChars(decodeInput, decoderDest, out int charsRead, out int bytesDecoded, isFinalBlock: false); + + Assert.Equal(OperationStatus.Done, status); + Assert.Equal(input.Length, bytesDecoded); + Assert.Equal(charsEncoded, charsRead); + Assert.SequenceEqual(input, decoderDest.Slice(0, bytesDecoded)); + } + else + { + Assert.Equal(OperationStatus.NeedMoreData, status); + Assert.Equal(true, input.Length / 3 * 4 == charsEncoded); + + status = Base64.DecodeFromChars(decodeInput, decoderDest, out int charsRead, out int bytesDecoded, isFinalBlock: false); + + if (decodeInput.Length % 4 == 0) + { + Assert.Equal(OperationStatus.Done, status); + Assert.Equal(bytesConsumed, bytesDecoded); + Assert.Equal(charsEncoded, charsRead); + } + else + { + Assert.Equal(OperationStatus.NeedMoreData, status); + } + + Assert.SequenceEqual(input.Slice(0, bytesDecoded), decoderDest.Slice(0, bytesDecoded)); + } + } + + { // Test array-returning and int-returning overloads + char[] encodedChars = Base64.EncodeToChars(input); + Assert.Equal(encodedLength, encodedChars.Length); + + encoderDest.Clear(); + int charsWritten = Base64.EncodeToChars(input, encoderDest); + Assert.Equal(encodedLength, charsWritten); + Assert.SequenceEqual(encodedChars.AsSpan(), encoderDest.Slice(0, charsWritten)); + + byte[] decodedBytes = Base64.DecodeFromChars(encodedChars); + Assert.SequenceEqual(input, decodedBytes.AsSpan()); + + decoderDest.Clear(); + int bytesWritten = Base64.DecodeFromChars(encodedChars, decoderDest); + Assert.Equal(input.Length, bytesWritten); + Assert.SequenceEqual(input, decoderDest.Slice(0, bytesWritten)); + } + + { // Test Try* variants + encoderDest.Clear(); + Assert.Equal(true, Base64.TryEncodeToChars(input, encoderDest, out int charsWritten)); + Assert.Equal(encodedLength, charsWritten); + + decoderDest.Clear(); + Assert.Equal(true, Base64.TryDecodeFromChars(encoderDest.Slice(0, charsWritten), decoderDest, out int bytesWritten)); + Assert.Equal(input.Length, bytesWritten); + Assert.SequenceEqual(input, decoderDest.Slice(0, bytesWritten)); + } + + { // Decode the random chars directly (as chars, from the input bytes interpreted as UTF-16) + // Create a char span from the input bytes for testing decode with random data + if (input.Length >= 2) + { + ReadOnlySpan inputChars = System.Runtime.InteropServices.MemoryMarshal.Cast(input); + decoderDest.Clear(); + + // Try decoding - may succeed or fail depending on if input is valid base64 + OperationStatus status = Base64.DecodeFromChars(inputChars, decoderDest, out int charsConsumed, out int bytesDecoded); + // Just verify we don't crash - the result depends on input validity + Assert.Equal(true, status == OperationStatus.Done || status == OperationStatus.InvalidData || + status == OperationStatus.NeedMoreData || status == OperationStatus.DestinationTooSmall); + } + } } private static void TestToStringToCharArray(Span input, Base64FormattingOptions options) diff --git a/src/libraries/System.Memory/tests/Base64/Base64DecoderUnitTests.cs b/src/libraries/System.Memory/tests/Base64/Base64DecoderUnitTests.cs index 81f5be0688a6b2..cb95a6e03f77af 100644 --- a/src/libraries/System.Memory/tests/Base64/Base64DecoderUnitTests.cs +++ b/src/libraries/System.Memory/tests/Base64/Base64DecoderUnitTests.cs @@ -771,6 +771,134 @@ public void BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes(strin Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, expectedWritten, source, decodedBytes)); } + [Fact] + public void DecodeFromCharsWithLargeSpan() + { + var rnd = new Random(42); + for (int i = 0; i < 5; i++) + { + int numBytes = rnd.Next(100, 1000 * 1000); + // Ensure we have a valid length (multiple of 4 for standard Base64) + numBytes = (numBytes / 4) * 4; + + Span source = new char[numBytes]; + Base64TestHelper.InitializeDecodableChars(source, numBytes); + + Span decodedBytes = new byte[Base64.GetMaxDecodedLength(source.Length)]; + Assert.Equal(OperationStatus.Done, Base64.DecodeFromChars(source, decodedBytes, out int consumed, out int decodedByteCount)); + Assert.Equal(source.Length, consumed); + + string sourceString = source.ToString(); + byte[] expectedBytes = Convert.FromBase64String(sourceString); + Assert.True(expectedBytes.AsSpan().SequenceEqual(decodedBytes.Slice(0, decodedByteCount))); + } + } + + [Theory] + [InlineData("\u5948cz/T", 0, 0)] // tests the scalar code-path with non-ASCII + [InlineData("z/Ta123\u5948", 4, 3)] + public void DecodeFromCharsNonAsciiInputInvalid(string inputString, int expectedConsumed, int expectedWritten) + { + Span source = inputString.ToArray(); + Span decodedBytes = new byte[Base64.GetMaxDecodedLength(source.Length)]; + + Assert.Equal(OperationStatus.InvalidData, Base64.DecodeFromChars(source, decodedBytes, out int consumed, out int decodedByteCount)); + Assert.Equal(expectedConsumed, consumed); + Assert.Equal(expectedWritten, decodedByteCount); + } + + [Fact] + public void DecodeFromUtf8_ArrayOverload() + { + byte[] utf8Input = Encoding.UTF8.GetBytes("dGVzdA=="); // "test" encoded + byte[] result = Base64.DecodeFromUtf8(utf8Input); + Assert.Equal(4, result.Length); + Assert.Equal("test", Encoding.UTF8.GetString(result)); + } + + [Fact] + public void DecodeFromUtf8_SpanOverload() + { + byte[] utf8Input = Encoding.UTF8.GetBytes("dGVzdA=="); // "test" encoded + Span destination = new byte[10]; + int bytesWritten = Base64.DecodeFromUtf8(utf8Input, destination); + Assert.Equal(4, bytesWritten); + Assert.Equal("test", Encoding.UTF8.GetString(destination.Slice(0, bytesWritten))); + } + + [Fact] + public void TryDecodeFromUtf8_Success() + { + byte[] utf8Input = Encoding.UTF8.GetBytes("dGVzdA=="); + Span destination = new byte[10]; + Assert.True(Base64.TryDecodeFromUtf8(utf8Input, destination, out int bytesWritten)); + Assert.Equal(4, bytesWritten); + Assert.Equal("test", Encoding.UTF8.GetString(destination.Slice(0, bytesWritten))); + } + + [Fact] + public void TryDecodeFromUtf8_DestinationTooSmall() + { + byte[] utf8Input = Encoding.UTF8.GetBytes("dGVzdA=="); + Span destination = new byte[2]; // Too small + Assert.False(Base64.TryDecodeFromUtf8(utf8Input, destination, out int bytesWritten)); + Assert.Equal(0, bytesWritten); + } + + [Fact] + public void DecodeFromChars_InvalidData() + { + string invalidInput = "@#$%"; + byte[] destination = new byte[10]; + Assert.Throws(() => Base64.DecodeFromChars(invalidInput, destination)); + Assert.Throws(() => Base64.DecodeFromChars(invalidInput.AsSpan())); + } + + [Fact] + public void DecodeFromChars_DestinationTooSmall() + { + string validInput = "dGVzdA=="; // "test" encoded + byte[] destination = new byte[2]; // Too small + Assert.Throws("destination", () => Base64.DecodeFromChars(validInput, destination)); + } + + [Fact] + public void TryDecodeFromChars_DestinationTooSmall() + { + string validInput = "dGVzdA=="; // "test" encoded + Span destination = new byte[2]; // Too small + Assert.False(Base64.TryDecodeFromChars(validInput, destination, out int bytesWritten)); + } + + [Fact] + public void DecodeFromChars_OperationStatus_DistinguishesBetweenInvalidAndDestinationTooSmall() + { + // This is the key use case from the issue - distinguishing between invalid data and destination too small + string validInput = "dGVzdA=="; // "test" encoded - produces 4 bytes + string invalidInput = "@#$%"; + Span smallDestination = new byte[2]; + + // With destination too small, we should get DestinationTooSmall + OperationStatus status1 = Base64.DecodeFromChars(validInput, smallDestination, out int consumed1, out int written1); + Assert.Equal(OperationStatus.DestinationTooSmall, status1); + Assert.True(consumed1 > 0 || written1 >= 0); // Some progress was made or at least we know why it failed + + // With invalid data, we should get InvalidData + OperationStatus status2 = Base64.DecodeFromChars(invalidInput, smallDestination, out int consumed2, out int written2); + Assert.Equal(OperationStatus.InvalidData, status2); + Assert.Equal(0, consumed2); + Assert.Equal(0, written2); + } + + [Fact] + public void GetMaxDecodedLength_Matches_GetMaxDecodedFromUtf8Length() + { + for (int i = 0; i < 100; i++) + { + Assert.Equal(Base64.GetMaxDecodedFromUtf8Length(i), Base64.GetMaxDecodedLength(i)); + } + } + [Fact] public void DecodingWithWhiteSpaceIntoSmallDestination() { diff --git a/src/libraries/System.Memory/tests/Base64/Base64EncoderUnitTests.cs b/src/libraries/System.Memory/tests/Base64/Base64EncoderUnitTests.cs index da2d1b9d38b7ad..a4ba6c0b1176a4 100644 --- a/src/libraries/System.Memory/tests/Base64/Base64EncoderUnitTests.cs +++ b/src/libraries/System.Memory/tests/Base64/Base64EncoderUnitTests.cs @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Collections.Generic; +using System.Linq; using System.SpanTests; using System.Text; using Xunit; @@ -297,5 +299,152 @@ public void EncodeInPlaceDataLengthTooLarge() Assert.Equal(OperationStatus.DestinationTooSmall, Base64.EncodeToUtf8InPlace(testBytes, testBytes.Length + 1, out int bytesWritten)); Assert.Equal(0, bytesWritten); } + + [Fact] + public void EncodeToCharsWithLargeSpan() + { + var rnd = new Random(42); + for (int i = 0; i < 5; i++) + { + int numBytes = rnd.Next(100, 1000 * 1000); + Span source = new byte[numBytes]; + Base64TestHelper.InitializeBytes(source, numBytes); + + Span encodedBytes = new char[Base64.GetEncodedLength(source.Length)]; + OperationStatus result = Base64.EncodeToChars(source, encodedBytes, out int consumed, out int encodedBytesCount); + Assert.Equal(OperationStatus.Done, result); + Assert.Equal(source.Length, consumed); + Assert.Equal(encodedBytes.Length, encodedBytesCount); + string expectedText = Convert.ToBase64String(source); + Assert.Equal(expectedText, encodedBytes.ToString()); + } + } + + public static IEnumerable EncodeToStringTests_TestData() + { + yield return new object[] { Enumerable.Range(0, 0).Select(i => (byte)i).ToArray(), "" }; + yield return new object[] { Enumerable.Range(0, 1).Select(i => (byte)i).ToArray(), "AA==" }; + yield return new object[] { Enumerable.Range(0, 2).Select(i => (byte)i).ToArray(), "AAE=" }; + yield return new object[] { Enumerable.Range(0, 3).Select(i => (byte)i).ToArray(), "AAEC" }; + yield return new object[] { Enumerable.Range(0, 4).Select(i => (byte)i).ToArray(), "AAECAw==" }; + yield return new object[] { Enumerable.Range(0, 5).Select(i => (byte)i).ToArray(), "AAECAwQ=" }; + yield return new object[] { Enumerable.Range(0, 6).Select(i => (byte)i).ToArray(), "AAECAwQF" }; + } + + [Theory] + [MemberData(nameof(EncodeToStringTests_TestData))] + public static void EncodeToStringTests(byte[] inputBytes, string expectedBase64) + { + Assert.Equal(expectedBase64, Base64.EncodeToString(inputBytes)); + Span chars = new char[Base64.GetEncodedLength(inputBytes.Length)]; + Assert.Equal(OperationStatus.Done, Base64.EncodeToChars(inputBytes, chars, out int _, out int charsWritten)); + Assert.Equal(expectedBase64, chars.Slice(0, charsWritten).ToString()); + } + + [Fact] + public void EncodeToCharsOutputTooSmall() + { + for (int numBytes = 4; numBytes < 20; numBytes++) + { + byte[] source = new byte[numBytes]; + Base64TestHelper.InitializeBytes(source, numBytes); + int expectedConsumed = 3; + char[] encodedBytes = new char[4]; + + Assert.Equal(OperationStatus.DestinationTooSmall, Base64.EncodeToChars(source, encodedBytes, out int consumed, out int written)); + Assert.Equal(expectedConsumed, consumed); + Assert.Equal(encodedBytes.Length, written); + + Assert.Throws("destination", () => Base64.EncodeToChars(source, encodedBytes)); + } + } + + [Fact] + public void EncodeToUtf8_ArrayOverload() + { + byte[] input = Encoding.UTF8.GetBytes("test"); + byte[] result = Base64.EncodeToUtf8(input); + Assert.Equal("dGVzdA==", Encoding.UTF8.GetString(result)); + } + + [Fact] + public void EncodeToUtf8_SpanOverload() + { + byte[] input = Encoding.UTF8.GetBytes("test"); + Span destination = new byte[20]; + int bytesWritten = Base64.EncodeToUtf8(input, destination); + Assert.Equal(8, bytesWritten); + Assert.Equal("dGVzdA==", Encoding.UTF8.GetString(destination.Slice(0, bytesWritten))); + } + + [Fact] + public void TryEncodeToUtf8_Success() + { + byte[] input = Encoding.UTF8.GetBytes("test"); + Span destination = new byte[20]; + Assert.True(Base64.TryEncodeToUtf8(input, destination, out int bytesWritten)); + Assert.Equal(8, bytesWritten); + Assert.Equal("dGVzdA==", Encoding.UTF8.GetString(destination.Slice(0, bytesWritten))); + } + + [Fact] + public void TryEncodeToUtf8_DestinationTooSmall() + { + byte[] input = Encoding.UTF8.GetBytes("test"); + Span destination = new byte[4]; // Too small + Assert.False(Base64.TryEncodeToUtf8(input, destination, out int bytesWritten)); + } + + [Fact] + public void TryEncodeToUtf8InPlace_Success() + { + byte[] buffer = new byte[20]; + buffer[0] = (byte)'t'; + buffer[1] = (byte)'e'; + buffer[2] = (byte)'s'; + buffer[3] = (byte)'t'; + + Assert.True(Base64.TryEncodeToUtf8InPlace(buffer, 4, out int bytesWritten)); + Assert.Equal(8, bytesWritten); + Assert.Equal("dGVzdA==", Encoding.UTF8.GetString(buffer.AsSpan(0, bytesWritten))); + } + + [Fact] + public void TryEncodeToUtf8InPlace_DestinationTooSmall() + { + byte[] buffer = new byte[4]; // Same size as input, which is too small for encoded output + buffer[0] = (byte)'t'; + buffer[1] = (byte)'e'; + buffer[2] = (byte)'s'; + buffer[3] = (byte)'t'; + + Assert.False(Base64.TryEncodeToUtf8InPlace(buffer, 4, out int bytesWritten)); + Assert.Equal(0, bytesWritten); + } + + [Fact] + public void EncodeToChars_DestinationTooSmall() + { + byte[] input = Encoding.UTF8.GetBytes("test"); + char[] destination = new char[4]; // Too small + Assert.Throws("destination", () => Base64.EncodeToChars(input, destination)); + } + + [Fact] + public void TryEncodeToChars_DestinationTooSmall() + { + byte[] input = Encoding.UTF8.GetBytes("test"); + Span destination = new char[4]; // Too small + Assert.False(Base64.TryEncodeToChars(input, destination, out int charsWritten)); + } + + [Fact] + public void GetEncodedLength_MatchesExisting() + { + for (int i = 0; i < 100; i++) + { + Assert.Equal(Base64.GetMaxEncodedToUtf8Length(i), Base64.GetEncodedLength(i)); + } + } } } diff --git a/src/libraries/System.Memory/tests/Base64/Base64TestHelper.cs b/src/libraries/System.Memory/tests/Base64/Base64TestHelper.cs index 95da69c0164c9b..c4dc9bf42308c8 100644 --- a/src/libraries/System.Memory/tests/Base64/Base64TestHelper.cs +++ b/src/libraries/System.Memory/tests/Base64/Base64TestHelper.cs @@ -121,6 +121,16 @@ internal static void InitializeDecodableBytes(Span bytes, int seed = 100) } } + internal static void InitializeDecodableChars(Span chars, int seed = 100) + { + var rnd = new Random(seed); + for (int i = 0; i < chars.Length; i++) + { + int index = (byte)rnd.Next(0, s_encodingMap.Length); + chars[i] = (char)s_encodingMap[index]; + } + } + internal static void InitializeUrlDecodableChars(Span bytes, int seed = 100) { var rnd = new Random(seed); diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index d30c305a7e6fae..ebf422b4b3c287 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -256,7 +256,6 @@ - diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Decoder.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Decoder.cs index c7277b40829b01..322b0c71a45ca2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Decoder.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Decoder.cs @@ -1,15 +1,48 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using static System.Buffers.Text.Base64Helper; namespace System.Buffers.Text { - // AVX2 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/avx2 - // Vector128 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/ssse3 public static partial class Base64 { + private const int MaxStackallocThreshold = 256; + + /// + /// Returns the maximum length (in bytes) of the result if you were to decode base 64 encoded text from a span of size . + /// + /// The length of the base64-encoded input. + /// The maximum number of bytes that decoding could produce. + /// + /// is less than 0. + /// + /// + /// This method is equivalent to . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetMaxDecodedLength(int base64Length) + { + ArgumentOutOfRangeException.ThrowIfNegative(base64Length); + + return (base64Length >> 2) * 3; + } + + /// + /// Returns the maximum length (in bytes) of the result if you were to decode base 64 encoded text within a byte span of size "length". + /// + /// + /// Thrown when the specified is less than 0. + /// + /// + /// This method is equivalent to . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetMaxDecodedFromUtf8Length(int length) => GetMaxDecodedLength(length); + /// /// Decode the span of UTF-8 encoded text represented as base64 into binary data. /// If the input is not a multiple of 4, it will decode as much as it can, to the closest multiple of 4. @@ -33,17 +66,80 @@ public static OperationStatus DecodeFromUtf8(ReadOnlySpan utf8, Span DecodeFrom(default(Base64DecoderByte), utf8, bytes, out bytesConsumed, out bytesWritten, isFinalBlock, ignoreWhiteSpace: true); /// - /// Returns the maximum length (in bytes) of the result if you were to decode base 64 encoded text within a byte span of size "length". + /// Decodes the span of UTF-8 encoded text represented as Base64 into binary data. /// - /// - /// Thrown when the specified is less than 0. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int GetMaxDecodedFromUtf8Length(int length) + /// The input span which contains UTF-8 encoded text in Base64 that needs to be decoded. + /// The output span which contains the result of the operation, i.e. the decoded binary data. + /// The number of bytes written into . This can be used to slice the output for subsequent calls, if necessary. + /// The buffer in is too small to hold the encoded output. + /// contains an invalid Base64 character, + /// more than two padding characters, or a non white space character among the padding characters. + public static int DecodeFromUtf8(ReadOnlySpan source, Span destination) + { + OperationStatus status = DecodeFromUtf8(source, destination, out _, out int bytesWritten); + + if (status == OperationStatus.Done) + { + return bytesWritten; + } + + if (status == OperationStatus.DestinationTooSmall) + { + throw new ArgumentException(SR.Argument_DestinationTooShort, nameof(destination)); + } + + Debug.Assert(status == OperationStatus.InvalidData); + throw new FormatException(SR.Format_BadBase64Char); + } + + /// + /// Decodes the span of UTF-8 encoded text represented as Base64 into binary data. + /// + /// The input span which contains UTF-8 encoded text in Base64 that needs to be decoded. + /// A byte array which contains the result of the decoding operation. + /// contains an invalid Base64 character, + /// more than two padding characters, or a non white space character among the padding characters. + public static byte[] DecodeFromUtf8(ReadOnlySpan source) + { + int upperBound = GetMaxDecodedLength(source.Length); + byte[]? rented = null; + + Span destination = (uint)upperBound <= MaxStackallocThreshold + ? stackalloc byte[MaxStackallocThreshold] + : (rented = ArrayPool.Shared.Rent(upperBound)); + + OperationStatus status = DecodeFromUtf8(source, destination, out _, out int bytesWritten); + Debug.Assert(status is OperationStatus.Done or OperationStatus.InvalidData); + byte[] result = destination.Slice(0, bytesWritten).ToArray(); + + if (rented is not null) + { + ArrayPool.Shared.Return(rented); + } + + return status == OperationStatus.Done ? result : throw new FormatException(SR.Format_BadBase64Char); + } + + /// + /// Decodes the span of UTF-8 encoded text represented as Base64 into binary data. + /// + /// The input span which contains UTF-8 encoded text in Base64 that needs to be decoded. + /// The output span which contains the result of the operation, i.e. the decoded binary data. + /// When this method returns, contains the number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary. This parameter is treated as uninitialized. + /// if bytes decoded successfully, otherwise . + /// contains an invalid Base64 character, + /// more than two padding characters, or a non white space character among the padding characters. + public static bool TryDecodeFromUtf8(ReadOnlySpan source, Span destination, out int bytesWritten) { - ArgumentOutOfRangeException.ThrowIfNegative(length); + OperationStatus status = DecodeFromUtf8(source, destination, out _, out bytesWritten); + + if (status == OperationStatus.InvalidData) + { + throw new FormatException(SR.Format_BadBase64Char); + } - return (length >> 2) * 3; + Debug.Assert(status is OperationStatus.Done or OperationStatus.DestinationTooSmall); + return status == OperationStatus.Done; } /// @@ -63,5 +159,97 @@ public static int GetMaxDecodedFromUtf8Length(int length) /// public static OperationStatus DecodeFromUtf8InPlace(Span buffer, out int bytesWritten) => Base64Helper.DecodeFromUtf8InPlace(default(Base64DecoderByte), buffer, out bytesWritten, ignoreWhiteSpace: true); + + /// + /// Decodes the span of unicode ASCII chars represented as Base64 into binary data. + /// + /// The input span which contains unicode ASCII chars in Base64 that needs to be decoded. + /// The output span which contains the result of the operation, i.e. the decoded binary data. + /// When this method returns, contains the number of input chars consumed during the operation. This can be used to slice the input for subsequent calls, if necessary. This parameter is treated as uninitialized. + /// When this method returns, contains the number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary. This parameter is treated as uninitialized. + /// when the input span contains the entirety of data to encode; when more data may follow, + /// such as when calling in a loop. Calls with should be followed up with another call where this parameter is . The default is . + /// One of the enumeration values that indicates the success or failure of the operation. + public static OperationStatus DecodeFromChars(ReadOnlySpan source, Span destination, + out int charsConsumed, out int bytesWritten, bool isFinalBlock = true) => + DecodeFrom(default(Base64DecoderChar), MemoryMarshal.Cast(source), destination, + out charsConsumed, out bytesWritten, isFinalBlock, ignoreWhiteSpace: true); + + /// + /// Decodes the span of unicode ASCII chars represented as Base64 into binary data. + /// + /// The input span which contains ASCII chars in Base64 that needs to be decoded. + /// The output span which contains the result of the operation, i.e. the decoded binary data. + /// The number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary. + /// The buffer in is too small to hold the encoded output. + /// contains an invalid Base64 character, + /// more than two padding characters, or a non white space character among the padding characters. + public static int DecodeFromChars(ReadOnlySpan source, Span destination) + { + OperationStatus status = DecodeFromChars(source, destination, out _, out int bytesWritten); + + if (status == OperationStatus.Done) + { + return bytesWritten; + } + + if (status == OperationStatus.DestinationTooSmall) + { + throw new ArgumentException(SR.Argument_DestinationTooShort, nameof(destination)); + } + + Debug.Assert(status == OperationStatus.InvalidData); + throw new FormatException(SR.Format_BadBase64Char); + } + + /// + /// Decodes the span of unicode ASCII chars represented as Base64 into binary data. + /// + /// The input span which contains ASCII chars in Base64 that needs to be decoded. + /// The output span which contains the result of the operation, i.e. the decoded binary data. + /// When this method returns, contains the number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary. This parameter is treated as uninitialized. + /// if bytes decoded successfully, otherwise . + /// contains an invalid Base64 character, + /// more than two padding characters, or a non white space character among the padding characters. + public static bool TryDecodeFromChars(ReadOnlySpan source, Span destination, out int bytesWritten) + { + OperationStatus status = DecodeFromChars(source, destination, out _, out bytesWritten); + + if (status == OperationStatus.InvalidData) + { + throw new FormatException(SR.Format_BadBase64Char); + } + + Debug.Assert(status is OperationStatus.Done or OperationStatus.DestinationTooSmall); + return status == OperationStatus.Done; + } + + /// + /// Decodes the span of unicode ASCII chars represented as Base64 into binary data. + /// + /// The input span which contains ASCII chars in Base64 that needs to be decoded. + /// A byte array which contains the result of the decoding operation. + /// contains an invalid Base64 character, + /// more than two padding characters, or a non white space character among the padding characters. + public static byte[] DecodeFromChars(ReadOnlySpan source) + { + int upperBound = GetMaxDecodedLength(source.Length); + byte[]? rented = null; + + Span destination = (uint)upperBound <= MaxStackallocThreshold + ? stackalloc byte[MaxStackallocThreshold] + : (rented = ArrayPool.Shared.Rent(upperBound)); + + OperationStatus status = DecodeFromChars(source, destination, out _, out int bytesWritten); + Debug.Assert(status is OperationStatus.Done or OperationStatus.InvalidData); + byte[] result = destination.Slice(0, bytesWritten).ToArray(); + + if (rented is not null) + { + ArrayPool.Shared.Return(rented); + } + + return status == OperationStatus.Done ? result : throw new FormatException(SR.Format_BadBase64Char); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs index 13e5de5868594b..c599dc014da371 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs @@ -1,19 +1,49 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using static System.Buffers.Text.Base64Helper; namespace System.Buffers.Text { - // AVX2 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/avx2 - // Vector128 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/ssse3 - /// /// Convert between binary data and UTF-8 encoded text that is represented in base 64. /// public static partial class Base64 { + /// + /// Returns the length (in bytes) of the result if you were to encode binary data within a byte span of size . + /// + /// The number of bytes to encode. + /// The number of bytes that encoding will produce. + /// + /// is less than 0 or greater than 1610612733. + /// + /// + /// This method is equivalent to . The encoded length for base64 is exactly calculated, not an upper bound. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetEncodedLength(int bytesLength) + { + ArgumentOutOfRangeException.ThrowIfGreaterThan((uint)bytesLength, MaximumEncodeLength); + + return ((bytesLength + 2) / 3) * 4; + } + + /// + /// Returns the maximum length (in bytes) of the result if you were to encode binary data within a byte span of size "length". + /// + /// + /// Thrown when the specified is less than 0 or larger than 1610612733 (since encode inflates the data by 4/3). + /// + /// + /// This method is equivalent to . The encoded length for base64 is exactly calculated, not an upper bound. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetMaxEncodedToUtf8Length(int length) => GetEncodedLength(length); + /// /// Encode the span of binary data into UTF-8 encoded text represented as base64. /// @@ -35,17 +65,51 @@ public static OperationStatus EncodeToUtf8(ReadOnlySpan bytes, Span EncodeTo(default(Base64EncoderByte), bytes, utf8, out bytesConsumed, out bytesWritten, isFinalBlock); /// - /// Returns the maximum length (in bytes) of the result if you were to encode binary data within a byte span of size "length". + /// Encodes the span of binary data into UTF-8 encoded text represented as Base64. /// - /// - /// Thrown when the specified is less than 0 or larger than 1610612733 (since encode inflates the data by 4/3). - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int GetMaxEncodedToUtf8Length(int length) + /// The input span which contains binary data that needs to be encoded. + /// The output span which contains the result of the operation, i.e. the UTF-8 encoded text in Base64. + /// The number of bytes written into the destination span. This can be used to slice the output for subsequent calls, if necessary. + /// The buffer in is too small to hold the encoded output. + public static int EncodeToUtf8(ReadOnlySpan source, Span destination) + { + OperationStatus status = EncodeToUtf8(source, destination, out _, out int bytesWritten); + + if (status == OperationStatus.Done) + { + return bytesWritten; + } + + Debug.Assert(status == OperationStatus.DestinationTooSmall); + throw new ArgumentException(SR.Argument_DestinationTooShort, nameof(destination)); + } + + /// + /// Encodes the span of binary data into UTF-8 encoded text represented as Base64. + /// + /// The input span which contains binary data that needs to be encoded. + /// The output byte array which contains the result of the operation, i.e. the UTF-8 encoded text in Base64. + public static byte[] EncodeToUtf8(ReadOnlySpan source) + { + byte[] destination = new byte[GetEncodedLength(source.Length)]; + EncodeToUtf8(source, destination, out _, out int bytesWritten); + Debug.Assert(destination.Length == bytesWritten); + + return destination; + } + + /// + /// Encodes the span of binary data into UTF-8 encoded text represented as Base64. + /// + /// The input span which contains binary data that needs to be encoded. + /// The output span which contains the result of the operation, i.e. the UTF-8 encoded text in Base64. + /// When this method returns, contains the number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary. This parameter is treated as uninitialized. + /// if bytes encoded successfully, otherwise . + public static bool TryEncodeToUtf8(ReadOnlySpan source, Span destination, out int bytesWritten) { - ArgumentOutOfRangeException.ThrowIfGreaterThan((uint)length, MaximumEncodeLength); + OperationStatus status = EncodeToUtf8(source, destination, out _, out bytesWritten); - return ((length + 2) / 3) * 4; + return status == OperationStatus.Done; } /// @@ -65,5 +129,96 @@ public static int GetMaxEncodedToUtf8Length(int length) /// public static OperationStatus EncodeToUtf8InPlace(Span buffer, int dataLength, out int bytesWritten) => Base64Helper.EncodeToUtf8InPlace(default(Base64EncoderByte), buffer, dataLength, out bytesWritten); + + /// + /// Encodes the span of binary data (in-place) into UTF-8 encoded text represented as base 64. + /// The encoded text output is larger than the binary data contained in the input (the operation inflates the data). + /// + /// The input span which contains binary data that needs to be encoded. + /// It needs to be large enough to fit the result of the operation. + /// The amount of binary data contained within the buffer that needs to be encoded + /// (and needs to be smaller than the buffer length). + /// When this method returns, contains the number of bytes written into the buffer. This parameter is treated as uninitialized. + /// if bytes encoded successfully, otherwise . + public static bool TryEncodeToUtf8InPlace(Span buffer, int dataLength, out int bytesWritten) + { + OperationStatus status = EncodeToUtf8InPlace(buffer, dataLength, out bytesWritten); + + return status == OperationStatus.Done; + } + + /// + /// Encodes the span of binary data into unicode ASCII chars represented as Base64. + /// + /// The input span which contains binary data that needs to be encoded. + /// The output span which contains the result of the operation, i.e. the ASCII chars in Base64. + /// When this method returns, contains the number of input bytes consumed during the operation. This can be used to slice the input for subsequent calls, if necessary. This parameter is treated as uninitialized. + /// When this method returns, contains the number of chars written into the output span. This can be used to slice the output for subsequent calls, if necessary. This parameter is treated as uninitialized. + /// when the input span contains the entirety of data to encode; when more data may follow, + /// such as when calling in a loop, subsequent calls with should end with call. The default is . + /// One of the enumeration values that indicates the success or failure of the operation. + public static OperationStatus EncodeToChars(ReadOnlySpan source, Span destination, + out int bytesConsumed, out int charsWritten, bool isFinalBlock = true) => + EncodeTo(default(Base64EncoderChar), source, MemoryMarshal.Cast(destination), out bytesConsumed, out charsWritten, isFinalBlock); + + /// + /// Encodes the span of binary data into unicode ASCII chars represented as Base64. + /// + /// The input span which contains binary data that needs to be encoded. + /// The output span which contains the result of the operation, i.e. the ASCII chars in Base64. + /// The number of chars written into the destination span. This can be used to slice the output for subsequent calls, if necessary. + /// The buffer in is too small to hold the encoded output. + public static int EncodeToChars(ReadOnlySpan source, Span destination) + { + OperationStatus status = EncodeToChars(source, destination, out _, out int charsWritten); + + if (status == OperationStatus.Done) + { + return charsWritten; + } + + Debug.Assert(status == OperationStatus.DestinationTooSmall); + throw new ArgumentException(SR.Argument_DestinationTooShort, nameof(destination)); + } + + /// + /// Encodes the span of binary data into unicode ASCII chars represented as Base64. + /// + /// The input span which contains binary data that needs to be encoded. + /// A char array which contains the result of the operation, i.e. the ASCII chars in Base64. + public static char[] EncodeToChars(ReadOnlySpan source) + { + char[] destination = new char[GetEncodedLength(source.Length)]; + EncodeToChars(source, destination, out _, out int charsWritten); + Debug.Assert(destination.Length == charsWritten); + + return destination; + } + + /// + /// Encodes the span of binary data into unicode string represented as Base64 ASCII chars. + /// + /// The input span which contains binary data that needs to be encoded. + /// A string which contains the result of the operation, i.e. the ASCII string in Base64. + public static string EncodeToString(ReadOnlySpan source) => + string.Create(GetEncodedLength(source.Length), source, static (buffer, source) => + { + EncodeToChars(source, buffer, out _, out int charsWritten); + Debug.Assert(buffer.Length == charsWritten, $"The source length: {source.Length}, chars written: {charsWritten}"); + }); + + /// + /// Encodes the span of binary data into unicode ASCII chars represented as Base64. + /// + /// The input span which contains binary data that needs to be encoded. + /// The output span which contains the result of the operation, i.e. the ASCII chars in Base64. + /// When this method returns, contains the number of chars written into the output span. This can be used to slice the output for subsequent calls, if necessary. This parameter is treated as uninitialized. + /// if chars encoded successfully, otherwise . + public static bool TryEncodeToChars(ReadOnlySpan source, Span destination, out int charsWritten) + { + OperationStatus status = EncodeToChars(source, destination, out _, out charsWritten); + + return status == OperationStatus.Done; + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64DecoderHelper.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64DecoderHelper.cs index d63377faf110ec..33d875527ed789 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64DecoderHelper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64DecoderHelper.cs @@ -4,6 +4,7 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Text; #if NET using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; @@ -12,6 +13,8 @@ namespace System.Buffers.Text { + // AVX2 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/avx2 + // Vector128 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/ssse3 internal static partial class Base64Helper { internal static unsafe OperationStatus DecodeFrom(TBase64Decoder decoder, ReadOnlySpan source, Span bytes, @@ -500,7 +503,7 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise(TB // If this block contains padding and there's another block, then only whitespace may follow for being valid. if (hasAnotherBlock) { - int paddingCount = GetPaddingCount(decoder, ref buffer[BlockSize - 1]); + int paddingCount = GetPaddingCount(decoder, ref buffer[BlockSize - 1]); if (paddingCount > 0) { hasAnotherBlock = false; @@ -548,6 +551,103 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise(TB return status; } + internal static OperationStatus DecodeWithWhiteSpaceBlockwise(TBase64Decoder decoder, ReadOnlySpan source, Span bytes, ref int bytesConsumed, ref int bytesWritten, bool isFinalBlock = true) + where TBase64Decoder : IBase64Decoder + { + const int BlockSize = 4; + Span buffer = stackalloc ushort[BlockSize]; + OperationStatus status = OperationStatus.Done; + + while (!source.IsEmpty) + { + int encodedIdx = 0; + int bufferIdx = 0; + int skipped = 0; + + for (; encodedIdx < source.Length && (uint)bufferIdx < (uint)buffer.Length; ++encodedIdx) + { + if (IsWhiteSpace(source[encodedIdx])) + { + skipped++; + } + else + { + buffer[bufferIdx] = source[encodedIdx]; + bufferIdx++; + } + } + + source = source.Slice(encodedIdx); + bytesConsumed += skipped; + + if (bufferIdx == 0) + { + continue; + } + + bool hasAnotherBlock; + + if (decoder is Base64DecoderByte) + { + hasAnotherBlock = source.Length >= BlockSize; + } + else + { + hasAnotherBlock = source.Length > 1; + } + + bool localIsFinalBlock = !hasAnotherBlock; + + // If this block contains padding and there's another block, then only whitespace may follow for being valid. + if (hasAnotherBlock) + { + int paddingCount = GetPaddingCount(decoder, ref buffer[BlockSize - 1]); + if (paddingCount > 0) + { + hasAnotherBlock = false; + localIsFinalBlock = true; + } + } + + if (localIsFinalBlock && !isFinalBlock) + { + localIsFinalBlock = false; + } + + status = DecodeFrom(decoder, buffer.Slice(0, bufferIdx), bytes, out int localConsumed, out int localWritten, localIsFinalBlock, ignoreWhiteSpace: false); + bytesConsumed += localConsumed; + bytesWritten += localWritten; + + if (status != OperationStatus.Done) + { + return status; + } + + // The remaining data must all be whitespace in order to be valid. + if (!hasAnotherBlock) + { + for (int i = 0; i < source.Length; ++i) + { + if (!IsWhiteSpace(source[i])) + { + // Revert previous dest increment, since an invalid state followed. + bytesConsumed -= localConsumed; + bytesWritten -= localWritten; + + return OperationStatus.InvalidData; + } + } + + bytesConsumed += source.Length; + break; + } + + bytes = bytes.Slice(localWritten); + } + + return status; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int GetPaddingCount(TBase64Decoder decoder, ref byte ptrToLastElement) where TBase64Decoder : IBase64Decoder @@ -567,6 +667,25 @@ private static int GetPaddingCount(TBase64Decoder decoder, ref b return padding; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int GetPaddingCount(TBase64Decoder decoder, ref ushort ptrToLastElement) + where TBase64Decoder : IBase64Decoder + { + int padding = 0; + + if (decoder.IsValidPadding(ptrToLastElement)) + { + padding++; + } + + if (decoder.IsValidPadding(Unsafe.Subtract(ref ptrToLastElement, 1))) + { + padding++; + } + + return padding; + } + private static OperationStatus DecodeWithWhiteSpaceFromUtf8InPlace(TBase64Decoder decoder, Span source, ref int destIndex, uint sourceIndex) where TBase64Decoder : IBase64Decoder { @@ -1453,5 +1572,220 @@ public OperationStatus DecodeWithWhiteSpaceBlockwiseWrapper(TBas where TBase64Decoder : IBase64Decoder => DecodeWithWhiteSpaceBlockwise(decoder, utf8, bytes, ref bytesConsumed, ref bytesWritten, isFinalBlock); } + + internal readonly struct Base64DecoderChar : IBase64Decoder + { + public ReadOnlySpan DecodingMap => default(Base64DecoderByte).DecodingMap; + + public ReadOnlySpan VbmiLookup0 => default(Base64DecoderByte).VbmiLookup0; + + public ReadOnlySpan VbmiLookup1 => default(Base64DecoderByte).VbmiLookup1; + + public ReadOnlySpan Avx2LutHigh => default(Base64DecoderByte).Avx2LutHigh; + + public ReadOnlySpan Avx2LutLow => default(Base64DecoderByte).Avx2LutLow; + + public ReadOnlySpan Avx2LutShift => default(Base64DecoderByte).Avx2LutShift; + + public byte MaskSlashOrUnderscore => default(Base64DecoderByte).MaskSlashOrUnderscore; + + public ReadOnlySpan Vector128LutHigh => default(Base64DecoderByte).Vector128LutHigh; + + public ReadOnlySpan Vector128LutLow => default(Base64DecoderByte).Vector128LutLow; + + public ReadOnlySpan Vector128LutShift => default(Base64DecoderByte).Vector128LutShift; + + public ReadOnlySpan AdvSimdLutOne3 => default(Base64DecoderByte).AdvSimdLutOne3; + + public uint AdvSimdLutTwo3Uint1 => default(Base64DecoderByte).AdvSimdLutTwo3Uint1; + + public int GetMaxDecodedLength(int sourceLength) => Base64.GetMaxDecodedFromUtf8Length(sourceLength); + + public bool IsInvalidLength(int bufferLength) => bufferLength % 4 != 0; + + public bool IsValidPadding(uint padChar) => padChar == EncodingPad; + + public int SrcLength(bool _, int sourceLength) => sourceLength & ~0x3; + +#if NET + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(Ssse3))] + public bool TryDecode128Core(Vector128 str, Vector128 hiNibbles, Vector128 maskSlashOrUnderscore, Vector128 mask8F, + Vector128 lutLow, Vector128 lutHigh, Vector128 lutShift, Vector128 shiftForUnderscore, out Vector128 result) => + default(Base64DecoderByte).TryDecode128Core(str, hiNibbles, maskSlashOrUnderscore, mask8F, lutLow, lutHigh, lutShift, shiftForUnderscore, out result); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + public bool TryDecode256Core(Vector256 str, Vector256 hiNibbles, Vector256 maskSlashOrUnderscore, Vector256 lutLow, + Vector256 lutHigh, Vector256 lutShift, Vector256 shiftForUnderscore, out Vector256 result) => + default(Base64DecoderByte).TryDecode256Core(str, hiNibbles, maskSlashOrUnderscore, lutLow, lutHigh, lutShift, shiftForUnderscore, out result); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool TryLoadVector512(ushort* src, ushort* srcStart, int sourceLength, out Vector512 str) + { + AssertRead>(src, srcStart, sourceLength); + Vector512 utf16VectorLower = Vector512.Load(src); + Vector512 utf16VectorUpper = Vector512.Load(src + 32); + if (Ascii.VectorContainsNonAsciiChar(utf16VectorLower | utf16VectorUpper)) + { + str = default; + return false; + } + + str = Ascii.ExtractAsciiVector(utf16VectorLower, utf16VectorUpper).AsSByte(); + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + public unsafe bool TryLoadAvxVector256(ushort* src, ushort* srcStart, int sourceLength, out Vector256 str) + { + AssertRead>(src, srcStart, sourceLength); + Vector256 utf16VectorLower = Avx.LoadVector256(src); + Vector256 utf16VectorUpper = Avx.LoadVector256(src + 16); + + if (Ascii.VectorContainsNonAsciiChar(utf16VectorLower | utf16VectorUpper)) + { + str = default; + return false; + } + + str = Ascii.ExtractAsciiVector(utf16VectorLower, utf16VectorUpper).AsSByte(); + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool TryLoadVector128(ushort* src, ushort* srcStart, int sourceLength, out Vector128 str) + { + AssertRead>(src, srcStart, sourceLength); + Vector128 utf16VectorLower = Vector128.LoadUnsafe(ref *src); + Vector128 utf16VectorUpper = Vector128.LoadUnsafe(ref *src, 8); + if (Ascii.VectorContainsNonAsciiChar(utf16VectorLower | utf16VectorUpper)) + { + str = default; + return false; + } + + str = Ascii.ExtractAsciiVector(utf16VectorLower, utf16VectorUpper); + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + public unsafe bool TryLoadArmVector128x4(ushort* src, ushort* srcStart, int sourceLength, + out Vector128 str1, out Vector128 str2, out Vector128 str3, out Vector128 str4) + { + AssertRead>(src, srcStart, sourceLength); + var (s11, s12, s21, s22) = AdvSimd.Arm64.Load4xVector128AndUnzip(src); + var (s31, s32, s41, s42) = AdvSimd.Arm64.Load4xVector128AndUnzip(src + 32); + + if (Ascii.VectorContainsNonAsciiChar(s11 | s12 | s21 | s22 | s31 | s32 | s41 | s42)) + { + str1 = str2 = str3 = str4 = default; + return false; + } + + str1 = Ascii.ExtractAsciiVector(s11, s31); + str2 = Ascii.ExtractAsciiVector(s12, s32); + str3 = Ascii.ExtractAsciiVector(s21, s41); + str4 = Ascii.ExtractAsciiVector(s22, s42); + + return true; + } +#endif // NET + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe int DecodeFourElements(ushort* source, ref sbyte decodingMap) + { + // The 'source' span expected to have at least 4 elements, and the 'decodingMap' consists 256 sbytes + uint t0 = source[0]; + uint t1 = source[1]; + uint t2 = source[2]; + uint t3 = source[3]; + + if (((t0 | t1 | t2 | t3) & 0xffffff00) != 0) + { + return -1; // One or more chars falls outside the 00..ff range, invalid Base64 character. + } + + int i0 = Unsafe.Add(ref decodingMap, (int)t0); + int i1 = Unsafe.Add(ref decodingMap, (int)t1); + int i2 = Unsafe.Add(ref decodingMap, (int)t2); + int i3 = Unsafe.Add(ref decodingMap, (int)t3); + + i0 <<= 18; + i1 <<= 12; + i2 <<= 6; + + i0 |= i3; + i1 |= i2; + + i0 |= i1; + return i0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe int DecodeRemaining(ushort* srcEnd, ref sbyte decodingMap, long remaining, out uint t2, out uint t3) + { + uint t0; + uint t1; + t2 = EncodingPad; + t3 = EncodingPad; + switch (remaining) + { + case 2: + t0 = srcEnd[-2]; + t1 = srcEnd[-1]; + break; + case 3: + t0 = srcEnd[-3]; + t1 = srcEnd[-2]; + t2 = srcEnd[-1]; + break; + case 4: + t0 = srcEnd[-4]; + t1 = srcEnd[-3]; + t2 = srcEnd[-2]; + t3 = srcEnd[-1]; + break; + default: + return -1; + } + + if (((t0 | t1 | t2 | t3) & 0xffffff00) != 0) + { + return -1; + } + + int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); + int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); + + i0 <<= 18; + i1 <<= 12; + + i0 |= i1; + return i0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int IndexOfAnyExceptWhiteSpace(ReadOnlySpan span) + { + for (int i = 0; i < span.Length; i++) + { + if (!IsWhiteSpace(span[i])) + { + return i; + } + } + + return -1; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public OperationStatus DecodeWithWhiteSpaceBlockwiseWrapper(TBase64Decoder decoder, ReadOnlySpan source, + Span bytes, ref int bytesConsumed, ref int bytesWritten, bool isFinalBlock = true) where TBase64Decoder : IBase64Decoder => + DecodeWithWhiteSpaceBlockwise(default(Base64DecoderChar), source, bytes, ref bytesConsumed, ref bytesWritten, isFinalBlock); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64EncoderHelper.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64EncoderHelper.cs index 5a1d40f8cd4592..2afe15475c236b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64EncoderHelper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64EncoderHelper.cs @@ -11,6 +11,8 @@ namespace System.Buffers.Text { + // AVX2 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/avx2 + // Vector128 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/ssse3 internal static partial class Base64Helper { internal static unsafe OperationStatus EncodeTo(TBase64Encoder encoder, ReadOnlySpan source, @@ -656,6 +658,47 @@ private static uint ConstructResult(uint i0, uint i1, uint i2, uint i3) } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void EncodeOneOptionallyPadTwo(byte* oneByte, ushort* dest, ref byte encodingMap) + { + uint t0 = oneByte[0]; + + uint i = t0 << 8; + + uint i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 10)); + uint i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 4) & 0x3F)); + + uint result; + + if (BitConverter.IsLittleEndian) + { + result = (i0 | (i1 << 16)); + } + else + { + result = ((i0 << 16) | i1); + } + + Unsafe.WriteUnaligned(dest, result); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void EncodeTwoOptionallyPadOne(byte* twoBytes, ushort* dest, ref byte encodingMap) + { + uint t0 = twoBytes[0]; + uint t1 = twoBytes[1]; + + uint i = (t0 << 16) | (t1 << 8); + + ushort i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 18)); + ushort i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 12) & 0x3F)); + ushort i2 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 6) & 0x3F)); + + dest[0] = i0; + dest[1] = i1; + dest[2] = i2; + } + internal const uint EncodingPad = '='; // '=', for padding internal const int MaximumEncodeLength = (int.MaxValue / 4) * 3; // 1610612733 @@ -755,5 +798,114 @@ public unsafe void EncodeThreeAndWrite(byte* threeBytes, byte* destination, ref Unsafe.WriteUnaligned(destination, result); } } + + internal readonly struct Base64EncoderChar : IBase64Encoder + { + public ReadOnlySpan EncodingMap => default(Base64EncoderByte).EncodingMap; + + public sbyte Avx2LutChar62 => default(Base64EncoderByte).Avx2LutChar62; + + public sbyte Avx2LutChar63 => default(Base64EncoderByte).Avx2LutChar63; + + public ReadOnlySpan AdvSimdLut4 => default(Base64EncoderByte).AdvSimdLut4; + + public uint Ssse3AdvSimdLutE3 => default(Base64EncoderByte).Ssse3AdvSimdLutE3; + + public int IncrementPadTwo => default(Base64EncoderByte).IncrementPadTwo; + + public int IncrementPadOne => default(Base64EncoderByte).IncrementPadOne; + + public int GetMaxSrcLength(int srcLength, int destLength) => + default(Base64EncoderByte).GetMaxSrcLength(srcLength, destLength); + + public uint GetInPlaceDestinationLength(int encodedLength, int _) => 0; // not used for char encoding + + public int GetMaxEncodedLength(int _) => 0; // not used for char encoding + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void EncodeOneOptionallyPadTwo(byte* oneByte, ushort* dest, ref byte encodingMap) + { + Base64Helper.EncodeOneOptionallyPadTwo(oneByte, dest, ref encodingMap); + dest[2] = (ushort)EncodingPad; + dest[3] = (ushort)EncodingPad; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void EncodeTwoOptionallyPadOne(byte* twoBytes, ushort* dest, ref byte encodingMap) + { + Base64Helper.EncodeTwoOptionallyPadOne(twoBytes, dest, ref encodingMap); + dest[3] = (ushort)EncodingPad; + } + +#if NET + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void StoreVector512ToDestination(ushort* dest, ushort* destStart, int destLength, Vector512 str) + { + AssertWrite>(dest, destStart, destLength); + (Vector512 utf16LowVector, Vector512 utf16HighVector) = Vector512.Widen(str); + utf16LowVector.Store(dest); + utf16HighVector.Store(dest + 32); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void StoreVector256ToDestination(ushort* dest, ushort* destStart, int destLength, Vector256 str) + { + AssertWrite>(dest, destStart, destLength); + (Vector256 utf16LowVector, Vector256 utf16HighVector) = Vector256.Widen(str); + utf16LowVector.Store(dest); + utf16HighVector.Store(dest + 16); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void StoreVector128ToDestination(ushort* dest, ushort* destStart, int destLength, Vector128 str) + { + AssertWrite>(dest, destStart, destLength); + (Vector128 utf16LowVector, Vector128 utf16HighVector) = Vector128.Widen(str); + utf16LowVector.Store(dest); + utf16HighVector.Store(dest + 8); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + public unsafe void StoreArmVector128x4ToDestination(ushort* dest, ushort* destStart, int destLength, + Vector128 res1, Vector128 res2, Vector128 res3, Vector128 res4) + { + AssertWrite>(dest, destStart, destLength); + (Vector128 utf16LowVector1, Vector128 utf16HighVector1) = Vector128.Widen(res1); + (Vector128 utf16LowVector2, Vector128 utf16HighVector2) = Vector128.Widen(res2); + (Vector128 utf16LowVector3, Vector128 utf16HighVector3) = Vector128.Widen(res3); + (Vector128 utf16LowVector4, Vector128 utf16HighVector4) = Vector128.Widen(res4); + AdvSimd.Arm64.StoreVectorAndZip(dest, (utf16LowVector1, utf16LowVector2, utf16LowVector3, utf16LowVector4)); + AdvSimd.Arm64.StoreVectorAndZip(dest + 32, (utf16HighVector1, utf16HighVector2, utf16HighVector3, utf16HighVector4)); + } +#endif // NET + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void EncodeThreeAndWrite(byte* threeBytes, ushort* destination, ref byte encodingMap) + { + uint t0 = threeBytes[0]; + uint t1 = threeBytes[1]; + uint t2 = threeBytes[2]; + + uint i = (t0 << 16) | (t1 << 8) | t2; + + ulong i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 18)); + ulong i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 12) & 0x3F)); + ulong i2 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 6) & 0x3F)); + ulong i3 = Unsafe.Add(ref encodingMap, (IntPtr)(i & 0x3F)); + + ulong result; + if (BitConverter.IsLittleEndian) + { + result = i0 | (i1 << 16) | (i2 << 32) | (i3 << 48); + } + else + { + result = (i0 << 48) | (i1 << 32) | (i2 << 16) | i3; + } + + Unsafe.WriteUnaligned(destination, result); + } + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64Helper.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64Helper.cs index cd3dc499689ad8..28429d9382b81e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64Helper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64Helper.cs @@ -6,9 +6,6 @@ using System.Runtime.CompilerServices; #if NET using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.Intrinsics.X86; -using System.Runtime.Intrinsics.Wasm; #endif namespace System.Buffers.Text diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlDecoder.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlDecoder.cs index ffa7d9716b8c37..7c2439c6e39a35 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlDecoder.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlDecoder.cs @@ -9,7 +9,6 @@ using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; #endif -using System.Text; using static System.Buffers.Text.Base64Helper; namespace System.Buffers.Text @@ -202,124 +201,6 @@ public static OperationStatus DecodeFromChars(ReadOnlySpan source, Span(source), destination, out charsConsumed, out bytesWritten, isFinalBlock, ignoreWhiteSpace: true); - private static OperationStatus DecodeWithWhiteSpaceBlockwise(TBase64Decoder decoder, - ReadOnlySpan source, Span bytes, ref int bytesConsumed, ref int bytesWritten, bool isFinalBlock = true) - where TBase64Decoder : IBase64Decoder - { - const int BlockSize = 4; - Span buffer = stackalloc ushort[BlockSize]; - OperationStatus status = OperationStatus.Done; - - while (!source.IsEmpty) - { - int encodedIdx = 0; - int bufferIdx = 0; - int skipped = 0; - - for (; encodedIdx < source.Length && (uint)bufferIdx < (uint)buffer.Length; ++encodedIdx) - { - if (IsWhiteSpace(source[encodedIdx])) - { - skipped++; - } - else - { - buffer[bufferIdx] = source[encodedIdx]; - bufferIdx++; - } - } - - source = source.Slice(encodedIdx); - bytesConsumed += skipped; - - if (bufferIdx == 0) - { - continue; - } - - bool hasAnotherBlock; - - if (decoder is Base64DecoderByte) - { - hasAnotherBlock = source.Length >= BlockSize; - } - else - { - hasAnotherBlock = source.Length > 1; - } - - bool localIsFinalBlock = !hasAnotherBlock; - - // If this block contains padding and there's another block, then only whitespace may follow for being valid. - if (hasAnotherBlock) - { - int paddingCount = GetPaddingCount(decoder, ref buffer[BlockSize - 1]); - if (paddingCount > 0) - { - hasAnotherBlock = false; - localIsFinalBlock = true; - } - } - - if (localIsFinalBlock && !isFinalBlock) - { - localIsFinalBlock = false; - } - - status = DecodeFrom(decoder, buffer.Slice(0, bufferIdx), bytes, out int localConsumed, out int localWritten, localIsFinalBlock, ignoreWhiteSpace: false); - bytesConsumed += localConsumed; - bytesWritten += localWritten; - - if (status != OperationStatus.Done) - { - return status; - } - - // The remaining data must all be whitespace in order to be valid. - if (!hasAnotherBlock) - { - for (int i = 0; i < source.Length; ++i) - { - if (!IsWhiteSpace(source[i])) - { - // Revert previous dest increment, since an invalid state followed. - bytesConsumed -= localConsumed; - bytesWritten -= localWritten; - - return OperationStatus.InvalidData; - } - - bytesConsumed++; - } - - break; - } - - bytes = bytes.Slice(localWritten); - } - - return status; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int GetPaddingCount(TBase64Decoder decoder, ref ushort ptrToLastElement) - where TBase64Decoder : IBase64Decoder - { - int padding = 0; - - if (decoder.IsValidPadding(ptrToLastElement)) - { - padding++; - } - - if (decoder.IsValidPadding(Unsafe.Subtract(ref ptrToLastElement, 1))) - { - padding++; - } - - return padding; - } - /// /// Decodes the span of unicode ASCII chars represented as Base64Url into binary data. /// @@ -596,7 +477,7 @@ public unsafe int DecodeRemaining(byte* srcEnd, ref sbyte decodingMap, long rema [MethodImpl(MethodImplOptions.AggressiveInlining)] public OperationStatus DecodeWithWhiteSpaceBlockwiseWrapper(TBase64Decoder decoder, ReadOnlySpan utf8, Span bytes, ref int bytesConsumed, ref int bytesWritten, bool isFinalBlock = true) where TBase64Decoder : IBase64Decoder => - Base64Helper.DecodeWithWhiteSpaceBlockwise(decoder, utf8, bytes, ref bytesConsumed, ref bytesWritten, isFinalBlock); + DecodeWithWhiteSpaceBlockwise(decoder, utf8, bytes, ref bytesConsumed, ref bytesWritten, isFinalBlock); } private readonly struct Base64UrlDecoderChar : IBase64Decoder @@ -648,165 +529,35 @@ public bool TryDecode256Core(Vector256 str, Vector256 hiNibbles, V default(Base64UrlDecoderByte).TryDecode256Core(str, hiNibbles, maskSlashOrUnderscore, lutLow, lutHigh, lutShift, shiftForUnderscore, out result); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe bool TryLoadVector512(ushort* src, ushort* srcStart, int sourceLength, out Vector512 str) - { - AssertRead>(src, srcStart, sourceLength); - Vector512 utf16VectorLower = Vector512.Load(src); - Vector512 utf16VectorUpper = Vector512.Load(src + 32); - if (Ascii.VectorContainsNonAsciiChar(utf16VectorLower | utf16VectorUpper)) - { - str = default; - return false; - } - - str = Ascii.ExtractAsciiVector(utf16VectorLower, utf16VectorUpper).AsSByte(); - return true; - } + public unsafe bool TryLoadVector512(ushort* src, ushort* srcStart, int sourceLength, out Vector512 str) => + default(Base64DecoderChar).TryLoadVector512(src, srcStart, sourceLength, out str); [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Avx2))] - public unsafe bool TryLoadAvxVector256(ushort* src, ushort* srcStart, int sourceLength, out Vector256 str) - { - AssertRead>(src, srcStart, sourceLength); - Vector256 utf16VectorLower = Avx.LoadVector256(src); - Vector256 utf16VectorUpper = Avx.LoadVector256(src + 16); - - if (Ascii.VectorContainsNonAsciiChar(utf16VectorLower | utf16VectorUpper)) - { - str = default; - return false; - } - - str = Ascii.ExtractAsciiVector(utf16VectorLower, utf16VectorUpper).AsSByte(); - return true; - } + public unsafe bool TryLoadAvxVector256(ushort* src, ushort* srcStart, int sourceLength, out Vector256 str) => + default(Base64DecoderChar).TryLoadAvxVector256(src, srcStart, sourceLength, out str); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe bool TryLoadVector128(ushort* src, ushort* srcStart, int sourceLength, out Vector128 str) - { - AssertRead>(src, srcStart, sourceLength); - Vector128 utf16VectorLower = Vector128.LoadUnsafe(ref *src); - Vector128 utf16VectorUpper = Vector128.LoadUnsafe(ref *src, 8); - if (Ascii.VectorContainsNonAsciiChar(utf16VectorLower | utf16VectorUpper)) - { - str = default; - return false; - } - - str = Ascii.ExtractAsciiVector(utf16VectorLower, utf16VectorUpper); - return true; - } + public unsafe bool TryLoadVector128(ushort* src, ushort* srcStart, int sourceLength, out Vector128 str) => + default(Base64DecoderChar).TryLoadVector128(src, srcStart, sourceLength, out str); [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] public unsafe bool TryLoadArmVector128x4(ushort* src, ushort* srcStart, int sourceLength, - out Vector128 str1, out Vector128 str2, out Vector128 str3, out Vector128 str4) - { - AssertRead>(src, srcStart, sourceLength); - var (s11, s12, s21, s22) = AdvSimd.Arm64.Load4xVector128AndUnzip(src); - var (s31, s32, s41, s42) = AdvSimd.Arm64.Load4xVector128AndUnzip(src + 32); - - if (Ascii.VectorContainsNonAsciiChar(s11 | s12 | s21 | s22 | s31 | s32 | s41 | s42)) - { - str1 = str2 = str3 = str4 = default; - return false; - } - - str1 = Ascii.ExtractAsciiVector(s11, s31); - str2 = Ascii.ExtractAsciiVector(s12, s32); - str3 = Ascii.ExtractAsciiVector(s21, s41); - str4 = Ascii.ExtractAsciiVector(s22, s42); - - return true; - } + out Vector128 str1, out Vector128 str2, out Vector128 str3, out Vector128 str4) => + default(Base64DecoderChar).TryLoadArmVector128x4(src, srcStart, sourceLength, out str1, out str2, out str3, out str4); #endif // NET [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe int DecodeFourElements(ushort* source, ref sbyte decodingMap) - { - // The 'source' span expected to have at least 4 elements, and the 'decodingMap' consists 256 sbytes - uint t0 = source[0]; - uint t1 = source[1]; - uint t2 = source[2]; - uint t3 = source[3]; - - if (((t0 | t1 | t2 | t3) & 0xffffff00) != 0) - { - return -1; // One or more chars falls outside the 00..ff range, invalid Base64Url character. - } - - int i0 = Unsafe.Add(ref decodingMap, (int)t0); - int i1 = Unsafe.Add(ref decodingMap, (int)t1); - int i2 = Unsafe.Add(ref decodingMap, (int)t2); - int i3 = Unsafe.Add(ref decodingMap, (int)t3); - - i0 <<= 18; - i1 <<= 12; - i2 <<= 6; - - i0 |= i3; - i1 |= i2; - - i0 |= i1; - return i0; - } + public unsafe int DecodeFourElements(ushort* source, ref sbyte decodingMap) => + default(Base64DecoderChar).DecodeFourElements(source, ref decodingMap); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe int DecodeRemaining(ushort* srcEnd, ref sbyte decodingMap, long remaining, out uint t2, out uint t3) - { - uint t0; - uint t1; - t2 = EncodingPad; - t3 = EncodingPad; - switch (remaining) - { - case 2: - t0 = srcEnd[-2]; - t1 = srcEnd[-1]; - break; - case 3: - t0 = srcEnd[-3]; - t1 = srcEnd[-2]; - t2 = srcEnd[-1]; - break; - case 4: - t0 = srcEnd[-4]; - t1 = srcEnd[-3]; - t2 = srcEnd[-2]; - t3 = srcEnd[-1]; - break; - default: - return -1; - } - - if (((t0 | t1 | t2 | t3) & 0xffffff00) != 0) - { - return -1; - } - - int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); - int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); - - i0 <<= 18; - i1 <<= 12; - - i0 |= i1; - return i0; - } + public unsafe int DecodeRemaining(ushort* srcEnd, ref sbyte decodingMap, long remaining, out uint t2, out uint t3) => + default(Base64DecoderChar).DecodeRemaining(srcEnd, ref decodingMap, remaining, out t2, out t3); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int IndexOfAnyExceptWhiteSpace(ReadOnlySpan span) - { - for (int i = 0; i < span.Length; i++) - { - if (!IsWhiteSpace(span[i])) - { - return i; - } - } - - return -1; - } + public int IndexOfAnyExceptWhiteSpace(ReadOnlySpan span) => default(Base64DecoderChar).IndexOfAnyExceptWhiteSpace(span); [MethodImpl(MethodImplOptions.AggressiveInlining)] public OperationStatus DecodeWithWhiteSpaceBlockwiseWrapper(TBase64Decoder decoder, ReadOnlySpan source, Span bytes, diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlEncoder.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlEncoder.cs index 663b91dca8ee8f..e0ae10a2953989 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlEncoder.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Url/Base64UrlEncoder.cs @@ -333,115 +333,36 @@ public int GetMaxSrcLength(int srcLength, int destLength) => public int GetMaxEncodedLength(int _) => 0; // not used for char encoding [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe void EncodeOneOptionallyPadTwo(byte* oneByte, ushort* dest, ref byte encodingMap) - { - uint t0 = oneByte[0]; - - uint i = t0 << 8; - - uint i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 10)); - uint i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 4) & 0x3F)); - - uint result; - - if (BitConverter.IsLittleEndian) - { - result = (i0 | (i1 << 16)); - } - else - { - result = ((i0 << 16) | i1); - } - - Unsafe.WriteUnaligned(dest, result); - } + public unsafe void EncodeOneOptionallyPadTwo(byte* oneByte, ushort* dest, ref byte encodingMap) => + Base64Helper.EncodeOneOptionallyPadTwo(oneByte, dest, ref encodingMap); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe void EncodeTwoOptionallyPadOne(byte* twoBytes, ushort* dest, ref byte encodingMap) - { - uint t0 = twoBytes[0]; - uint t1 = twoBytes[1]; - - uint i = (t0 << 16) | (t1 << 8); - - ushort i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 18)); - ushort i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 12) & 0x3F)); - ushort i2 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 6) & 0x3F)); - - dest[0] = i0; - dest[1] = i1; - dest[2] = i2; - } + public unsafe void EncodeTwoOptionallyPadOne(byte* twoBytes, ushort* dest, ref byte encodingMap) => + Base64Helper.EncodeTwoOptionallyPadOne(twoBytes, dest, ref encodingMap); #if NET [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe void StoreVector512ToDestination(ushort* dest, ushort* destStart, int destLength, Vector512 str) - { - AssertWrite>(dest, destStart, destLength); - (Vector512 utf16LowVector, Vector512 utf16HighVector) = Vector512.Widen(str); - utf16LowVector.Store(dest); - utf16HighVector.Store(dest + Vector512.Count); - } + public unsafe void StoreVector512ToDestination(ushort* dest, ushort* destStart, int destLength, Vector512 str) => + default(Base64EncoderChar).StoreVector512ToDestination(dest, destStart, destLength, str); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe void StoreVector256ToDestination(ushort* dest, ushort* destStart, int destLength, Vector256 str) - { - AssertWrite>(dest, destStart, destLength); - (Vector256 utf16LowVector, Vector256 utf16HighVector) = Vector256.Widen(str); - utf16LowVector.Store(dest); - utf16HighVector.Store(dest + Vector256.Count); - } + public unsafe void StoreVector256ToDestination(ushort* dest, ushort* destStart, int destLength, Vector256 str) => + default(Base64EncoderChar).StoreVector256ToDestination(dest, destStart, destLength, str); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe void StoreVector128ToDestination(ushort* dest, ushort* destStart, int destLength, Vector128 str) - { - AssertWrite>(dest, destStart, destLength); - (Vector128 utf16LowVector, Vector128 utf16HighVector) = Vector128.Widen(str); - utf16LowVector.Store(dest); - utf16HighVector.Store(dest + Vector128.Count); - } + public unsafe void StoreVector128ToDestination(ushort* dest, ushort* destStart, int destLength, Vector128 str) => + default(Base64EncoderChar).StoreVector128ToDestination(dest, destStart, destLength, str); [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] public unsafe void StoreArmVector128x4ToDestination(ushort* dest, ushort* destStart, int destLength, - Vector128 res1, Vector128 res2, Vector128 res3, Vector128 res4) - { - AssertWrite>(dest, destStart, destLength); - (Vector128 utf16LowVector1, Vector128 utf16HighVector1) = Vector128.Widen(res1); - (Vector128 utf16LowVector2, Vector128 utf16HighVector2) = Vector128.Widen(res2); - (Vector128 utf16LowVector3, Vector128 utf16HighVector3) = Vector128.Widen(res3); - (Vector128 utf16LowVector4, Vector128 utf16HighVector4) = Vector128.Widen(res4); - AdvSimd.Arm64.StoreVectorAndZip(dest, (utf16LowVector1, utf16LowVector2, utf16LowVector3, utf16LowVector4)); - AdvSimd.Arm64.StoreVectorAndZip(dest + 32, (utf16HighVector1, utf16HighVector2, utf16HighVector3, utf16HighVector4)); - } + Vector128 res1, Vector128 res2, Vector128 res3, Vector128 res4) => + default(Base64EncoderChar).StoreArmVector128x4ToDestination(dest, destStart, destLength, res1, res2, res3, res4); #endif // NET [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe void EncodeThreeAndWrite(byte* threeBytes, ushort* destination, ref byte encodingMap) - { - uint t0 = threeBytes[0]; - uint t1 = threeBytes[1]; - uint t2 = threeBytes[2]; - - uint i = (t0 << 16) | (t1 << 8) | t2; - - ulong i0 = Unsafe.Add(ref encodingMap, (IntPtr)(i >> 18)); - ulong i1 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 12) & 0x3F)); - ulong i2 = Unsafe.Add(ref encodingMap, (IntPtr)((i >> 6) & 0x3F)); - ulong i3 = Unsafe.Add(ref encodingMap, (IntPtr)(i & 0x3F)); - - ulong result; - if (BitConverter.IsLittleEndian) - { - result = i0 | (i1 << 16) | (i2 << 32) | (i3 << 48); - } - else - { - result = (i0 << 48) | (i1 << 32) | (i2 << 16) | i3; - } - - Unsafe.WriteUnaligned(destination, result); - } + public unsafe void EncodeThreeAndWrite(byte* threeBytes, ushort* destination, ref byte encodingMap) => + default(Base64EncoderChar).EncodeThreeAndWrite(threeBytes, destination, ref encodingMap); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Convert.Base64.cs b/src/libraries/System.Private.CoreLib/src/System/Convert.Base64.cs deleted file mode 100644 index 27573b1c42a9ea..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/Convert.Base64.cs +++ /dev/null @@ -1,215 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -namespace System -{ - public static partial class Convert - { - /// - /// Decode the span of UTF-16 encoded text represented as base 64 into binary data. - /// If the input is not a multiple of 4, or contains illegal characters, it will decode as much as it can, to the largest possible multiple of 4. - /// This invariant allows continuation of the parse with a slower, whitespace-tolerant algorithm. - /// - /// The input span which contains UTF-16 encoded text in base 64 that needs to be decoded. - /// The output span which contains the result of the operation, i.e. the decoded binary data. - /// The number of input bytes consumed during the operation. This can be used to slice the input for subsequent calls, if necessary. - /// The number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary. - /// Returns: - /// - true - The entire input span was successfully parsed. - /// - false - Only a part of the input span was successfully parsed. Failure causes may include embedded or trailing whitespace, - /// other illegal Base64 characters, trailing characters after an encoding pad ('='), an input span whose length is not divisible by 4 - /// or a destination span that's too small. and are set so that - /// parsing can continue with a slower whitespace-tolerant algorithm. - /// - /// Note: This is a cut down version of the implementation of Base64.DecodeFromUtf8(), modified the accept UTF-16 chars and act as a fast-path - /// helper for the Convert routines when the input string contains no whitespace. - /// - private static bool TryDecodeFromUtf16(ReadOnlySpan utf16, Span bytes, out int consumed, out int written) - { - ref char srcChars = ref MemoryMarshal.GetReference(utf16); - ref byte destBytes = ref MemoryMarshal.GetReference(bytes); - - int srcLength = utf16.Length & ~0x3; // only decode input up to the closest multiple of 4. - int destLength = bytes.Length; - - int sourceIndex = 0; - int destIndex = 0; - - if (utf16.Length == 0) - goto DoneExit; - - ref sbyte decodingMap = ref MemoryMarshal.GetReference(DecodingMap); - - // Last bytes could have padding characters, so process them separately and treat them as valid. - const int skipLastChunk = 4; - - int maxSrcLength; - if (destLength >= (srcLength >> 2) * 3) - { - maxSrcLength = srcLength - skipLastChunk; - } - else - { - // This should never overflow since destLength here is less than int.MaxValue / 4 * 3 (i.e. 1610612733) - // Therefore, (destLength / 3) * 4 will always be less than 2147483641 - maxSrcLength = (destLength / 3) * 4; - } - - while (sourceIndex < maxSrcLength) - { - int result = Decode(ref Unsafe.Add(ref srcChars, sourceIndex), ref decodingMap); - if (result < 0) - goto InvalidExit; - WriteThreeLowOrderBytes(ref Unsafe.Add(ref destBytes, destIndex), result); - destIndex += 3; - sourceIndex += 4; - } - - if (maxSrcLength != srcLength - skipLastChunk) - goto InvalidExit; - - // If input is less than 4 bytes, srcLength == sourceIndex == 0 - // If input is not a multiple of 4, sourceIndex == srcLength != 0 - if (sourceIndex == srcLength) - { - goto InvalidExit; - } - - int i0 = Unsafe.Add(ref srcChars, srcLength - 4); - int i1 = Unsafe.Add(ref srcChars, srcLength - 3); - int i2 = Unsafe.Add(ref srcChars, srcLength - 2); - int i3 = Unsafe.Add(ref srcChars, srcLength - 1); - if (((i0 | i1 | i2 | i3) & 0xffffff00) != 0) - goto InvalidExit; - - i0 = Unsafe.Add(ref decodingMap, i0); - i1 = Unsafe.Add(ref decodingMap, i1); - - i0 <<= 18; - i1 <<= 12; - - i0 |= i1; - - if (i3 != EncodingPad) - { - i2 = Unsafe.Add(ref decodingMap, i2); - i3 = Unsafe.Add(ref decodingMap, i3); - - i2 <<= 6; - - i0 |= i3; - i0 |= i2; - - if (i0 < 0) - goto InvalidExit; - if (destIndex > destLength - 3) - goto InvalidExit; - WriteThreeLowOrderBytes(ref Unsafe.Add(ref destBytes, destIndex), i0); - destIndex += 3; - } - else if (i2 != EncodingPad) - { - i2 = Unsafe.Add(ref decodingMap, i2); - - i2 <<= 6; - - i0 |= i2; - - if ((i0 & 0x800000c0) != 0) // if negative or 2 unused bits are not 0. - goto InvalidExit; - if (destIndex > destLength - 2) - goto InvalidExit; - Unsafe.Add(ref destBytes, destIndex) = (byte)(i0 >> 16); - Unsafe.Add(ref destBytes, destIndex + 1) = (byte)(i0 >> 8); - destIndex += 2; - } - else - { - if ((i0 & 0x8000F000) != 0) // if negative or 4 unused bits are not 0. - goto InvalidExit; - if (destIndex > destLength - 1) - goto InvalidExit; - Unsafe.Add(ref destBytes, destIndex) = (byte)(i0 >> 16); - destIndex++; - } - - sourceIndex += 4; - - if (srcLength != utf16.Length) - goto InvalidExit; - - DoneExit: - consumed = sourceIndex; - written = destIndex; - return true; - - InvalidExit: - consumed = sourceIndex; - written = destIndex; - Debug.Assert((consumed % 4) == 0); - return false; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int Decode(ref char encodedChars, ref sbyte decodingMap) - { - int i0 = encodedChars; - int i1 = Unsafe.Add(ref encodedChars, 1); - int i2 = Unsafe.Add(ref encodedChars, 2); - int i3 = Unsafe.Add(ref encodedChars, 3); - - if (((i0 | i1 | i2 | i3) & 0xffffff00) != 0) - return -1; // One or more chars falls outside the 00..ff range. This cannot be a valid Base64 character. - - i0 = Unsafe.Add(ref decodingMap, i0); - i1 = Unsafe.Add(ref decodingMap, i1); - i2 = Unsafe.Add(ref decodingMap, i2); - i3 = Unsafe.Add(ref decodingMap, i3); - - i0 <<= 18; - i1 <<= 12; - i2 <<= 6; - - i0 |= i3; - i1 |= i2; - - i0 |= i1; - return i0; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void WriteThreeLowOrderBytes(ref byte destination, int value) - { - destination = (byte)(value >> 16); - Unsafe.Add(ref destination, 1) = (byte)(value >> 8); - Unsafe.Add(ref destination, 2) = (byte)value; - } - - // Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in tests) - private static ReadOnlySpan DecodingMap => - [ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, // 62 is placed at index 43 (for +), 63 at index 47 (for /) - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 52-61 are placed at index 48-57 (for 0-9), 64 at index 61 (for =) - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, // 0-25 are placed at index 65-90 (for A-Z) - -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, // 26-51 are placed at index 97-122 (for a-z) - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bytes over 122 ('z') are invalid and cannot be decoded - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Hence, padding the map with 255, which indicates invalid input - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - ]; - - private const byte EncodingPad = (byte)'='; // '=', for padding - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/Convert.cs b/src/libraries/System.Private.CoreLib/src/System/Convert.cs index 6674aebb8a843c..0be9677e877c56 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Convert.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Convert.cs @@ -75,7 +75,6 @@ public enum Base64FormattingOptions public static partial class Convert { private const int Base64LineBreakPosition = 76; - private const int Base64VectorizationLengthThreshold = 16; // Constant representing the database null value. This value is used in // database applications to indicate the absence of a known value. Note @@ -2314,7 +2313,7 @@ public static string ToBase64String(byte[] inArray) { ArgumentNullException.ThrowIfNull(inArray); - return ToBase64String(new ReadOnlySpan(inArray), Base64FormattingOptions.None); + return Base64.EncodeToString(inArray); } public static string ToBase64String(byte[] inArray, Base64FormattingOptions options) @@ -2353,27 +2352,19 @@ public static string ToBase64String(ReadOnlySpan bytes, Base64FormattingOp } bool insertLineBreaks = (options == Base64FormattingOptions.InsertLineBreaks); - int outputLength = ToBase64_CalculateAndValidateOutputLength(bytes.Length, insertLineBreaks); - string result = string.FastAllocateString(outputLength); - - if (Vector128.IsHardwareAccelerated && !insertLineBreaks && bytes.Length >= Base64VectorizationLengthThreshold) + if (!insertLineBreaks) { - ToBase64CharsLargeNoLineBreaks(bytes, new Span(ref result.GetRawStringData(), result.Length), result.Length); - } - else - { - unsafe - { - fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes)) - fixed (char* charsPtr = result) - { - int charsWritten = ConvertToBase64Array(charsPtr, bytesPtr, 0, bytes.Length, insertLineBreaks); - Debug.Assert(result.Length == charsWritten, $"Expected {result.Length} == {charsWritten}"); - } - } + return Base64.EncodeToString(bytes); } + int outputLength = ToBase64_CalculateAndValidateOutputLength(bytes.Length, insertLineBreaks: true); + + string result = string.FastAllocateString(outputLength); + + int charsWritten = ConvertToBase64ArrayInsertLineBreaks(new Span(ref result.GetRawStringData(), result.Length), bytes); + Debug.Assert(result.Length == charsWritten, $"Expected {result.Length} == {charsWritten}"); + return result; } @@ -2382,7 +2373,7 @@ public static int ToBase64CharArray(byte[] inArray, int offsetIn, int length, ch return ToBase64CharArray(inArray, offsetIn, length, outArray, offsetOut, Base64FormattingOptions.None); } - public static unsafe int ToBase64CharArray(byte[] inArray, int offsetIn, int length, char[] outArray, int offsetOut, Base64FormattingOptions options) + public static int ToBase64CharArray(byte[] inArray, int offsetIn, int length, char[] outArray, int offsetOut, Base64FormattingOptions options) { ArgumentNullException.ThrowIfNull(inArray); ArgumentNullException.ThrowIfNull(outArray); @@ -2409,24 +2400,21 @@ public static unsafe int ToBase64CharArray(byte[] inArray, int offsetIn, int len ArgumentOutOfRangeException.ThrowIfGreaterThan(offsetOut, outArrayLength - charLengthRequired); - if (Vector128.IsHardwareAccelerated && !insertLineBreaks && length >= Base64VectorizationLengthThreshold) + if (!insertLineBreaks) { - ToBase64CharsLargeNoLineBreaks(new ReadOnlySpan(inArray, offsetIn, length), outArray.AsSpan(offsetOut), charLengthRequired); + int charsWritten = Base64.EncodeToChars(new ReadOnlySpan(inArray, offsetIn, length), outArray.AsSpan(offsetOut)); + Debug.Assert(charsWritten == charLengthRequired); } else { - fixed (char* outChars = &outArray[offsetOut]) - fixed (byte* inData = &inArray[0]) - { - int converted = ConvertToBase64Array(outChars, inData, offsetIn, length, insertLineBreaks); - Debug.Assert(converted == charLengthRequired); - } + int converted = ConvertToBase64ArrayInsertLineBreaks(outArray.AsSpan(offsetOut), new ReadOnlySpan(inArray, offsetIn, length)); + Debug.Assert(converted == charLengthRequired); } return charLengthRequired; } - public static unsafe bool TryToBase64Chars(ReadOnlySpan bytes, Span chars, out int charsWritten, Base64FormattingOptions options = Base64FormattingOptions.None) + public static bool TryToBase64Chars(ReadOnlySpan bytes, Span chars, out int charsWritten, Base64FormattingOptions options = Base64FormattingOptions.None) { if ((uint)options > (uint)Base64FormattingOptions.InsertLineBreaks) { @@ -2448,107 +2436,34 @@ public static unsafe bool TryToBase64Chars(ReadOnlySpan bytes, Span return false; } - if (Vector128.IsHardwareAccelerated && !insertLineBreaks && bytes.Length >= Base64VectorizationLengthThreshold) + if (!insertLineBreaks) { - ToBase64CharsLargeNoLineBreaks(bytes, chars, charLengthRequired); + int written = Base64.EncodeToChars(bytes, chars); + Debug.Assert(written == charLengthRequired); } else { - fixed (char* outChars = &MemoryMarshal.GetReference(chars)) - fixed (byte* inData = &MemoryMarshal.GetReference(bytes)) - { - int converted = ConvertToBase64Array(outChars, inData, 0, bytes.Length, insertLineBreaks); - Debug.Assert(converted == charLengthRequired); - } + int converted = ConvertToBase64ArrayInsertLineBreaks(chars, bytes); + Debug.Assert(converted == charLengthRequired); } charsWritten = charLengthRequired; return true; } - /// Base64 encodes the bytes from into . - /// The bytes to encode. - /// The destination buffer large enough to handle the encoded chars. - /// The pre-calculated, exact number of chars that will be written. - private static void ToBase64CharsLargeNoLineBreaks(ReadOnlySpan bytes, Span chars, int charLengthRequired) - { - // For large enough inputs, it's beneficial to use the vectorized UTF8-based Base64 encoding - // and then widen the resulting bytes into chars. - Debug.Assert(bytes.Length >= Base64VectorizationLengthThreshold); - Debug.Assert(chars.Length >= charLengthRequired); - Debug.Assert(charLengthRequired % 4 == 0); - - // Base64-encode the bytes directly into the destination char buffer (reinterpreted as a byte buffer). - OperationStatus status = Base64.EncodeToUtf8(bytes, MemoryMarshal.AsBytes(chars), out _, out int bytesWritten); - Debug.Assert(status == OperationStatus.Done && charLengthRequired == bytesWritten); - - // Now widen the ASCII bytes in-place to chars (if the vectorized Ascii.WidenAsciiToUtf16 is ever updated - // to support in-place updates, it should be used here instead). Since the base64 bytes are all valid ASCII, the byte - // data is guaranteed to be 1/2 as long as the char data, and we can widen in-place. - ref ushort dest = ref Unsafe.As(ref MemoryMarshal.GetReference(chars)); - ref byte src = ref Unsafe.As(ref dest); - ref byte srcBeginning = ref src; - - // We process the bytes/chars from right to left to avoid overwriting the remaining unprocessed data. - // The refs start out pointing just past the end of the data, and each iteration of a loop bumps - // the refs back the apropriate amount and performs the copy/widening. - dest = ref Unsafe.Add(ref dest, charLengthRequired); - src = ref Unsafe.Add(ref src, charLengthRequired); - - // Handle 32 bytes at a time. - if (Vector256.IsHardwareAccelerated) - { - ref byte srcBeginningPlus31 = ref Unsafe.Add(ref srcBeginning, 31); - while (Unsafe.IsAddressGreaterThan(ref src, ref srcBeginningPlus31)) - { - src = ref Unsafe.Subtract(ref src, 32); - dest = ref Unsafe.Subtract(ref dest, 32); - - (Vector256 utf16Lower, Vector256 utf16Upper) = Vector256.Widen(Vector256.LoadUnsafe(ref src)); - - utf16Lower.StoreUnsafe(ref dest); - utf16Upper.StoreUnsafe(ref dest, 16); - } - } - - // Handle 16 bytes at a time. - if (Vector128.IsHardwareAccelerated) - { - ref byte srcBeginningPlus15 = ref Unsafe.Add(ref srcBeginning, 15); - while (Unsafe.IsAddressGreaterThan(ref src, ref srcBeginningPlus15)) - { - src = ref Unsafe.Subtract(ref src, 16); - dest = ref Unsafe.Subtract(ref dest, 16); - - (Vector128 utf16Lower, Vector128 utf16Upper) = Vector128.Widen(Vector128.LoadUnsafe(ref src)); - - utf16Lower.StoreUnsafe(ref dest); - utf16Upper.StoreUnsafe(ref dest, 8); - } - } - - // Handle 4 bytes at a time. - ref byte srcBeginningPlus3 = ref Unsafe.Add(ref srcBeginning, 3); - while (Unsafe.IsAddressGreaterThan(ref src, ref srcBeginningPlus3)) + private static unsafe int ConvertToBase64ArrayInsertLineBreaks(Span outChars, ReadOnlySpan inData) + { + fixed (char* outCharsPtr = &MemoryMarshal.GetReference(outChars)) + fixed (byte* inDataPtr = &MemoryMarshal.GetReference(inData)) { - dest = ref Unsafe.Subtract(ref dest, 4); - src = ref Unsafe.Subtract(ref src, 4); - Ascii.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref Unsafe.As(ref dest), Unsafe.ReadUnaligned(ref src)); + return ConvertToBase64ArrayInsertLineBreaks(outCharsPtr, inDataPtr, inData.Length); } - - // The length produced by Base64 encoding is always a multiple of 4, so we don't need to handle - // 1 byte at a time as is common in other vectorized operations, as nothing will remain after - // the 4-byte loop. - - Debug.Assert(Unsafe.AreSame(ref srcBeginning, ref src)); - Debug.Assert(Unsafe.AreSame(ref srcBeginning, ref Unsafe.As(ref dest)), - "The two references should have ended up exactly at the beginning"); } - private static unsafe int ConvertToBase64Array(char* outChars, byte* inData, int offset, int length, bool insertLineBreaks) + private static unsafe int ConvertToBase64ArrayInsertLineBreaks(char* outChars, byte* inData, int length) { int lengthmod3 = length % 3; - int calcLength = offset + (length - lengthmod3); + int calcLength = length - lengthmod3; int j = 0; int charcount = 0; // Convert three bytes at a time to base64 notation. This will consume 4 chars. @@ -2556,18 +2471,15 @@ private static unsafe int ConvertToBase64Array(char* outChars, byte* inData, int // get a pointer to the base64 table to avoid unnecessary range checking ReadOnlySpan base64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="u8; - for (i = offset; i < calcLength; i += 3) + for (i = 0; i < calcLength; i += 3) { - if (insertLineBreaks) + if (charcount == Base64LineBreakPosition) { - if (charcount == Base64LineBreakPosition) - { - outChars[j++] = '\r'; - outChars[j++] = '\n'; - charcount = 0; - } - charcount += 4; + outChars[j++] = '\r'; + outChars[j++] = '\n'; + charcount = 0; } + charcount += 4; outChars[j] = (char)base64[(inData[i] & 0xfc) >> 2]; outChars[j + 1] = (char)base64[((inData[i] & 0x03) << 4) | ((inData[i + 1] & 0xf0) >> 4)]; outChars[j + 2] = (char)base64[((inData[i + 1] & 0x0f) << 2) | ((inData[i + 2] & 0xc0) >> 6)]; @@ -2578,7 +2490,7 @@ private static unsafe int ConvertToBase64Array(char* outChars, byte* inData, int // Where we left off before i = calcLength; - if (insertLineBreaks && (lengthmod3 != 0) && (charcount == Base64LineBreakPosition)) + if ((lengthmod3 != 0) && (charcount == Base64LineBreakPosition)) { outChars[j++] = '\r'; outChars[j++] = '\n'; @@ -2645,13 +2557,7 @@ public static byte[] FromBase64String(string s) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.s); } - unsafe - { - fixed (char* sPtr = s) - { - return FromBase64CharPtr(sPtr, s.Length); - } - } + return Base64.DecodeFromChars(s); } public static bool TryFromBase64String(string s, Span bytes, out int bytesWritten) @@ -2666,122 +2572,16 @@ public static bool TryFromBase64String(string s, Span bytes, out int bytes public static bool TryFromBase64Chars(ReadOnlySpan chars, Span bytes, out int bytesWritten) { - // This is actually local to one of the nested blocks but is being declared at the top as we don't want multiple stackallocs - // for each iteraton of the loop. - Span tempBuffer = stackalloc char[4]; // Note: The tempBuffer size could be made larger than 4 but the size must be a multiple of 4. - - bytesWritten = 0; - - while (chars.Length != 0) + OperationStatus status = Base64.DecodeFromChars(chars, bytes, out _, out bytesWritten); + if (status == OperationStatus.Done) { - // Attempt to decode a segment that doesn't contain whitespace. - bool complete = TryDecodeFromUtf16(chars, bytes, out int consumedInThisIteration, out int bytesWrittenInThisIteration); - bytesWritten += bytesWrittenInThisIteration; - if (complete) - return true; - - chars = chars.Slice(consumedInThisIteration); - bytes = bytes.Slice(bytesWrittenInThisIteration); - - Debug.Assert(chars.Length != 0); // If TryDecodeFromUtf16() consumed the entire buffer, it could not have returned false. - if (chars[0].IsSpace()) - { - // If we got here, the very first character not consumed was a whitespace. We can skip past any consecutive whitespace, then continue decoding. - - int indexOfFirstNonSpace = 1; - while (true) - { - if (indexOfFirstNonSpace == chars.Length) - break; - if (!chars[indexOfFirstNonSpace].IsSpace()) - break; - indexOfFirstNonSpace++; - } - - chars = chars.Slice(indexOfFirstNonSpace); - - if ((bytesWrittenInThisIteration % 3) != 0 && chars.Length != 0) - { - // If we got here, the last successfully decoded block encountered an end-marker, yet we have trailing non-whitespace characters. - // That is not allowed. - bytesWritten = default; - return false; - } - - // We now loop again to decode the next run of non-space characters. - } - else - { - Debug.Assert(chars.Length != 0 && !chars[0].IsSpace()); - - // If we got here, it is possible that there is whitespace that occurred in the middle of a 4-byte chunk. That is, we still have - // up to three Base64 characters that were left undecoded by the fast-path helper because they didn't form a complete 4-byte chunk. - // This is hopefully the rare case (multiline-formatted base64 message with a non-space character width that's not a multiple of 4.) - // We'll filter out whitespace and copy the remaining characters into a temporary buffer. - CopyToTempBufferWithoutWhiteSpace(chars, tempBuffer, out int consumedFromChars, out int charsWritten); - if ((charsWritten & 0x3) != 0) - { - // Even after stripping out whitespace, the number of characters is not divisible by 4. This cannot be a legal Base64 string. - bytesWritten = default; - return false; - } - - tempBuffer = tempBuffer.Slice(0, charsWritten); - if (!TryDecodeFromUtf16(tempBuffer, bytes, out int consumedFromTempBuffer, out int bytesWrittenFromTempBuffer)) - { - bytesWritten = default; - return false; - } - bytesWritten += bytesWrittenFromTempBuffer; - chars = chars.Slice(consumedFromChars); - bytes = bytes.Slice(bytesWrittenFromTempBuffer); - - if ((bytesWrittenFromTempBuffer % 3) != 0) - { - // If we got here, this decode contained one or more padding characters ('='). We can accept trailing whitespace after this - // but nothing else. - for (int i = 0; i < chars.Length; i++) - { - if (!chars[i].IsSpace()) - { - bytesWritten = default; - return false; - } - } - return true; - } - - // We now loop again to decode the next run of non-space characters. - } + return true; } - return true; - } - - private static void CopyToTempBufferWithoutWhiteSpace(ReadOnlySpan chars, Span tempBuffer, out int consumed, out int charsWritten) - { - Debug.Assert(tempBuffer.Length != 0); // We only bound-check after writing a character to the tempBuffer. - - charsWritten = 0; - for (int i = 0; i < chars.Length; i++) - { - char c = chars[i]; - if (!c.IsSpace()) - { - tempBuffer[charsWritten++] = c; - if (charsWritten == tempBuffer.Length) - { - consumed = i + 1; - return; - } - } - } - consumed = chars.Length; + bytesWritten = 0; + return false; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool IsSpace(this char c) => c == ' ' || c == '\t' || c == '\r' || c == '\n'; - /// /// Converts the specified range of a Char array, which encodes binary data as Base64 digits, to the equivalent byte array. /// @@ -2796,121 +2596,7 @@ public static byte[] FromBase64CharArray(char[] inArray, int offset, int length) ArgumentOutOfRangeException.ThrowIfNegative(offset); ArgumentOutOfRangeException.ThrowIfGreaterThan(offset, inArray.Length - length); - if (length == 0) - { - return Array.Empty(); - } - - unsafe - { - fixed (char* inArrayPtr = &inArray[0]) - { - return FromBase64CharPtr(inArrayPtr + offset, length); - } - } - } - - /// - /// Convert Base64 encoding characters to bytes: - /// - Compute result length exactly by actually walking the input; - /// - Allocate new result array based on computation; - /// - Decode input into the new array; - /// - /// Pointer to the first input char - /// Number of input chars - /// - private static unsafe byte[] FromBase64CharPtr(char* inputPtr, int inputLength) - { - // The validity of parameters much be checked by callers, thus we are Critical here. - - Debug.Assert(0 <= inputLength); - - // We need to get rid of any trailing white spaces. - // Otherwise we would be rejecting input such as "abc= ": - while (inputLength > 0) - { - int lastChar = inputPtr[inputLength - 1]; - if (lastChar != (int)' ' && lastChar != (int)'\n' && lastChar != (int)'\r' && lastChar != (int)'\t') - break; - inputLength--; - } - - // Compute the output length: - int resultLength = FromBase64_ComputeResultLength(inputPtr, inputLength); - - Debug.Assert(0 <= resultLength); - - // resultLength can be zero. We will still enter FromBase64_Decode and process the input. - // It may either simply write no bytes (e.g. input = " ") or throw (e.g. input = "ab"). - - // Create result byte blob: - byte[] decodedBytes = new byte[resultLength]; - - // Convert Base64 chars into bytes: - if (!TryFromBase64Chars(new ReadOnlySpan(inputPtr, inputLength), decodedBytes, out int _)) - throw new FormatException(SR.Format_BadBase64Char); - - // Note that the number of bytes written can differ from resultLength if the caller is modifying the array - // as it is being converted. Silently ignore the failure. - // Consider throwing exception in an non in-place release. - - // We are done: - return decodedBytes; - } - - /// - /// Compute the number of bytes encoded in the specified Base 64 char array: - /// Walk the entire input counting white spaces and padding chars, then compute result length - /// based on 3 bytes per 4 chars. - /// - private static unsafe int FromBase64_ComputeResultLength(char* inputPtr, int inputLength) - { - const uint intEq = (uint)'='; - const uint intSpace = (uint)' '; - - Debug.Assert(0 <= inputLength); - - char* inputEndPtr = inputPtr + inputLength; - int usefulInputLength = inputLength; - int padding = 0; - - while (inputPtr < inputEndPtr) - { - uint c = (uint)(*inputPtr); - inputPtr++; - - // We want to be as fast as possible and filter out spaces with as few comparisons as possible. - // We end up accepting a number of illegal chars as legal white-space chars. - // This is ok: as soon as we hit them during actual decode we will recognise them as illegal and throw. - if (c <= intSpace) - usefulInputLength--; - else if (c == intEq) - { - usefulInputLength--; - padding++; - } - } - - Debug.Assert(0 <= usefulInputLength); - - // For legal input, we can assume that 0 <= padding < 3. But it may be more for illegal input. - // We will notice it at decode when we see a '=' at the wrong place. - Debug.Assert(0 <= padding); - - // Perf: reuse the variable that stored the number of '=' to store the number of bytes encoded by the - // last group that contains the '=': - if (padding != 0) - { - if (padding == 1) - padding = 2; - else if (padding == 2) - padding = 1; - else - throw new FormatException(SR.Format_BadBase64Char); - } - - // Done: - return (usefulInputLength / 4) * 3 + padding; + return Base64.DecodeFromChars(new ReadOnlySpan(inArray, offset, length)); } /// Converts the specified string, which encodes binary data as hex characters, to an equivalent 8-bit unsigned integer array. diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index ce6380fc645bf2..93e10104c7ae1e 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7959,16 +7959,34 @@ namespace System.Buffers.Text { public static partial class Base64 { + public static byte[] DecodeFromChars(System.ReadOnlySpan source) { throw null; } + public static int DecodeFromChars(System.ReadOnlySpan source, System.Span destination) { throw null; } + public static System.Buffers.OperationStatus DecodeFromChars(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int bytesWritten, bool isFinalBlock = true) { throw null; } + public static byte[] DecodeFromUtf8(System.ReadOnlySpan source) { throw null; } + public static int DecodeFromUtf8(System.ReadOnlySpan source, System.Span destination) { throw null; } public static System.Buffers.OperationStatus DecodeFromUtf8(System.ReadOnlySpan utf8, System.Span bytes, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) { throw null; } public static System.Buffers.OperationStatus DecodeFromUtf8InPlace(System.Span buffer, out int bytesWritten) { throw null; } + public static char[] EncodeToChars(System.ReadOnlySpan source) { throw null; } + public static int EncodeToChars(System.ReadOnlySpan source, System.Span destination) { throw null; } + public static System.Buffers.OperationStatus EncodeToChars(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten, bool isFinalBlock = true) { throw null; } + public static string EncodeToString(System.ReadOnlySpan source) { throw null; } + public static byte[] EncodeToUtf8(System.ReadOnlySpan source) { throw null; } + public static int EncodeToUtf8(System.ReadOnlySpan source, System.Span destination) { throw null; } public static System.Buffers.OperationStatus EncodeToUtf8(System.ReadOnlySpan bytes, System.Span utf8, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) { throw null; } public static System.Buffers.OperationStatus EncodeToUtf8InPlace(System.Span buffer, int dataLength, out int bytesWritten) { throw null; } + public static int GetEncodedLength(int bytesLength) { throw null; } public static int GetMaxDecodedFromUtf8Length(int length) { throw null; } + public static int GetMaxDecodedLength(int base64Length) { throw null; } public static int GetMaxEncodedToUtf8Length(int length) { throw null; } public static bool IsValid(System.ReadOnlySpan base64TextUtf8) { throw null; } public static bool IsValid(System.ReadOnlySpan base64TextUtf8, out int decodedLength) { throw null; } public static bool IsValid(System.ReadOnlySpan base64Text) { throw null; } public static bool IsValid(System.ReadOnlySpan base64Text, out int decodedLength) { throw null; } + public static bool TryDecodeFromChars(System.ReadOnlySpan source, System.Span destination, out int bytesWritten) { throw null; } + public static bool TryDecodeFromUtf8(System.ReadOnlySpan source, System.Span destination, out int bytesWritten) { throw null; } + public static bool TryEncodeToChars(System.ReadOnlySpan source, System.Span destination, out int charsWritten) { throw null; } + public static bool TryEncodeToUtf8(System.ReadOnlySpan source, System.Span destination, out int bytesWritten) { throw null; } + public static bool TryEncodeToUtf8InPlace(System.Span buffer, int dataLength, out int bytesWritten) { throw null; } } public static partial class Base64Url {