From d935e45b5b0f688ef707b8bb9f46ddfd7217e3d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20M=C3=AD=C5=A1ek?= Date: Thu, 15 Aug 2024 19:09:39 +0200 Subject: [PATCH] GetChars(Encoding,byte[],StringBuilder) helper - encode ReadOnlySpan without allocs into StringBuilder + tests --- .../Utilities/EncodingExtensions.cs | 73 +++++++++++++++++++ .../Peachpie.Runtime.Tests/UtilitiesTests.cs | 31 +++++++- 2 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 src/Peachpie.Runtime/Utilities/EncodingExtensions.cs diff --git a/src/Peachpie.Runtime/Utilities/EncodingExtensions.cs b/src/Peachpie.Runtime/Utilities/EncodingExtensions.cs new file mode 100644 index 0000000000..ae4e79400b --- /dev/null +++ b/src/Peachpie.Runtime/Utilities/EncodingExtensions.cs @@ -0,0 +1,73 @@ +using System; +using System.Buffers; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System.Threading.Tasks; + +namespace Pchp.Core.Utilities +{ + public static class EncodingExtensions + { + const int MaxBytesAtOnce = 1 * 1024 * 1024; // 1M + + /// + /// Encodes byte array using into given . + /// + /// Number of characters encoded. + public static int GetChars(this Encoding encoding, ReadOnlySpan bytes, StringBuilder builder) + { + ArgumentNullException.ThrowIfNull(encoding, nameof(encoding)); + ArgumentNullException.ThrowIfNull(builder, nameof(builder)); + + if (bytes.IsEmpty) + { + return 0; + } + + if (bytes.Length <= MaxBytesAtOnce) + { + + var maxCharCount = encoding.GetCharCount(bytes); + + var span = ArrayPool.Shared.Rent(maxCharCount); + var chars = encoding.GetChars(bytes, span.AsSpan()); + + // + builder.Append(span.AsSpan(0, chars)); + + // + ArrayPool.Shared.Return(span); + + return chars; + } + + // + + var decoder = encoding.GetDecoder(); // we need to encode in chunks, preserve state between chunks + int charsCount = 0; + + while (bytes.Length > 0) + { + var segment = bytes.Slice(0, Math.Min(MaxBytesAtOnce, bytes.Length)); + + bytes = bytes.Slice(segment.Length); + + var maxCharCount = decoder.GetCharCount(segment, flush: bytes.IsEmpty); + var span = ArrayPool.Shared.Rent(maxCharCount); + var chars = decoder.GetChars(segment, span.AsSpan(), flush: bytes.IsEmpty); + + // + builder.Append(span.AsSpan(0, chars)); + charsCount += chars; + + // + ArrayPool.Shared.Return(span); + } + + // + return charsCount; + } + } +} diff --git a/src/Tests/Peachpie.Runtime.Tests/UtilitiesTests.cs b/src/Tests/Peachpie.Runtime.Tests/UtilitiesTests.cs index 56e57a698a..412511e45b 100644 --- a/src/Tests/Peachpie.Runtime.Tests/UtilitiesTests.cs +++ b/src/Tests/Peachpie.Runtime.Tests/UtilitiesTests.cs @@ -1,9 +1,10 @@ -using System; +using System; using System.Text; using Microsoft.VisualStudio.TestTools.UnitTesting; using Pchp.Core; using Pchp.Core.Collections; using Pchp.Core.Text; +using Pchp.Core.Utilities; namespace Peachpie.Runtime.Tests { @@ -63,5 +64,33 @@ public void ValueListToBytesTest() Assert.AreEqual(Encoding.UTF8.GetString(list.ToArray()), "hello"); } + + [DataTestMethod] + [DataRow("")] + [DataRow("lorem ipsum")] + [DataRow("顧客は非常に重要です、顧客は顧客に続きます")] + public void GetCharsTest(string input) + { + string value = input; + + for (int multiplier = 0; multiplier < 10; multiplier++) + { + var encoding = Encoding.UTF8; + var bytes = encoding.GetBytes(value); + + var builder = new StringBuilder(); + var count = Pchp.Core.Utilities.EncodingExtensions.GetChars(encoding, bytes, builder); + + Assert.AreEqual(value.Length, count, "Length don't match"); + Assert.AreEqual(value, builder.ToString(), "String don't match"); + + // add ~1M chars + for (int i = 0; i < 1_000_000 / (input.Length + 1); i++) + { + builder.Append(input); + } + value = builder.ToString(); + } + } } }