Skip to content

Commit

Permalink
GetChars(Encoding,byte[],StringBuilder) helper
Browse files Browse the repository at this point in the history
- encode ReadOnlySpan<byte> without allocs into StringBuilder
+ tests
  • Loading branch information
jakubmisek committed Aug 15, 2024
1 parent 9e77131 commit d935e45
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 1 deletion.
73 changes: 73 additions & 0 deletions src/Peachpie.Runtime/Utilities/EncodingExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;

namespace Pchp.Core.Utilities
{
public static class EncodingExtensions
{
const int MaxBytesAtOnce = 1 * 1024 * 1024; // 1M

/// <summary>
/// Encodes byte array using <paramref name="encoding"/> into given <paramref name="builder"/>.
/// </summary>
/// <returns>Number of characters encoded.</returns>
public static int GetChars(this Encoding encoding, ReadOnlySpan<byte> bytes, StringBuilder builder)
{
ArgumentNullException.ThrowIfNull(encoding, nameof(encoding));
ArgumentNullException.ThrowIfNull(builder, nameof(builder));

if (bytes.IsEmpty)
{
return 0;
}

if (bytes.Length <= MaxBytesAtOnce)
{

var maxCharCount = encoding.GetCharCount(bytes);

var span = ArrayPool<char>.Shared.Rent(maxCharCount);
var chars = encoding.GetChars(bytes, span.AsSpan());

//
builder.Append(span.AsSpan(0, chars));

//
ArrayPool<char>.Shared.Return(span);

return chars;
}

//

var decoder = encoding.GetDecoder(); // we need to encode in chunks, preserve state between chunks
int charsCount = 0;

while (bytes.Length > 0)
{
var segment = bytes.Slice(0, Math.Min(MaxBytesAtOnce, bytes.Length));

bytes = bytes.Slice(segment.Length);

var maxCharCount = decoder.GetCharCount(segment, flush: bytes.IsEmpty);
var span = ArrayPool<char>.Shared.Rent(maxCharCount);
var chars = decoder.GetChars(segment, span.AsSpan(), flush: bytes.IsEmpty);

//
builder.Append(span.AsSpan(0, chars));
charsCount += chars;

//
ArrayPool<char>.Shared.Return(span);
}

//
return charsCount;
}
}
}
31 changes: 30 additions & 1 deletion src/Tests/Peachpie.Runtime.Tests/UtilitiesTests.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
using System;
using System;
using System.Text;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Pchp.Core;
using Pchp.Core.Collections;
using Pchp.Core.Text;
using Pchp.Core.Utilities;

namespace Peachpie.Runtime.Tests
{
Expand Down Expand Up @@ -63,5 +64,33 @@ public void ValueListToBytesTest()

Assert.AreEqual(Encoding.UTF8.GetString(list.ToArray()), "hello");
}

[DataTestMethod]
[DataRow("")]
[DataRow("lorem ipsum")]
[DataRow("顧客は非常に重要です、顧客は顧客に続きます")]
public void GetCharsTest(string input)
{
string value = input;

for (int multiplier = 0; multiplier < 10; multiplier++)
{
var encoding = Encoding.UTF8;
var bytes = encoding.GetBytes(value);

var builder = new StringBuilder();
var count = Pchp.Core.Utilities.EncodingExtensions.GetChars(encoding, bytes, builder);

Assert.AreEqual(value.Length, count, "Length don't match");
Assert.AreEqual(value, builder.ToString(), "String don't match");

// add ~1M chars
for (int i = 0; i < 1_000_000 / (input.Length + 1); i++)
{
builder.Append(input);
}
value = builder.ToString();
}
}
}
}

0 comments on commit d935e45

Please sign in to comment.