Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize BigInteger.ToString for large decimal string #104676

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 160 additions & 39 deletions src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -748,9 +748,6 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo
{
Debug.Assert(formatString == null || formatString.Length == formatSpan.Length);

const uint TenPowMaxPartial = PowersOf1e9.TenPowMaxPartial;
const int MaxPartialDigits = PowersOf1e9.MaxPartialDigits;

int digits = 0;
char fmt = ParseFormatSpecifier(formatSpan, out digits);
if (fmt == 'x' || fmt == 'X')
Expand Down Expand Up @@ -783,44 +780,22 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo
}
}

// First convert to base 10^9.
int cuSrc = value._bits.Length;
// A quick conservative max length of base 10^9 representation
// A uint contributes to no more than 10/9 of 10^9 block, +1 for ceiling of division
int cuMax = cuSrc * (MaxPartialDigits + 1) / MaxPartialDigits + 1;
Debug.Assert((long)BigInteger.MaxLength * (MaxPartialDigits + 1) / MaxPartialDigits + 1 < (long)int.MaxValue); // won't overflow

uint[]? bufferToReturn = null;
Span<uint> base1E9Buffer = cuMax < BigIntegerCalculator.StackAllocThreshold ?
stackalloc uint[cuMax] :
(bufferToReturn = ArrayPool<uint>.Shared.Rent(cuMax));
// The Ratio is calculated as: log_{10^9}(2^32)
const double digitRatio = 1.0703288734719332;
Debug.Assert(BigInteger.MaxLength * digitRatio + 1 < Array.MaxLength); // won't overflow
Comment on lines +783 to +785
Copy link
Member

@huoyaoyuan huoyaoyuan Jul 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the length doesn't need to be exact, you can use integer estimation instead, similar to what I did in NumberToBigInteger.

Copy link
Contributor Author

@kzrnm kzrnm Jul 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which part are you referring to? NumberToBigInteger also used $\log_{2^{32}}(10^9)$.

// shrink buffer to the currently used portion.
// First, calculate the rough size of the buffer from the ratio that the number
// of digits follows. Then, shrink the size until there is no more space left.
// The Ratio is calculated as: log_{2^32}(10^9)
const double digitRatio = 0.934292276687070661;
currentBufferSize = Math.Min((int)(bufferSize * digitRatio) + 1, bufferSize);
Debug.Assert(buffer.Length == currentBufferSize || buffer[currentBufferSize] == 0);
while (0 < currentBufferSize && buffer[currentBufferSize - 1] == 0)
{
currentBufferSize--;
}
currentBuffer = buffer.Slice(0, currentBufferSize);

I rewrote it to use $\log_{2^{32}}(10)$ in pull request #97589, but it is essentially the same.

const double digitRatio = 0.10381025297; // log_{2^32}(10)
int resultLength = checked((int)(digitRatio * number.Scale) + 1 + 2);
uint[]? resultBufferFromPool = null;
Span<uint> resultBuffer = (
resultLength <= BigIntegerCalculator.StackAllocThreshold
? stackalloc uint[BigIntegerCalculator.StackAllocThreshold]
: resultBufferFromPool = ArrayPool<uint>.Shared.Rent(resultLength)).Slice(0, resultLength);
resultBuffer.Clear();


int cuDst = 0;

for (int iuSrc = cuSrc; --iuSrc >= 0;)
{
uint uCarry = value._bits[iuSrc];
for (int iuDst = 0; iuDst < cuDst; iuDst++)
{
Debug.Assert(base1E9Buffer[iuDst] < TenPowMaxPartial);
int base1E9BufferLength = (int)(value._bits.Length * digitRatio) + 1;
uint[]? base1E9BufferFromPool = null;
Span<uint> base1E9Buffer = base1E9BufferLength < BigIntegerCalculator.StackAllocThreshold ?
stackalloc uint[base1E9BufferLength] :
(base1E9BufferFromPool = ArrayPool<uint>.Shared.Rent(base1E9BufferLength));
base1E9Buffer.Clear();

// Use X86Base.DivRem when stable
ulong uuRes = NumericsHelpers.MakeUInt64(base1E9Buffer[iuDst], uCarry);
(ulong quo, ulong rem) = Math.DivRem(uuRes, TenPowMaxPartial);
uCarry = (uint)quo;
base1E9Buffer[iuDst] = (uint)rem;
}
if (uCarry != 0)
{
(uCarry, base1E9Buffer[cuDst++]) = Math.DivRem(uCarry, TenPowMaxPartial);
if (uCarry != 0)
base1E9Buffer[cuDst++] = uCarry;
}
}

ReadOnlySpan<uint> base1E9Value = base1E9Buffer[..cuDst];
BigIntegerToBase1E9(value._bits, base1E9Buffer, out int written);
ReadOnlySpan<uint> base1E9Value = base1E9Buffer[..written];

int valueDigits = (base1E9Value.Length - 1) * MaxPartialDigits + FormattingHelpers.CountDigits(base1E9Value[^1]);
int valueDigits = (base1E9Value.Length - 1) * PowersOf1e9.MaxPartialDigits + FormattingHelpers.CountDigits(base1E9Value[^1]);

string? strResult;

Expand Down Expand Up @@ -912,9 +887,9 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo
}
}

if (bufferToReturn != null)
if (base1E9BufferFromPool != null)
{
ArrayPool<uint>.Shared.Return(bufferToReturn);
ArrayPool<uint>.Shared.Return(base1E9BufferFromPool);
}

return strResult;
Expand All @@ -935,6 +910,132 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo
return UInt32ToDecChars(bufferEnd, base1E9Value[^1], digits);
}

#if DEBUG
// Mutable for unit testing...
public static
#else
public const
#endif
// TODO: https://github.com/dotnet/runtime/pull/96895
// int ToStringNaiveThreshold = BigIntegerCalculator.DivideBurnikelZieglerThreshold;
int ToStringNaiveThreshold = 16;
private static void BigIntegerToBase1E9(ReadOnlySpan<uint> bits, Span<uint> base1E9Buffer, out int leadingWritten)
{
Debug.Assert(ToStringNaiveThreshold >= 2);

if (bits.Length <= ToStringNaiveThreshold)
{
Naive(bits, base1E9Buffer, out leadingWritten);
return;
}

PowersOf1e9.FloorBufferSize(bits.Length, out int powersOf1e9BufferLength, out int mi);
uint[]? powersOf1e9BufferFromPool = null;
Span<uint> powersOf1e9Buffer = (
powersOf1e9BufferLength <= BigIntegerCalculator.StackAllocThreshold
? stackalloc uint[BigIntegerCalculator.StackAllocThreshold]
: powersOf1e9BufferFromPool = ArrayPool<uint>.Shared.Rent(powersOf1e9BufferLength)).Slice(0, powersOf1e9BufferLength);
powersOf1e9Buffer.Clear();

PowersOf1e9 powersOf1e9 = new PowersOf1e9(powersOf1e9Buffer);

DivideAndConquer(powersOf1e9, mi, bits, base1E9Buffer, out leadingWritten);

if (powersOf1e9BufferFromPool != null)
{
ArrayPool<uint>.Shared.Return(powersOf1e9BufferFromPool);
}

static void DivideAndConquer(in PowersOf1e9 powersOf1e9, int powersIndex, ReadOnlySpan<uint> bits, Span<uint> base1E9Buffer, out int leadingWritten)
{
Debug.Assert(bits.Length == 0 || bits[^1] != 0);
Debug.Assert(powersIndex >= 0);

if (bits.Length <= ToStringNaiveThreshold)
{
Naive(bits, base1E9Buffer, out leadingWritten);
return;
}

ReadOnlySpan<uint> powOfTen = powersOf1e9.GetSpan(powersIndex);
int omittedLength = PowersOf1e9.OmittedLength(powersIndex);

while (bits.Length < powOfTen.Length + omittedLength || BigIntegerCalculator.Compare(bits.Slice(omittedLength), powOfTen) < 0)
{
--powersIndex;
powOfTen = powersOf1e9.GetSpan(powersIndex);
omittedLength = PowersOf1e9.OmittedLength(powersIndex);
}

int upperLength = bits.Length - powOfTen.Length - omittedLength + 1;
uint[]? upperFromPool = null;
Span<uint> upper = ((uint)upperLength <= BigIntegerCalculator.StackAllocThreshold
? stackalloc uint[BigIntegerCalculator.StackAllocThreshold]
: upperFromPool = ArrayPool<uint>.Shared.Rent(upperLength)).Slice(0, upperLength);

int lowerLength = bits.Length;
uint[]? lowerFromPool = null;
Span<uint> lower = ((uint)lowerLength <= BigIntegerCalculator.StackAllocThreshold
? stackalloc uint[BigIntegerCalculator.StackAllocThreshold]
: lowerFromPool = ArrayPool<uint>.Shared.Rent(lowerLength)).Slice(0, lowerLength);

bits.Slice(0, omittedLength).CopyTo(lower);
BigIntegerCalculator.Divide(bits.Slice(omittedLength), powOfTen, upper, lower.Slice(omittedLength));
Debug.Assert(!upper.Trim(0u).IsEmpty);

int lower1E9Length = 1 << powersIndex;
DivideAndConquer(
powersOf1e9,
powersIndex - 1,
lower.Slice(0, BigIntegerCalculator.ActualLength(lower)),
base1E9Buffer,
out int lowerWritten);
if (lowerFromPool != null)
ArrayPool<uint>.Shared.Return(lowerFromPool);
Debug.Assert(lower1E9Length >= lowerWritten);

DivideAndConquer(
powersOf1e9,
powersIndex - 1,
upper.Slice(0, BigIntegerCalculator.ActualLength(upper)),
base1E9Buffer.Slice(lower1E9Length),
out leadingWritten);
if (upperFromPool != null)
ArrayPool<uint>.Shared.Return(upperFromPool);

leadingWritten += lower1E9Length;
}

static void Naive(ReadOnlySpan<uint> bits, Span<uint> base1E9Buffer, out int leadingWritten)
{
// First convert to base 10^9.
int cuSrc = bits.Length;
int cuDst = 0;

for (int iuSrc = cuSrc; --iuSrc >= 0;)
{
uint uCarry = bits[iuSrc];
for (int iuDst = 0; iuDst < cuDst; iuDst++)
{
Debug.Assert(base1E9Buffer[iuDst] < PowersOf1e9.TenPowMaxPartial);

// Use X86Base.DivRem when stable
ulong uuRes = NumericsHelpers.MakeUInt64(base1E9Buffer[iuDst], uCarry);
(ulong quo, ulong rem) = Math.DivRem(uuRes, PowersOf1e9.TenPowMaxPartial);
uCarry = (uint)quo;
base1E9Buffer[iuDst] = (uint)rem;
}
if (uCarry != 0)
{
(uCarry, base1E9Buffer[cuDst++]) = Math.DivRem(uCarry, PowersOf1e9.TenPowMaxPartial);
if (uCarry != 0)
base1E9Buffer[cuDst++] = uCarry;
}
}
leadingWritten = cuDst;
}
}

internal readonly ref struct PowersOf1e9
{
// Holds 1000000000^(1<<<n).
Expand Down Expand Up @@ -1109,6 +1210,26 @@ public static int OmittedLength(int index)
return (MaxPartialDigits * (1 << index)) >> 5;
}

public static void FloorBufferSize(int size, out int bufferSize, out int maxIndex)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it required to calculate exact buffer length? Can it be relaxed, and let the algorithm to strip unnecessary zeros?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a concise way to calculate the inexact buffer length? It would be most concise to find out from the predefined buffer length.

{
Debug.Assert(size > 0);

// binary search
// size < Indexes[hi+1] - Indexes[hi]
// size >= Indexes[lo+1] - Indexes[lo]
int hi = Indexes.Length - 1;
maxIndex = 0;
while (maxIndex + 1 < hi)
{
int i = (hi + maxIndex) >> 1;
if (size < Indexes[i + 1] - Indexes[i])
hi = i;
else
maxIndex = i;
}
bufferSize = Indexes[maxIndex + 1] + 1;
}

public void MultiplyPowerOfTen(ReadOnlySpan<uint> left, int trailingZeroCount, Span<uint> bits)
{
Debug.Assert(trailingZeroCount >= 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
using System.Diagnostics;
using System.Globalization;
using System.Tests;
using Microsoft.DotNet.RemoteExecutor;
using Xunit;

namespace System.Numerics.Tests
Expand Down Expand Up @@ -497,7 +496,7 @@ public static void RunFormatScientificNotationToBigIntegerThrowsException(string
{
Assert.Throws<OverflowException>(() => BigInteger.Parse(testingValue, NumberStyles.AllowExponent));
}

[Fact]
public static void ToString_InvalidFormat_ThrowsFormatException()
{
Expand Down Expand Up @@ -539,6 +538,38 @@ public static void ToString_ValidLargeFormat()
b.ToString("G00000999999999"); // Should not throw
}

[Fact]
public static void RunPowerOf1E9ToStringTests()
{
foreach (var test in new[]
{
new string('9', 9* (1<<10))+new string('9', 9* (1<<10)),
"1"+new string('0', 9* (1<<10))+new string('9', 9* (1<<10)),
"1"+new string('0', 9* (1<<10)-1)+"1"+new string('9', 9* (1<<10)),
"1"+new string('0', 9* (1<<11)),
"1"+new string('0', 9* (1<<11)-1)+"1",
})
{
VerifyToString(test, test);
}
}

[Fact]
[OuterLoop]
public static void RunRepeatedCharsToStringTests()
{
string test;

for (int length = 1; length < 1300; length++)
{
test = new string('1', length);
VerifyToString(test, test);

test = new string('9', length);
VerifyToString(test, test);
}
}

private static void RunSimpleProviderToStringTests(Random random, string format, NumberFormatInfo provider, int precision, StringFormatter formatter)
{
string test;
Expand Down Expand Up @@ -2063,4 +2094,37 @@ private static NumberFormatInfo MarkUp(NumberFormatInfo nfi)
return nfi;
}
}


[Collection(nameof(DisableParallelization))]
public class ToStringTestThreshold
{
[Fact]
public static void RunSimpleToStringTests()
{
BigIntTools.Utils.RunWithFakeThreshold(Number.ToStringNaiveThreshold, 4, () =>
{
ToStringTest.RunSimpleToStringTests();
});
}

[Fact]
public void RunPowerOf1E9ToStringTests()
{
BigIntTools.Utils.RunWithFakeThreshold(Number.ToStringNaiveThreshold, 4, () =>
{
ToStringTest.RunPowerOf1E9ToStringTests();
});
}

[Fact]
[OuterLoop]
public static void RunRepeatedCharsToStringTests()
{
BigIntTools.Utils.RunWithFakeThreshold(Number.ToStringNaiveThreshold, 4, () =>
{
ToStringTest.RunRepeatedCharsToStringTests();
});
}
}
}
Loading