From 3215e5eb172f74bcca47191b27662f09fcab4e69 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sun, 23 Jun 2024 16:24:54 -0400 Subject: [PATCH 1/3] Minor optimizations --- src/UTF8.cs | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/UTF8.cs b/src/UTF8.cs index 0268317..bc567f2 100644 --- a/src/UTF8.cs +++ b/src/UTF8.cs @@ -68,6 +68,7 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by // We scan the input from buf to len, possibly going back howFarBack bytes, to find the end of // a valid UTF-8 sequence. We return buf + len if the buffer is valid, otherwise we return the // pointer to the first invalid byte. + [MethodImpl(MethodImplOptions.AggressiveInlining)] private unsafe static byte* SimpleRewindAndValidateWithErrors(int howFarBack, byte* buf, int len) { int extraLen = 0; @@ -90,7 +91,6 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by { return buf - howFarBack; } - int pos = 0; int nextPos; uint codePoint = 0; @@ -598,7 +598,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust } else { - invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); + invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); } if (invalidBytePointer < pInputBuffer + processedLength) { @@ -624,16 +624,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust // We may still have an error. - if (processedLength < inputLength || !Sse42.TestZ(prevIncomplete, prevIncomplete)) + bool hasIncompete = !Sse42.TestZ(prevIncomplete, prevIncomplete); + if (processedLength < inputLength || hasIncompete) { byte* invalidBytePointer; - if (processedLength == 0) + if (processedLength == 0 || !hasIncompete) { invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength); } else { - invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); + invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); } if (invalidBytePointer != pInputBuffer + inputLength) @@ -813,7 +814,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust if (!Avx2.TestZ(prevIncomplete, prevIncomplete)) { int off = processedLength >= 3 ? processedLength - 3 : processedLength; - byte* invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(16 - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); + byte* invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(32 - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); // So the code is correct up to invalidBytePointer if (invalidBytePointer < pInputBuffer + processedLength) { @@ -877,7 +878,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust } else { - invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); + invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); } if (invalidBytePointer < pInputBuffer + processedLength) { @@ -899,17 +900,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust } } // We may still have an error. - if (processedLength < inputLength || !Avx2.TestZ(prevIncomplete, prevIncomplete)) + bool hasIncompete = !Avx2.TestZ(prevIncomplete, prevIncomplete); + if (processedLength < inputLength || hasIncompete) { byte* invalidBytePointer; - if (processedLength == 0) + if (processedLength == 0 || !hasIncompete) { invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength); } else { - invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); - + invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); } if (invalidBytePointer != pInputBuffer + inputLength) { @@ -1215,7 +1216,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust } else { - invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); + invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); } if (invalidBytePointer < pInputBuffer + processedLength) { @@ -1237,16 +1238,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust } } // We may still have an error. - if (processedLength < inputLength || Avx512BW.CompareGreaterThan(prevIncomplete, Vector512.Zero).ExtractMostSignificantBits() != 0) + bool hasIncompete = Avx512BW.CompareGreaterThan(prevIncomplete, Vector512.Zero).ExtractMostSignificantBits() != 0; + if (processedLength < inputLength || hasIncompete) { byte* invalidBytePointer; - if (processedLength == 0) + if (processedLength == 0 || !hasIncompete) { invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength); } else { - invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); + invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); } if (invalidBytePointer != pInputBuffer + inputLength) @@ -1431,7 +1433,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust } else { - invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); + invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); } if (invalidBytePointer < pInputBuffer + processedLength) { @@ -1457,18 +1459,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust n4 += negn4add; } } - - // We may still have an error. - if (processedLength < inputLength || AdvSimd.Arm64.MaxAcross(prevIncomplete).ToScalar() != 0) + bool hasIncompete = AdvSimd.Arm64.MaxAcross(Vector128.AsUInt32(prevIncomplete)).ToScalar() != 0; + if (processedLength < inputLength || hasIncompete) { byte* invalidBytePointer; - if (processedLength == 0) + if (processedLength == 0 || !hasIncompete) { invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength); } else { - invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); + invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3); } if (invalidBytePointer != pInputBuffer + inputLength) { @@ -1497,6 +1498,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust return GetPointerToFirstInvalidByteScalar(pInputBuffer + processedLength, inputLength - processedLength, out utf16CodeUnitCountAdjustment, out scalarCountAdjustment); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe void removeCounters(byte* start, byte* end, ref int n4, ref int contbytes) { for (byte* p = start; p < end; p++) @@ -1512,6 +1514,7 @@ private static unsafe void removeCounters(byte* start, byte* end, ref int n4, re } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe void addCounters(byte* start, byte* end, ref int n4, ref int contbytes) { for (byte* p = start; p < end; p++) From ff40b25cd9afa6296b8edf3122b9d1e07697999e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sun, 23 Jun 2024 17:18:30 -0400 Subject: [PATCH 2/3] minor reformat --- benchmark/Benchmark.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmark/Benchmark.cs b/benchmark/Benchmark.cs index ba44ba6..f1954cd 100644 --- a/benchmark/Benchmark.cs +++ b/benchmark/Benchmark.cs @@ -125,10 +125,10 @@ public Config() } // Parameters and variables for real data [Params(@"data/twitter.json", - @"data/Bogatov1069.utf8.txt", - @"data/Bogatov136.utf8.txt", - @"data/Bogatov286.utf8.txt", - @"data/Bogatov527.utf8.txt", + @"data/Bogatov1069.utf8.txt", + @"data/Bogatov136.utf8.txt", + @"data/Bogatov286.utf8.txt", + @"data/Bogatov527.utf8.txt", @"data/Arabic-Lipsum.utf8.txt", @"data/Hebrew-Lipsum.utf8.txt", @"data/Korean-Lipsum.utf8.txt", From 34171110bfda1bd65bb2704a00ed3184693b5f75 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 23 Jun 2024 23:46:50 +0000 Subject: [PATCH 3/3] Optimizing for neoverse --- src/UTF8.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/UTF8.cs b/src/UTF8.cs index bc567f2..ebf37e7 100644 --- a/src/UTF8.cs +++ b/src/UTF8.cs @@ -1362,8 +1362,9 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust { Vector128 currentBlock = AdvSimd.LoadVector128(pInputBuffer + processedLength); - - if (AdvSimd.Arm64.MaxAcross(currentBlock).ToScalar() <= 127) + if (AdvSimd.Arm64.MaxAcross(Vector128.AsUInt32(AdvSimd.And(currentBlock, v80))).ToScalar() == 0) + // We could it with (AdvSimd.Arm64.MaxAcross(currentBlock).ToScalar() <= 127) but it is slower on some + // hardware. { // We have an ASCII block, no need to process it, but // we need to check if the previous block was incomplete.