Skip to content

Commit

Permalink
Optimize key encoding (#141)
Browse files Browse the repository at this point in the history
Optimize SSE key encoding, and remove the AVX key encoder. Per the comment in this article https://stackoverflow.com/questions/35663635/why-do-processors-with-only-avx-out-perform-avx2-processors-for-many-simd-algori
it seems using AVX does not make sense for this use case. After some benchmarking, AVX
generally did not outperform SSE. Given the extra cost of using AVX registers and no additional
performance, it seems like SSE is the clear winner.

Also, fall back to the standard binary encoding (instead of the slow path) if SSE isn't available.
  • Loading branch information
xqrzd authored Feb 4, 2021
1 parent 7390efe commit 4fc23d7
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 169 deletions.
19 changes: 19 additions & 0 deletions src/Knet.Kudu.Client/Tablet/KeyEncoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,25 @@ private static void EncodeBinary(
}
}

private static int EncodeBinaryStandard(
ReadOnlySpan<byte> source, Span<byte> destination)
{
// In the common case where there are no zeros, this is
// faster than copying byte by byte.
int index = source.IndexOf((byte)0);

if (index == -1)
{
// Data contains no zeros, do the fast path copy.
source.CopyTo(destination);
return source.Length;
}
else
{
return EncodeBinarySlow(source, destination);
}
}

private static int EncodeBinarySlow(
ReadOnlySpan<byte> source, Span<byte> destination)
{
Expand Down
155 changes: 0 additions & 155 deletions src/Knet.Kudu.Client/Tablet/KeyEncoder.netcoreapp3.cs

This file was deleted.

15 changes: 1 addition & 14 deletions src/Knet.Kudu.Client/Tablet/KeyEncoder.netstandard.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,7 @@ public static partial class KeyEncoder
private static int EncodeBinary(
ReadOnlySpan<byte> source, Span<byte> destination)
{
// In the common case where there are no zeros, this is
// faster than copying byte by byte.
int index = source.IndexOf((byte)0);

if (index == -1)
{
// Data contains no zeros, do the fast path copy.
source.CopyTo(destination);
return source.Length;
}
else
{
return EncodeBinarySlow(source, destination);
}
return EncodeBinaryStandard(source, destination);
}
}
}
Expand Down
83 changes: 83 additions & 0 deletions src/Knet.Kudu.Client/Tablet/KeyEncoder.sse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#if NETCOREAPP3_1 || NET5_0
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

namespace Knet.Kudu.Client.Tablet
{
public static partial class KeyEncoder
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int EncodeBinary(
ReadOnlySpan<byte> source, Span<byte> destination)
{
if (Sse41.IsSupported)
{
return EncodeBinarySse(source, destination);
}

return EncodeBinaryStandard(source, destination);
}

private static unsafe int EncodeBinarySse(
ReadOnlySpan<byte> source, Span<byte> destination)
{
var length = (uint)source.Length;

if ((uint)destination.Length < length * 2)
ThrowException();

fixed (byte* src = source)
fixed (byte* dest = destination)
{
var srcCurrent = src;
var destCurrent = dest;

var remainder = length % 16;
var lastBlockIndex = length - remainder;
var blockEnd = src + lastBlockIndex;
var end = src + length;

while (srcCurrent < blockEnd)
{
var data = Sse2.LoadVector128(srcCurrent);
var zeros = Vector128<byte>.Zero;

var zeroBytes = Sse2.CompareEqual(data, zeros);
bool allZeros = Sse41.TestZ(zeroBytes, zeroBytes);

if (allZeros)
Sse2.Store(destCurrent, data);
else
break;

srcCurrent += 16;
destCurrent += 16;
}

while (srcCurrent < end)
{
byte value = *srcCurrent++;
if (value == 0)
{
*destCurrent++ = 0;
*destCurrent++ = 1;
}
else
{
*destCurrent++ = value;
}
}

var written = destCurrent - dest;
return (int)written;
}
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void ThrowException() =>
throw new Exception("Destination must be at least double source");
}
}
#endif

0 comments on commit 4fc23d7

Please sign in to comment.