Skip to content

Commit

Permalink
Merge pull request #1517 from tkp1n/feature/vectorize-scale-to-dst-block
Browse files Browse the repository at this point in the history
Vectorize Scale16X16To8X8
  • Loading branch information
JimBobSquarePants authored Jan 21, 2021
2 parents 7eb5cc0 + 4fb1859 commit 954d233
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ public static class HwIntrinsics

public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };

public static ReadOnlySpan<byte> PermuteMaskSwitchInnerDWords8x32 => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0 };

private static ReadOnlySpan<byte> ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 };

private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
Expand Down
51 changes: 51 additions & 0 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,57 @@ public static unsafe void Quantize(
/// <param name="source">The source block.</param>
public static unsafe void Scale16X16To8X8(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
Scale16X16To8X8Vectorized(ref destination, source);
return;
}
#endif

Scale16X16To8X8Scalar(ref destination, source);
}

private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
{
#if SUPPORTS_RUNTIME_INTRINSICS
Debug.Assert(Avx2.IsSupported, "AVX2 is required to execute this method");

var f2 = Vector256.Create(2f);
var f025 = Vector256.Create(0.25f);
Vector256<int> switchInnerDoubleWords = Unsafe.As<byte, Vector256<int>>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32));

ref Vector256<float> in1 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<float> in2 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 1));
ref Vector256<float> destRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref destination);

for (int i = 0; i < 8; i++)
{
Vector256<float> a = in1;
Vector256<float> b = Unsafe.Add(ref in1, 1);
Vector256<float> c = in2;
Vector256<float> d = Unsafe.Add(ref in2, 1);

Vector256<float> calc1 = Avx.Shuffle(a, c, 0b10_00_10_00);
Vector256<float> calc2 = Avx.Shuffle(a, c, 0b11_01_11_01);
Vector256<float> calc3 = Avx.Shuffle(b, d, 0b10_00_10_00);
Vector256<float> calc4 = Avx.Shuffle(b, d, 0b11_01_11_01);

Vector256<float> sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4));
Vector256<float> add = Avx.Add(sum, f2);
Vector256<float> res = Avx.Multiply(add, f025);

destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords);
destRef = ref Unsafe.Add(ref destRef, 1);

in1 = ref Unsafe.Add(ref in1, 2);
in2 = ref Unsafe.Add(ref in2, 2);
}
#endif
}

private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
{
for (int i = 0; i < 4; i++)
{
int dstOff = ((i & 2) << 4) | ((i & 1) << 2);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using System;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;

namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Block8x8F_Scale16X16To8X8
{
private Block8x8F source;
private readonly Block8x8F[] target = new Block8x8F[4];

[GlobalSetup]
public void Setup()
{
var random = new Random();

float[] f = new float[8*8];
for (int i = 0; i < f.Length; i++)
{
f[i] = (float)random.NextDouble();
}

for (int i = 0; i < 4; i++)
{
this.target[i] = Block8x8F.Load(f);
}

this.source = Block8x8F.Load(f);
}

[Benchmark]
public void Scale16X16To8X8() => Block8x8F.Scale16X16To8X8(ref this.source, this.target);
}
}

0 comments on commit 954d233

Please sign in to comment.