SixLabors · antonfirsov · Oct 2, 2021 · Aug 17, 2021 · Aug 17, 2021 · Aug 18, 2021
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
@@ -2,42 +2,67 @@
 // Licensed under the Apache License, Version 2.0.
 
 using System;
-using System.Diagnostics;
+using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
 using System.Text;
 
 namespace SixLabors.ImageSharp.Formats.Jpeg.Components
 {
  /// <summary>
- /// Represents a Jpeg block with <see cref="short"/> coefficients.
+ /// 8x8 matrix of <see cref="short"/> coefficients.
  /// </summary>
  // ReSharper disable once InconsistentNaming
+ [StructLayout(LayoutKind.Explicit)]
  internal unsafe struct Block8x8 : IEquatable<Block8x8>
  {
  /// <summary>
  /// A number of scalar coefficients in a <see cref="Block8x8F"/>
  /// </summary>
  public const int Size = 64;
 
+#pragma warning disable IDE0051 // Remove unused private member
  /// <summary>
- /// A fixed size buffer holding the values.
- /// See: <see>
- /// <cref>https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/unsafe-code-pointers/fixed-size-buffers</cref>
- /// </see>
+ /// A placeholder buffer so the actual struct occupies exactly 64 * 2 bytes.
  /// </summary>
+ /// <remarks>
+ /// This is not used directly in the code.
+ /// </remarks>
+ [FieldOffset(0)]
  private fixed short data[Size];
-
- /// <summary>
- /// Initializes a new instance of the <see cref="Block8x8"/> struct.
- /// </summary>
- /// <param name="coefficients">A <see cref="Span{T}"/> of coefficients</param>
- public Block8x8(Span<short> coefficients)
- {
- ref byte selfRef = ref Unsafe.As<Block8x8, byte>(ref this);
- ref byte sourceRef = ref Unsafe.As<short, byte>(ref MemoryMarshal.GetReference(coefficients));
- Unsafe.CopyBlock(ref selfRef, ref sourceRef, Size * sizeof(short));
- }
+#pragma warning restore IDE0051
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ [FieldOffset(0)]
+ public Vector128<short> V0;
+ [FieldOffset(16)]
+ public Vector128<short> V1;
+ [FieldOffset(32)]
+ public Vector128<short> V2;
+ [FieldOffset(48)]
+ public Vector128<short> V3;
+ [FieldOffset(64)]
+ public Vector128<short> V4;
+ [FieldOffset(80)]
+ public Vector128<short> V5;
+ [FieldOffset(96)]
+ public Vector128<short> V6;
+ [FieldOffset(112)]
+ public Vector128<short> V7;
+
+ [FieldOffset(0)]
+ public Vector256<short> V01;
+ [FieldOffset(32)]
+ public Vector256<short> V23;
+ [FieldOffset(64)]
+ public Vector256<short> V45;
+ [FieldOffset(96)]
+ public Vector256<short> V67;
+#endif
 
  /// <summary>
  /// Gets or sets a <see cref="short"/> value at the given index
@@ -49,15 +74,17 @@ public short this[int idx]
  [MethodImpl(MethodImplOptions.AggressiveInlining)]
  get
  {
- GuardBlockIndex(idx);
+ DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
+
  ref short selfRef = ref Unsafe.As<Block8x8, short>(ref this);
  return Unsafe.Add(ref selfRef, idx);
  }
 
  [MethodImpl(MethodImplOptions.AggressiveInlining)]
  set
  {
- GuardBlockIndex(idx);
+ DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
+
  ref short selfRef = ref Unsafe.As<Block8x8, short>(ref this);
  Unsafe.Add(ref selfRef, idx) = value;
  }
@@ -75,15 +102,9 @@ public short this[int idx]
  set => this[(y * 8) + x] = value;
  }
 
- public static bool operator ==(Block8x8 left, Block8x8 right)
- {
- return left.Equals(right);
- }
+ public static bool operator ==(Block8x8 left, Block8x8 right) => left.Equals(right);
 
- public static bool operator !=(Block8x8 left, Block8x8 right)
- {
- return !left.Equals(right);
- }
+ public static bool operator !=(Block8x8 left, Block8x8 right) => !left.Equals(right);
 
  /// <summary>
  /// Multiply all elements by a given <see cref="int"/>
@@ -149,34 +170,11 @@ public short this[int idx]
  return result;
  }
 
- /// <summary>
- /// Pointer-based "Indexer" (getter part)
- /// </summary>
- /// <param name="blockPtr">Block pointer</param>
- /// <param name="idx">Index</param>
- /// <returns>The scaleVec value at the specified index</returns>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static short GetScalarAt(Block8x8* blockPtr, int idx)
- {
- GuardBlockIndex(idx);
-
- short* fp = blockPtr->data;
- return fp[idx];
- }
-
- /// <summary>
- /// Pointer-based "Indexer" (setter part)
- /// </summary>
- /// <param name="blockPtr">Block pointer</param>
- /// <param name="idx">Index</param>
- /// <param name="value">Value</param>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void SetScalarAt(Block8x8* blockPtr, int idx, short value)
+ public static Block8x8 Load(Span<short> data)
  {
- GuardBlockIndex(idx);
-
- short* fp = blockPtr->data;
- fp[idx] = value;
+ Unsafe.SkipInit(out Block8x8 result);
+ result.LoadFrom(data);
+ return result;
  }
 
  /// <summary>
@@ -194,7 +192,7 @@ public Block8x8F AsFloatBlock()
  /// </summary>
  public short[] ToArray()
  {
- var result = new short[Size];
+ short[] result = new short[Size];
  this.CopyTo(result);
  return result;
  }
@@ -206,7 +204,7 @@ public void CopyTo(Span<short> destination)
  {
  ref byte selfRef = ref Unsafe.As<Block8x8, byte>(ref this);
  ref byte destRef = ref MemoryMarshal.GetReference(MemoryMarshal.Cast<short, byte>(destination));
- Unsafe.CopyBlock(ref destRef, ref selfRef, Size * sizeof(short));
+ Unsafe.CopyBlockUnaligned(ref destRef, ref selfRef, Size * sizeof(short));
  }
 
  /// <summary>
@@ -220,6 +218,19 @@ public void CopyTo(Span<int> destination)
  }
  }
 
+ /// <summary>
+ /// Load raw 16bit integers from source.
+ /// </summary>
+ /// <param name="source">Source</param>
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void LoadFrom(Span<short> source)
+ {
+ ref byte sourceRef = ref Unsafe.As<short, byte>(ref MemoryMarshal.GetReference(source));
+ ref byte destRef = ref Unsafe.As<Block8x8, byte>(ref this);
+
+ Unsafe.CopyBlockUnaligned(ref destRef, ref sourceRef, Size * sizeof(short));
+ }
+
  /// <summary>
  /// Cast and copy <see cref="Size"/> <see cref="int"/>-s from the beginning of 'source' span.
  /// </summary>
@@ -231,13 +242,6 @@ public void LoadFrom(Span<int> source)
  }
  }
 
- [Conditional("DEBUG")]
- private static void GuardBlockIndex(int idx)
- {
- DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
- DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
- }
-
  /// <inheritdoc />
  public override string ToString()
  {
@@ -271,15 +275,66 @@ public bool Equals(Block8x8 other)
  }
 
  /// <inheritdoc />
- public override bool Equals(object obj)
- {
- return obj is Block8x8 other && this.Equals(other);
- }
+ public override bool Equals(object obj) => obj is Block8x8 other && this.Equals(other);
 
  /// <inheritdoc />
- public override int GetHashCode()
+ public override int GetHashCode() => (this[0] * 31) + this[1];
+
+ /// <summary>
+ /// Returns index of the last non-zero element in given matrix.
+ /// </summary>
+ /// <returns>
+ /// Index of the last non-zero element. Returns -1 if all elements are equal to zero.
+ /// </returns>
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public nint GetLastNonZeroIndex()
  {
- return (this[0] * 31) + this[1];
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported)
+ {
+ const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
+
+ Vector256<short> zero16 = Vector256<short>.Zero;
+
+ ref Vector256<short> mcuStride = ref Unsafe.As<Block8x8, Vector256<short>>(ref this);
+
+ for (nint i = 3; i >= 0; i--)
+ {
+ int areEqual = Avx2.MoveMask(Avx2.CompareEqual(Unsafe.Add(ref mcuStride, i), zero16).AsByte());
+
+ if (areEqual != equalityMask)
+ {
+ // Each 2 bits represents comparison operation for each 2-byte element in input vectors
+ // LSB represents first element in the stride
+ // MSB represents last element in the stride
+ // lzcnt operation would calculate number of zero numbers at the end
+
+ // Given mask is not actually suitable for lzcnt as 1's represent zero elements and 0's represent non-zero elements
+ // So we need to invert it
+ int lzcnt = BitOperations.LeadingZeroCount(~(uint)areEqual);
+
+ // As input number is represented by 2 bits in the mask, we need to divide lzcnt result by 2
+ // to get the exact number of zero elements in the stride
+ int strideRelativeIndex = 15 - (lzcnt / 2);
+ return (i * 16) + strideRelativeIndex;
+ }
+ }
+
+ return -1;
+ }
+ else
+#endif
+ {
+ nint index = Size - 1;
+ ref short elemRef = ref Unsafe.As<Block8x8, short>(ref this);
+
+ while (index >= 0 && Unsafe.Add(ref elemRef, index) == 0)
+ {
+ index--;
+ }
+
+ return index;
+ }
  }
 
  /// <summary>