-
-
Notifications
You must be signed in to change notification settings - Fork 852
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improvements around fixed
#2418
Changes from 6 commits
a4ad7b0
4532194
a534328
528469f
f348d70
4b84aec
66b2f32
df4ae9e
f65122b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,25 +15,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components; | |
/// 8x8 matrix of <see cref="short"/> coefficients. | ||
/// </summary> | ||
// ReSharper disable once InconsistentNaming | ||
[StructLayout(LayoutKind.Explicit)] | ||
internal unsafe partial struct Block8x8 | ||
[StructLayout(LayoutKind.Explicit, Size = 2 * Size)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The unused fixed sized buffer to specify the size of the struct isn't neede, as the size can be set directly. Codegen for creating the struct is also a bit better -- only zeroing the memory, instead of stack-cookie + zeroing. |
||
internal partial struct Block8x8 | ||
{ | ||
/// <summary> | ||
/// A number of scalar coefficients in a <see cref="Block8x8F"/> | ||
/// </summary> | ||
public const int Size = 64; | ||
|
||
#pragma warning disable IDE0051 // Remove unused private member | ||
/// <summary> | ||
/// A placeholder buffer so the actual struct occupies exactly 64 * 2 bytes. | ||
/// </summary> | ||
/// <remarks> | ||
/// This is not used directly in the code. | ||
/// </remarks> | ||
[FieldOffset(0)] | ||
private fixed short data[Size]; | ||
#pragma warning restore IDE0051 | ||
|
||
/// <summary> | ||
/// Gets or sets a <see cref="short"/> value at the given index | ||
/// </summary> | ||
|
@@ -74,9 +63,10 @@ public short this[int idx] | |
|
||
public static Block8x8 Load(Span<short> data) | ||
{ | ||
Unsafe.SkipInit(out Block8x8 result); | ||
result.LoadFrom(data); | ||
return result; | ||
DebugGuard.MustBeGreaterThanOrEqualTo(data.Length, Size, "data is too small"); | ||
|
||
ref byte src = ref Unsafe.As<short, byte>(ref MemoryMarshal.GetReference(data)); | ||
return Unsafe.ReadUnaligned<Block8x8>(ref src); | ||
} | ||
|
||
/// <summary> | ||
|
@@ -104,9 +94,10 @@ public short[] ToArray() | |
/// </summary> | ||
public void CopyTo(Span<short> destination) | ||
{ | ||
ref byte selfRef = ref Unsafe.As<Block8x8, byte>(ref this); | ||
ref byte destRef = ref MemoryMarshal.GetReference(MemoryMarshal.Cast<short, byte>(destination)); | ||
Unsafe.CopyBlockUnaligned(ref destRef, ref selfRef, Size * sizeof(short)); | ||
DebugGuard.MustBeGreaterThanOrEqualTo(destination.Length, Size, "destination is too small"); | ||
|
||
ref byte destRef = ref Unsafe.As<short, byte>(ref MemoryMarshal.GetReference(destination)); | ||
Unsafe.WriteUnaligned(ref destRef, this); | ||
} | ||
|
||
/// <summary> | ||
|
@@ -135,19 +126,6 @@ public void LoadFrom(ReadOnlySpan<byte> source) | |
} | ||
} | ||
|
||
/// <summary> | ||
/// Load raw 16bit integers from source. | ||
/// </summary> | ||
/// <param name="source">Source</param> | ||
[MethodImpl(InliningOptions.ShortMethod)] | ||
public void LoadFrom(Span<short> source) | ||
{ | ||
ref byte sourceRef = ref Unsafe.As<short, byte>(ref MemoryMarshal.GetReference(source)); | ||
ref byte destRef = ref Unsafe.As<Block8x8, byte>(ref this); | ||
|
||
Unsafe.CopyBlockUnaligned(ref destRef, ref sourceRef, Size * sizeof(short)); | ||
} | ||
|
||
/// <summary> | ||
/// Cast and copy <see cref="Size"/> <see cref="int"/>-s from the beginning of 'source' span. | ||
/// </summary> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -101,24 +101,17 @@ internal float this[nuint idx] | |
set => this[((uint)y * 8) + (uint)x] = value; | ||
} | ||
|
||
public static Block8x8F Load(Span<float> data) | ||
{ | ||
Block8x8F result = default; | ||
result.LoadFrom(data); | ||
Comment on lines
-106
to
-107
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Going via instance method needs to copy two times, which can be avoided. In sharplab it's hard to see (no method names, just the address of the call), so here from the JIT directly: JIT assembly; Assembly listing for method Foo:Default(System.Span`1[float]):Block8x8F
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 4 single block inlinees; 0 inlinees without PGO data
G_M000_IG01: ;; offset=0000H
56 push rsi
4881EC00010000 sub rsp, 256
488BF1 mov rsi, rcx
G_M000_IG02: ;; offset=000BH
488B12 mov rdx, bword ptr [rdx]
488D0C24 lea rcx, bword ptr [rsp]
41B800010000 mov r8d, 256
E80330AF5F call CORINFO_HELP_MEMCPY
488BCE mov rcx, rsi
488D1424 lea rdx, bword ptr [rsp]
41B800010000 mov r8d, 256
E8F12FAF5F call CORINFO_HELP_MEMCPY
488BC6 mov rax, rsi
G_M000_IG03: ;; offset=0032H
4881C400010000 add rsp, 256
5E pop rsi
C3 ret
; Total bytes of code 59
; Assembly listing for method Foo:PR(System.Span`1[float]):Block8x8F_PR
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data
G_M000_IG01: ;; offset=0000H
56 push rsi
488BF1 mov rsi, rcx
G_M000_IG02: ;; offset=0004H
488B12 mov rdx, bword ptr [rdx]
488BCE mov rcx, rsi
41B800010000 mov r8d, 256
E80B2FAF5F call CORINFO_HELP_MEMCPY
488BC6 mov rax, rsi
G_M000_IG03: ;; offset=0018H
5E pop rsi
C3 ret
; Total bytes of code 26 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just wondering: is it the same gain when you compare use cases on call sites, ie. Block8x8F b = Block8x8F.Load(data)`; after the PR change vs. typical usage on main: Block8x8F b = default;
b.LoadFrom(data); ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It depends what's done with the instance of Besides that from an API point of view having an instance method to load (or better "fill") the block with data is a bit strange for an value-type*. Here a static method seems the right fit, like * for reference types this is pretty much OK There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Note that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Checked it -- all usages are safe.
When I understand it right, then this is a simple example of it. Both variants work and produce the same machine-code. I'll see if the C# code here can be simplified. |
||
return result; | ||
} | ||
|
||
/// <summary> | ||
/// Load raw 32bit floating point data from source. | ||
/// </summary> | ||
/// <param name="source">Source</param> | ||
/// <param name="data">Source</param> | ||
[MethodImpl(InliningOptions.ShortMethod)] | ||
public void LoadFrom(Span<float> source) | ||
public static Block8x8F Load(Span<float> data) | ||
{ | ||
ref byte s = ref Unsafe.As<float, byte>(ref MemoryMarshal.GetReference(source)); | ||
ref byte d = ref Unsafe.As<Block8x8F, byte>(ref this); | ||
DebugGuard.MustBeGreaterThanOrEqualTo(data.Length, Size, "data is too small"); | ||
|
||
Unsafe.CopyBlock(ref d, ref s, Size * sizeof(float)); | ||
ref byte src = ref Unsafe.As<float, byte>(ref MemoryMarshal.GetReference(data)); | ||
return Unsafe.ReadUnaligned<Block8x8F>(ref src); | ||
} | ||
|
||
/// <summary> | ||
|
@@ -144,10 +137,10 @@ public unsafe void LoadFrom(Span<int> source) | |
[MethodImpl(InliningOptions.ShortMethod)] | ||
public unsafe void ScaledCopyTo(float[] dest) | ||
{ | ||
fixed (void* ptr = &this.V0L) | ||
{ | ||
Marshal.Copy((IntPtr)ptr, dest, 0, Size); | ||
} | ||
DebugGuard.MustBeGreaterThanOrEqualTo(dest.Length, Size, "dest is too small"); | ||
|
||
ref byte destRef = ref Unsafe.As<float, byte>(ref MemoryMarshal.GetArrayDataReference(dest)); | ||
Unsafe.WriteUnaligned(ref destRef, this); | ||
} | ||
|
||
public float[] ToArray() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,7 @@ public static unsafe bool CheckNonOpaque(Span<Bgra32> row) | |
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row); | ||
int i = 0; | ||
int length = (row.Length * 4) - 3; | ||
fixed (byte* src = rowBytes) | ||
fixed (byte* src = &MemoryMarshal.GetReference(rowBytes)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ups, I've forgotten this one. Couldn't see them in the local diffs. |
||
{ | ||
var alphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); | ||
Vector256<byte> all0x80Vector256 = Vector256.Create((byte)0x80).AsByte(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This avoids the bound-checks.
See sharplab for example.