Skip to content

Commit 484bd77

Browse files
authored
Merge branch 'master' into bp/webpalpha
2 parents b12ad75 + cb3896a commit 484bd77

File tree

7 files changed

+240
-348
lines changed

7 files changed

+240
-348
lines changed

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ public void AddInPlace(float value)
280280
}
281281

282282
/// <summary>
283-
/// Quantize input block, apply zig-zag ordering and store result as 16bit integers.
283+
/// Quantize input block, transpose, apply zig-zag ordering and store as <see cref="Block8x8"/>.
284284
/// </summary>
285285
/// <param name="block">Source block.</param>
286286
/// <param name="dest">Destination block.</param>
@@ -291,19 +291,19 @@ public static void Quantize(ref Block8x8F block, ref Block8x8 dest, ref Block8x8
291291
if (Avx2.IsSupported)
292292
{
293293
MultiplyIntoInt16_Avx2(ref block, ref qt, ref dest);
294-
ZigZag.ApplyZigZagOrderingAvx2(ref dest);
294+
ZigZag.ApplyTransposingZigZagOrderingAvx2(ref dest);
295295
}
296296
else if (Ssse3.IsSupported)
297297
{
298298
MultiplyIntoInt16_Sse2(ref block, ref qt, ref dest);
299-
ZigZag.ApplyZigZagOrderingSsse3(ref dest);
299+
ZigZag.ApplyTransposingZigZagOrderingSsse3(ref dest);
300300
}
301301
else
302302
#endif
303303
{
304304
for (int i = 0; i < Size; i++)
305305
{
306-
int idx = ZigZag.ZigZagOrder[i];
306+
int idx = ZigZag.TransposingOrder[i];
307307
float quantizedVal = block[idx] * qt[idx];
308308
quantizedVal += quantizedVal < 0 ? -0.5f : 0.5f;
309309
dest[i] = (short)quantizedVal;

src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,10 @@ private static void FDCT8x8_Avx(ref Block8x8F block)
2929
{
3030
DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation.");
3131

32-
// First pass - process rows
33-
block.TransposeInplace();
32+
// First pass - process columns
3433
FDCT8x8_1D_Avx(ref block);
3534

36-
// Second pass - process columns
35+
// Second pass - process rows
3736
block.TransposeInplace();
3837
FDCT8x8_1D_Avx(ref block);
3938

src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

Lines changed: 7 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ public static void AdjustToFDCT(ref Block8x8F quantTable)
9292
tableRef = 0.125f / (tableRef * Unsafe.Add(ref multipliersRef, i));
9393
tableRef = ref Unsafe.Add(ref tableRef, 1);
9494
}
95+
96+
// Spectral macroblocks are not transposed before quantization
97+
// Transpose is done after quantization at zig-zag stage
98+
// so we must transpose quantization table
99+
quantTable.TransposeInplace();
95100
}
96101

97102
/// <summary>
@@ -133,14 +138,9 @@ public static void TransformFDCT(ref Block8x8F block)
133138
}
134139
else
135140
#endif
136-
if (Vector.IsHardwareAccelerated)
137141
{
138142
FDCT_Vector4(ref block);
139143
}
140-
else
141-
{
142-
FDCT_Scalar(ref block);
143-
}
144144
}
145145

146146
/// <summary>
@@ -217,136 +217,17 @@ static void IDCT8x4_Vector4(ref Vector4 vecRef)
217217
}
218218
}
219219

220-
/// <summary>
221-
/// Apply 2D floating point FDCT inplace using scalar operations.
222-
/// </summary>
223-
/// <remarks>
224-
/// Ported from libjpeg-turbo https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/jfdctflt.c.
225-
/// </remarks>
226-
/// <param name="block">Input block.</param>
227-
private static void FDCT_Scalar(ref Block8x8F block)
228-
{
229-
const int dctSize = 8;
230-
231-
float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
232-
float tmp10, tmp11, tmp12, tmp13;
233-
float z1, z2, z3, z4, z5, z11, z13;
234-
235-
// First pass - process rows
236-
ref float blockRef = ref Unsafe.As<Block8x8F, float>(ref block);
237-
for (int ctr = 7; ctr >= 0; ctr--)
238-
{
239-
tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 7);
240-
tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 7);
241-
tmp1 = Unsafe.Add(ref blockRef, 1) + Unsafe.Add(ref blockRef, 6);
242-
tmp6 = Unsafe.Add(ref blockRef, 1) - Unsafe.Add(ref blockRef, 6);
243-
tmp2 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 5);
244-
tmp5 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 5);
245-
tmp3 = Unsafe.Add(ref blockRef, 3) + Unsafe.Add(ref blockRef, 4);
246-
tmp4 = Unsafe.Add(ref blockRef, 3) - Unsafe.Add(ref blockRef, 4);
247-
248-
// Even part
249-
tmp10 = tmp0 + tmp3;
250-
tmp13 = tmp0 - tmp3;
251-
tmp11 = tmp1 + tmp2;
252-
tmp12 = tmp1 - tmp2;
253-
254-
Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11;
255-
Unsafe.Add(ref blockRef, 4) = tmp10 - tmp11;
256-
257-
z1 = (tmp12 + tmp13) * 0.707106781f;
258-
Unsafe.Add(ref blockRef, 2) = tmp13 + z1;
259-
Unsafe.Add(ref blockRef, 6) = tmp13 - z1;
260-
261-
// Odd part
262-
tmp10 = tmp4 + tmp5;
263-
tmp11 = tmp5 + tmp6;
264-
tmp12 = tmp6 + tmp7;
265-
266-
z5 = (tmp10 - tmp12) * 0.382683433f;
267-
z2 = (0.541196100f * tmp10) + z5;
268-
z4 = (1.306562965f * tmp12) + z5;
269-
z3 = tmp11 * 0.707106781f;
270-
271-
z11 = tmp7 + z3;
272-
z13 = tmp7 - z3;
273-
274-
Unsafe.Add(ref blockRef, 5) = z13 + z2;
275-
Unsafe.Add(ref blockRef, 3) = z13 - z2;
276-
Unsafe.Add(ref blockRef, 1) = z11 + z4;
277-
Unsafe.Add(ref blockRef, 7) = z11 - z4;
278-
279-
blockRef = ref Unsafe.Add(ref blockRef, dctSize);
280-
}
281-
282-
// Second pass - process columns
283-
blockRef = ref Unsafe.As<Block8x8F, float>(ref block);
284-
for (int ctr = 7; ctr >= 0; ctr--)
285-
{
286-
tmp0 = Unsafe.Add(ref blockRef, dctSize * 0) + Unsafe.Add(ref blockRef, dctSize * 7);
287-
tmp7 = Unsafe.Add(ref blockRef, dctSize * 0) - Unsafe.Add(ref blockRef, dctSize * 7);
288-
tmp1 = Unsafe.Add(ref blockRef, dctSize * 1) + Unsafe.Add(ref blockRef, dctSize * 6);
289-
tmp6 = Unsafe.Add(ref blockRef, dctSize * 1) - Unsafe.Add(ref blockRef, dctSize * 6);
290-
tmp2 = Unsafe.Add(ref blockRef, dctSize * 2) + Unsafe.Add(ref blockRef, dctSize * 5);
291-
tmp5 = Unsafe.Add(ref blockRef, dctSize * 2) - Unsafe.Add(ref blockRef, dctSize * 5);
292-
tmp3 = Unsafe.Add(ref blockRef, dctSize * 3) + Unsafe.Add(ref blockRef, dctSize * 4);
293-
tmp4 = Unsafe.Add(ref blockRef, dctSize * 3) - Unsafe.Add(ref blockRef, dctSize * 4);
294-
295-
// Even part
296-
tmp10 = tmp0 + tmp3;
297-
tmp13 = tmp0 - tmp3;
298-
tmp11 = tmp1 + tmp2;
299-
tmp12 = tmp1 - tmp2;
300-
301-
Unsafe.Add(ref blockRef, dctSize * 0) = tmp10 + tmp11;
302-
Unsafe.Add(ref blockRef, dctSize * 4) = tmp10 - tmp11;
303-
304-
z1 = (tmp12 + tmp13) * 0.707106781f;
305-
Unsafe.Add(ref blockRef, dctSize * 2) = tmp13 + z1;
306-
Unsafe.Add(ref blockRef, dctSize * 6) = tmp13 - z1;
307-
308-
// Odd part
309-
tmp10 = tmp4 + tmp5;
310-
tmp11 = tmp5 + tmp6;
311-
tmp12 = tmp6 + tmp7;
312-
313-
z5 = (tmp10 - tmp12) * 0.382683433f;
314-
z2 = (0.541196100f * tmp10) + z5;
315-
z4 = (1.306562965f * tmp12) + z5;
316-
z3 = tmp11 * 0.707106781f;
317-
318-
z11 = tmp7 + z3;
319-
z13 = tmp7 - z3;
320-
321-
Unsafe.Add(ref blockRef, dctSize * 5) = z13 + z2;
322-
Unsafe.Add(ref blockRef, dctSize * 3) = z13 - z2;
323-
Unsafe.Add(ref blockRef, dctSize * 1) = z11 + z4;
324-
Unsafe.Add(ref blockRef, dctSize * 7) = z11 - z4;
325-
326-
blockRef = ref Unsafe.Add(ref blockRef, 1);
327-
}
328-
}
329-
330220
/// <summary>
331221
/// Apply floating point FDCT inplace using <see cref="Vector4"/> API.
332222
/// </summary>
333-
/// <remarks>
334-
/// This implementation must be called only if hardware supports 4
335-
/// floating point numbers vector. Otherwise explicit scalar
336-
/// implementation <see cref="FDCT_Scalar"/> is faster
337-
/// because it does not rely on block transposition.
338-
/// </remarks>
339223
/// <param name="block">Input block.</param>
340224
public static void FDCT_Vector4(ref Block8x8F block)
341225
{
342-
DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware.");
343-
344-
// First pass - process rows
345-
block.TransposeInplace();
226+
// First pass - process columns
346227
FDCT8x4_Vector4(ref block.V0L);
347228
FDCT8x4_Vector4(ref block.V0R);
348229

349-
// Second pass - process columns
230+
// Second pass - process rows
350231
block.TransposeInplace();
351232
FDCT8x4_Vector4(ref block.V0L);
352233
FDCT8x4_Vector4(ref block.V0R);

0 commit comments

Comments
 (0)