SixLabors · JimBobSquarePants · Jul 17, 2022 · Mar 19, 2022 · Mar 19, 2022 · Mar 21, 2022
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ArithmeticScanDecoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ArithmeticScanDecoder.cs
@@ -247,8 +247,7 @@ public void ParseEntropyCodedData(int scanComponentCount)
 
             this.scanBuffer = new JpegBitReader(this.stream);
 
-            bool fullScan = this.frame.Progressive || this.frame.MultiScan;
-            this.frame.AllocateComponents(fullScan);
+            this.frame.AllocateComponents();
 
             if (this.frame.Progressive)
             {
@@ -326,11 +325,13 @@ private void ParseBaselineData()
 
             if (this.scanComponentCount != 1)
             {
+                this.spectralConverter.PrepareForDecoding();
                 this.ParseBaselineDataInterleaved();
                 this.spectralConverter.CommitConversion();
             }
             else if (this.frame.ComponentCount == 1)
             {
+                this.spectralConverter.PrepareForDecoding();
                 this.ParseBaselineDataSingleComponent();
                 this.spectralConverter.CommitConversion();
             }

diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverterAvx.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverterAvx.cs
@@ -1,6 +1,7 @@
 // Copyright (c) Six Labors.
 // Licensed under the Six Labors Split License.
 #if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 
 namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
@@ -25,7 +26,9 @@ protected JpegColorConverterAvx(JpegColorSpace colorSpace, int precision)
             {
             }
 
-            public override bool IsAvailable => Avx.IsSupported;
+            public sealed override bool IsAvailable => Avx.IsSupported;
+
+            public sealed override int ElementsPerBatch => Vector256<float>.Count;
         }
     }
 }

diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverterBase.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverterBase.cs
@@ -35,6 +35,15 @@ protected JpegColorConverterBase(JpegColorSpace colorSpace, int precision)
         /// </summary>
         public abstract bool IsAvailable { get; }
 
+        /// <summary>
+        /// Gets a value indicating how many pixels are processed in a single batch.
+        /// </summary>
+        /// <remarks>
+        /// This generally should be equal to register size,
+        /// e.g. 1 for scalar implementation, 8 for AVX implementation and so on.
+        /// </remarks>
+        public abstract int ElementsPerBatch { get; }
+
         /// <summary>
         /// Gets the <see cref="JpegColorSpace"/> of this converter.
         /// </summary>
@@ -219,7 +228,7 @@ public ComponentValues(IReadOnlyList<Buffer2D<float>> componentBuffers, int row)
             /// </summary>
             /// <param name="processors">List of component color processors.</param>
             /// <param name="row">Row to convert</param>
-            public ComponentValues(IReadOnlyList<JpegComponentPostProcessor> processors, int row)
+            public ComponentValues(IReadOnlyList<ComponentProcessor> processors, int row)
             {
                 DebugGuard.MustBeGreaterThan(processors.Count, 0, nameof(processors));
 

diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverterScalar.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverterScalar.cs
@@ -16,7 +16,9 @@ protected JpegColorConverterScalar(JpegColorSpace colorSpace, int precision)
             {
             }
 
-            public override bool IsAvailable => true;
+            public sealed override bool IsAvailable => true;
+
+            public sealed override int ElementsPerBatch => 1;
         }
     }
 }
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverterVector.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverterVector.cs
@@ -17,7 +17,7 @@ internal abstract partial class JpegColorConverterBase
         /// Even though real life data is guaranteed to be of size
         /// divisible by 8 newer SIMD instructions like AVX512 won't work with
         /// such data out of the box. These converters have fallback code
-        /// for 'remainder' data.
+        /// for remainder data.
         /// </remarks>
         internal abstract class JpegColorConverterVector : JpegColorConverterBase
         {
@@ -28,7 +28,9 @@ protected JpegColorConverterVector(JpegColorSpace colorSpace, int precision)
 
             public sealed override bool IsAvailable => Vector.IsHardwareAccelerated && Vector<float>.Count % 4 == 0;
 
-            public override void ConvertToRgbInplace(in ComponentValues values)
+            public sealed override int ElementsPerBatch => Vector<float>.Count;
+
+            public sealed override void ConvertToRgbInplace(in ComponentValues values)
             {
                 DebugGuard.IsTrue(this.IsAvailable, $"{this.GetType().Name} converter is not supported on current hardware.");
 

diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/ComponentProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/ComponentProcessor.cs
@@ -0,0 +1,65 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System;
+using SixLabors.ImageSharp.Memory;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
+{
+    /// <summary>
+    /// Base class for processing component spectral data and converting it to raw color data.
+    /// </summary>
+    internal abstract class ComponentProcessor : IDisposable
+    {
+        public ComponentProcessor(MemoryAllocator memoryAllocator, JpegFrame frame, Size postProcessorBufferSize, IJpegComponent component, int blockSize)
+        {
+            this.Frame = frame;
+            this.Component = component;
+
+            this.BlockAreaSize = component.SubSamplingDivisors * blockSize;
+            this.ColorBuffer = memoryAllocator.Allocate2DOveraligned<float>(
+                postProcessorBufferSize.Width,
+                postProcessorBufferSize.Height,
+                this.BlockAreaSize.Height);
+        }
+
+        protected JpegFrame Frame { get; }
+
+        protected IJpegComponent Component { get; }
+
+        protected Buffer2D<float> ColorBuffer { get; }
+
+        protected Size BlockAreaSize { get; }
+
+        /// <summary>
+        /// Converts spectral data to color data accessible via <see cref="GetColorBufferRowSpan(int)"/>.
+        /// </summary>
+        /// <param name="row">Spectral row index to convert.</param>
+        public abstract void CopyBlocksToColorBuffer(int row);
+
+        /// <summary>
+        /// Clears spectral buffers.
+        /// </summary>
+        /// <remarks>
+        /// Should only be called during baseline interleaved decoding.
+        /// </remarks>
+        public void ClearSpectralBuffers()
+        {
+            Buffer2D<Block8x8> spectralBlocks = this.Component.SpectralBlocks;
+            for (int i = 0; i < spectralBlocks.Height; i++)
+            {
+                spectralBlocks.DangerousGetRowSpan(i).Clear();
+            }
+        }
+
+        /// <summary>
+        /// Gets converted color buffer row.
+        /// </summary>
+        /// <param name="row">Row index.</param>
+        /// <returns>Color buffer row.</returns>
+        public Span<float> GetColorBufferRowSpan(int row) =>
+            this.ColorBuffer.DangerousGetRowSpan(row);
+
+        public void Dispose() => this.ColorBuffer.Dispose();
+    }
+}
diff --git a/...mageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DirectComponentProcessor.cs b/...mageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DirectComponentProcessor.cs
@@ -0,0 +1,73 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System;
+using SixLabors.ImageSharp.Memory;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
+{
+    /// <summary>
+    /// Processes component spectral data and converts it to color data in 1-to-1 scale.
+    /// </summary>
+    internal sealed class DirectComponentProcessor : ComponentProcessor
+    {
+        private Block8x8F dequantizationTable;
+
+        public DirectComponentProcessor(MemoryAllocator memoryAllocator, JpegFrame frame, IRawJpegData rawJpeg, Size postProcessorBufferSize, IJpegComponent component)
+            : base(memoryAllocator, frame, postProcessorBufferSize, component, blockSize: 8)
+        {
+            this.dequantizationTable = rawJpeg.QuantizationTables[component.QuantizationTableIndex];
+            FloatingPointDCT.AdjustToIDCT(ref this.dequantizationTable);
+        }
+
+        public override void CopyBlocksToColorBuffer(int spectralStep)
+        {
+            Buffer2D<Block8x8> spectralBuffer = this.Component.SpectralBlocks;
+
+            float maximumValue = this.Frame.MaxColorChannelValue;
+
+            int destAreaStride = this.ColorBuffer.Width;
+
+            int blocksRowsPerStep = this.Component.SamplingFactors.Height;
+
+            int yBlockStart = spectralStep * blocksRowsPerStep;
+
+            Size subSamplingDivisors = this.Component.SubSamplingDivisors;
+
+            Block8x8F workspaceBlock = default;
+
+            for (int y = 0; y < blocksRowsPerStep; y++)
+            {
+                int yBuffer = y * this.BlockAreaSize.Height;
+
+                Span<float> colorBufferRow = this.ColorBuffer.DangerousGetRowSpan(yBuffer);
+                Span<Block8x8> blockRow = spectralBuffer.DangerousGetRowSpan(yBlockStart + y);
+
+                for (int xBlock = 0; xBlock < spectralBuffer.Width; xBlock++)
+                {
+                    // Integer to float
+                    workspaceBlock.LoadFrom(ref blockRow[xBlock]);
+
+                    // Dequantize
+                    workspaceBlock.MultiplyInPlace(ref this.dequantizationTable);
+
+                    // Convert from spectral to color
+                    FloatingPointDCT.TransformIDCT(ref workspaceBlock);
+
+                    // To conform better to libjpeg we actually NEED TO loose precision here.
+                    // This is because they store blocks as Int16 between all the operations.
+                    // To be "more accurate", we need to emulate this by rounding!
+                    workspaceBlock.NormalizeColorsAndRoundInPlace(maximumValue);
+
+                    // Write to color buffer acording to sampling factors
+                    int xColorBufferStart = xBlock * this.BlockAreaSize.Width;
+                    workspaceBlock.ScaledCopyTo(
+                        ref colorBufferRow[xColorBufferStart],
+                        destAreaStride,
+                        subSamplingDivisors.Width,
+                        subSamplingDivisors.Height);
+                }
+            }
+        }
+    }
+}
diff --git a/...arp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor2.cs b/...arp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor2.cs
@@ -0,0 +1,102 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System;
+using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.Memory;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
+{
+    /// <summary>
+    /// Processes component spectral data and converts it to color data in 2-to-1 scale.
+    /// </summary>
+    internal sealed class DownScalingComponentProcessor2 : ComponentProcessor
+    {
+        private Block8x8F dequantizationTable;
+
+        public DownScalingComponentProcessor2(MemoryAllocator memoryAllocator, JpegFrame frame, IRawJpegData rawJpeg, Size postProcessorBufferSize, IJpegComponent component)
+            : base(memoryAllocator, frame, postProcessorBufferSize, component, 4)
+        {
+            this.dequantizationTable = rawJpeg.QuantizationTables[component.QuantizationTableIndex];
+            ScaledFloatingPointDCT.AdjustToIDCT(ref this.dequantizationTable);
+        }
+
+        public override void CopyBlocksToColorBuffer(int spectralStep)
+        {
+            Buffer2D<Block8x8> spectralBuffer = this.Component.SpectralBlocks;
+
+            float maximumValue = this.Frame.MaxColorChannelValue;
+            float normalizationValue = MathF.Ceiling(maximumValue / 2);
+
+            int destAreaStride = this.ColorBuffer.Width;
+
+            int blocksRowsPerStep = this.Component.SamplingFactors.Height;
+            Size subSamplingDivisors = this.Component.SubSamplingDivisors;
+
+            Block8x8F workspaceBlock = default;
+
+            int yBlockStart = spectralStep * blocksRowsPerStep;
+
+            for (int y = 0; y < blocksRowsPerStep; y++)
+            {
+                int yBuffer = y * this.BlockAreaSize.Height;
+
+                Span<float> colorBufferRow = this.ColorBuffer.DangerousGetRowSpan(yBuffer);
+                Span<Block8x8> blockRow = spectralBuffer.DangerousGetRowSpan(yBlockStart + y);
+
+                for (int xBlock = 0; xBlock < spectralBuffer.Width; xBlock++)
+                {
+                    // Integer to float
+                    workspaceBlock.LoadFrom(ref blockRow[xBlock]);
+
+                    // IDCT/Normalization/Range
+                    ScaledFloatingPointDCT.TransformIDCT_4x4(ref workspaceBlock, ref this.dequantizationTable, normalizationValue, maximumValue);
+
+                    // Save to the intermediate buffer
+                    int xColorBufferStart = xBlock * this.BlockAreaSize.Width;
+                    ScaledCopyTo(
+                        ref workspaceBlock,
+                        ref colorBufferRow[xColorBufferStart],
+                        destAreaStride,
+                        subSamplingDivisors.Width,
+                        subSamplingDivisors.Height);
+                }
+            }
+        }
+
+        [MethodImpl(InliningOptions.ShortMethod)]
+        public static void ScaledCopyTo(ref Block8x8F block, ref float destRef, int destStrideWidth, int horizontalScale, int verticalScale)
+        {
+            // TODO: Optimize: implement all cases with scale-specific, loopless code!
+            CopyArbitraryScale(ref block, ref destRef, destStrideWidth, horizontalScale, verticalScale);
+
+            [MethodImpl(InliningOptions.ColdPath)]
+            static void CopyArbitraryScale(ref Block8x8F block, ref float areaOrigin, int areaStride, int horizontalScale, int verticalScale)
+            {
+                for (int y = 0; y < 4; y++)
+                {
+                    int yy = y * verticalScale;
+                    int y8 = y * 8;
+
+                    for (int x = 0; x < 4; x++)
+                    {
+                        int xx = x * horizontalScale;
+
+                        float value = block[y8 + x];
+
+                        for (int i = 0; i < verticalScale; i++)
+                        {
+                            int baseIdx = ((yy + i) * areaStride) + xx;
+
+                            for (int j = 0; j < horizontalScale; j++)
+                            {
+                                // area[xx + j, yy + i] = value;
+                                Unsafe.Add(ref areaOrigin, (nint)(uint)(baseIdx + j)) = value;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}