From a6fd7e8d10aa24acf5f5bcf36e669a1609cf065b Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 27 Jan 2020 04:56:12 -0600
Subject: [PATCH 1/2] Remove unfortunate OffsetArrays specializations

---
 src/ImageFiltering.jl |  1 -
 src/border.jl         |  5 ---
 src/imfilter.jl       | 71 ++-----------------------------------------
 3 files changed, 2 insertions(+), 75 deletions(-)

diff --git a/src/ImageFiltering.jl b/src/ImageFiltering.jl
index 675a4df..7db6af7 100644
--- a/src/ImageFiltering.jl
+++ b/src/ImageFiltering.jl
@@ -19,7 +19,6 @@ export Kernel, KernelFactors,
 
 FixedColorant{T<:Normed} = Colorant{T}
 StaticOffsetArray{T,N,A<:StaticArray} = OffsetArray{T,N,A}
-OffsetVector{T} = OffsetArray{T,1}
 
 # Add a fix that should have been included in julia-1.0.3
 if isdefined(Broadcast, :_sametype) && !isdefined(Broadcast, :axistype)
diff --git a/src/border.jl b/src/border.jl
index 26926e6..a9d8795 100644
--- a/src/border.jl
+++ b/src/border.jl
@@ -681,11 +681,6 @@ function copydata!(dest, img, inds)
     dest
 end
 
-function copydata!(dest::OffsetArray, img, inds::Tuple{Vararg{OffsetArray}})
-    copydata!(parent(dest), img, map(parent, inds))
-    dest
-end
-
 Base.ndims(b::AbstractBorder) = ndims(typeof(b))
 Base.ndims(::Type{Pad{N}}) where {N} = N
 
diff --git a/src/imfilter.jl b/src/imfilter.jl
index a0632a2..cba472b 100644
--- a/src/imfilter.jl
+++ b/src/imfilter.jl
@@ -1006,30 +1006,6 @@ function __imfilter_inbounds!(r, out, A, kern, border, R, z)
     out
 end
 
-# This is unfortunate, but specializing this saves an add in the inner
-# loop and results in a modest performance improvement. It would be
-# nice if LLVM did this automatically. (@polly?)
-function __imfilter_inbounds!(r, out, A::OffsetArray, kern::OffsetArray, border, R, z)
-    off, k = CartesianIndex(kern.offsets), parent(kern)
-    o, O = safehead(off), safetail(off)
-    Rnew = CartesianIndices(map((x,y)->x.+y, R.indices, Tuple(off)))
-    Rk = CartesianIndices(axes(k))
-    offA, pA = CartesianIndex(A.offsets), parent(A)
-    oA, OA = safehead(offA), safetail(offA)
-    for I in safetail(Rnew)
-        IA = I-OA
-        for i in safehead(Rnew)
-            tmp = z
-            iA = i-oA
-            @inbounds for J in safetail(Rk), j in safehead(Rk)
-                tmp += safe_for_prod(pA[iA+j,IA+J], tmp)*k[j,J]
-            end
-            @inbounds out[i-o,I-O] = tmp
-        end
-    end
-    out
-end
-
 function _imfilter_inbounds!(r::AbstractResource, out, A::AbstractArray, kern::ReshapedOneD, border::NoPad, inds)
     Rpre, ind, Rpost = iterdims(inds, kern)
     k = kern.data
@@ -1042,56 +1018,14 @@ function _imfilter_inbounds!(r::AbstractResource, out, A::AbstractArray, kern::R
     _imfilter_inbounds!(r, z, out, A, k, Rpre, ind, Rpost)
 end
 
-# Many of the following are unfortunate specializations
-function _imfilter_inbounds!(r::AbstractResource, z, out, A::AbstractArray, k::OffsetVector, Rpre::CartesianIndices, ind, Rpost::CartesianIndices)
-    _imfilter_inbounds!(r, z, out, A, parent(k), Rpre, ind, Rpost, k.offsets[1])
-end
-
-function _imfilter_inbounds!(r::AbstractResource, z, out, A::AbstractArray, k::AbstractVector, Rpre::CartesianIndices, ind, Rpost::CartesianIndices, koffset=0)
-    indsk = axes(k, 1)
-    for Ipost in Rpost
-        for i in ind
-            ik = i+koffset
-            for Ipre in Rpre
-                tmp = z
-                for j in indsk
-                    @inbounds tmp += safe_for_prod(A[Ipre,ik+j,Ipost], tmp)*k[j]
-                end
-                @inbounds out[Ipre,i,Ipost] = tmp
-            end
-        end
-    end
-    out
-end
-
-function _imfilter_inbounds!(r::AbstractResource, out, A::OffsetArray, kern::ReshapedVector, border::NoPad, inds)
-    Rpre, ind, Rpost = iterdims(inds, kern)
-    k = kern.data
-    R, Rk = CartesianIndices(inds), CartesianIndices(axes(kern))
-    if isempty(R) || isempty(Rk)
-        return out
-    end
-    p = accumfilter(A[first(R)+first(Rk)], first(k))
-    z = zero(typeof(p+p))
-    Opre, o, Opost = KernelFactors.indexsplit(CartesianIndex(A.offsets), kern)
-    _imfilter_inbounds!(r, z, out, parent(A), k, Rpre, ind, Rpost, Opre, o, Opost)
-end
-
-function _imfilter_inbounds!(r::AbstractResource, z, out, A::AbstractArray, k::OffsetVector, Rpre::CartesianIndices, ind, Rpost::CartesianIndices, Opre, o, Opost)
-    _imfilter_inbounds!(r, z, out, A, parent(k), Rpre, ind, Rpost, Opre, o, Opost, k.offsets[1])
-end
-
-function _imfilter_inbounds!(r::AbstractResource, z, out, A::AbstractArray, k::AbstractVector, Rpre::CartesianIndices, ind, Rpost::CartesianIndices, Opre, o, Opost, koffset=0)
+function _imfilter_inbounds!(r::AbstractResource, z, out, A::AbstractArray, k::AbstractVector, Rpre::CartesianIndices, ind, Rpost::CartesianIndices)
     indsk = axes(k, 1)
     for Ipost in Rpost
-        IOpost = Ipost - Opost
         for i in ind
-            io = i-o+koffset
             for Ipre in Rpre
-                IOpre = Ipre - Opre
                 tmp = z
                 for j in indsk
-                    @inbounds tmp += safe_for_prod(A[IOpre,io+j,IOpost], tmp)*k[j]
+                    @inbounds tmp += safe_for_prod(A[Ipre,i+j,Ipost], tmp)*k[j]
                 end
                 @inbounds out[Ipre,i,Ipost] = tmp
             end
@@ -1099,7 +1033,6 @@ function _imfilter_inbounds!(r::AbstractResource, z, out, A::AbstractArray, k::A
     end
     out
 end
-# end unfortunate specializations
 
 ## commented out because "virtual padding" is commented out
 # function _imfilter_iter!(r::AbstractResource, out, padded, kernel::AbstractArray, iter)

From 7b500f6fbc422a7bbc41a8c265a2b4599b9cb3f5 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 30 Mar 2020 03:52:43 -0500
Subject: [PATCH 2/2] WIP (might make it slower)

---
 src/imfilter.jl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/imfilter.jl b/src/imfilter.jl
index cba472b..8226c6d 100644
--- a/src/imfilter.jl
+++ b/src/imfilter.jl
@@ -996,10 +996,13 @@ end
 
 function __imfilter_inbounds!(r, out, A, kern, border, R, z)
     Rk = CartesianIndices(axes(kern))
+    Rkhead, Rktail = safehead(Rk), safetail(Rk)
     for I in safetail(R), i in safehead(R)
         tmp = z
-        @inbounds for J in safetail(Rk), j in safehead(Rk)
-            tmp += safe_for_prod(A[i+j,I+J], tmp)*kern[j,J]
+        @inbounds for J in Rktail
+            for j in Rkhead
+                tmp += safe_for_prod(A[i+j,I+J], tmp)*kern[j,J]
+            end
         end
         @inbounds out[i,I] = tmp
     end