diff --git a/src/wenbo.jl b/src/wenbo.jl index 9c55a70..9ab7124 100644 --- a/src/wenbo.jl +++ b/src/wenbo.jl @@ -50,7 +50,7 @@ function _transform1!(f::AbstractVector) pointerA = 1 l = length(f) while pointerA <= l - while pointerA <= l && @inbounds f[pointerA] == 0 + while pointerA <= l && @inbounds f[pointerA] == 0f0 pointerA+=1 end pointerB = pointerA @@ -112,44 +112,82 @@ md""" """ # ╔═╡ 16991d8b-ec84-49d0-90a9-15a78f1668bb -function _encode(leftD, rightf) +function _encode(leftD::Float32, rightf::Float32) if rightf == 1f10 return -leftD end - idx = 0 - while rightf>1 - rightf /=10 - idx+=1 + idx = 0f0 + while rightf >= 1f0 + rightf /= 10f0 + idx += 1f0 end - return -leftD-idx/10-rightf/10 + return -leftD-idx/10f0-rightf/10f0 end # ╔═╡ e7dbc916-c5cb-4f86-8ea1-adbcb0bdf8ea -function _decode(curr) - curr *= -10 - temp = Int(floor(curr)) - curr -= temp - if curr == 0 +function _decode(curr::Float32) + curr *= -1f0 + curr -= floor(curr) + curr *= 10f0 + temp = floor(curr % 10f0) + if temp == 0f0 return 1f10 end - temp %= 10 - while temp > 0 - temp -= 1 - curr*=10 + curr -= temp + while temp > 0f0 + temp -= 1f0 + curr *= 10f0 end return round(curr) end # ╔═╡ 32a4bf03-98f8-4ed9-9c12-f45c09b0b0dd -function _transform2!(f::AbstractVector) +function _transform2!(f::AbstractVector, org::AbstractVector) l = length(f) pointerA = 1 - while pointerA<=l && @inbounds f[pointerA] <= 1 + while pointerA<=l && @inbounds f[pointerA] <= 1f0 pointerA += 1 end p = 0 while pointerA<=l @inbounds curr = f[pointerA] + # left + temp = min(pointerA-1, p+1) + p = 0 + while 0 < temp + @inbounds newDistance = muladd(temp, temp, org[pointerA-temp]) + if newDistance < curr + curr = newDistance + p = temp + end + temp -= 1 + end + # right + temp = 1 + templ = length(f) - pointerA + while temp <= templ && muladd(temp, temp, -curr) < 0 + @inbounds curr = min(curr, muladd(temp, temp, f[pointerA+temp])) + temp += 1 + end + @inbounds f[pointerA] = curr + pointerA+=1 + while pointerA<=l && @inbounds f[pointerA] <= 1f0 + pointerA += 1 + end + end +end + +# ╔═╡ 89fed2a6-b09e-47b1-a020-efed76ba57de +function _transform2_EN_DE!(f::AbstractVector) + l = length(f) + pointerA = 1 + while pointerA<=l && @inbounds f[pointerA] <= 1f0 + pointerA += 1 + end + p = 0 + while pointerA<=l + @inbounds curr = f[pointerA] + # left prev = curr temp = min(pointerA-1, p+1) p = 0 @@ -163,6 +201,7 @@ function _transform2!(f::AbstractVector) end temp -= 1 end + # right temp = 1 templ = length(f) - pointerA while temp <= templ && muladd(temp, temp, -curr) < 0 @@ -171,7 +210,7 @@ function _transform2!(f::AbstractVector) end @inbounds f[pointerA] = _encode(curr, prev) pointerA+=1 - while pointerA<=l && @inbounds f[pointerA] <= 1 + while pointerA<=l && @inbounds f[pointerA] <= 1f0 pointerA += 1 end end @@ -182,16 +221,6 @@ function _transform2!(f::AbstractVector) end end -# ╔═╡ 89fed2a6-b09e-47b1-a020-efed76ba57de -function _transform3!(f) - for i in axes(f, 1) - @inbounds _transform1!(@view(f[i, :])) - end - for j in axes(f, 2) - @inbounds _transform2!(@view(f[:,j])) - end -end - # ╔═╡ 423df2ac-b9a2-4d59-b5fc-8de0e8cc6691 """ ```julia @@ -205,8 +234,9 @@ function transform(f::AbstractMatrix, tfm::Wenbo) for i in axes(f, 1) @inbounds _transform1!(@view(f[i, :])) end + org = copy(f) for j in axes(f, 2) - @inbounds _transform2!(@view(f[:,j])) + @inbounds _transform2!(@view(f[:,j]), @view(org[:,j])) end return f end @@ -226,11 +256,14 @@ Applies a squared euclidean distance transform to an input 3D image using the We """ function transform(f::AbstractArray, tfm::Wenbo) f = boolean_indicator(f) - for i in axes(f, 3) - @inbounds _transform3!(@view(f[:, :, i])) + for i in CartesianIndices(f[1,:,:]) + @inbounds _transform1!(@view(f[:, i])) + end + for i in CartesianIndices(f[:,1,:]) + @inbounds _transform2_EN_DE!(@view(f[i[1], :, i[2]])) end - for j in CartesianIndices(f[:,:,1]) - @inbounds _transform2!(@view(f[j, :])) + for i in CartesianIndices(f[:,:,1]) + @inbounds _transform2_EN_DE!(@view(f[i, :])) end return f end @@ -241,14 +274,15 @@ md""" """ # ╔═╡ d663cf13-4a3a-4667-8971-ddb5c455d85c -function _transform4!(f) - Threads.@threads for i in axes(f, 1) - @inbounds _transform1!(@view(f[i, :])) - end - Threads.@threads for j in axes(f, 2) - @inbounds _transform2!(@view(f[:, j])) - end -end +# function _transform4!(f) +# Threads.@threads for i in axes(f, 1) +# @inbounds _transform1!(@view(f[i, :])) +# end +# org = copy(f) +# Threads.@threads for j in axes(f, 2) +# @inbounds _transform2!(@view(f[:,j]), @view(org[:,j])) +# end +# end # ╔═╡ 0f0675ad-899d-4808-9757-deaae19a58a5 md""" @@ -269,7 +303,7 @@ function transform(f::AbstractMatrix, tfm::Wenbo, nthreads::Number) @inbounds _transform1!(@view(f[i, :])) end Threads.@threads for j in axes(f, 2) - @inbounds _transform2!(@view(f[:, j])) + @inbounds _transform2_EN_DE!(@view(f[:,j])) end return f end @@ -289,11 +323,14 @@ Applies a squared euclidean distance transform to an input 3D image using the We """ function transform(f::AbstractArray, tfm::Wenbo, nthreads::Number) f = boolean_indicator(f) - Threads.@threads for i in axes(f, 3) - @inbounds _transform4!(@view(f[:, :, i])) + Threads.@threads for i in CartesianIndices(f[1,:,:]) + @inbounds _transform1!(@view(f[:, i])) end - Threads.@threads for j in CartesianIndices(f[:,:,1]) - @inbounds _transform2!(@view(f[j, :])) + Threads.@threads for i in CartesianIndices(f[:,1,:]) + @inbounds _transform2_EN_DE!(@view(f[i[1], :, i[2]])) + end + Threads.@threads for i in CartesianIndices(f[:,:,1]) + @inbounds _transform2_EN_DE!(@view(f[i, :])) end return f end @@ -316,8 +353,8 @@ function transform(is_batch::Bool, f::AbstractArray, tfm::Wenbo, _) num_channels, num_batchs = size(f)[n_dims-1], size(f)[n_dims] f_new = similar(f, Float32) for batch_idx = 1: num_batchs - Threads.@threads for channel_idx = 1:num_channels - @inbounds selectdim(selectdim(f_new, n_dims, batch_idx), n_dims-1, channel_idx)[:] = transform(selectdim(selectdim(f, n_dims, batch_idx), n_dims-1, channel_idx), tfm) + for channel_idx = 1:num_channels + @inbounds selectdim(selectdim(f_new, n_dims, batch_idx), n_dims-1, channel_idx)[:] = transform(selectdim(selectdim(f, n_dims, batch_idx), n_dims-1, channel_idx), tfm, 16) end end return f_new @@ -557,7 +594,7 @@ function _transform_batch(f::CuArray{T, 4}, tfm::Wenbo, kernels) where T for batch_idx = 1:batch_size for channel_idx = 1:num_channels @inbounds kernels[7](f_new, f, row_length, l, channel_idx, batch_idx; threads, blocks) - @inbounds kernels[8](deepcopy(f_new), f_new, row_length, col_length, l, channel_idx, batch_idx; threads, blocks) + @inbounds kernels[8](copy(f_new), f_new, row_length, col_length, l, channel_idx, batch_idx; threads, blocks) end end return f_new @@ -580,7 +617,7 @@ function transform(f::CuArray{T, 2}, tfm::Wenbo, kernels) where T blocks = cld(l, threads) # k1 = T<:Bool ? kernels[2] : kernels[1] @inbounds kernels[1](f_new, f, row_length, l; threads, blocks) - @inbounds kernels[2](deepcopy(f_new), f_new, row_length, col_length, l; threads, blocks) + @inbounds kernels[2](copy(f_new), f_new, row_length, col_length, l; threads, blocks) return f_new end @@ -928,8 +965,8 @@ function _transform_batch(f::CuArray{T, 5}, tfm::Wenbo, kernels) where T for batch_idx = 1:batch_size for channel_idx = 1:num_channels @inbounds kernels[9](f_new, f, d2, d3, l, channel_idx, batch_idx; threads, blocks) - @inbounds kernels[10](f_new, deepcopy(f_new), d1, d2, d3, l, channel_idx, batch_idx; threads, blocks) - @inbounds kernels[11](f_new, deepcopy(f_new), d2, d3, l, channel_idx, batch_idx; threads, blocks) + @inbounds kernels[10](f_new, copy(f_new), d1, d2, d3, l, channel_idx, batch_idx; threads, blocks) + @inbounds kernels[11](f_new, copy(f_new), d2, d3, l, channel_idx, batch_idx; threads, blocks) end end return f_new @@ -951,8 +988,8 @@ function transform(f::CuArray{T, 3}, tfm::Wenbo, kernels) where T blocks = cld(l, threads) # k1 = T<:Bool ? kernels[5] : kernels[4] @inbounds kernels[3](f_new, f, d2, d3, l; threads, blocks) - @inbounds kernels[4](f_new, deepcopy(f_new), d1, d2, d3, l; threads, blocks) - @inbounds kernels[5](f_new, deepcopy(f_new), d2, d3, l; threads, blocks) + @inbounds kernels[4](f_new, copy(f_new), d1, d2, d3, l; threads, blocks) + @inbounds kernels[5](f_new, copy(f_new), d2, d3, l; threads, blocks) return f_new end @@ -1010,7 +1047,7 @@ function transform(f::AbstractMatrix, tfm::Wenbo, ex) @inbounds _transform1!(@view(f[i, :])) end @floop ex for j in axes(f, 2) - @inbounds _transform2!(@view(f[:, j])) + @inbounds _transform2_EN_DE!(@view(f[:,j])) end return f end @@ -1030,11 +1067,14 @@ Applies a squared euclidean distance transform to an input 3D image using the We """ function transform(f::AbstractArray, tfm::Wenbo, ex) f = boolean_indicator(f) - @floop ex for k in axes(f, 3) - @inbounds _transform4!(@view(f[:, :, k])) + @floop ex for i in CartesianIndices(f[1,:,:]) + @inbounds _transform1!(@view(f[:, i])) + end + @floop ex for i in CartesianIndices(f[:,1,:]) + @inbounds _transform2_EN_DE!(@view(f[i[1], :, i[2]])) end @floop ex for i in CartesianIndices(f[:,:,1]) - @inbounds _transform2!(@view(f[i, :])) + @inbounds _transform2_EN_DE!(@view(f[i, :])) end return f end diff --git a/test/wenbo.jl b/test/wenbo.jl index 3e2c2a0..29bfe89 100644 --- a/test/wenbo.jl +++ b/test/wenbo.jl @@ -1,5 +1,5 @@ ### A Pluto.jl notebook ### -# v0.19.14 +# v0.19.22 using Markdown using InteractiveUtils @@ -286,7 +286,7 @@ if CUDA.has_cuda_gpu() 0 1 1 1 0 0 0 0 1 ] tfm = Wenbo() - test = transform(CuArray(img), tfm, ks) + test = transform(CuArray(Float32.(img)), tfm, ks) answer = CuArray([ 1.0 0.0 0.0 0.0 1.0 2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 2.0 1.0 0.0 @@ -308,7 +308,7 @@ if CUDA.has_cuda_gpu() img = rand([0, 1], 10, 10) img2 = copy(img) tfm = Wenbo() - test = transform(CUDA.CuArray(img), tfm, ks) + test = transform(CUDA.CuArray(Float32.(img)), tfm, ks) answer = transform(img2, tfm) @test test == CuArray(answer) end @@ -344,7 +344,7 @@ if CUDA.has_cuda_gpu() for i in 1:10 push!(container2, vol) end - vol_inv = CuArray(cat(container2..., dims=3)) + vol_inv = CuArray(Float32.(cat(container2..., dims=3))) tfm = Wenbo() test = transform(vol_inv, tfm, ks) a1 = img_inv @@ -367,7 +367,7 @@ if CUDA.has_cuda_gpu() img = rand([0, 1], 10, 10, 10) img2 = copy(img) tfm = Wenbo() - test = transform(CUDA.CuArray(img), tfm, ks) + test = transform(CUDA.CuArray(Float32.(img)), tfm, ks) answer = transform(img2, tfm) @test test == CuArray(answer) end