From a45d291ed79b279fcf17431399e43ceda8ae9adb Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sun, 14 Aug 2016 23:59:06 +0200 Subject: [PATCH 01/13] Allow for AbstractVector --- REQUIRE | 3 +- src/NearestNeighbors.jl | 18 ++--- src/ball_tree.jl | 145 +++++++++++++++++++++++----------------- src/brute_tree.jl | 56 +++++++++------- src/datafreetree.jl | 26 ++++--- src/evaluation.jl | 58 ++++------------ src/hyperrectangles.jl | 49 +++++++------- src/hyperspheres.jl | 86 ++++++++++++------------ src/inrange.jl | 44 +++++++----- src/kd_tree.jl | 120 +++++++++++++++++++-------------- src/knn.jl | 40 ++++++----- src/tree_data.jl | 4 +- src/tree_ops.jl | 30 ++++----- src/utilities.jl | 23 ++++--- test/datafreetree.jl | 3 +- test/runtests.jl | 17 ++--- test/test_knn.jl | 12 ++-- 17 files changed, 385 insertions(+), 349 deletions(-) diff --git a/REQUIRE b/REQUIRE index 570e6be..0a4ac02 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,3 +1,4 @@ -julia 0.4 +julia 0.5- Distances +StaticArrays Compat 0.8.4 diff --git a/src/NearestNeighbors.jl b/src/NearestNeighbors.jl index 7ebc107..7daf112 100644 --- a/src/NearestNeighbors.jl +++ b/src/NearestNeighbors.jl @@ -1,11 +1,12 @@ -__precompile__() +# __precompile__() module NearestNeighbors using Distances - import Distances: Metric, result_type, eval_reduce, eval_end, eval_op, eval_start, evaluate +using StaticArrays + import Base.show import Compat.view @@ -22,14 +23,15 @@ export Euclidean, # Change this to enable debugging const DEBUG = false -abstract NNTree{T <: AbstractFloat, P <: Metric} +abstract NNTree{V <: AbstractVector, P <: Metric} + +typealias DistanceType Float64 +typealias MinkowskiMetric Union{Euclidean, Chebyshev, Cityblock, Minkowski} -function check_input(tree::NNTree, points::AbstractArray) - ndim_points = size(points,1) - ndim_tree = size(tree.data, 1) - if ndim_points != ndim_tree +function check_input{V1, V2}(tree::NNTree{V1}, points::Vector{V2}) + if length(V1) != length(V2) throw(ArgumentError( - "dimension of input points:$(ndim_points) and tree data:$(ndim_tree) must agree")) + "dimension of input points:$(length(V2)) and tree data:$(length(V1)) must agree")) end end diff --git a/src/ball_tree.jl b/src/ball_tree.jl index 0b25747..7a9cdb8 100644 --- a/src/ball_tree.jl +++ b/src/ball_tree.jl @@ -3,9 +3,9 @@ # which radius are determined from the given metric. # The tree uses the triangle inequality to prune the search space # when finding the neighbors to a point, -immutable BallTree{T <: AbstractFloat, M <: Metric} <: NNTree{T, M} - data::Matrix{T} # dim x n_points array with floats - hyper_spheres::Vector{HyperSphere{T}} # Each hyper sphere bounds its children +immutable BallTree{V <: AbstractVector, N, T, M <: Metric} <: NNTree{V, M} + data::Vector{V} + hyper_spheres::Vector{HyperSphere{N, T}} # Each hyper sphere bounds its children indices::Vector{Int} # Translates from tree index -> point index metric::M # Metric used for tree tree_data::TreeData # Some constants needed @@ -15,15 +15,17 @@ end # When we create the bounding spheres we need some temporary arrays. # We create a type to hold them to not allocate these arrays at every # function call and to reduce the number of parameters in the tree builder. -immutable ArrayBuffers{T <: AbstractFloat} - left::Vector{T} - right::Vector{T} - v12::Vector{T} - zerobuf::Vector{T} +immutable ArrayBuffers{N, T <: AbstractFloat} + left::MVector{N, T} + right::MVector{N, T} + v12::MVector{N, T} + zerobuf::MVector{N, T} + center::MVector{N, T} end -function ArrayBuffers(T, ndim) - ArrayBuffers{T}(zeros(T, ndim), zeros(T, ndim), zeros(T, ndim), zeros(T, ndim)) +function ArrayBuffers{N, T}(::Type{Val{N}}, ::Type{T}) + ArrayBuffers(zeros(MVector{N, T}), zeros(MVector{N, T}), zeros(MVector{N, T}), + zeros(MVector{N, T}), zeros(MVector{N, T})) end """ @@ -31,63 +33,83 @@ end Creates a `BallTree` from the data using the given `metric` and `leafsize`. """ -function BallTree{T <: AbstractFloat, M<:Metric}(data::Matrix{T}, - metric::M = Euclidean(); - leafsize::Int = 10, - reorder::Bool = true, - storedata::Bool = true, - reorderbuffer::Matrix{T} = Matrix{T}(), - indicesfor::Symbol = :data) +function BallTree{V <: AbstractArray, M <: Metric}(data::Vector{V}, + metric::M = Euclidean(); + leafsize::Int = 10, + reorder::Bool = true, + storedata::Bool = true, + reorderbuffer::Vector{V} = Vector{V}(), + indicesfor::Symbol = :data) reorder = !isempty(reorderbuffer) || (storedata ? reorder : false) tree_data = TreeData(data, leafsize) - n_d = size(data, 1) - n_p = size(data, 2) - array_buffs = ArrayBuffers(T, size(data, 1)) + n_d = length(V) + n_p = length(data) + + array_buffs = ArrayBuffers(Val{length(V)}, DistanceType) indices = collect(1:n_p) # Bottom up creation of hyper spheres so need spheres even for leafs - hyper_spheres = Array(HyperSphere{T}, tree_data.n_internal_nodes + tree_data.n_leafs) + hyper_spheres = Array(HyperSphere{length(V), eltype(V)}, tree_data.n_internal_nodes + tree_data.n_leafs) - if reorder + if reorder indices_reordered = Vector{Int}(n_p) if isempty(reorderbuffer) - data_reordered = Matrix{T}(n_d, n_p) + data_reordered = Vector{V}(n_p) else data_reordered = reorderbuffer end else # Dummy variables - indices_reordered = Vector{Int}(0) - data_reordered = Matrix{T}(0, 0) + indices_reordered = Vector{Int}() + data_reordered = Vector{V}() end # Call the recursive BallTree builder build_BallTree(1, data, data_reordered, hyper_spheres, metric, indices, indices_reordered, - 1, size(data,2), tree_data, array_buffs, reorder) + 1, length(data), tree_data, array_buffs, reorder) if reorder data = data_reordered indices = indicesfor == :data ? indices_reordered : collect(1:n_p) end - BallTree(storedata ? data : similar(data,0,0), hyper_spheres, indices, metric, tree_data, reorder) + BallTree(storedata ? data : similar(data,0), hyper_spheres, indices, metric, tree_data, reorder) +end + +function BallTree{T <: AbstractFloat, M <: Metric}(data::Matrix{T}, + metric::M = Euclidean(); + leafsize::Int = 10, + storedata::Bool = true, + reorder::Bool = true, + reorderbuffer::Matrix{T} = Matrix{T}(), + indicesfor::Symbol = :data) + dim = size(data, 1) + npoints = size(data, 2) + points = reinterpret(SVector{dim, T}, data, (length(data) ÷ dim, )) + if isempty(reorderbuffer) + reorderbuffer_points = Vector{SVector{dim, T}}() + else + reorderbuffer_points = reinterpret(SVector{dim, T}, reorderbuffer, (length(reorderbuffer) ÷ dim, )) + end + BallTree(points ,metric, leafsize = leafsize, storedata = storedata, reorder = reorder, + reorderbuffer = reorderbuffer_points, indicesfor = indicesfor) end # Recursive function to build the tree. -function build_BallTree{T <: AbstractFloat}(index::Int, - data::Matrix{T}, - data_reordered::Matrix{T}, - hyper_spheres::Vector{HyperSphere{T}}, - metric::Metric, - indices::Vector{Int}, - indices_reordered::Vector{Int}, - low::Int, - high::Int, - tree_data::TreeData, - array_buffs::ArrayBuffers{T}, - reorder::Bool) +function build_BallTree{V <: AbstractVector, N, T}(index::Int, + data::Vector{V}, + data_reordered::Vector{V}, + hyper_spheres::Vector{HyperSphere{N, T}}, + metric::Metric, + indices::Vector{Int}, + indices_reordered::Vector{Int}, + low::Int, + high::Int, + tree_data::TreeData, + array_buffs::ArrayBuffers{N, T}, + reorder::Bool) n_points = high - low + 1 # Points left if n_points <= tree_data.leafsize @@ -95,7 +117,7 @@ function build_BallTree{T <: AbstractFloat}(index::Int, reorder_data!(data_reordered, data, index, indices, indices_reordered, tree_data) end # Create bounding sphere of points in leaf node by brute force - hyper_spheres[index] = create_bsphere(data, metric, indices, low, high) + hyper_spheres[index] = create_bsphere(data, metric, indices, low, high, array_buffs) return end @@ -124,22 +146,23 @@ function build_BallTree{T <: AbstractFloat}(index::Int, array_buffs) end -function _knn{T}(tree::BallTree{T}, - point::AbstractVector{T}, - k::Int, - skip::Function) +function _knn(tree::BallTree, + point::AbstractVector, + k::Int, + skip::Function) best_idxs = [-1 for _ in 1:k] - best_dists = [typemax(T) for _ in 1:k] + best_dists = [typemax(DistanceType) for _ in 1:k] knn_kernel!(tree, 1, point, best_idxs, best_dists, skip) return best_idxs, best_dists end -function knn_kernel!{T, F}(tree::BallTree{T}, - index::Int, - point::AbstractArray{T}, - best_idxs ::Vector{Int}, - best_dists::Vector{T}, - skip::F) + +function knn_kernel!{V, F}(tree::BallTree{V}, + index::Int, + point::AbstractArray, + best_idxs ::Vector{Int}, + best_dists::Vector, + skip::F) @NODE 1 if isleaf(tree.tree_data.n_internal_nodes, index) add_points_knn!(best_dists, best_idxs, tree, index, point, true, skip) @@ -149,8 +172,8 @@ function knn_kernel!{T, F}(tree::BallTree{T}, left_sphere = tree.hyper_spheres[getleft(index)] right_sphere = tree.hyper_spheres[getright(index)] - left_dist = max(zero(T), evaluate(tree.metric, point, left_sphere.center) - left_sphere.r) - right_dist = max(zero(T), evaluate(tree.metric, point, right_sphere.center) - right_sphere.r) + left_dist = max(zero(eltype(V)), evaluate(tree.metric, point, left_sphere.center) - left_sphere.r) + right_dist = max(zero(eltype(V)), evaluate(tree.metric, point, right_sphere.center) - right_sphere.r) if left_dist <= best_dists[1] || right_dist <= best_dists[1] if left_dist < right_dist @@ -168,20 +191,20 @@ function knn_kernel!{T, F}(tree::BallTree{T}, return end -function _inrange{T}(tree::BallTree{T}, - point::AbstractVector{T}, +function _inrange{V}(tree::BallTree{V}, + point::AbstractVector, radius::Number) idx_in_ball = Int[] # List to hold the indices in range - ball = HyperSphere(point, radius) # The "query ball" + ball = HyperSphere(convert(V, point), convert(eltype(V), radius)) # The "query ball" inrange_kernel!(tree, 1, point, ball, idx_in_ball) # Call the recursive range finder return idx_in_ball end -function inrange_kernel!{T}(tree::BallTree{T}, - index::Int, - point::Vector{T}, - query_ball::HyperSphere{T}, - idx_in_ball::Vector{Int}) +function inrange_kernel!(tree::BallTree, + index::Int, + point::AbstractVector, + query_ball::HyperSphere, + idx_in_ball::Vector{Int}) @NODE 1 sphere = tree.hyper_spheres[index] diff --git a/src/brute_tree.jl b/src/brute_tree.jl index dd4af02..1a61b81 100644 --- a/src/brute_tree.jl +++ b/src/brute_tree.jl @@ -1,7 +1,6 @@ -immutable BruteTree{T <: AbstractFloat, M <: Metric} <: NNTree{T, M} - data::Matrix{T} +immutable BruteTree{V <: AbstractVector, M <: Metric} <: NNTree{V, M} + data::Vector{V} metric::M - leafsize::Int reordered::Bool end @@ -10,34 +9,41 @@ end Creates a `BruteTree` from the data using the given `metric`. """ -function BruteTree{T <: AbstractFloat}(data::Matrix{T}, metric::Metric=Euclidean(); - reorder::Bool=false, leafsize=0, storedata::Bool=true) - BruteTree(storedata ? data : similar(data,0,0), metric, 0, reorder) +function BruteTree{V <: AbstractVector}(data::Vector{V}, metric::Metric = Euclidean(); + reorder::Bool=false, leafsize::Int=0, storedata::Bool=true) + BruteTree(storedata ? data : Vector{V}(), metric, reorder) end -function _knn{T}(tree::BruteTree{T}, - point::AbstractVector{T}, +function BruteTree{T}(data::Matrix{T}, metric::Metric = Euclidean(); + reorder::Bool=false, leafsize::Int=0, storedata::Bool=true) + dim = size(data, 1) + npoints = size(data, 2) + BruteTree(reinterpret(SVector{dim, T}, data, (length(data) ÷ dim, )), + metric, reorder = reorder, leafsize = leafsize, storedata = storedata) +end + +function _knn{V}(tree::BruteTree{V}, + point::AbstractVector, k::Int, skip::Function) best_idxs = [-1 for _ in 1:k] - best_dists = [typemax(T) for _ in 1:k] + best_dists = [typemax(DistanceType) for _ in 1:k] knn_kernel!(tree, point, best_idxs, best_dists, skip) return best_idxs, best_dists end -function knn_kernel!{T, F}(tree::BruteTree{T}, - point::AbstractArray{T}, +function knn_kernel!{V, F}(tree::BruteTree{V}, + point::AbstractVector, best_idxs::Vector{Int}, - best_dists::Vector{T}, + best_dists::Vector{DistanceType}, skip::F) - - for i in 1:size(tree.data, 2) - if skip != always_false && skip(i) + for i in 1:length(tree.data) + if skip(i) continue end @POINT 1 - dist_d = evaluate(tree.metric, tree.data, point, i) + dist_d = evaluate(tree.metric, tree.data[i], point) if dist_d <= best_dists[1] best_dists[1] = dist_d best_idxs[1] = i @@ -46,22 +52,22 @@ function knn_kernel!{T, F}(tree::BruteTree{T}, end end -function _inrange{T}(tree::BruteTree{T}, - point::AbstractVector{T}, - radius::Number) +function _inrange(tree::BruteTree, + point::AbstractVector, + radius::Number) idx_in_ball = Int[] inrange_kernel!(tree, point, radius, idx_in_ball) return idx_in_ball end -function inrange_kernel!{T}(tree::BruteTree{T}, - point::Vector{T}, - r::Number, - idx_in_ball::Vector{Int}) - for i in 1:size(tree.data, 2) +function inrange_kernel!(tree::BruteTree, + point::AbstractVector, + r::Number, + idx_in_ball::Vector{Int}) + for i in 1:length(tree.data) @POINT 1 - d = evaluate(tree.metric, tree.data, point, i) + d = evaluate(tree.metric, tree.data[i], point) if d <= r push!(idx_in_ball, i) end diff --git a/src/datafreetree.jl b/src/datafreetree.jl index 391f8af..adc44a1 100644 --- a/src/datafreetree.jl +++ b/src/datafreetree.jl @@ -1,9 +1,8 @@ # A DataFreeTree wraps a descendant of NNTree # which does not contain a copy of the data -immutable DataFreeTree{T <: AbstractFloat, M <: Metric} - size::Tuple{Int,Int} - hash::UInt - tree::NNTree{T,M} +immutable DataFreeTree{N <: NNTree} + hash::UInt64 + tree::N end """ @@ -21,9 +20,10 @@ is provided, reordering is performed and the contents of `reorderbuffer` have to `indicesfor` controlls whether the indices returned by the query functions should refer to `data` or the `reorderbuffer`. Valid values are `:data` and `:reordered`. """ -function DataFreeTree{T<:NNTree}(::Type{T}, data, args...; reorderbuffer = data[:,1:0], kargs...) +function DataFreeTree{T<:NNTree}(::Type{T}, data, args...; reorderbuffer = data[:, 1:0], kargs...) tree = T(data, args...; storedata = false, reorderbuffer = reorderbuffer, kargs...) - DataFreeTree(size(data), hash(tree.reordered ? reorderbuffer : data), tree) + + DataFreeTree(hash(tree.reordered ? reorderbuffer : data), tree) end """ @@ -31,15 +31,23 @@ end Returns the `KDTree`/`BallTree` wrapped by `datafreetree`, set up to use `data` for the points data. """ -function injectdata{T,M}(datafreetree::DataFreeTree{T,M}, data::Matrix{T}) - if size(data) != datafreetree.size - throw(DimensionMismatch("NearestNeighbors:injectdata: The size of 'data' $(data) does not match the data array used to construct the tree $(datafreetree.size).")) +function injectdata{T}(datafreetree::DataFreeTree, data::Matrix{T}) + dim = size(data, 1) + npoints = size(data, 2) + if isbits(T) + new_data = reinterpret(SVector{dim, T}, data, (npoints, )) + else + new_data = SVector{dim, T}[SVector{dim, T}(data[:, i]) for i in 1:npoints] end + injectdata(datafreetree, new_data) +end +function injectdata(datafreetree::DataFreeTree, data::Vector) if hash(data) != datafreetree.hash throw(ArgumentError("NearestNeighbors:injectdata: The hash of 'data' does not match the hash of the data array used to construct the tree.")) end + typ = typeof(datafreetree.tree) fields = map(x-> getfield(datafreetree.tree, x), fieldnames(datafreetree.tree))[2:end] typ(data, fields...) diff --git a/src/evaluation.jl b/src/evaluation.jl index b094780..7364acb 100644 --- a/src/evaluation.jl +++ b/src/evaluation.jl @@ -1,16 +1,15 @@ -typealias MinkowskiMetric Union{Euclidean, Chebyshev, Cityblock, Minkowski} +@inline eval_pow(::MinkowskiMetric, s) = abs(s) +@inline eval_pow(::Euclidean, s) = abs2(s) +@inline eval_pow(d::Minkowski, s) = abs(s)^d.p -@inline eval_start(m::Metric, a::AbstractMatrix, b::AbstractArray, col::Int) = eval_start(m, a, b) -@inline eval_start(::Chebyshev, a::AbstractMatrix, b::AbstractArray, col::Int) = abs(a[1,col] - b[1]) -@inline function eval_start(::SpanNormDist, a::AbstractArray, b::AbstractArray, col::Int) - a[1,col] - b[1], a[1, col]- b[1] -end +@inline eval_diff(::MinkowskiMetric, a, b) = a - b +@inline eval_diff(::Chebyshev, a, b) = b -function Distances.evaluate(d::Distances.UnionMetrics, a::AbstractMatrix, - b::AbstractArray, col::Int, do_end::Bool=true) - s = eval_start(d, a, b, col) +function Distances.evaluate(d::Distances.UnionMetrics, a::AbstractVector, + b::AbstractVector, do_end::Bool=true) + s = eval_start(d, a, b) @simd for I in eachindex(b) - @inbounds ai = a[I, col] + @inbounds ai = a[I] @inbounds bi = b[I] s = eval_reduce(d, s, eval_op(d, ai, bi)) end @@ -21,40 +20,7 @@ function Distances.evaluate(d::Distances.UnionMetrics, a::AbstractMatrix, end end -# As above but stops evaluating after cumulutative sum gets larger than -# break_at -# TODO: Use this in the inrange methods -function Distances.evaluate(d::Distances.UnionMetrics, a::AbstractMatrix, - b::AbstractArray, col::Int, break_at::Number, do_end::Bool=true) - s = eval_start(d, a, b, col) - for I in eachindex(b) - @inbounds ai = a[I, col] - @inbounds bi = b[I] - s = eval_reduce(d, s, eval_op(d, ai, bi)) - if s > break_at - break - end - end - if do_end - return eval_end(d, s) - else - return s - end -end - -function Distances.evaluate(d::Distances.PreMetric, a::AbstractMatrix, - b::AbstractArray, col::Int, do_end::Bool=true) - evaluate(d, view(a, :, col), b) +function Distances.evaluate(d::Distances.PreMetric, a::AbstractVector, + b::AbstractArray, do_end::Bool=true) + evaluate(d, a, b) end - -function Distances.evaluate(d::Distances.PreMetric, a::AbstractMatrix, - b::AbstractArray, col::Int, break_at::Number, do_end::Bool=true) - evaluate(d, view(a, :, col), b) -end - -@inline eval_pow(::MinkowskiMetric, s) = abs(s) -@inline eval_pow(::Euclidean, s) = abs2(s) -@inline eval_pow(d::Minkowski, s) = abs(s)^d.p - -@inline eval_diff(::MinkowskiMetric, a, b) = a - b -@inline eval_diff(::Chebyshev, a, b) = b diff --git a/src/hyperrectangles.jl b/src/hyperrectangles.jl index d5fe5e4..2b565db 100644 --- a/src/hyperrectangles.jl +++ b/src/hyperrectangles.jl @@ -1,35 +1,35 @@ -immutable HyperRectangle{T <: AbstractFloat} +# abstract HyperRectangle{N, T} + +immutable HyperRectangle{T} mins::Vector{T} maxes::Vector{T} end -@inline max!(hr::HyperRectangle, dim::Int, val) = hr.maxes[dim] = val -@inline min!(hr::HyperRectangle, dim::Int, val) = hr.mins[dim] = val - # Computes a bounding box around a point cloud -function compute_bbox{T}(data::Matrix{T}) - n_d = size(data,1) - n_p = size(data,2) - maxes = Array(T, n_d) - mins = Array(T, n_d) - @inbounds for j in 1:n_d +function compute_bbox{V <: AbstractVector}(data::Vector{V}) + @assert length(data) != 0 + T = eltype(V) + n_dim = length(V) + maxes = Vector{T}(n_dim) + mins = Vector{T}(n_dim) + @inbounds for j in 1:length(V) dim_max = typemin(T) dim_min = typemax(T) - for k in 1:n_p - dim_max = max(data[j, k], dim_max) - dim_min = min(data[j, k], dim_min) + for k in 1:length(data) + dim_max = max(data[k][j], dim_max) + dim_min = min(data[k][j], dim_min) end maxes[j] = dim_max mins[j] = dim_min end - return HyperRectangle{T}(mins, maxes) + return HyperRectangle(mins, maxes) end # Splits a hyper rectangle into two rectangles by dividing the # rectangle at a specific value in a given dimension. -function split{T <: AbstractFloat}(hyper_rec::HyperRectangle{T}, - dim::Int, - value::T) +function Base.split(hyper_rec::HyperRectangle, + dim::Int, + value::Number) new_max = copy(hyper_rec.maxes) new_max[dim] = value @@ -40,9 +40,8 @@ function split{T <: AbstractFloat}(hyper_rec::HyperRectangle{T}, HyperRectangle(new_min, hyper_rec.maxes) end -function find_maxspread{T <: AbstractFloat}(hyper_rec::HyperRectangle{T}) +function find_maxspread{T}(hyper_rec::HyperRectangle{T}) # Find the dimension where we have the largest spread. - split_dim = 1 max_spread = zero(T) @@ -59,18 +58,18 @@ end ############################################ # Rectangle - Point functions ############################################ -@inline function get_min_dim{T <: AbstractFloat}(rec::HyperRectangle{T}, point::Vector{T}, dim::Int) +@inline function get_min_dim(rec::HyperRectangle, point::AbstractVector, dim::Int) @inbounds d = abs2(max(0, max(rec.mins[dim] - point[dim], point[dim] - rec.maxes[dim]))) d end -@inline function get_max_dim{T <: AbstractFloat}(rec::HyperRectangle{T}, point::Vector{T}, dim::Int) +@inline function get_max_dim(rec::HyperRectangle, point::AbstractVector, dim::Int) @inbounds d = abs2(max(rec.maxes[dim] - point[dim], point[dim] - rec.mins[dim])) d end # Max distance between rectangle and point -@inline function get_max_distance{T <: AbstractFloat}(rec::HyperRectangle{T}, point::Vector{T}) +@inline function get_max_distance{T}(rec::HyperRectangle, point::AbstractVector{T}) max_dist = zero(T) @inbounds @simd for dim in eachindex(point) max_dist += abs2(max(rec.maxes[dim] - point[dim], point[dim] - rec.mins[dim])) @@ -79,7 +78,7 @@ end end # Min distance between rectangle and point -@inline function get_min_distance{T <: AbstractFloat}(rec::HyperRectangle{T}, point::Vector{T}) +@inline function get_min_distance{T}(rec::HyperRectangle, point::AbstractVector{T}) min_dist = zero(T) @inbounds @simd for dim in eachindex(point) min_dist += abs2(max(0, max(rec.mins[dim] - point[dim], point[dim] - rec.maxes[dim]))) @@ -88,14 +87,14 @@ end end # (Min, Max) distance between rectangle and point -@inline function get_min_max_distance{T <: AbstractFloat}(rec::HyperRectangle{T}, point::Vector{T}) +@inline function get_min_max_distance{T}(rec::HyperRectangle, point::AbstractVector{T}) min_dist = get_min_distance(rec, point) max_dist = get_max_distance(rec, point) return min_dist, max_dist end # (Min, Max) distance between rectangle and point for a certain dim -@inline function get_min_max_dim{T <: AbstractFloat}(rec::HyperRectangle{T}, point::Vector{T}, dim::Int) +@inline function get_min_max_dim{T}(rec::HyperRectangle, point::AbstractVector{T}, dim::Int) min_dist_dim = get_min_dim(rec, point, dim) max_dist_dim = get_max_dim(rec, point, dim) return min_dist_dim, max_dist_dim diff --git a/src/hyperspheres.jl b/src/hyperspheres.jl index 3ef6fdf..728cf51 100644 --- a/src/hyperspheres.jl +++ b/src/hyperspheres.jl @@ -1,79 +1,75 @@ typealias NormMetric Union{Euclidean, Chebyshev, Cityblock, Minkowski, WeightedEuclidean, WeightedCityblock, WeightedMinkowski, Mahalanobis} -immutable HyperSphere{T <: AbstractFloat} - center::Vector{T} +immutable HyperSphere{N, T <: AbstractFloat} + center::SVector{N, T} r::T end -HyperSphere{T <: AbstractFloat}(center::Vector{T}, r) = HyperSphere(center, T(r)) - -@inline ndim(hs::HyperSphere) = length(hs.center) - -@inline function intersects{T <: AbstractFloat, M <: Metric}(m::M, - s1::HyperSphere{T}, - s2::HyperSphere{T}) +@inline function intersects{T <: AbstractFloat, N, M <: Metric}(m::M, + s1::HyperSphere{N, T}, + s2::HyperSphere{N, T}) evaluate(m, s1.center, s2.center) <= s1.r + s2.r end -@inline function encloses{T <: AbstractFloat, M <: Metric}(m::M, - s1::HyperSphere{T}, - s2::HyperSphere{T}) +@inline function encloses{T <: AbstractFloat, N, M <: Metric}(m::M, + s1::HyperSphere{N, T}, + s2::HyperSphere{N, T}) evaluate(m, s1.center, s2.center) + s1.r <= s2.r end -@inline function interpolate{T <: AbstractFloat, M <: NormMetric}(m::M, - c1::Vector{T}, - c2::Vector{T}, +@inline function interpolate{V <: AbstractVector, M <: NormMetric}(m::M, + c1::V, + c2::V, x, - d) + d, + ab) alpha = x / d @assert length(c1) == length(c2) - c = similar(c1) - @inbounds for i in eachindex(c) - c[i] = (1 - alpha) .* c1[i] + alpha .* c2[i] + @inbounds for i in eachindex(ab.center) + ab.center[i] = (1 - alpha) .* c1[i] + alpha .* c2[i] end - return c, true + return ab.center, true end -@inline function interpolate{T <: AbstractFloat, M <: Metric}(m::M, - c1::Vector{T}, - c2::Vector{T}, - x, - d) - return copy(c1), false +@inline function interpolate{V <: AbstractVector, M <: Metric}(m::M, + c1::V, + c2::V, + x, + d, + ab) + return c1, false end -function create_bsphere{T}(data::Matrix{T}, metric::Metric, indices::Vector{Int}, low, high) +function create_bsphere{V}(data::Vector{V}, metric::Metric, indices::Vector{Int}, low, high, ab) n_dim = size(data,1) n_points = high - low + 1 - # First find center of all points - center = zeros(T, n_dim) + fill!(ab.center, 0.0) for i in low:high - for j in 1:n_dim - center[j] += data[j, indices[i]] - end + for j in length(ab.center) + ab.center[j] += data[indices[i]][j] + end end - scale!(center, 1 / n_points) + scale!(ab.center, 1 / n_points) # Then find r - r = zero(T) + r = zero(DistanceType) for i in low:high - r = max(r, evaluate(metric, data, center, indices[i])) + r = max(r, evaluate(metric, data[indices[i]], ab.center)) end - r += eps(T) - return HyperSphere(center, r) + r += eps(DistanceType) + return HyperSphere(SVector{length(V), eltype(V)}(ab.center), r) end # Creates a bounding sphere from two other spheres -function create_bsphere{T <: AbstractFloat}(m::Metric, - s1::HyperSphere{T}, - s2::HyperSphere{T}, - ab) +function create_bsphere{N, T <: AbstractFloat}(m::Metric, + s1::HyperSphere{N, T}, + s2::HyperSphere{N, T}, + ab) if encloses(m, s1, s2) - return HyperSphere{T}(copy(s2.center), s2.r) + return HyperSphere(s2.center, s2.r) elseif encloses(m, s2, s1) - return HyperSphere{T}(copy(s1.center), s1.r) + return HyperSphere(s1.center, s1.r) end # Compute the distance x along a geodesic from s1.center to s2.center @@ -81,12 +77,12 @@ function create_bsphere{T <: AbstractFloat}(m::Metric, # neither s1 nor s2 contains the other) dist = evaluate(m, s1.center, s2.center) x = 0.5 * (s2.r - s1.r + dist) - center, is_exact_center = interpolate(m, s1.center, s2.center, x, dist) + center, is_exact_center = interpolate(m, s1.center, s2.center, x, dist, ab) if is_exact_center rad = 0.5 * (s2.r + s1.r + dist) else rad = max(s1.r + evaluate(m, s1.center, center), s2.r + evaluate(m, s2.center, center)) end - HyperSphere{T}(center, rad) + return HyperSphere(SVector{N, T}(center), rad) end diff --git a/src/inrange.jl b/src/inrange.jl index 29f7153..d4f779d 100644 --- a/src/inrange.jl +++ b/src/inrange.jl @@ -4,23 +4,20 @@ Find all the points in the tree which is closer than `radius` to `points`. If `sortres = true` the resulting indices are sorted. """ -function inrange{T <: AbstractFloat}(tree::NNTree{T}, - points::AbstractArray{T}, - radius::Number, - sortres=false) +function inrange{T <: AbstractVector}(tree::NNTree, + points::Vector{T}, + radius::Number, + sortres=false) check_input(tree, points) if radius < 0 throw(ArgumentError("the query radius r must be ≧ 0")) end - idxs = Array(Vector{Int}, size(points, 2)) - point = zeros(T, size(points, 1)) + idxs = Array(Vector{Int}, length(points)) - for i in 1:size(points, 2) - for j in 1:size(points, 1) - point[j] = points[j, i] - end + for i in 1:length(points) + point = points[i] idx_in_ball = _inrange(tree, point, radius) if tree.reordered @inbounds for j in 1:length(idx_in_ball) @@ -32,15 +29,26 @@ function inrange{T <: AbstractFloat}(tree::NNTree{T}, end idxs[i] = idx_in_ball end - return do_return_inrange(idxs, points) + return idxs end -do_return_inrange(idxs, ::AbstractVector) = idxs[1] -do_return_inrange(idxs, ::AbstractMatrix) = idxs +function inrange{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, radius::Number, sortres=false) + idxs = inrange(tree, Vector{T}[point], radius, sortres) + return idxs[1] +end -function inrange{T <: AbstractFloat, P <: Real}(tree::NNTree{T}, - points::AbstractArray{P}, - radius::Number, - sortres=false) - inrange(tree, map(T, points), radius, sortres) +function inrange{V, T <: Number}(tree::NNTree{V}, point::Vector{T}, radius::Number, sortres=false) + idxs = inrange(tree, [convert(SVector{length(point), T}, point)], radius, sortres) + return idxs[1] +end + +function inrange{V, T <: Number}(tree::NNTree{V}, point::Matrix{T}, radius::Number, sortres=false) + dim = size(point, 1) + npoints = size(point, 2) + if isbits(T) + new_data = reinterpret(SVector{dim, T}, point, (length(point) ÷ dim, )) + else + new_data = SVector{dim, T}[SVector{dim, T}(point[:, i]) for i in 1:npoints] + end + inrange(tree, new_data, radius, sortres) end diff --git a/src/kd_tree.jl b/src/kd_tree.jl index 4500c7f..fe19aa5 100644 --- a/src/kd_tree.jl +++ b/src/kd_tree.jl @@ -7,8 +7,8 @@ immutable KDNode{T} split_dim::Int # The dimension the hyper rectangle was split at end -immutable KDTree{T <: AbstractFloat, M <: MinkowskiMetric} <: NNTree{T, M} - data::Matrix{T} # dim x n_p array with floats +immutable KDTree{V <: AbstractVector, M <: MinkowskiMetric, T} <: NNTree{V, M} + data::Vector{V} hyper_rec::HyperRectangle{T} indices::Vector{Int} metric::M @@ -17,40 +17,40 @@ immutable KDTree{T <: AbstractFloat, M <: MinkowskiMetric} <: NNTree{T, M} reordered::Bool end + """ KDTree(data [, metric = Euclidean(), leafsize = 10]) -> kdtree Creates a `KDTree` from the data using the given `metric` and `leafsize`. The `metric` must be a `MinkowskiMetric`. """ -function KDTree{T <: AbstractFloat, M <: MinkowskiMetric}(data::Matrix{T}, +function KDTree{V <: AbstractArray, M <: MinkowskiMetric}(data::Vector{V}, metric::M = Euclidean(); leafsize::Int = 10, storedata::Bool = true, reorder::Bool = true, - reorderbuffer::Matrix{T} = Matrix{T}(), + reorderbuffer::Vector{V} = Vector{V}(), indicesfor::Symbol = :data) - reorder = !isempty(reorderbuffer) || (storedata ? reorder : false) tree_data = TreeData(data, leafsize) - n_d = size(data, 1) - n_p = size(data, 2) + n_d = length(V) + n_p = length(data) indices = collect(1:n_p) - nodes = Array(KDNode{T}, tree_data.n_internal_nodes) + nodes = Array(KDNode{eltype(V)}, tree_data.n_internal_nodes) if reorder indices_reordered = Vector{Int}(n_p) if isempty(reorderbuffer) - data_reordered = Matrix{T}(n_d, n_p) + data_reordered = Vector{V}(n_p) else data_reordered = reorderbuffer end else # Dummy variables - indices_reordered = Vector{Int}(0) - data_reordered = Matrix{T}(0, 0) + indices_reordered = Vector{Int}() + data_reordered = Vector{V}() end # Create first bounding hyper rectangle that bounds all the input points @@ -58,26 +58,45 @@ function KDTree{T <: AbstractFloat, M <: MinkowskiMetric}(data::Matrix{T}, # Call the recursive KDTree builder build_KDTree(1, data, data_reordered, hyper_rec, nodes, indices, indices_reordered, - 1, size(data,2), tree_data, reorder) + 1, length(data), tree_data, reorder) if reorder data = data_reordered indices = indicesfor == :data ? indices_reordered : collect(1:n_p) end - KDTree{T, M}(storedata ? data : similar(data,0,0), hyper_rec, indices, metric, nodes, tree_data, reorder) + KDTree(storedata ? data : similar(data,0), hyper_rec, indices, metric, nodes, tree_data, reorder) +end + +function KDTree{T <: AbstractFloat, M <: MinkowskiMetric}(data::Matrix{T}, + metric::M = Euclidean(); + leafsize::Int = 10, + storedata::Bool = true, + reorder::Bool = true, + reorderbuffer::Matrix{T} = Matrix{T}(), + indicesfor::Symbol = :data) + dim = size(data, 1) + npoints = size(data, 2) + points = reinterpret(SVector{dim, T}, data, (length(data) ÷ dim, )) + if isempty(reorderbuffer) + reorderbuffer_points = Vector{SVector{dim, T}}() + else + reorderbuffer_points = reinterpret(SVector{dim, T}, reorderbuffer, (length(reorderbuffer) ÷ dim, )) + end + KDTree(points ,metric, leafsize = leafsize, storedata = storedata, reorder = reorder, + reorderbuffer = reorderbuffer_points, indicesfor = indicesfor) end -function build_KDTree{T <: AbstractFloat}(index::Int, - data::Matrix{T}, - data_reordered::Matrix{T}, - hyper_rec::HyperRectangle{T}, - nodes::Vector{KDNode{T}}, - indices::Vector{Int}, - indices_reordered::Vector{Int}, - low::Int, - high::Int, - tree_data::TreeData, - reorder::Bool) +function build_KDTree{V <: AbstractVector, T}(index::Int, + data::Vector{V}, + data_reordered::Vector{V}, + hyper_rec::HyperRectangle, + nodes::Vector{KDNode{T}}, + indices::Vector{Int}, + indices_reordered::Vector{Int}, + low::Int, + high::Int, + tree_data::TreeData, + reorder::Bool) n_p = high - low + 1 # Points left if n_p <= tree_data.leafsize if reorder @@ -90,8 +109,8 @@ function build_KDTree{T <: AbstractFloat}(index::Int, split_dim = 1 max_spread = zero(T) - # Find dimension and and spread where the spread is maximal - for d in 1:size(data, 1) + # Find dimension and spread where the spread is maximal + for d in 1:length(V) spread = hyper_rec.maxes[d] - hyper_rec.mins[d] if spread > max_spread max_spread = spread @@ -101,7 +120,7 @@ function build_KDTree{T <: AbstractFloat}(index::Int, select_spec!(indices, mid_idx, low, high, data, split_dim) - split_val = data[split_dim, indices[mid_idx]] + split_val = data[indices[mid_idx]][split_dim] lo = hyper_rec.mins[split_dim] hi = hyper_rec.maxes[split_dim] @@ -123,12 +142,12 @@ function build_KDTree{T <: AbstractFloat}(index::Int, end -function _knn{T}(tree::KDTree{T}, - point::AbstractVector{T}, - k::Int, - skip::Function) +function _knn(tree::KDTree, + point::AbstractVector, + k::Int, + skip::Function) best_idxs = [-1 for _ in 1:k] - best_dists = [typemax(T) for _ in 1:k] + best_dists = [typemax(DistanceType) for _ in 1:k] init_min = get_min_distance(tree.hyper_rec, point) knn_kernel!(tree, 1, point, best_idxs, best_dists, init_min, skip) @simd for i in eachindex(best_dists) @@ -137,12 +156,12 @@ function _knn{T}(tree::KDTree{T}, return best_idxs, best_dists end -function knn_kernel!{T, F}(tree::KDTree{T}, +function knn_kernel!{V, F}(tree::KDTree{V}, index::Int, - point::Vector{T}, + point::AbstractVector, best_idxs ::Vector{Int}, - best_dists::Vector{T}, - min_dist::T, + best_dists::Vector, + min_dist::DistanceType, skip::F) @NODE 1 # At a leaf node. Go through all points in node and add those in range @@ -162,16 +181,15 @@ function knn_kernel!{T, F}(tree::KDTree{T}, if split_diff > 0 close = getright(index) far = getleft(index) - ddiff = max(zero(T), p_dim - hi) + ddiff = max(zero(eltype(V)), p_dim - hi) else close = getleft(index) far = getright(index) - ddiff = max(zero(T), lo - p_dim) + ddiff = max(zero(eltype(V)), lo - p_dim) end # Always call closer sub tree knn_kernel!(tree, close, point, best_idxs, best_dists, min_dist, skip) - split_diff_pow = eval_pow(M, split_diff) ddiff_pow = eval_pow(M, ddiff) diff_tot = eval_diff(M, split_diff_pow, ddiff_pow) @@ -182,23 +200,23 @@ function knn_kernel!{T, F}(tree::KDTree{T}, return end -function _inrange{T}(tree::KDTree{T}, - point::AbstractVector{T}, - radius::Number) +function _inrange(tree::KDTree, + point::AbstractVector, + radius::Number) idx_in_ball = Int[] init_min = get_min_distance(tree.hyper_rec, point) - inrange_kernel!(tree, 1, point, eval_op(tree.metric, radius, zero(T)), idx_in_ball, + inrange_kernel!(tree, 1, point, eval_op(tree.metric, radius, zero(DistanceType)), idx_in_ball, init_min) return idx_in_ball end # Explicitly check the distance between leaf node and point while traversing -function inrange_kernel!{T}(tree::KDTree{T}, - index::Int, - point::Vector{T}, - r::Number, - idx_in_ball::Vector{Int}, - min_dist::T) +function inrange_kernel!(tree::KDTree, + index::Int, + point::AbstractVector, + r::Number, + idx_in_ball::Vector{Int}, + min_dist) @NODE 1 # Point is outside hyper rectangle, skip the whole sub tree if min_dist > r @@ -222,11 +240,11 @@ function inrange_kernel!{T}(tree::KDTree{T}, if split_diff > 0 # Point is to the right of the split value close = getright(index) far = getleft(index) - ddiff = max(zero(T), p_dim - hi) + ddiff = max(zero(DistanceType), p_dim - hi) else # Point is to the left of the split value close = getleft(index) far = getright(index) - ddiff = max(zero(T), lo - p_dim) + ddiff = max(zero(DistanceType), lo - p_dim) end # Call closer sub tree inrange_kernel!(tree, close, point, r, idx_in_ball, min_dist) diff --git a/src/knn.jl b/src/knn.jl index 9ab9832..9ddbe7e 100644 --- a/src/knn.jl +++ b/src/knn.jl @@ -6,22 +6,19 @@ in the `tree`. If `sortres = true` the result is sorted such that the results ar in the order of increasing distance to the point. `skip` is an optional predicate to determine if a point that would be returned should be skipped. """ -function knn{T <: AbstractFloat}(tree::NNTree{T}, points::AbstractArray{T}, k::Int, sortres=false, skip::Function=always_false) +function knn{V, T <: AbstractVector}(tree::NNTree{V}, points::Vector{T}, k::Int, sortres=false, skip::Function=always_false) check_input(tree, points) - n_points = size(points, 2) - n_dim = size(points, 1) + n_points = length(points) + n_dim = length(V) - if k > size(tree.data, 2) || k <= 0 + if k > length(tree.data)|| k <= 0 throw(ArgumentError("k > number of points in tree or ≦ 0")) end - dists = Array(Vector{T}, n_points) + dists = Array(Vector{DistanceType}, n_points) idxs = Array(Vector{Int}, n_points) - point = zeros(T, n_dim) for i in 1:n_points - for j in 1:size(points, 1) - point[j] = points[j, i] - end + point = points[i] best_idxs, best_dists = _knn(tree, point, k, skip) if sortres heap_sort_inplace!(best_dists, best_idxs) @@ -34,13 +31,26 @@ function knn{T <: AbstractFloat}(tree::NNTree{T}, points::AbstractArray{T}, k::I end idxs[i] = best_idxs end - return do_return(idxs, dists, points) + return idxs, dists end -do_return(idxs, dists, ::AbstractVector) = idxs[1], dists[1] -do_return(idxs, dists, ::AbstractMatrix) = idxs, dists +function knn{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, k::Int, sortres=false, skip::Function=always_false) + idxs, dists = knn(tree, Vector{T}[point], k, sortres, skip) + return idxs[1], dists[1] +end -# Conversions for knn if input data is not floating points -function knn{T <: AbstractFloat, P <: Real}(tree::NNTree{T}, points::AbstractArray{P}, k::Int) - knn(tree, map(T, points), k) +function knn{V, T <: Number}(tree::NNTree{V}, point::Vector{T}, k::Int, sortres=false, skip::Function=always_false) + idxs, dists = knn(tree, [convert(SVector{length(point), T}, point)], k, sortres, skip) + return idxs[1], dists[1] +end + +function knn{V, T <: Number}(tree::NNTree{V}, point::Matrix{T}, k::Int, sortres=false, skip::Function=always_false) + dim = size(point, 1) + npoints = size(point, 2) + if isbits(T) + new_data = reinterpret(SVector{dim, T}, point, (length(point) ÷ dim, )) + else + new_data = SVector{dim, T}[SVector{dim, T}(point[:, i]) for i in 1:npoints] + end + knn(tree, new_data, k, sortres, skip) end diff --git a/src/tree_data.jl b/src/tree_data.jl index 387fa27..6bed9bc 100644 --- a/src/tree_data.jl +++ b/src/tree_data.jl @@ -10,8 +10,8 @@ immutable TreeData end -function TreeData(data, leafsize) - n_dim, n_p = size(data) +function TreeData{V}(data::Vector{V}, leafsize) + n_dim, n_p = length(V), length(data) # If number of points is zero n_p == 0 && return TreeData(0, 0, 0, 0, 0, 0, 0, 0) diff --git a/src/tree_ops.jl b/src/tree_ops.jl index 1667475..31a211c 100644 --- a/src/tree_ops.jl +++ b/src/tree_ops.jl @@ -4,10 +4,10 @@ @inline getparent(i::Int) = div(i, 2) @inline isleaf(n_internal_nodes::Int, idx::Int) = idx > n_internal_nodes -function show(io::IO, tree::NNTree) +function show{V}(io::IO, tree::NNTree{V}) println(io, typeof(tree)) - println(io, " Number of points: ", size(tree.data, 2)) - println(io, " Dimensions: ", size(tree.data, 1)) + println(io, " Number of points: ", length(tree.data)) + println(io, " Dimensions: ", length(V)) println(io, " Metric: ", tree.metric) print(io, " Reordered: ", tree.reordered) end @@ -77,14 +77,12 @@ end # Store all the points in a leaf node continuously in memory in data_reordered to improve cache locality. # Also stores the mapping to get the index into the original data from the reordered data. -function reorder_data!(data_reordered, data, index, indices, indices_reordered, tree_data) +function reorder_data!{V}(data_reordered::Vector{V}, data::Vector{V}, index::Int, + indices::Vector{Int}, indices_reordered::Vector{Int}, tree_data::TreeData) for i in get_leaf_range(tree_data, index) idx = indices[i] - for j in 1:size(data_reordered, 1) - data_reordered[j, i] = data[j, idx] - end - + data_reordered[i] = data[idx] # Saves the inverse n indices_reordered[i] = idx end @@ -92,15 +90,15 @@ end # Checks the distance function and add those points that are among the k best. # Uses a heap for fast insertion. -@inline function add_points_knn!{T}(best_dists::Vector{T}, best_idxs::Vector{Int}, - tree::NNTree{T}, index::Int, point::Vector{T}, - do_end::Bool, skip::Function) +@inline function add_points_knn!(best_dists::Vector, best_idxs::Vector{Int}, + tree::NNTree, index::Int, point::AbstractVector, + do_end::Bool, skip::Function) for z in get_leaf_range(tree.tree_data, index) @POINT 1 idx = tree.reordered ? z : tree.indices[z] - dist_d = evaluate(tree.metric, tree.data, point, idx, do_end) + dist_d = evaluate(tree.metric, tree.data[idx], point, do_end) if dist_d <= best_dists[1] - if skip != always_false && skip(tree.indices[z]) + if skip(tree.indices[z]) continue end @@ -117,12 +115,12 @@ end # stop computing the distance function as soon as we reach the desired radius. # This will probably prevent SIMD and other optimizations so some care is needed # to evaluate if it is worth it. -@inline function add_points_inrange!{T}(idx_in_ball::Vector{Int}, tree::NNTree{T}, - index::Int, point::Vector{T}, r::Number, do_end::Bool) +@inline function add_points_inrange!(idx_in_ball::Vector{Int}, tree::NNTree, + index::Int, point::AbstractVector, r::Number, do_end::Bool) for z in get_leaf_range(tree.tree_data, index) @POINT 1 idx = tree.reordered ? z : tree.indices[z] - dist_d = evaluate(tree.metric, tree.data, point, idx, do_end) + dist_d = evaluate(tree.metric, tree.data[idx], point, do_end) if dist_d <= r push!(idx_in_ball, idx) end diff --git a/src/utilities.jl b/src/utilities.jl index a99dfaa..8797679 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -1,7 +1,8 @@ # Find the dimension witht the largest spread. -function find_largest_spread{T}(data::Matrix{T}, indices, low, high) +function find_largest_spread{V}(data::Vector{V}, indices, low, high) + T = eltype(V) n_points = high - low + 1 - n_dim = size(data, 1) + n_dim = length(V) split_dim = 1 max_spread = zero(T) for dim in 1:n_dim @@ -9,8 +10,8 @@ function find_largest_spread{T}(data::Matrix{T}, indices, low, high) xmax = typemin(T) # Find max and min in this dim for coordinate in 1:n_points - xmin = min(xmin, data[dim, indices[coordinate + low - 1]]) - xmax = max(xmax, data[dim, indices[coordinate + low - 1]]) + xmin = min(xmin, data[indices[coordinate + low - 1]][dim]) + xmax = max(xmax, data[indices[coordinate + low - 1]][dim]) end if xmax - xmin > max_spread # Found new max_spread, update split dimension @@ -23,12 +24,12 @@ end # Taken from https://github.com/JuliaLang/julia/blob/v0.3.5/base/sort.jl # and modified to compare against a matrix -@inline function select_spec!{T <: AbstractFloat}(v::AbstractVector, k::Int, lo::Int, - hi::Int, data::Matrix{T}, dim::Int) +@inline function select_spec!(v::Vector{Int}, k::Int, lo::Int, + hi::Int, data::Vector, dim::Int) @inbounds lo <= k <= hi || error("select index $k is out of range $lo:$hi") while lo < hi if hi-lo == 1 - if data[dim, v[hi]] < data[dim, v[lo]] + if data[v[hi]][dim] < data[v[lo]][dim] v[lo], v[hi] = v[hi], v[lo] end return @@ -36,8 +37,8 @@ end pivot = v[(lo+hi)>>>1] i, j = lo, hi while true - while data[dim, v[i]] < data[dim, pivot]; i += 1; end - while data[dim, pivot] < data[dim, v[j]] ; j -= 1; end + while data[v[i]][dim] < data[pivot][dim]; i += 1; end + while data[pivot][dim] < data[v[j]][dim]; j -= 1; end i <= j || break v[i], v[j] = v[j], v[i] i += 1; j -= 1 @@ -87,6 +88,6 @@ end end # Default skip function, always false -function always_false(i::Int) +@inline function always_false(i::Int) false -end \ No newline at end of file +end diff --git a/test/datafreetree.jl b/test/datafreetree.jl index 281305e..3a419d8 100644 --- a/test/datafreetree.jl +++ b/test/datafreetree.jl @@ -3,8 +3,7 @@ data2 = rand(2,100) data3 = rand(3,100) t = DataFreeTree(KDTree, data) - @test_throws ArgumentError injectdata(t, data2) - @test_throws DimensionMismatch injectdata(t, data3) + @test_throws ArgumentError injectdata(t, data3) for typ in [KDTree, BallTree] dfilename = tempname() diff --git a/test/runtests.jl b/test/runtests.jl index b29c1cb..a522716 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,11 +10,12 @@ end import Distances: Metric, evaluate immutable CustomMetric1 <: Metric end evaluate(::CustomMetric1, a::AbstractVector, b::AbstractVector) = maximum(abs(a - b)) -function NearestNeighbors.interpolate{T <: AbstractFloat}(::CustomMetric1, - a::Vector{T}, - b::Vector{T}, - x, - d) +function NearestNeighbors.interpolate{V <: AbstractVector}(::CustomMetric1, + a::V, + b::V, + x, + d, + ab) idx = (abs(b - a) .>= d - x) c = copy(a) c[idx] = (1 - x / d) .* a[idx] + (x / d) .* b[idx] @@ -29,7 +30,7 @@ const fullmetrics = [metrics; Hamming(); CustomMetric1(); CustomMetric2()] const trees = [KDTree, BallTree] const trees_with_brute = [BruteTree; trees] -include("test_knn.jl") -include("test_inrange.jl") -include("test_monkey.jl") +#include("test_knn.jl") +#include("test_inrange.jl") +#include("test_monkey.jl") include("datafreetree.jl") diff --git a/test/test_knn.jl b/test/test_knn.jl index e54d2f4..56bbfa8 100644 --- a/test/test_knn.jl +++ b/test/test_knn.jl @@ -15,11 +15,11 @@ import Distances.evaluate @test idxs[1] == 8 # Should be closest to top right corner @test evaluate(metric, [0.2, 0.2], zeros(2)) ≈ dists[1] - idxs, dists = knn(tree, [0.1, 0.8], 3) - @test idxs == [5, 2, 3] + idxs, dists = knn(tree, [0.1, 0.8], 3, true) + @test idxs == [3, 2, 5] - idxs, dists = knn(tree, [1//10, 8//10], 3) - @test idxs == [5, 2, 3] + idxs, dists = knn(tree, [1//10, 8//10], 3, true) + @test idxs == [3, 2, 5] @test_throws ArgumentError knn(tree, [0.1, 0.8], 10) # k > n_points @test_throws ArgumentError knn(tree, [0.1], 10) # n_dim != trees dim @@ -32,11 +32,11 @@ end @testset "tree type" for TreeType in trees_with_brute data = rand(2, 1000) tree = TreeType(data) - + idxs, dists = knn(tree, data[:, 10], 2, true) first_idx = idxs[1] second_idx = idxs[2] - + idxs, dists = knn(tree, data[:, 10], 2, true, i -> i == first_idx) @test idxs[1] == second_idx end From a28642451c6db35a058531a66afecd9618f66f61 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 15 Aug 2016 19:14:02 +0200 Subject: [PATCH 02/13] better handling of single point --- src/NearestNeighbors.jl | 11 +++++++-- src/ball_tree.jl | 15 +++++++------ src/brute_tree.jl | 15 +++++++------ src/inrange.jl | 42 +++++++++++++++++----------------- src/kd_tree.jl | 15 +++++++------ src/knn.jl | 50 ++++++++++++++++++++--------------------- test/runtests.jl | 6 ++--- 7 files changed, 81 insertions(+), 73 deletions(-) diff --git a/src/NearestNeighbors.jl b/src/NearestNeighbors.jl index 7daf112..3285698 100644 --- a/src/NearestNeighbors.jl +++ b/src/NearestNeighbors.jl @@ -1,4 +1,4 @@ -# __precompile__() +__precompile__() module NearestNeighbors @@ -28,13 +28,20 @@ abstract NNTree{V <: AbstractVector, P <: Metric} typealias DistanceType Float64 typealias MinkowskiMetric Union{Euclidean, Chebyshev, Cityblock, Minkowski} -function check_input{V1, V2}(tree::NNTree{V1}, points::Vector{V2}) +function check_input{V1, V2 <: AbstractVector}(tree::NNTree{V1}, points::Vector{V2}) if length(V1) != length(V2) throw(ArgumentError( "dimension of input points:$(length(V2)) and tree data:$(length(V1)) must agree")) end end +function check_input{V1, V2 <: Number}(tree::NNTree{V1}, point::Vector{V2}) + if length(V1) != length(point) + throw(ArgumentError( + "dimension of input points:$(length(point)) and tree data:$(length(V1)) must agree")) + end +end + include("debugging.jl") include("evaluation.jl") include("tree_data.jl") diff --git a/src/ball_tree.jl b/src/ball_tree.jl index 7a9cdb8..5810568 100644 --- a/src/ball_tree.jl +++ b/src/ball_tree.jl @@ -149,11 +149,12 @@ end function _knn(tree::BallTree, point::AbstractVector, k::Int, - skip::Function) - best_idxs = [-1 for _ in 1:k] - best_dists = [typemax(DistanceType) for _ in 1:k] + skip::Function, + best_idxs::Vector{Int}, + best_dists::Vector) + knn_kernel!(tree, 1, point, best_idxs, best_dists, skip) - return best_idxs, best_dists + return end @@ -193,11 +194,11 @@ end function _inrange{V}(tree::BallTree{V}, point::AbstractVector, - radius::Number) - idx_in_ball = Int[] # List to hold the indices in range + radius::Number, + idx_in_ball::Vector{Int}) ball = HyperSphere(convert(V, point), convert(eltype(V), radius)) # The "query ball" inrange_kernel!(tree, 1, point, ball, idx_in_ball) # Call the recursive range finder - return idx_in_ball + return end function inrange_kernel!(tree::BallTree, diff --git a/src/brute_tree.jl b/src/brute_tree.jl index 1a61b81..b89fd6e 100644 --- a/src/brute_tree.jl +++ b/src/brute_tree.jl @@ -25,11 +25,12 @@ end function _knn{V}(tree::BruteTree{V}, point::AbstractVector, k::Int, - skip::Function) - best_idxs = [-1 for _ in 1:k] - best_dists = [typemax(DistanceType) for _ in 1:k] + skip::Function, + best_idxs::Vector{Int}, + best_dists::Vector) + knn_kernel!(tree, point, best_idxs, best_dists, skip) - return best_idxs, best_dists + return end function knn_kernel!{V, F}(tree::BruteTree{V}, @@ -54,10 +55,10 @@ end function _inrange(tree::BruteTree, point::AbstractVector, - radius::Number) - idx_in_ball = Int[] + radius::Number, + idx_in_ball::Vector{Int}) inrange_kernel!(tree, point, radius, idx_in_ball) - return idx_in_ball + return end diff --git a/src/inrange.jl b/src/inrange.jl index d4f779d..c69ba31 100644 --- a/src/inrange.jl +++ b/src/inrange.jl @@ -1,3 +1,5 @@ +check_radius(r) = r < 0 && throw(ArgumentError("the query radius r must be ≧ 0")) + """ inrange(tree::NNTree, points, radius [, sortres=false]) -> indices @@ -9,37 +11,33 @@ function inrange{T <: AbstractVector}(tree::NNTree, radius::Number, sortres=false) check_input(tree, points) + check_radius(radius) - if radius < 0 - throw(ArgumentError("the query radius r must be ≧ 0")) - end - - idxs = Array(Vector{Int}, length(points)) + idxs = [Vector{Int}() for _ in 1:length(points)] for i in 1:length(points) - point = points[i] - idx_in_ball = _inrange(tree, point, radius) - if tree.reordered - @inbounds for j in 1:length(idx_in_ball) - idx_in_ball[j] = tree.indices[idx_in_ball[j]] - end - end - if sortres - sort!(idx_in_ball) - end - idxs[i] = idx_in_ball + inrange_point!(tree, points[i], radius, sortres, idxs[i]) end return idxs end -function inrange{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, radius::Number, sortres=false) - idxs = inrange(tree, Vector{T}[point], radius, sortres) - return idxs[1] +function inrange_point!(tree, point, radius, sortres, idx) + _inrange(tree, point, radius, idx) + if tree.reordered + @inbounds for j in 1:length(idx) + idx[j] = tree.indices[idx[j]] + end + end + sortres && sort!(idx) + return end -function inrange{V, T <: Number}(tree::NNTree{V}, point::Vector{T}, radius::Number, sortres=false) - idxs = inrange(tree, [convert(SVector{length(point), T}, point)], radius, sortres) - return idxs[1] +function inrange{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, radius::Number, sortres=false) + check_input(tree, point) + check_radius(radius) + idx = Int[] + inrange_point!(tree, point, radius, sortres, idx) + return idx end function inrange{V, T <: Number}(tree::NNTree{V}, point::Matrix{T}, radius::Number, sortres=false) diff --git a/src/kd_tree.jl b/src/kd_tree.jl index fe19aa5..9c5b6c4 100644 --- a/src/kd_tree.jl +++ b/src/kd_tree.jl @@ -145,15 +145,16 @@ end function _knn(tree::KDTree, point::AbstractVector, k::Int, - skip::Function) - best_idxs = [-1 for _ in 1:k] - best_dists = [typemax(DistanceType) for _ in 1:k] + skip::Function, + best_idxs::Vector{Int}, + best_dists::Vector) + init_min = get_min_distance(tree.hyper_rec, point) knn_kernel!(tree, 1, point, best_idxs, best_dists, init_min, skip) @simd for i in eachindex(best_dists) @inbounds best_dists[i] = eval_end(tree.metric, best_dists[i]) end - return best_idxs, best_dists + return end function knn_kernel!{V, F}(tree::KDTree{V}, @@ -202,12 +203,12 @@ end function _inrange(tree::KDTree, point::AbstractVector, - radius::Number) - idx_in_ball = Int[] + radius::Number, + idx_in_ball = Int[]) init_min = get_min_distance(tree.hyper_rec, point) inrange_kernel!(tree, 1, point, eval_op(tree.metric, radius, zero(DistanceType)), idx_in_ball, init_min) - return idx_in_ball + return end # Explicitly check the distance between leaf node and point while traversing diff --git a/src/knn.jl b/src/knn.jl index 9ddbe7e..a4543bd 100644 --- a/src/knn.jl +++ b/src/knn.jl @@ -1,3 +1,5 @@ +check_k(tree, k) = (k > length(tree.data)|| k <= 0) && throw(ArgumentError("k > number of points in tree or ≦ 0")) + """ knn(tree::NNTree, points, k [, sortres=false]) -> indices, distances @@ -8,40 +10,38 @@ to determine if a point that would be returned should be skipped. """ function knn{V, T <: AbstractVector}(tree::NNTree{V}, points::Vector{T}, k::Int, sortres=false, skip::Function=always_false) check_input(tree, points) - n_points = length(points) - n_dim = length(V) + check_k(tree, k) - if k > length(tree.data)|| k <= 0 - throw(ArgumentError("k > number of points in tree or ≦ 0")) + n_points = length(points) + dists = [Vector{DistanceType}(k) for _ in 1:n_points] + idxs = [Vector{Int}(k) for _ in 1:n_points] + for i in 1:n_points + knn_point!(tree, point[i], k, sortres, dists[i], idxs[i], skip) end + return idxs, dists +end - dists = Array(Vector{DistanceType}, n_points) - idxs = Array(Vector{Int}, n_points) - for i in 1:n_points - point = points[i] - best_idxs, best_dists = _knn(tree, point, k, skip) - if sortres - heap_sort_inplace!(best_dists, best_idxs) - end - dists[i] = best_dists - if tree.reordered - for j in 1:k - @inbounds best_idxs[j] = tree.indices[best_idxs[j]] - end +function knn_point!{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, k::Int, sortres, skip, dist, idx) + fill!(idx, -1) + fill!(dist, typemax(DistanceType)) + _knn(tree, point, k, skip, idx, dist) + sortres && heap_sort_inplace!(dist, idx) + if tree.reordered + for j in 1:k + @inbounds idx[j] = tree.indices[idx[j]] end - idxs[i] = best_idxs end - return idxs, dists end function knn{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, k::Int, sortres=false, skip::Function=always_false) - idxs, dists = knn(tree, Vector{T}[point], k, sortres, skip) - return idxs[1], dists[1] -end + if k > length(tree.data)|| k <= 0 + throw(ArgumentError("k > number of points in tree or ≦ 0")) + end -function knn{V, T <: Number}(tree::NNTree{V}, point::Vector{T}, k::Int, sortres=false, skip::Function=always_false) - idxs, dists = knn(tree, [convert(SVector{length(point), T}, point)], k, sortres, skip) - return idxs[1], dists[1] + idx = Vector{Int}(k) + dist = Vector{DistanceType}(k) + knn_point!(tree, point, k, sortres, skip, dist, idx) + return idx, dist end function knn{V, T <: Number}(tree::NNTree{V}, point::Matrix{T}, k::Int, sortres=false, skip::Function=always_false) diff --git a/test/runtests.jl b/test/runtests.jl index a522716..bb765c0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,7 +30,7 @@ const fullmetrics = [metrics; Hamming(); CustomMetric1(); CustomMetric2()] const trees = [KDTree, BallTree] const trees_with_brute = [BruteTree; trees] -#include("test_knn.jl") -#include("test_inrange.jl") -#include("test_monkey.jl") +include("test_knn.jl") +include("test_inrange.jl") +include("test_monkey.jl") include("datafreetree.jl") From 880dd0473c4d161ed5d6981bc385583245a0afaf Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Tue, 23 Aug 2016 16:46:18 +0200 Subject: [PATCH 03/13] fix buggie --- src/knn.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/knn.jl b/src/knn.jl index a4543bd..a16270e 100644 --- a/src/knn.jl +++ b/src/knn.jl @@ -16,7 +16,7 @@ function knn{V, T <: AbstractVector}(tree::NNTree{V}, points::Vector{T}, k::Int, dists = [Vector{DistanceType}(k) for _ in 1:n_points] idxs = [Vector{Int}(k) for _ in 1:n_points] for i in 1:n_points - knn_point!(tree, point[i], k, sortres, dists[i], idxs[i], skip) + knn_point!(tree, points[i], k, sortres, dists[i], idxs[i], skip) end return idxs, dists end From 62143f4dfcee2bfd595060ab04e48baa4e2980d3 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 08:42:30 +0200 Subject: [PATCH 04/13] fixes --- src/ball_tree.jl | 11 +++-------- src/brute_tree.jl | 4 ++-- src/kd_tree.jl | 4 ++-- src/knn.jl | 6 +++--- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/ball_tree.jl b/src/ball_tree.jl index 5810568..b89cecb 100644 --- a/src/ball_tree.jl +++ b/src/ball_tree.jl @@ -16,16 +16,11 @@ end # We create a type to hold them to not allocate these arrays at every # function call and to reduce the number of parameters in the tree builder. immutable ArrayBuffers{N, T <: AbstractFloat} - left::MVector{N, T} - right::MVector{N, T} - v12::MVector{N, T} - zerobuf::MVector{N, T} center::MVector{N, T} end function ArrayBuffers{N, T}(::Type{Val{N}}, ::Type{T}) - ArrayBuffers(zeros(MVector{N, T}), zeros(MVector{N, T}), zeros(MVector{N, T}), - zeros(MVector{N, T}), zeros(MVector{N, T})) + ArrayBuffers(zeros(MVector{N, T})) end """ @@ -149,9 +144,9 @@ end function _knn(tree::BallTree, point::AbstractVector, k::Int, - skip::Function, best_idxs::Vector{Int}, - best_dists::Vector) + best_dists::Vector, + skip::Function) knn_kernel!(tree, 1, point, best_idxs, best_dists, skip) return diff --git a/src/brute_tree.jl b/src/brute_tree.jl index b89fd6e..eaa767c 100644 --- a/src/brute_tree.jl +++ b/src/brute_tree.jl @@ -25,9 +25,9 @@ end function _knn{V}(tree::BruteTree{V}, point::AbstractVector, k::Int, - skip::Function, best_idxs::Vector{Int}, - best_dists::Vector) + best_dists::Vector, + skip::Function) knn_kernel!(tree, point, best_idxs, best_dists, skip) return diff --git a/src/kd_tree.jl b/src/kd_tree.jl index 9c5b6c4..06bfac8 100644 --- a/src/kd_tree.jl +++ b/src/kd_tree.jl @@ -145,9 +145,9 @@ end function _knn(tree::KDTree, point::AbstractVector, k::Int, - skip::Function, best_idxs::Vector{Int}, - best_dists::Vector) + best_dists::Vector, + skip::Function) init_min = get_min_distance(tree.hyper_rec, point) knn_kernel!(tree, 1, point, best_idxs, best_dists, init_min, skip) diff --git a/src/knn.jl b/src/knn.jl index a16270e..50c7ebc 100644 --- a/src/knn.jl +++ b/src/knn.jl @@ -21,10 +21,10 @@ function knn{V, T <: AbstractVector}(tree::NNTree{V}, points::Vector{T}, k::Int, return idxs, dists end -function knn_point!{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, k::Int, sortres, skip, dist, idx) +function knn_point!{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, k::Int, sortres, dist, idx, skip) fill!(idx, -1) fill!(dist, typemax(DistanceType)) - _knn(tree, point, k, skip, idx, dist) + _knn(tree, point, k, idx, dist, skip) sortres && heap_sort_inplace!(dist, idx) if tree.reordered for j in 1:k @@ -40,7 +40,7 @@ function knn{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, k::Int, idx = Vector{Int}(k) dist = Vector{DistanceType}(k) - knn_point!(tree, point, k, sortres, skip, dist, idx) + knn_point!(tree, point, k, sortres, dist, idx, skip) return idx, dist end From e3a061e498f26b08052641626de96e74a78348c9 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 09:25:12 +0200 Subject: [PATCH 05/13] fix some lint stuff --- src/NearestNeighbors.jl | 4 ++-- src/ball_tree.jl | 1 - src/brute_tree.jl | 1 - src/evaluation.jl | 10 +++++----- src/hyperspheres.jl | 14 +++++++------- src/kd_tree.jl | 1 - src/knn.jl | 8 ++++---- src/tree_ops.jl | 4 ++-- src/utilities.jl | 2 +- 9 files changed, 21 insertions(+), 24 deletions(-) diff --git a/src/NearestNeighbors.jl b/src/NearestNeighbors.jl index 3285698..3b33b5d 100644 --- a/src/NearestNeighbors.jl +++ b/src/NearestNeighbors.jl @@ -28,14 +28,14 @@ abstract NNTree{V <: AbstractVector, P <: Metric} typealias DistanceType Float64 typealias MinkowskiMetric Union{Euclidean, Chebyshev, Cityblock, Minkowski} -function check_input{V1, V2 <: AbstractVector}(tree::NNTree{V1}, points::Vector{V2}) +function check_input{V1, V2 <: AbstractVector}(::NNTree{V1}, ::Vector{V2}) if length(V1) != length(V2) throw(ArgumentError( "dimension of input points:$(length(V2)) and tree data:$(length(V1)) must agree")) end end -function check_input{V1, V2 <: Number}(tree::NNTree{V1}, point::Vector{V2}) +function check_input{V1, V2 <: Number}(::NNTree{V1}, point::Vector{V2}) if length(V1) != length(point) throw(ArgumentError( "dimension of input points:$(length(point)) and tree data:$(length(V1)) must agree")) diff --git a/src/ball_tree.jl b/src/ball_tree.jl index b89cecb..0fed8e2 100644 --- a/src/ball_tree.jl +++ b/src/ball_tree.jl @@ -143,7 +143,6 @@ end function _knn(tree::BallTree, point::AbstractVector, - k::Int, best_idxs::Vector{Int}, best_dists::Vector, skip::Function) diff --git a/src/brute_tree.jl b/src/brute_tree.jl index eaa767c..5479c98 100644 --- a/src/brute_tree.jl +++ b/src/brute_tree.jl @@ -24,7 +24,6 @@ end function _knn{V}(tree::BruteTree{V}, point::AbstractVector, - k::Int, best_idxs::Vector{Int}, best_dists::Vector, skip::Function) diff --git a/src/evaluation.jl b/src/evaluation.jl index 7364acb..96bd8f6 100644 --- a/src/evaluation.jl +++ b/src/evaluation.jl @@ -3,14 +3,14 @@ @inline eval_pow(d::Minkowski, s) = abs(s)^d.p @inline eval_diff(::MinkowskiMetric, a, b) = a - b -@inline eval_diff(::Chebyshev, a, b) = b +@inline eval_diff(::Chebyshev, ::Any, b) = b function Distances.evaluate(d::Distances.UnionMetrics, a::AbstractVector, b::AbstractVector, do_end::Bool=true) s = eval_start(d, a, b) - @simd for I in eachindex(b) - @inbounds ai = a[I] - @inbounds bi = b[I] + @simd for i in eachindex(b) + @inbounds ai = a[i] + @inbounds bi = b[i] s = eval_reduce(d, s, eval_op(d, ai, bi)) end if do_end @@ -21,6 +21,6 @@ function Distances.evaluate(d::Distances.UnionMetrics, a::AbstractVector, end function Distances.evaluate(d::Distances.PreMetric, a::AbstractVector, - b::AbstractArray, do_end::Bool=true) + b::AbstractArray, ::Bool=true) evaluate(d, a, b) end diff --git a/src/hyperspheres.jl b/src/hyperspheres.jl index 728cf51..22b0e1d 100644 --- a/src/hyperspheres.jl +++ b/src/hyperspheres.jl @@ -17,7 +17,7 @@ end evaluate(m, s1.center, s2.center) + s1.r <= s2.r end -@inline function interpolate{V <: AbstractVector, M <: NormMetric}(m::M, +@inline function interpolate{V <: AbstractVector, M <: NormMetric}(::M, c1::V, c2::V, x, @@ -31,12 +31,12 @@ end return ab.center, true end -@inline function interpolate{V <: AbstractVector, M <: Metric}(m::M, +@inline function interpolate{V <: AbstractVector, M <: Metric}(::M, c1::V, - c2::V, - x, - d, - ab) + ::V, + ::Any, + ::Any, + ::Any) return c1, false end @@ -46,7 +46,7 @@ function create_bsphere{V}(data::Vector{V}, metric::Metric, indices::Vector{Int} # First find center of all points fill!(ab.center, 0.0) for i in low:high - for j in length(ab.center) + for j in 1:length(ab.center) ab.center[j] += data[indices[i]][j] end end diff --git a/src/kd_tree.jl b/src/kd_tree.jl index 06bfac8..eb868a4 100644 --- a/src/kd_tree.jl +++ b/src/kd_tree.jl @@ -144,7 +144,6 @@ end function _knn(tree::KDTree, point::AbstractVector, - k::Int, best_idxs::Vector{Int}, best_dists::Vector, skip::Function) diff --git a/src/knn.jl b/src/knn.jl index 50c7ebc..353e391 100644 --- a/src/knn.jl +++ b/src/knn.jl @@ -16,18 +16,18 @@ function knn{V, T <: AbstractVector}(tree::NNTree{V}, points::Vector{T}, k::Int, dists = [Vector{DistanceType}(k) for _ in 1:n_points] idxs = [Vector{Int}(k) for _ in 1:n_points] for i in 1:n_points - knn_point!(tree, points[i], k, sortres, dists[i], idxs[i], skip) + knn_point!(tree, points[i], sortres, dists[i], idxs[i], skip) end return idxs, dists end -function knn_point!{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, k::Int, sortres, dist, idx, skip) +function knn_point!{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, sortres, dist, idx, skip) fill!(idx, -1) fill!(dist, typemax(DistanceType)) - _knn(tree, point, k, idx, dist, skip) + _knn(tree, point, idx, dist, skip) sortres && heap_sort_inplace!(dist, idx) if tree.reordered - for j in 1:k + for j in eachindex(idx) @inbounds idx[j] = tree.indices[idx[j]] end end diff --git a/src/tree_ops.jl b/src/tree_ops.jl index 31a211c..19aa96c 100644 --- a/src/tree_ops.jl +++ b/src/tree_ops.jl @@ -90,9 +90,9 @@ end # Checks the distance function and add those points that are among the k best. # Uses a heap for fast insertion. -@inline function add_points_knn!(best_dists::Vector, best_idxs::Vector{Int}, +@inline function add_points_knn!{F}(best_dists::Vector, best_idxs::Vector{Int}, tree::NNTree, index::Int, point::AbstractVector, - do_end::Bool, skip::Function) + do_end::Bool, skip::F) for z in get_leaf_range(tree.tree_data, index) @POINT 1 idx = tree.reordered ? z : tree.indices[z] diff --git a/src/utilities.jl b/src/utilities.jl index 8797679..879d8a0 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -88,6 +88,6 @@ end end # Default skip function, always false -@inline function always_false(i::Int) +@inline function always_false(::Int) false end From 35edce3ae3c323f2d7b8b81b4e6d5b35efc03fc0 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 09:41:05 +0200 Subject: [PATCH 06/13] fix extra k --- src/knn.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/knn.jl b/src/knn.jl index 353e391..995d3c4 100644 --- a/src/knn.jl +++ b/src/knn.jl @@ -40,7 +40,7 @@ function knn{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, k::Int, idx = Vector{Int}(k) dist = Vector{DistanceType}(k) - knn_point!(tree, point, k, sortres, dist, idx, skip) + knn_point!(tree, point, sortres, dist, idx, skip) return idx, dist end From 439eccd6ec966e64a2603be6c63e37bb06b73fca Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 10:27:53 +0200 Subject: [PATCH 07/13] final updates --- benchmarks/runbenchmarks.jl | 7 +- src/NearestNeighbors.jl | 4 +- src/ball_tree.jl | 4 +- src/brute_tree.jl | 2 +- src/datafreetree.jl | 33 ++++- src/hyperspheres.jl | 6 +- src/kd_tree.jl | 8 +- src/knn.jl | 6 +- test/runtests.jl | 2 +- .../{datafreetree.jl => test_datafreetree.jl} | 4 +- test/test_monkey.jl | 126 +++++++++--------- 11 files changed, 114 insertions(+), 88 deletions(-) rename test/{datafreetree.jl => test_datafreetree.jl} (90%) diff --git a/benchmarks/runbenchmarks.jl b/benchmarks/runbenchmarks.jl index b735bee..444c02d 100644 --- a/benchmarks/runbenchmarks.jl +++ b/benchmarks/runbenchmarks.jl @@ -4,7 +4,7 @@ using JLD include("generate_report.jl") -const EXTENSIVE_BENCHMARK = true +const EXTENSIVE_BENCHMARK = false const SUITE = BenchmarkGroup() SUITE["build tree"] = BenchmarkGroup() @@ -20,7 +20,7 @@ for n_points in (EXTENSIVE_BENCHMARK ? (10^3, 10^5) : 10^5) (BallTree, "ball tree")) tree = tree_type(data; leafsize = leafsize, reorder = reorder) SUITE["build tree"]["$(tree_type.name.name) $dim × $n_points, ls = $leafsize"] = @benchmarkable $(tree_type)($data; leafsize = $leafsize, reorder = $reorder) - for input_size in (EXTENSIVE_BENCHMARK ? (1, 1000) : 1000) + for input_size in (1, 1000) input_data = rand(MersenneTwister(1), dim, input_size) for k in (EXTENSIVE_BENCHMARK ? (1, 10) : 10) SUITE["knn"]["$(tree_type.name.name) $dim × $n_points, ls = $leafsize, input_size = $input_size, k = $k"] = @benchmarkable knn($tree, $input_data, $k) @@ -66,4 +66,5 @@ end # run_benchmarks("primary") # generate_report("primary") # generate a report with stats about a run -# generate_report("primary", "secondary") # generate report comparing two runs +# run_benchmarks("secondary") +# generate_report("secondary", "primary") # generate report comparing two runs diff --git a/src/NearestNeighbors.jl b/src/NearestNeighbors.jl index 3b33b5d..eee6efb 100644 --- a/src/NearestNeighbors.jl +++ b/src/NearestNeighbors.jl @@ -25,7 +25,6 @@ const DEBUG = false abstract NNTree{V <: AbstractVector, P <: Metric} -typealias DistanceType Float64 typealias MinkowskiMetric Union{Euclidean, Chebyshev, Cityblock, Minkowski} function check_input{V1, V2 <: AbstractVector}(::NNTree{V1}, ::Vector{V2}) @@ -42,6 +41,9 @@ function check_input{V1, V2 <: Number}(::NNTree{V1}, point::Vector{V2}) end end +get_T{T <: AbstractFloat}(::Type{T}) = T +get_T{T}(::T) = Float64 + include("debugging.jl") include("evaluation.jl") include("tree_data.jl") diff --git a/src/ball_tree.jl b/src/ball_tree.jl index 0fed8e2..11d36b0 100644 --- a/src/ball_tree.jl +++ b/src/ball_tree.jl @@ -42,10 +42,10 @@ function BallTree{V <: AbstractArray, M <: Metric}(data::Vector{V}, n_d = length(V) n_p = length(data) - array_buffs = ArrayBuffers(Val{length(V)}, DistanceType) + array_buffs = ArrayBuffers(Val{length(V)}, get_T(eltype(V))) indices = collect(1:n_p) - # Bottom up creation of hyper spheres so need spheres even for leafs + # Bottom up creation of hyper spheres so need spheres even for leafs) hyper_spheres = Array(HyperSphere{length(V), eltype(V)}, tree_data.n_internal_nodes + tree_data.n_leafs) if reorder diff --git a/src/brute_tree.jl b/src/brute_tree.jl index 5479c98..4cba9b9 100644 --- a/src/brute_tree.jl +++ b/src/brute_tree.jl @@ -35,7 +35,7 @@ end function knn_kernel!{V, F}(tree::BruteTree{V}, point::AbstractVector, best_idxs::Vector{Int}, - best_dists::Vector{DistanceType}, + best_dists::Vector, skip::F) for i in 1:length(tree.data) if skip(i) diff --git a/src/datafreetree.jl b/src/datafreetree.jl index adc44a1..8f6354d 100644 --- a/src/datafreetree.jl +++ b/src/datafreetree.jl @@ -1,10 +1,24 @@ # A DataFreeTree wraps a descendant of NNTree # which does not contain a copy of the data immutable DataFreeTree{N <: NNTree} + size::Tuple{Int,Int} hash::UInt64 tree::N end +function get_points_dim(data) + if eltype(data) <: AbstractVector + ndim = eltype(eltype(data)) + npoints = length(data) + elseif typeof(data) <: Matrix + ndim = size(data, 1) + npoints = size(data, 2) + else + error("Unknown input data format") + end + return ndim, npoints +end + """ DataFreeTree(treetype, data[, reorderbufffer = similar(data), indicesfor = :data, kargs...]) -> datafreetree @@ -22,8 +36,8 @@ is provided, reordering is performed and the contents of `reorderbuffer` have to """ function DataFreeTree{T<:NNTree}(::Type{T}, data, args...; reorderbuffer = data[:, 1:0], kargs...) tree = T(data, args...; storedata = false, reorderbuffer = reorderbuffer, kargs...) - - DataFreeTree(hash(tree.reordered ? reorderbuffer : data), tree) + ndim, npoints = get_points_dim(data) + DataFreeTree((ndim, npoints), hash(tree.reordered ? reorderbuffer : data), tree) end """ @@ -39,17 +53,22 @@ function injectdata{T}(datafreetree::DataFreeTree, data::Matrix{T}) else new_data = SVector{dim, T}[SVector{dim, T}(data[:, i]) for i in 1:npoints] end - injectdata(datafreetree, new_data) + new_hash = hash(data) + injectdata(datafreetree, new_data, new_hash) end -function injectdata(datafreetree::DataFreeTree, data::Vector) - if hash(data) != datafreetree.hash +function injectdata{V <: AbstractVector}(datafreetree::DataFreeTree, data::Vector{V}, new_hash::UInt64=0) + if new_hash == 0 + new_hash = hash(data) + end + if length(V) != datafreetree.size[1] || length(data) != datafreetree.size[2] + throw(DimensionMismatch("NearestNeighbors:injectdata: The size of 'data' $(length(data)) × $(length(V)) does not match the data array used to construct the tree $(datafreetree.size).")) + end + if new_hash != datafreetree.hash throw(ArgumentError("NearestNeighbors:injectdata: The hash of 'data' does not match the hash of the data array used to construct the tree.")) end - typ = typeof(datafreetree.tree) fields = map(x-> getfield(datafreetree.tree, x), fieldnames(datafreetree.tree))[2:end] typ(data, fields...) end - diff --git a/src/hyperspheres.jl b/src/hyperspheres.jl index 22b0e1d..dd24fd5 100644 --- a/src/hyperspheres.jl +++ b/src/hyperspheres.jl @@ -5,6 +5,8 @@ immutable HyperSphere{N, T <: AbstractFloat} r::T end +HyperSphere{N, T1, T2}(center::SVector{N, T1}, r::T2) = HyperSphere(center, convert(T1, r)) + @inline function intersects{T <: AbstractFloat, N, M <: Metric}(m::M, s1::HyperSphere{N, T}, s2::HyperSphere{N, T}) @@ -53,11 +55,11 @@ function create_bsphere{V}(data::Vector{V}, metric::Metric, indices::Vector{Int} scale!(ab.center, 1 / n_points) # Then find r - r = zero(DistanceType) + r = zero(get_T(eltype(V))) for i in low:high r = max(r, evaluate(metric, data[indices[i]], ab.center)) end - r += eps(DistanceType) + r += eps(get_T(eltype(V))) return HyperSphere(SVector{length(V), eltype(V)}(ab.center), r) end diff --git a/src/kd_tree.jl b/src/kd_tree.jl index eb868a4..96f277d 100644 --- a/src/kd_tree.jl +++ b/src/kd_tree.jl @@ -161,7 +161,7 @@ function knn_kernel!{V, F}(tree::KDTree{V}, point::AbstractVector, best_idxs ::Vector{Int}, best_dists::Vector, - min_dist::DistanceType, + min_dist, skip::F) @NODE 1 # At a leaf node. Go through all points in node and add those in range @@ -205,7 +205,7 @@ function _inrange(tree::KDTree, radius::Number, idx_in_ball = Int[]) init_min = get_min_distance(tree.hyper_rec, point) - inrange_kernel!(tree, 1, point, eval_op(tree.metric, radius, zero(DistanceType)), idx_in_ball, + inrange_kernel!(tree, 1, point, eval_op(tree.metric, radius, zero(init_min)), idx_in_ball, init_min) return end @@ -240,11 +240,11 @@ function inrange_kernel!(tree::KDTree, if split_diff > 0 # Point is to the right of the split value close = getright(index) far = getleft(index) - ddiff = max(zero(DistanceType), p_dim - hi) + ddiff = max(zero(p_dim - hi), p_dim - hi) else # Point is to the left of the split value close = getleft(index) far = getright(index) - ddiff = max(zero(DistanceType), lo - p_dim) + ddiff = max(zero(lo - p_dim), lo - p_dim) end # Call closer sub tree inrange_kernel!(tree, close, point, r, idx_in_ball, min_dist) diff --git a/src/knn.jl b/src/knn.jl index 995d3c4..4c0e2c9 100644 --- a/src/knn.jl +++ b/src/knn.jl @@ -13,7 +13,7 @@ function knn{V, T <: AbstractVector}(tree::NNTree{V}, points::Vector{T}, k::Int, check_k(tree, k) n_points = length(points) - dists = [Vector{DistanceType}(k) for _ in 1:n_points] + dists = [Vector{get_T(eltype(V))}(k) for _ in 1:n_points] idxs = [Vector{Int}(k) for _ in 1:n_points] for i in 1:n_points knn_point!(tree, points[i], sortres, dists[i], idxs[i], skip) @@ -23,7 +23,7 @@ end function knn_point!{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, sortres, dist, idx, skip) fill!(idx, -1) - fill!(dist, typemax(DistanceType)) + fill!(dist, typemax(get_T(eltype(V)))) _knn(tree, point, idx, dist, skip) sortres && heap_sort_inplace!(dist, idx) if tree.reordered @@ -39,7 +39,7 @@ function knn{V, T <: Number}(tree::NNTree{V}, point::AbstractVector{T}, k::Int, end idx = Vector{Int}(k) - dist = Vector{DistanceType}(k) + dist = Vector{get_T(eltype(V))}(k) knn_point!(tree, point, sortres, dist, idx, skip) return idx, dist end diff --git a/test/runtests.jl b/test/runtests.jl index bb765c0..1b9d093 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -33,4 +33,4 @@ const trees_with_brute = [BruteTree; trees] include("test_knn.jl") include("test_inrange.jl") include("test_monkey.jl") -include("datafreetree.jl") +include("test_datafreetree.jl") diff --git a/test/datafreetree.jl b/test/test_datafreetree.jl similarity index 90% rename from test/datafreetree.jl rename to test/test_datafreetree.jl index 3a419d8..5f226e4 100644 --- a/test/datafreetree.jl +++ b/test/test_datafreetree.jl @@ -3,8 +3,8 @@ data2 = rand(2,100) data3 = rand(3,100) t = DataFreeTree(KDTree, data) - @test_throws ArgumentError injectdata(t, data3) - + @test_throws ArgumentError injectdata(t, data2) + @test_throws DimensionMismatch injectdata(t, data3) for typ in [KDTree, BallTree] dfilename = tempname() rfilename = tempname() diff --git a/test/test_monkey.jl b/test/test_monkey.jl index 572043a..abf54c2 100644 --- a/test/test_monkey.jl +++ b/test/test_monkey.jl @@ -5,77 +5,79 @@ import NearestNeighbors.MinkowskiMetric @testset "metric" for metric in fullmetrics @testset "tree type" for TreeType in trees_with_brute - @testset "knn monkey" begin - # Checks that we find existing point in the tree - # and that it is the closest - if TreeType == KDTree && !isa(metric, MinkowskiMetric) - continue - elseif TreeType == BallTree && isa(metric, Hamming) - continue - end - for i in 1:100 - dim_data = rand(1:4) - size_data = rand(1000:1300) - data = rand(dim_data, size_data) - for j = 1:5 + @testset "type" for T in (Float32, Float64) + @testset "knn monkey" begin + # Checks that we find existing point in the tree + # and that it is the closest + if TreeType == KDTree && !isa(metric, MinkowskiMetric) + continue + elseif TreeType == BallTree && isa(metric, Hamming) + continue + end + for i in 1:100 + dim_data = rand(1:4) + size_data = rand(1000:1300) + data = rand(T, dim_data, size_data) + for j = 1:5 + tree = TreeType(data, metric; leafsize = rand(1:15)) + n = rand(1:size_data) + idx, dist = knn(tree, data[:,n], rand(1:30), true) + @test issorted(dist) == true + @test n == idx[1] + end + end + + # Compares vs Brute Force + for i in 1:100 + dim_data = rand(1:5) + size_data = rand(100:151) + data = rand(T, dim_data, size_data) tree = TreeType(data, metric; leafsize = rand(1:15)) - n = rand(1:size_data) - idx, dist = knn(tree, data[:,n], rand(1:30), true) - @test issorted(dist) == true - @test n == idx[1] + btree = BruteTree(data, metric) + k = rand(1:12) + p = rand(dim_data) + idx, dist = knn(tree, p, k) + bidx, bdist = knn(tree, p, k) + @test idx == bidx + @test dist ≈ bdist end end - # Compares vs Brute Force - for i in 1:100 - dim_data = rand(1:5) - size_data = rand(100:151) - data = rand(dim_data, size_data) - tree = TreeType(data, metric; leafsize = rand(1:15)) - btree = BruteTree(data, metric) - k = rand(1:12) - p = rand(dim_data) - idx, dist = knn(tree, p, k) - bidx, bdist = knn(tree, p, k) - @test idx == bidx - @test dist ≈ bdist - end - end + @testset "inrange monkey" begin + # Test against brute force + for i in 1:100 + dim_data = rand(1:6) + size_data = rand(20:250) + data = rand(T, dim_data, size_data) + tree = TreeType(data, metric; leafsize = rand(1:8)) + btree = BruteTree(data, metric) + p = 0.5 * ones(dim_data) + r = 0.3 - @testset "inrange monkey" begin - # Test against brute force - for i in 1:100 - dim_data = rand(1:6) - size_data = rand(20:250) - data = rand(dim_data, size_data) - tree = TreeType(data, metric; leafsize = rand(1:8)) - btree = BruteTree(data, metric) - p = 0.5 * ones(dim_data) - r = 0.3 + idxs = inrange(tree, p, r, true) + bidxs = inrange(btree, p, r, true) - idxs = inrange(tree, p, r, true) - bidxs = inrange(btree, p, r, true) - - @test idxs == bidxs + @test idxs == bidxs + end end - end - @testset "coupled monkey" begin - for i in 1:500 - dim_data = rand(1:5) - size_data = rand(100:1000) - data = randn(dim_data, size_data) - tree = TreeType(data, metric; leafsize = rand(1:8)) - point = randn(dim_data) - idxs_ball = Int[] - r = 0.1 - while length(idxs_ball) < 10 - r *= 2.0 - idxs_ball = inrange(tree, point, r, true) - end - idxs_knn, dists = knn(tree, point, length(idxs_ball)) + @testset "coupled monkey" begin + for i in 1:500 + dim_data = rand(1:5) + size_data = rand(100:1000) + data = randn(T, dim_data, size_data) + tree = TreeType(data, metric; leafsize = rand(1:8)) + point = randn(dim_data) + idxs_ball = Int[] + r = 0.1 + while length(idxs_ball) < 10 + r *= 2.0 + idxs_ball = inrange(tree, point, r, true) + end + idxs_knn, dists = knn(tree, point, length(idxs_ball)) - @test sort(idxs_knn) == sort(idxs_ball) + @test sort(idxs_knn) == sort(idxs_ball) + end end end end From ac22211b07460dc490faa26097b57d0575a5f6e9 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 10:36:13 +0200 Subject: [PATCH 08/13] update README and CI --- .travis.yml | 1 - README.md | 4 ++-- appveyor.yml | 2 -- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 760e726..72eed94 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: julia julia: - - 0.4 - 0.5 - nightly notifications: diff --git a/README.md b/README.md index 25a799a..9ee181c 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ is created by the function: NNTree(data, metric; leafsize, reorder) ``` -* `data`: A matrix of size `nd × np` with the points to insert in the tree. `nd` is the dimensionality of the points, `np` is the number of points. +* `data`: This parameter represents the points to build up the tree from. It can either be a matrix of size `nd × np` with the points to insert in the tree where `nd` is the dimensionality of the points, `np` is the number of points or it can be a `Vector{V}` where `V` is itself a subtype of an `AbstractVector` and such that `eltype(V)` and `length(V)` is defined. * `metric`: The metric to use, defaults to `Euclidean`. This is one of the `Metric` types defined in the `Distances.jl` packages. * `leafsize` (keyword argument): Determines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points. * `reorder` (keyword argument): While building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to `true`. @@ -52,7 +52,7 @@ knn(tree, points, k, sortres = false, skip = always_false) -> idxs, dists ``` * `tree`: The tree instance -* `points`: A vector or matrix of points to find the `k` nearest neighbors to. If `points` is a vector then this represents a single point, if `points` is a matrix then the `k` nearest neighbors to each point (column) will be computed. +* `points`: A vector or matrix of points to find the `k` nearest neighbors to. If `points` is a vector of numbers then this represents a single point, if `points` is a matrix then the `k` nearest neighbors to each point (column) will be computed. `points` can also be a vector of other vectors where each element in the outer vector is considered a point. * `sortres` (optional): Determines if the results should be sorted before returning. In this case the results will be sorted in order of increasing distance to the point. * `skip` (optional): A predicate to determine if a given point should be skipped, for diff --git a/appveyor.yml b/appveyor.yml index 83a71ad..8c164c1 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -2,8 +2,6 @@ environment: matrix: - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe" - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe" - - JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe" - - JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe" branches: only: - master From 4956f8f2de919430a19c24389872c83e2dcf9fd5 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 11:06:46 +0200 Subject: [PATCH 09/13] reduce monkey testing a bit --- test/test_datafreetree.jl | 4 ++-- test/test_monkey.jl | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_datafreetree.jl b/test/test_datafreetree.jl index 5f226e4..bf16c73 100644 --- a/test/test_datafreetree.jl +++ b/test/test_datafreetree.jl @@ -4,12 +4,12 @@ data3 = rand(3,100) t = DataFreeTree(KDTree, data) @test_throws ArgumentError injectdata(t, data2) - @test_throws DimensionMismatch injectdata(t, data3) + @test_throws DimensionMismatch injectdata(t, data3) for typ in [KDTree, BallTree] dfilename = tempname() rfilename = tempname() d = 2 - n = 1000 + n = 100 data = Mmap.mmap(dfilename, Matrix{Float32}, (d, n)) data[:] = rand(Float32, d, n) reorderbuffer = Mmap.mmap(rfilename, Matrix{Float32}, (d, n)) diff --git a/test/test_monkey.jl b/test/test_monkey.jl index abf54c2..a3fdb0b 100644 --- a/test/test_monkey.jl +++ b/test/test_monkey.jl @@ -14,7 +14,7 @@ import NearestNeighbors.MinkowskiMetric elseif TreeType == BallTree && isa(metric, Hamming) continue end - for i in 1:100 + for i in 1:30 dim_data = rand(1:4) size_data = rand(1000:1300) data = rand(T, dim_data, size_data) @@ -28,7 +28,7 @@ import NearestNeighbors.MinkowskiMetric end # Compares vs Brute Force - for i in 1:100 + for i in 1:30 dim_data = rand(1:5) size_data = rand(100:151) data = rand(T, dim_data, size_data) @@ -45,7 +45,7 @@ import NearestNeighbors.MinkowskiMetric @testset "inrange monkey" begin # Test against brute force - for i in 1:100 + for i in 1:30 dim_data = rand(1:6) size_data = rand(20:250) data = rand(T, dim_data, size_data) @@ -62,7 +62,7 @@ import NearestNeighbors.MinkowskiMetric end @testset "coupled monkey" begin - for i in 1:500 + for i in 1:50 dim_data = rand(1:5) size_data = rand(100:1000) data = randn(T, dim_data, size_data) From 5d37c5c1b721bb29a077cb31f093b6885830a0c8 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 11:14:31 +0200 Subject: [PATCH 10/13] add reqs to REQUIRE --- REQUIRE | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/REQUIRE b/REQUIRE index 0a4ac02..3622eb7 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,4 @@ julia 0.5- -Distances -StaticArrays +Distances 0.3 0.4 +StaticArrays 0.04 Compat 0.8.4 From 4ddb74de1606d531d44bf363f946c50ecd24b712 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 11:16:47 +0200 Subject: [PATCH 11/13] disable codecov comments --- .codecov.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .codecov.yml diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..69cb760 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1 @@ +comment: false From ab2c457880466ac794f3492bcc91a5d2b8f6407e Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 11:19:58 +0200 Subject: [PATCH 12/13] fix --- REQUIRE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/REQUIRE b/REQUIRE index 3622eb7..bf001e2 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,4 @@ julia 0.5- Distances 0.3 0.4 -StaticArrays 0.04 +StaticArrays 0.0.4 Compat 0.8.4 From 7043829175d9dd9928c15d46e6a813cb8a332533 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 24 Aug 2016 11:41:51 +0200 Subject: [PATCH 13/13] improve coverage --- src/hyperrectangles.jl | 43 ------------------------------------------ test/runtests.jl | 1 + test/test_inrange.jl | 8 ++++++++ test/test_knn.jl | 8 ++++++++ 4 files changed, 17 insertions(+), 43 deletions(-) diff --git a/src/hyperrectangles.jl b/src/hyperrectangles.jl index 2b565db..be49100 100644 --- a/src/hyperrectangles.jl +++ b/src/hyperrectangles.jl @@ -25,35 +25,6 @@ function compute_bbox{V <: AbstractVector}(data::Vector{V}) return HyperRectangle(mins, maxes) end -# Splits a hyper rectangle into two rectangles by dividing the -# rectangle at a specific value in a given dimension. -function Base.split(hyper_rec::HyperRectangle, - dim::Int, - value::Number) - new_max = copy(hyper_rec.maxes) - new_max[dim] = value - - new_min = copy(hyper_rec.mins) - new_min[dim] = value - - return HyperRectangle(hyper_rec.mins, new_max), - HyperRectangle(new_min, hyper_rec.maxes) -end - -function find_maxspread{T}(hyper_rec::HyperRectangle{T}) - # Find the dimension where we have the largest spread. - split_dim = 1 - max_spread = zero(T) - - for d in eachindex(hyper_rec.mins) - @inbounds spread = hyper_rec.maxes[d] - hyper_rec.mins[d] - if spread > max_spread - max_spread = spread - split_dim = d - end - end - return split_dim -end ############################################ # Rectangle - Point functions @@ -85,17 +56,3 @@ end end return min_dist end - -# (Min, Max) distance between rectangle and point -@inline function get_min_max_distance{T}(rec::HyperRectangle, point::AbstractVector{T}) - min_dist = get_min_distance(rec, point) - max_dist = get_max_distance(rec, point) - return min_dist, max_dist -end - -# (Min, Max) distance between rectangle and point for a certain dim -@inline function get_min_max_dim{T}(rec::HyperRectangle, point::AbstractVector{T}, dim::Int) - min_dist_dim = get_min_dim(rec, point, dim) - max_dist_dim = get_max_dim(rec, point, dim) - return min_dist_dim, max_dist_dim -end diff --git a/test/runtests.jl b/test/runtests.jl index 1b9d093..54a4fd1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using NearestNeighbors +using StaticArrays if VERSION >= v"0.5-" using Base.Test diff --git a/test/test_inrange.jl b/test/test_inrange.jl index b112105..af3f1a2 100644 --- a/test/test_inrange.jl +++ b/test/test_inrange.jl @@ -18,6 +18,14 @@ idxs = inrange(tree, [0, 0, 0], 0.6, dosort) @test idxs == [1] + idxs = inrange(tree, [0.0 0.0; 0.0 0.0; 0.5 0.0], 0.6, dosort) + @test idxs[1] == [1,2] + @test idxs[2] == [1] + + idxs = inrange(tree, [SVector{3,Float64}(0.0, 0.0, 0.5), SVector{3,Float64}(0.0, 0.0, 0.0)], 0.6, dosort) + @test idxs[1] == [1,2] + @test idxs[2] == [1] + idxs = inrange(tree, [0.33333333333, 0.33333333333, 0.33333333333], 1, dosort) @test idxs == [1, 2, 3, 5] diff --git a/test/test_knn.jl b/test/test_knn.jl index 56bbfa8..e042779 100644 --- a/test/test_knn.jl +++ b/test/test_knn.jl @@ -18,6 +18,14 @@ import Distances.evaluate idxs, dists = knn(tree, [0.1, 0.8], 3, true) @test idxs == [3, 2, 5] + idxs, dists = knn(tree, [0.8 0.1; 0.8 0.8], 1, true) + @test idxs[1][1] == 8 + @test idxs[2][1] == 3 + + idxs, dists = knn(tree, [SVector{2, Float64}(0.8,0.8), SVector{2, Float64}(0.1,0.8)], 1, true) + @test idxs[1][1] == 8 + @test idxs[2][1] == 3 + idxs, dists = knn(tree, [1//10, 8//10], 3, true) @test idxs == [3, 2, 5]