Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for AbstractVector #29

Merged
merged 13 commits into from
Aug 24, 2016
1 change: 1 addition & 0 deletions .codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
comment: false
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: julia
julia:
- 0.4
- 0.5
- nightly
notifications:
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ is created by the function:
NNTree(data, metric; leafsize, reorder)
```

* `data`: A matrix of size `nd × np` with the points to insert in the tree. `nd` is the dimensionality of the points, `np` is the number of points.
* `data`: This parameter represents the points to build up the tree from. It can either be a matrix of size `nd × np` with the points to insert in the tree where `nd` is the dimensionality of the points, `np` is the number of points or it can be a `Vector{V}` where `V` is itself a subtype of an `AbstractVector` and such that `eltype(V)` and `length(V)` is defined.
* `metric`: The metric to use, defaults to `Euclidean`. This is one of the `Metric` types defined in the `Distances.jl` packages.
* `leafsize` (keyword argument): Determines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.
* `reorder` (keyword argument): While building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to `true`.
Expand Down Expand Up @@ -52,7 +52,7 @@ knn(tree, points, k, sortres = false, skip = always_false) -> idxs, dists
```

* `tree`: The tree instance
* `points`: A vector or matrix of points to find the `k` nearest neighbors to. If `points` is a vector then this represents a single point, if `points` is a matrix then the `k` nearest neighbors to each point (column) will be computed.
* `points`: A vector or matrix of points to find the `k` nearest neighbors to. If `points` is a vector of numbers then this represents a single point, if `points` is a matrix then the `k` nearest neighbors to each point (column) will be computed. `points` can also be a vector of other vectors where each element in the outer vector is considered a point.
* `sortres` (optional): Determines if the results should be sorted before returning.
In this case the results will be sorted in order of increasing distance to the point.
* `skip` (optional): A predicate to determine if a given point should be skipped, for
Expand Down
5 changes: 3 additions & 2 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
julia 0.4
Distances
julia 0.5-
Distances 0.3 0.4
StaticArrays 0.0.4
Compat 0.8.4
2 changes: 0 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ environment:
matrix:
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"
- JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
branches:
only:
- master
Expand Down
7 changes: 4 additions & 3 deletions benchmarks/runbenchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using JLD

include("generate_report.jl")

const EXTENSIVE_BENCHMARK = true
const EXTENSIVE_BENCHMARK = false

const SUITE = BenchmarkGroup()
SUITE["build tree"] = BenchmarkGroup()
Expand All @@ -20,7 +20,7 @@ for n_points in (EXTENSIVE_BENCHMARK ? (10^3, 10^5) : 10^5)
(BallTree, "ball tree"))
tree = tree_type(data; leafsize = leafsize, reorder = reorder)
SUITE["build tree"]["$(tree_type.name.name) $dim × $n_points, ls = $leafsize"] = @benchmarkable $(tree_type)($data; leafsize = $leafsize, reorder = $reorder)
for input_size in (EXTENSIVE_BENCHMARK ? (1, 1000) : 1000)
for input_size in (1, 1000)
input_data = rand(MersenneTwister(1), dim, input_size)
for k in (EXTENSIVE_BENCHMARK ? (1, 10) : 10)
SUITE["knn"]["$(tree_type.name.name) $dim × $n_points, ls = $leafsize, input_size = $input_size, k = $k"] = @benchmarkable knn($tree, $input_data, $k)
Expand Down Expand Up @@ -66,4 +66,5 @@ end

# run_benchmarks("primary")
# generate_report("primary") # generate a report with stats about a run
# generate_report("primary", "secondary") # generate report comparing two runs
# run_benchmarks("secondary")
# generate_report("secondary", "primary") # generate report comparing two runs
25 changes: 18 additions & 7 deletions src/NearestNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ __precompile__()
module NearestNeighbors

using Distances

import Distances: Metric, result_type, eval_reduce, eval_end, eval_op, eval_start, evaluate

using StaticArrays

import Base.show
import Compat.view

Expand All @@ -22,17 +23,27 @@ export Euclidean,
# Change this to enable debugging
const DEBUG = false

abstract NNTree{T <: AbstractFloat, P <: Metric}
abstract NNTree{V <: AbstractVector, P <: Metric}

typealias MinkowskiMetric Union{Euclidean, Chebyshev, Cityblock, Minkowski}

function check_input{V1, V2 <: AbstractVector}(::NNTree{V1}, ::Vector{V2})
if length(V1) != length(V2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and elsewhere you are allowing V to be V <: AbstractVector, and then calling length() on the type. Do you think V <: StaticVector is more appropriate?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really since someone might want to use a FixedSizeArray for example. I don't want to bind V too much, I rather specify an interface that V has to follow (like definining length and eltype).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, that's true! (Except FixedSizeArray isn't a subtype of AbstractArray).

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah hehe. But still :P

throw(ArgumentError(
"dimension of input points:$(length(V2)) and tree data:$(length(V1)) must agree"))
end
end

function check_input(tree::NNTree, points::AbstractArray)
ndim_points = size(points,1)
ndim_tree = size(tree.data, 1)
if ndim_points != ndim_tree
function check_input{V1, V2 <: Number}(::NNTree{V1}, point::Vector{V2})
if length(V1) != length(point)
throw(ArgumentError(
"dimension of input points:$(ndim_points) and tree data:$(ndim_tree) must agree"))
"dimension of input points:$(length(point)) and tree data:$(length(V1)) must agree"))
end
end

get_T{T <: AbstractFloat}(::Type{T}) = T
get_T{T}(::T) = Float64

include("debugging.jl")
include("evaluation.jl")
include("tree_data.jl")
Expand Down
152 changes: 85 additions & 67 deletions src/ball_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
# which radius are determined from the given metric.
# The tree uses the triangle inequality to prune the search space
# when finding the neighbors to a point,
immutable BallTree{T <: AbstractFloat, M <: Metric} <: NNTree{T, M}
data::Matrix{T} # dim x n_points array with floats
hyper_spheres::Vector{HyperSphere{T}} # Each hyper sphere bounds its children
immutable BallTree{V <: AbstractVector, N, T, M <: Metric} <: NNTree{V, M}
data::Vector{V}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sure Vector{Vector{T}} is slower than Matrix{T} (as would be Vector{MVector{T}}), so people will need to be comfortable to use an immutable static array for the point type.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For isbits(T) yes. However, Matrix{T} with isbits(T) will be reinterpreted to a vector of static vectors so for any previous usage of this package the memory layout will be the same. I haven't written documentation yet but the requirement on V is that it defined both length and eltype which pretty much excludes anything other than fixed size arrays.

hyper_spheres::Vector{HyperSphere{N, T}} # Each hyper sphere bounds its children
indices::Vector{Int} # Translates from tree index -> point index
metric::M # Metric used for tree
tree_data::TreeData # Some constants needed
Expand All @@ -15,87 +15,104 @@ end
# When we create the bounding spheres we need some temporary arrays.
# We create a type to hold them to not allocate these arrays at every
# function call and to reduce the number of parameters in the tree builder.
immutable ArrayBuffers{T <: AbstractFloat}
left::Vector{T}
right::Vector{T}
v12::Vector{T}
zerobuf::Vector{T}
immutable ArrayBuffers{N, T <: AbstractFloat}
center::MVector{N, T}
end

function ArrayBuffers(T, ndim)
ArrayBuffers{T}(zeros(T, ndim), zeros(T, ndim), zeros(T, ndim), zeros(T, ndim))
function ArrayBuffers{N, T}(::Type{Val{N}}, ::Type{T})
ArrayBuffers(zeros(MVector{N, T}))
end

"""
BallTree(data [, metric = Euclidean(), leafsize = 10]) -> balltree

Creates a `BallTree` from the data using the given `metric` and `leafsize`.
"""
function BallTree{T <: AbstractFloat, M<:Metric}(data::Matrix{T},
metric::M = Euclidean();
leafsize::Int = 10,
reorder::Bool = true,
storedata::Bool = true,
reorderbuffer::Matrix{T} = Matrix{T}(),
indicesfor::Symbol = :data)
function BallTree{V <: AbstractArray, M <: Metric}(data::Vector{V},
metric::M = Euclidean();
leafsize::Int = 10,
reorder::Bool = true,
storedata::Bool = true,
reorderbuffer::Vector{V} = Vector{V}(),
indicesfor::Symbol = :data)

reorder = !isempty(reorderbuffer) || (storedata ? reorder : false)

tree_data = TreeData(data, leafsize)
n_d = size(data, 1)
n_p = size(data, 2)
array_buffs = ArrayBuffers(T, size(data, 1))
n_d = length(V)
n_p = length(data)

array_buffs = ArrayBuffers(Val{length(V)}, get_T(eltype(V)))
indices = collect(1:n_p)

# Bottom up creation of hyper spheres so need spheres even for leafs
hyper_spheres = Array(HyperSphere{T}, tree_data.n_internal_nodes + tree_data.n_leafs)
# Bottom up creation of hyper spheres so need spheres even for leafs)
hyper_spheres = Array(HyperSphere{length(V), eltype(V)}, tree_data.n_internal_nodes + tree_data.n_leafs)

if reorder
if reorder
indices_reordered = Vector{Int}(n_p)
if isempty(reorderbuffer)
data_reordered = Matrix{T}(n_d, n_p)
data_reordered = Vector{V}(n_p)
else
data_reordered = reorderbuffer
end
else
# Dummy variables
indices_reordered = Vector{Int}(0)
data_reordered = Matrix{T}(0, 0)
indices_reordered = Vector{Int}()
data_reordered = Vector{V}()
end

# Call the recursive BallTree builder
build_BallTree(1, data, data_reordered, hyper_spheres, metric, indices, indices_reordered,
1, size(data,2), tree_data, array_buffs, reorder)
1, length(data), tree_data, array_buffs, reorder)

if reorder
data = data_reordered
indices = indicesfor == :data ? indices_reordered : collect(1:n_p)
end

BallTree(storedata ? data : similar(data,0,0), hyper_spheres, indices, metric, tree_data, reorder)
BallTree(storedata ? data : similar(data,0), hyper_spheres, indices, metric, tree_data, reorder)
end

function BallTree{T <: AbstractFloat, M <: Metric}(data::Matrix{T},
metric::M = Euclidean();
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Matrix{T} = Matrix{T}(),
indicesfor::Symbol = :data)
dim = size(data, 1)
npoints = size(data, 2)
points = reinterpret(SVector{dim, T}, data, (length(data) ÷ dim, ))
if isempty(reorderbuffer)
reorderbuffer_points = Vector{SVector{dim, T}}()
else
reorderbuffer_points = reinterpret(SVector{dim, T}, reorderbuffer, (length(reorderbuffer) ÷ dim, ))
end
BallTree(points ,metric, leafsize = leafsize, storedata = storedata, reorder = reorder,
reorderbuffer = reorderbuffer_points, indicesfor = indicesfor)
end

# Recursive function to build the tree.
function build_BallTree{T <: AbstractFloat}(index::Int,
data::Matrix{T},
data_reordered::Matrix{T},
hyper_spheres::Vector{HyperSphere{T}},
metric::Metric,
indices::Vector{Int},
indices_reordered::Vector{Int},
low::Int,
high::Int,
tree_data::TreeData,
array_buffs::ArrayBuffers{T},
reorder::Bool)
function build_BallTree{V <: AbstractVector, N, T}(index::Int,
data::Vector{V},
data_reordered::Vector{V},
hyper_spheres::Vector{HyperSphere{N, T}},
metric::Metric,
indices::Vector{Int},
indices_reordered::Vector{Int},
low::Int,
high::Int,
tree_data::TreeData,
array_buffs::ArrayBuffers{N, T},
reorder::Bool)

n_points = high - low + 1 # Points left
if n_points <= tree_data.leafsize
if reorder
reorder_data!(data_reordered, data, index, indices, indices_reordered, tree_data)
end
# Create bounding sphere of points in leaf node by brute force
hyper_spheres[index] = create_bsphere(data, metric, indices, low, high)
hyper_spheres[index] = create_bsphere(data, metric, indices, low, high, array_buffs)
return
end

Expand Down Expand Up @@ -124,22 +141,23 @@ function build_BallTree{T <: AbstractFloat}(index::Int,
array_buffs)
end

function _knn{T}(tree::BallTree{T},
point::AbstractVector{T},
k::Int,
skip::Function)
best_idxs = [-1 for _ in 1:k]
best_dists = [typemax(T) for _ in 1:k]
function _knn(tree::BallTree,
point::AbstractVector,
best_idxs::Vector{Int},
best_dists::Vector,
skip::Function)

knn_kernel!(tree, 1, point, best_idxs, best_dists, skip)
return best_idxs, best_dists
return
end

function knn_kernel!{T, F}(tree::BallTree{T},
index::Int,
point::AbstractArray{T},
best_idxs ::Vector{Int},
best_dists::Vector{T},
skip::F)

function knn_kernel!{V, F}(tree::BallTree{V},
index::Int,
point::AbstractArray,
best_idxs ::Vector{Int},
best_dists::Vector,
skip::F)
@NODE 1
if isleaf(tree.tree_data.n_internal_nodes, index)
add_points_knn!(best_dists, best_idxs, tree, index, point, true, skip)
Expand All @@ -149,8 +167,8 @@ function knn_kernel!{T, F}(tree::BallTree{T},
left_sphere = tree.hyper_spheres[getleft(index)]
right_sphere = tree.hyper_spheres[getright(index)]

left_dist = max(zero(T), evaluate(tree.metric, point, left_sphere.center) - left_sphere.r)
right_dist = max(zero(T), evaluate(tree.metric, point, right_sphere.center) - right_sphere.r)
left_dist = max(zero(eltype(V)), evaluate(tree.metric, point, left_sphere.center) - left_sphere.r)
right_dist = max(zero(eltype(V)), evaluate(tree.metric, point, right_sphere.center) - right_sphere.r)

if left_dist <= best_dists[1] || right_dist <= best_dists[1]
if left_dist < right_dist
Expand All @@ -168,20 +186,20 @@ function knn_kernel!{T, F}(tree::BallTree{T},
return
end

function _inrange{T}(tree::BallTree{T},
point::AbstractVector{T},
radius::Number)
idx_in_ball = Int[] # List to hold the indices in range
ball = HyperSphere(point, radius) # The "query ball"
function _inrange{V}(tree::BallTree{V},
point::AbstractVector,
radius::Number,
idx_in_ball::Vector{Int})
ball = HyperSphere(convert(V, point), convert(eltype(V), radius)) # The "query ball"
inrange_kernel!(tree, 1, point, ball, idx_in_ball) # Call the recursive range finder
return idx_in_ball
return
end

function inrange_kernel!{T}(tree::BallTree{T},
index::Int,
point::Vector{T},
query_ball::HyperSphere{T},
idx_in_ball::Vector{Int})
function inrange_kernel!(tree::BallTree,
index::Int,
point::AbstractVector,
query_ball::HyperSphere,
idx_in_ball::Vector{Int})
@NODE 1
sphere = tree.hyper_spheres[index]

Expand Down
Loading