-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathfastknnsearch.m
161 lines (147 loc) · 4.02 KB
/
fastknnsearch.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
function [idx,D]=fastknnsearch(varargin)
% KNNSEARCH Linear k-nearest neighbor (KNN) search
% IDX = knnsearch(Q,R,K) searches the reference data set R (n x d array
% representing n points in a d-dimensional space) to find the k-nearest
% neighbors of each query point represented by eahc row of Q (m x d array).
% The results are stored in the (m x K) index array, IDX.
%
% IDX = knnsearch(Q,R) takes the default value K=1.
%
% IDX = knnsearch(Q) or IDX = knnsearch(Q,[],K) does the search for R = Q.
%
% Rationality
% Linear KNN search is the simplest appraoch of KNN. The search is based on
% calculation of all distances. Therefore, it is normally believed only
% suitable for small data sets. However, other advanced approaches, such as
% kd-tree and delaunary become inefficient when d is large comparing to the
% number of data points. On the other hand, the linear search in MATLAB is
% relatively insensitive to d due to the vectorization. In this code, the
% efficiency of linear search is further improved by using the JIT
% aceeleration of MATLAB. Numerical example shows that its performance is
% comparable with kd-tree algorithm in mex.
%
% See also, kdtree, nnsearch, delaunary, dsearch
% By Yi Cao at Cranfield University on 25 March 2008
% Example 1: small data sets
%{
R=randn(100,2);
Q=randn(3,2);
idx=knnsearch(Q,R);
plot(R(:,1),R(:,2),'b.',Q(:,1),Q(:,2),'ro',R(idx,1),R(idx,2),'gx');
%}
% Example 2: ten nearest points to [0 0]
%{
R=rand(100,2);
Q=[0 0];
K=10;
idx=knnsearch(Q,R,10);
r=max(sqrt(sum(R(idx,:).^2,2)));
theta=0:0.01:pi/2;
x=r*cos(theta);
y=r*sin(theta);
plot(R(:,1),R(:,2),'b.',Q(:,1),Q(:,2),'co',R(idx,1),R(idx,2),'gx',x,y,'r-','linewidth',2);
%}
% Example 3: cputime comparion with delaunay+dsearch I, a few to look up
%{
R=randn(10000,4);
Q=randn(500,4);
t0=cputime;
idx=knnsearch(Q,R);
t1=cputime;
T=delaunayn(R);
idx1=dsearchn(R,T,Q);
t2=cputime;
fprintf('Are both indices the same? %d\n',isequal(idx,idx1));
fprintf('CPU time for knnsearch = %g\n',t1-t0);
fprintf('CPU time for delaunay = %g\n',t2-t1);
%}
% Example 4: cputime comparion with delaunay+dsearch II, lots to look up
%{
Q=randn(10000,4);
R=randn(500,4);
t0=cputime;
idx=knnsearch(Q,R);
t1=cputime;
T=delaunayn(R);
idx1=dsearchn(R,T,Q);
t2=cputime;
fprintf('Are both indices the same? %d\n',isequal(idx,idx1));
fprintf('CPU time for knnsearch = %g\n',t1-t0);
fprintf('CPU time for delaunay = %g\n',t2-t1);
%}
% Example 5: cputime comparion with kd-tree by Steven Michael (mex file)
% <a href="http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=7030&objectType=file">kd-tree by Steven Michael</a>
%{
Q=randn(10000,10);
R=randn(500,10);
t0=cputime;
idx=knnsearch(Q,R);
t1=cputime;
tree=kdtree(R);
idx1=kdtree_closestpoint(tree,Q);
t2=cputime;
fprintf('Are both indices the same? %d\n',isequal(idx,idx1));
fprintf('CPU time for knnsearch = %g\n',t1-t0);
fprintf('CPU time for delaunay = %g\n',t2-t1);
%}
% Check inputs
[Q,R,K,fident] = parseinputs(varargin{:});
% Check outputs
nargoutchk(0,2);
% C2 = sum(C.*C,2)';
[N,M] = size(Q);
L=size(R,1);
idx = zeros(N,K);
D = idx;
if K==1
% Loop for each query point
for k=1:N
d=zeros(L,1);
for t=1:M
d=d+(R(:,t)-Q(k,t)).^2;
end
if fident
d(k)=inf;
end
[D(k),idx(k)]=min(d);
end
else
for k=1:N
d=zeros(L,1);
for t=1:M
d=d+(R(:,t)-Q(k,t)).^2;
end
if fident
d(k)=inf;
end
[s,t]=sort(d);
idx(k,:)=t(1:K);
D(k,:)=s(1:K);
end
end
if nargout>1
D=sqrt(D);
end
function [Q,R,K,fident] = parseinputs(varargin)
% Check input and output
narginchk(1,3);
Q=varargin{1};
if nargin<2
R=Q;
fident = true;
else
fident = false;
R=varargin{2};
end
if isempty(R)
fident = true;
R=Q;
end
if ~fident
fident = isequal(Q,R);
end
if nargin<3
K=1;
else
K=varargin{3};
end