Open
Description
When using a metric such as Cosine similarity, the returned vector is almost never the one with the smallest distance value.
Example:
Dataset
var points = [
{ 'x': 61, 'y': 54, 'id': 'A' },
{ 'x': 7, 'y': 54, 'id': 'B' },
{ 'x': 62, 'y': 58, 'id': 'C' },
{ 'x': 75, 'y': 48, 'id': 'D' },
{ 'x': 76, 'y': 52, 'id': 'E' },
{ 'x': 41, 'y': 16, 'id': 'F' },
{ 'x': 35, 'y': 35, 'id': 'G' },
{ 'x': 34, 'y': 63, 'id': 'H' },
{ 'x': 22, 'y': 32, 'id': 'I' },
{ 'x': 0, 'y': 53, 'id': 'J' },
{ 'x': 13, 'y': 91, 'id': 'K' },
{ 'x': 87, 'y': 24, 'id': 'L' },
{ 'x': 63, 'y': 2, 'id': 'M' },
{ 'x': 10, 'y': 76, 'id': 'N' },
{ 'x': 85, 'y': 98, 'id': 'O' },
{ 'x': 94, 'y': 90, 'id': 'P' },
{ 'x': 52, 'y': 47, 'id': 'Q' },
{ 'x': 100, 'y': 38, 'id': 'R' },
{ 'x': 95, 'y': 47, 'id': 'S' },
{ 'x': 62, 'y': 15, 'id': 'T' },
{ 'x': 93, 'y': 66, 'id': 'U' },
{ 'x': 46, 'y': 54, 'id': 'V' },
{ 'x': 85, 'y': 99, 'id': 'W' },
{ 'x': 32, 'y': 53, 'id': 'X' },
{ 'x': 11, 'y': 37, 'id': 'Y' },
{ 'x': 0, 'y': 54, 'id': 'Z' },
{ 'x': 90, 'y': 100, 'id': 's' },
{ 'x': 84, 'y': 58, 'id': 't' },
{ 'x': 97, 'y': 35, 'id': 'u' },
{ 'x': 24, 'y': 34, 'id': 'v' },
{ 'x': 67, 'y': 70, 'id': 'w' },
{ 'x': 16, 'y': 7, 'id': 'x' },
{ 'x': 27, 'y': 73, 'id': 'a' },
{ 'x': 0, 'y': 35, 'id': 'b' },
{ 'x': 97, 'y': 47, 'id': 'c' },
{ 'x': 44, 'y': 56, 'id': 'd' },
{ 'x': 23, 'y': 11, 'id': 'e' },
{ 'x': 3, 'y': 80, 'id': 'f' },
{ 'x': 87, 'y': 27, 'id': 'g' },
{ 'x': 42, 'y': 29, 'id': 'h' },
{ 'x': 77, 'y': 72, 'id': 'i' },
{ 'x': 40, 'y': 10, 'id': 'j' },
{ 'x': 86, 'y': 91, 'id': 'k' },
{ 'x': 43, 'y': 23, 'id': 'l' },
{ 'x': 55, 'y': 13, 'id': 'm' },
{ 'x': 88, 'y': 14, 'id': 'n' },
{ 'x': 67, 'y': 22, 'id': 'o' },
{ 'x': 88, 'y': 91, 'id': 'p' },
{ 'x': 82, 'y': 33, 'id': 'q' },
{ 'x': 97, 'y': 29, 'id': 'r' }
]
const cosineSimilarity = (vec1, vec2) => {
let dotProduct = 0;
let magnitude1 = 0;
let magnitude2 = 0;
for (const dim of Object.keys(vec1)) {
if (typeof (vec1[dim]) != 'number') continue;
dotProduct += vec1[dim] * vec2[dim];
magnitude1 += Math.pow(vec1[dim], 2);
magnitude2 += Math.pow(vec2[dim], 2);
}
return -dotProduct / (Math.sqrt(magnitude1) * Math.sqrt(magnitude2));
};
Inefficient function the get the closest vectors from an array.
const closestVectors = (vec, vectors) => {
const distances = vectors.map(v => cosineSimilarity(vec, v));
const min = Math.min(...distances);
// Check if multiple with same distance
const minIndices = distances.reduce((a, e, i) => {
if (e === min) a.push(i);
return a;
}, []);
// Return list of closest vectors
return { vectors: minIndices.map(i => vectors[i]), distances: minIndices.map(i => distances[i]) };
};
Testing
const test = { x: 200, y: 123, label: "TEST" }
var tree = new kdTree(points, cosineSimilarity, ['x', 'y']);
const nearest = tree.nearest(test, 1);
const nearest2 = closestVectors(test, points);
console.log("K-D Tree")
console.log(nearest)
console.log("Custom")
console.log(nearest2)
// > K-D Tree
// > [ [ { x: 76, y: 52, id: 'E' }, -0.998815642613221 ] ]
// > Custom
// > {
// vectors: [ { x: 75, y: 48, id: 'D' } ],
// distances: [ -0.999839132267399 ]
// }
Clearly the tree returns a vector with a slightly larger distance (0.998 vs 0.999).
Metadata
Metadata
Assignees
Labels
No labels