Skip to content

Commit

Permalink
fix: elkan means impl for some vector types (#472)
Browse files Browse the repository at this point in the history
Signed-off-by: usamoi <usamoi@outlook.com>
  • Loading branch information
usamoi authored May 8, 2024
1 parent d81473c commit 71b7a1a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 173 deletions.
176 changes: 13 additions & 163 deletions crates/elkan_k_means/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,7 @@ pub trait OperatorElkanKMeans: Operator {
type VectorNormalized: VectorOwned;

fn elkan_k_means_normalize(vector: &mut [Scalar<Self>]);
fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> Self::VectorNormalized;
fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32;
fn elkan_k_means_distance2(
lhs: <Self::VectorNormalized as VectorOwned>::Borrowed<'_>,
rhs: &[Scalar<Self>],
) -> F32;
}

impl OperatorElkanKMeans for BVecf32Cos {
Expand All @@ -22,17 +17,9 @@ impl OperatorElkanKMeans for BVecf32Cos {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> Vecf32Owned {
bvecf32::l2_normalize(vector)
}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Vecf32Borrowed<'_>, rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs.slice(), rhs).acos()
}
}

impl OperatorElkanKMeans for BVecf32Dot {
Expand All @@ -42,17 +29,9 @@ impl OperatorElkanKMeans for BVecf32Dot {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> Vecf32Owned {
bvecf32::l2_normalize(vector)
}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Vecf32Borrowed<'_>, rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs.slice(), rhs).acos()
}
}

impl OperatorElkanKMeans for BVecf32Jaccard {
Expand All @@ -62,37 +41,19 @@ impl OperatorElkanKMeans for BVecf32Jaccard {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> Vecf32Owned {
Vecf32Owned::new(vector.to_vec())
}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::sl2(lhs, rhs).sqrt()
}

fn elkan_k_means_distance2(lhs: Vecf32Borrowed<'_>, rhs: &[Scalar<Self>]) -> F32 {
vecf32::sl2(lhs.slice(), rhs).sqrt()
}
}

impl OperatorElkanKMeans for BVecf32L2 {
type VectorNormalized = Vecf32Owned;

fn elkan_k_means_normalize(vector: &mut [Scalar<Self>]) {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> Vecf32Owned {
Vecf32Owned::new(vector.to_vec())
}
fn elkan_k_means_normalize(_: &mut [Scalar<Self>]) {}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::sl2(lhs, rhs).sqrt()
}

fn elkan_k_means_distance2(lhs: Vecf32Borrowed<'_>, rhs: &[Scalar<Self>]) -> F32 {
vecf32::sl2(lhs.slice(), rhs).sqrt()
}
}

impl OperatorElkanKMeans for SVecf32Cos {
Expand All @@ -102,19 +63,9 @@ impl OperatorElkanKMeans for SVecf32Cos {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> SVecf32Owned {
let mut vector = vector.for_own();
svecf32::l2_normalize(&mut vector);
vector
}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Borrowed<'_, Self>, rhs: &[Scalar<Self>]) -> F32 {
svecf32::dot_2(lhs, rhs).acos()
}
}

impl OperatorElkanKMeans for SVecf32Dot {
Expand All @@ -124,161 +75,86 @@ impl OperatorElkanKMeans for SVecf32Dot {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> SVecf32Owned {
let mut vector = vector.for_own();
svecf32::l2_normalize(&mut vector);
vector
}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Borrowed<'_, Self>, rhs: &[Scalar<Self>]) -> F32 {
svecf32::dot_2(lhs, rhs).acos()
}
}

impl OperatorElkanKMeans for SVecf32L2 {
type VectorNormalized = Self::VectorOwned;

fn elkan_k_means_normalize(_: &mut [Scalar<Self>]) {}

fn elkan_k_means_normalize2(vector: SVecf32Borrowed<'_>) -> SVecf32Owned {
vector.for_own()
}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::sl2(lhs, rhs).sqrt()
}

fn elkan_k_means_distance2(lhs: SVecf32Borrowed<'_>, rhs: &[Scalar<Self>]) -> F32 {
svecf32::sl2_2(lhs, rhs).sqrt()
}
}

impl OperatorElkanKMeans for Vecf16Cos {
type VectorNormalized = Self::VectorOwned;

fn elkan_k_means_normalize(vector: &mut [F16]) {
fn elkan_k_means_normalize(vector: &mut [Scalar<Self>]) {
vecf16::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Vecf16Borrowed<'_>) -> Vecf16Owned {
let mut vector = vector.for_own();
vecf16::l2_normalize(vector.slice_mut());
vector
}

fn elkan_k_means_distance(lhs: &[F16], rhs: &[F16]) -> F32 {
fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf16::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Vecf16Borrowed<'_>, rhs: &[F16]) -> F32 {
vecf16::dot(lhs.slice(), rhs).acos()
}
}

impl OperatorElkanKMeans for Vecf16Dot {
type VectorNormalized = Self::VectorOwned;

fn elkan_k_means_normalize(vector: &mut [F16]) {
fn elkan_k_means_normalize(vector: &mut [Scalar<Self>]) {
vecf16::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Vecf16Borrowed<'_>) -> Vecf16Owned {
let mut vector = vector.for_own();
vecf16::l2_normalize(vector.slice_mut());
vector
}

fn elkan_k_means_distance(lhs: &[F16], rhs: &[F16]) -> F32 {
fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf16::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Vecf16Borrowed<'_>, rhs: &[F16]) -> F32 {
vecf16::dot(lhs.slice(), rhs).acos()
}
}

impl OperatorElkanKMeans for Vecf16L2 {
type VectorNormalized = Self::VectorOwned;

fn elkan_k_means_normalize(_: &mut [F16]) {}

fn elkan_k_means_normalize2(vector: Vecf16Borrowed<'_>) -> Vecf16Owned {
vector.for_own()
}
fn elkan_k_means_normalize(_: &mut [Scalar<Self>]) {}

fn elkan_k_means_distance(lhs: &[F16], rhs: &[F16]) -> F32 {
fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf16::sl2(lhs, rhs).sqrt()
}

fn elkan_k_means_distance2(lhs: Vecf16Borrowed<'_>, rhs: &[F16]) -> F32 {
vecf16::sl2(lhs.slice(), rhs).sqrt()
}
}

impl OperatorElkanKMeans for Vecf32Cos {
type VectorNormalized = Self::VectorOwned;

fn elkan_k_means_normalize(vector: &mut [F32]) {
fn elkan_k_means_normalize(vector: &mut [Scalar<Self>]) {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Vecf32Borrowed<'_>) -> Vecf32Owned {
let mut vector = vector.for_own();
vecf32::l2_normalize(vector.slice_mut());
vector
}

fn elkan_k_means_distance(lhs: &[F32], rhs: &[F32]) -> F32 {
fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Vecf32Borrowed<'_>, rhs: &[F32]) -> F32 {
vecf32::dot(lhs.slice(), rhs).acos()
}
}

impl OperatorElkanKMeans for Vecf32Dot {
type VectorNormalized = Self::VectorOwned;

fn elkan_k_means_normalize(vector: &mut [F32]) {
fn elkan_k_means_normalize(vector: &mut [Scalar<Self>]) {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Vecf32Borrowed<'_>) -> Vecf32Owned {
let mut vector = vector.for_own();
vecf32::l2_normalize(vector.slice_mut());
vector
}

fn elkan_k_means_distance(lhs: &[F32], rhs: &[F32]) -> F32 {
fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Vecf32Borrowed<'_>, rhs: &[F32]) -> F32 {
vecf32::dot(lhs.slice(), rhs).acos()
}
}

impl OperatorElkanKMeans for Vecf32L2 {
type VectorNormalized = Self::VectorOwned;

fn elkan_k_means_normalize(_: &mut [F32]) {}

fn elkan_k_means_normalize2(vector: Vecf32Borrowed<'_>) -> Vecf32Owned {
vector.for_own()
}
fn elkan_k_means_normalize(_: &mut [Scalar<Self>]) {}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::sl2(lhs, rhs).sqrt()
}

fn elkan_k_means_distance2(lhs: Vecf32Borrowed<'_>, rhs: &[Scalar<Self>]) -> F32 {
vecf32::sl2(lhs.slice(), rhs).sqrt()
}
}

impl OperatorElkanKMeans for Veci8Cos {
Expand All @@ -288,17 +164,9 @@ impl OperatorElkanKMeans for Veci8Cos {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> Veci8Owned {
vector.normalize()
}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Borrowed<'_, Self>, rhs: &[Scalar<Self>]) -> F32 {
veci8::dot_2(lhs, rhs).acos()
}
}

impl OperatorElkanKMeans for Veci8Dot {
Expand All @@ -308,35 +176,17 @@ impl OperatorElkanKMeans for Veci8Dot {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> Veci8Owned {
vector.normalize()
}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::dot(lhs, rhs).acos()
}

fn elkan_k_means_distance2(lhs: Borrowed<'_, Self>, rhs: &[Scalar<Self>]) -> F32 {
veci8::dot_2(lhs, rhs).acos()
}
}

impl OperatorElkanKMeans for Veci8L2 {
type VectorNormalized = Self::VectorOwned;

fn elkan_k_means_normalize(vector: &mut [Scalar<Self>]) {
vecf32::l2_normalize(vector)
}

fn elkan_k_means_normalize2(vector: Borrowed<'_, Self>) -> Veci8Owned {
vector.normalize()
}
fn elkan_k_means_normalize(_: &mut [Scalar<Self>]) {}

fn elkan_k_means_distance(lhs: &[Scalar<Self>], rhs: &[Scalar<Self>]) -> F32 {
vecf32::sl2(lhs, rhs).sqrt()
}

fn elkan_k_means_distance2(lhs: Borrowed<'_, Self>, rhs: &[Scalar<Self>]) -> F32 {
veci8::l2_2(lhs, rhs).sqrt()
}
}
16 changes: 9 additions & 7 deletions crates/ivf/src/ivf_naive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,11 @@ pub fn make<O: Op, S: Source<O>>(path: &Path, options: IndexOptions, source: &S)
let mut idx = vec![0usize; n as usize];
idx.par_iter_mut().enumerate().for_each(|(i, x)| {
rayon::check();
let vector = storage.vector(i as u32);
let vector = O::elkan_k_means_normalize2(vector);
let mut vector = storage.vector(i as u32).to_vec();
O::elkan_k_means_normalize(&mut vector);
let mut result = (F32::infinity(), 0);
for i in 0..nlist as usize {
let dis = O::elkan_k_means_distance2(vector.for_borrow(), &centroids[i]);
let dis = O::elkan_k_means_distance(&vector, &centroids[i]);
result = std::cmp::min(result, (dis, i));
}
*x = result.1;
Expand Down Expand Up @@ -242,11 +242,12 @@ pub fn basic<O: Op>(
nprobe: u32,
mut filter: impl Filter,
) -> BinaryHeap<Reverse<Element>> {
let target = O::elkan_k_means_normalize2(vector);
let mut target = vector.to_vec();
O::elkan_k_means_normalize(&mut target);
let mut lists = Vec::with_capacity(mmap.nlist as usize);
for i in 0..mmap.nlist {
let centroid = mmap.centroids(i);
let distance = O::elkan_k_means_distance2(target.for_borrow(), centroid);
let distance = O::elkan_k_means_distance(&target, centroid);
lists.push((distance, i));
}
if nprobe < mmap.nlist {
Expand Down Expand Up @@ -274,11 +275,12 @@ pub fn vbase<'a, O: Op>(
nprobe: u32,
mut filter: impl Filter + 'a,
) -> (Vec<Element>, Box<(dyn Iterator<Item = Element> + 'a)>) {
let target = O::elkan_k_means_normalize2(vector);
let mut target = vector.to_vec();
O::elkan_k_means_normalize(&mut target);
let mut lists = Vec::with_capacity(mmap.nlist as usize);
for i in 0..mmap.nlist {
let centroid = mmap.centroids(i);
let distance = O::elkan_k_means_distance2(target.for_borrow(), centroid);
let distance = O::elkan_k_means_distance(&target, centroid);
lists.push((distance, i));
}
if nprobe < mmap.nlist {
Expand Down
6 changes: 3 additions & 3 deletions crates/ivf/src/ivf_pq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,11 @@ pub fn make<O: Op, S: Source<O>>(path: &Path, options: IndexOptions, source: &S)
let mut idx = vec![0usize; n as usize];
idx.par_iter_mut().enumerate().for_each(|(i, x)| {
rayon::check();
let vector = storage.vector(i as u32);
let vector = O::elkan_k_means_normalize2(vector);
let mut vector = storage.vector(i as u32).to_vec();
O::elkan_k_means_normalize(&mut vector);
let mut result = (F32::infinity(), 0);
for i in 0..nlist as usize {
let dis = O::elkan_k_means_distance2(vector.for_borrow(), &centroids[i]);
let dis = O::elkan_k_means_distance(&vector, &centroids[i]);
result = std::cmp::min(result, (dis, i));
}
*x = result.1;
Expand Down

0 comments on commit 71b7a1a

Please sign in to comment.