forked from facebookresearch/faiss
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support independent IVF coarse quantizer
Summary: In the IndexIVFIndepenentQuantizer, the coarse quantizer is applied on the input vectors, but the encoding is performed on a vector-transformed version of the database elements. Reviewed By: alexanderguzhva Differential Revision: D45950970 fbshipit-source-id: 30f6cf46d44174b1d99a12384b7d5e2d475c1f88
- Loading branch information
1 parent
a3296f4
commit 6800ebe
Showing
14 changed files
with
448 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
/** | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* This source code is licensed under the MIT license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include <faiss/IndexIVFIndependentQuantizer.h> | ||
#include <faiss/IndexIVFPQ.h> | ||
#include <faiss/impl/FaissAssert.h> | ||
#include <faiss/utils/utils.h> | ||
|
||
namespace faiss { | ||
|
||
IndexIVFIndependentQuantizer::IndexIVFIndependentQuantizer( | ||
Index* quantizer, | ||
IndexIVF* index_ivf, | ||
VectorTransform* vt) | ||
: Index(quantizer->d, index_ivf->metric_type), | ||
quantizer(quantizer), | ||
vt(vt), | ||
index_ivf(index_ivf) { | ||
if (vt) { | ||
FAISS_THROW_IF_NOT_MSG( | ||
vt->d_in == d && vt->d_out == index_ivf->d, | ||
"invalid vector dimensions"); | ||
} else { | ||
FAISS_THROW_IF_NOT_MSG(index_ivf->d == d, "invalid vector dimensions"); | ||
} | ||
|
||
if (quantizer->is_trained && quantizer->ntotal != 0) { | ||
FAISS_THROW_IF_NOT(quantizer->ntotal == index_ivf->nlist); | ||
} | ||
if (index_ivf->is_trained && vt) { | ||
FAISS_THROW_IF_NOT(vt->is_trained); | ||
} | ||
ntotal = index_ivf->ntotal; | ||
is_trained = | ||
(quantizer->is_trained && quantizer->ntotal == index_ivf->nlist && | ||
(!vt || vt->is_trained) && index_ivf->is_trained); | ||
|
||
// disable precomputed tables because they use the distances that are | ||
// provided by the coarse quantizer (that are out of sync with the IVFPQ) | ||
if (auto index_ivfpq = dynamic_cast<IndexIVFPQ*>(index_ivf)) { | ||
index_ivfpq->use_precomputed_table = -1; | ||
} | ||
} | ||
|
||
IndexIVFIndependentQuantizer::~IndexIVFIndependentQuantizer() { | ||
if (own_fields) { | ||
delete quantizer; | ||
delete index_ivf; | ||
delete vt; | ||
} | ||
} | ||
|
||
namespace { | ||
|
||
struct VTransformedVectors : TransformedVectors { | ||
VTransformedVectors(const VectorTransform* vt, idx_t n, const float* x) | ||
: TransformedVectors(x, vt ? vt->apply(n, x) : x) {} | ||
}; | ||
|
||
struct SubsampledVectors : TransformedVectors { | ||
SubsampledVectors(int d, idx_t* n, idx_t max_n, const float* x) | ||
: TransformedVectors( | ||
x, | ||
fvecs_maybe_subsample(d, (size_t*)n, max_n, x, true)) {} | ||
}; | ||
|
||
} // anonymous namespace | ||
|
||
void IndexIVFIndependentQuantizer::add(idx_t n, const float* x) { | ||
std::vector<float> D(n); | ||
std::vector<idx_t> I(n); | ||
quantizer->search(n, x, 1, D.data(), I.data()); | ||
|
||
VTransformedVectors tv(vt, n, x); | ||
|
||
index_ivf->add_core(n, tv.x, nullptr, I.data()); | ||
} | ||
|
||
void IndexIVFIndependentQuantizer::search( | ||
idx_t n, | ||
const float* x, | ||
idx_t k, | ||
float* distances, | ||
idx_t* labels, | ||
const SearchParameters* params) const { | ||
FAISS_THROW_IF_NOT_MSG(!params, "search parameters not supported"); | ||
int nprobe = index_ivf->nprobe; | ||
std::vector<float> D(n * nprobe); | ||
std::vector<idx_t> I(n * nprobe); | ||
quantizer->search(n, x, nprobe, D.data(), I.data()); | ||
|
||
VTransformedVectors tv(vt, n, x); | ||
|
||
index_ivf->search_preassigned( | ||
n, tv.x, k, I.data(), D.data(), distances, labels, false); | ||
} | ||
|
||
void IndexIVFIndependentQuantizer::reset() { | ||
index_ivf->reset(); | ||
ntotal = 0; | ||
} | ||
|
||
void IndexIVFIndependentQuantizer::train(idx_t n, const float* x) { | ||
// quantizer training | ||
size_t nlist = index_ivf->nlist; | ||
Level1Quantizer l1(quantizer, nlist); | ||
l1.train_q1(n, x, verbose, metric_type); | ||
|
||
// train the VectorTransform | ||
if (vt && !vt->is_trained) { | ||
if (verbose) { | ||
printf("IndexIVFIndependentQuantizer: train the VectorTransform\n"); | ||
} | ||
vt->train(n, x); | ||
} | ||
|
||
// get the centroids from the quantizer, transform them and | ||
// add them to the index_ivf's quantizer | ||
if (verbose) { | ||
printf("IndexIVFIndependentQuantizer: extract the main quantizer centroids\n"); | ||
} | ||
std::vector<float> centroids(nlist * d); | ||
quantizer->reconstruct_n(0, nlist, centroids.data()); | ||
VTransformedVectors tcent(vt, nlist, centroids.data()); | ||
|
||
if (verbose) { | ||
printf("IndexIVFIndependentQuantizer: add centroids to the secondary quantizer\n"); | ||
} | ||
if (!index_ivf->quantizer->is_trained) { | ||
index_ivf->quantizer->train(nlist, tcent.x); | ||
} | ||
index_ivf->quantizer->add(nlist, tcent.x); | ||
|
||
// train the payload | ||
|
||
// optional subsampling | ||
idx_t max_nt = index_ivf->train_encoder_num_vectors(); | ||
if (max_nt <= 0) { | ||
max_nt = (size_t)1 << 35; | ||
} | ||
SubsampledVectors sv(index_ivf->d, &n, max_nt, x); | ||
|
||
// transform subsampled vectors | ||
VTransformedVectors tv(vt, n, sv.x); | ||
|
||
if (verbose) { | ||
printf("IndexIVFIndependentQuantizer: train encoder\n"); | ||
} | ||
|
||
if (index_ivf->by_residual) { | ||
// assign with quantizer | ||
std::vector<idx_t> assign(n); | ||
quantizer->assign(n, sv.x, assign.data()); | ||
|
||
// compute residual with IVF quantizer | ||
std::vector<float> residuals(n * index_ivf->d); | ||
index_ivf->quantizer->compute_residual_n( | ||
n, tv.x, residuals.data(), assign.data()); | ||
|
||
index_ivf->train_encoder(n, residuals.data(), assign.data()); | ||
} else { | ||
index_ivf->train_encoder(n, tv.x, nullptr); | ||
} | ||
index_ivf->is_trained = true; | ||
is_trained = true; | ||
} | ||
|
||
} // namespace faiss |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
/** | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* This source code is licensed under the MIT license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <faiss/IndexIVF.h> | ||
#include <faiss/VectorTransform.h> | ||
|
||
namespace faiss { | ||
|
||
/** An IVF index with a quantizer that has a different input dimension from the | ||
* payload size. The vectors to encode are obtained from the input vectors by a | ||
* VectorTransform. | ||
*/ | ||
struct IndexIVFIndependentQuantizer : Index { | ||
/// quantizer is fed directly with the input vectors | ||
Index* quantizer = nullptr; | ||
|
||
/// transform before the IVF vectors are applied | ||
VectorTransform* vt = nullptr; | ||
|
||
/// the IVF index, controls nlist and nprobe | ||
IndexIVF* index_ivf = nullptr; | ||
|
||
/// whether *this owns the 3 fields | ||
bool own_fields = false; | ||
|
||
IndexIVFIndependentQuantizer( | ||
Index* quantizer, | ||
IndexIVF* index_ivf, | ||
VectorTransform* vt = nullptr); | ||
|
||
IndexIVFIndependentQuantizer() {} | ||
|
||
void train(idx_t n, const float* x) override; | ||
|
||
void add(idx_t n, const float* x) override; | ||
|
||
void search( | ||
idx_t n, | ||
const float* x, | ||
idx_t k, | ||
float* distances, | ||
idx_t* labels, | ||
const SearchParameters* params = nullptr) const override; | ||
|
||
void reset() override; | ||
|
||
~IndexIVFIndependentQuantizer() override; | ||
}; | ||
|
||
} // namespace faiss |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.