Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge 0.6.2 to master #371

Merged
merged 7 commits into from
Feb 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ Header-only C++ HNSW implementation with python bindings.
**NEWS:**


**version 0.6.2**

* Fixed a bug in saving of large pickles. The pickles with > 4GB could have been corrupted. Thanks Kai Wohlfahrt for reporting.
* Thanks to ([@GuyAv46](https://github.com/GuyAv46)) hnswlib inner product now is more consitent accross architectures (SSE, AVX, etc).
*

**version 0.6.1**

* Thanks to ([@tony-kuo](https://github.com/tony-kuo)) hnswlib AVX512 and AVX builds are not backwards-compatible with older SSE and non-AVX512 architectures.
Expand Down Expand Up @@ -235,6 +241,9 @@ or you can install via pip:


### For developers
Contributions are highly welcome!

Please make pull requests against the `develop` branch.

When making changes please run tests (and please add a test to `python_bindings/tests` in case there is new functionality):
```bash
Expand All @@ -259,10 +268,6 @@ https://github.com/dbaranchuk/ivf-hnsw
* .Net implementation: https://github.com/microsoft/HNSW.Net
* CUDA implementation: https://github.com/js1010/cuhnsw

### Contributing to the repository
Contributions are highly welcome!

Please make pull requests against the `develop` branch.

### 200M SIFT test reproduction
To download and extract the bigann dataset (from root directory):
Expand Down
84 changes: 61 additions & 23 deletions hnswlib/space_ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@ namespace hnswlib {
for (unsigned i = 0; i < qty; i++) {
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
}
return (1.0f - res);
return res;

}

static float
InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) {
return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr);
}

#if defined(USE_AVX)

// Favor using AVX if available.
Expand Down Expand Up @@ -61,8 +66,13 @@ namespace hnswlib {

_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
return 1.0f - sum;
}
return sum;
}

static float
InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr);
}

#endif

Expand Down Expand Up @@ -121,7 +131,12 @@ namespace hnswlib {
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];

return 1.0f - sum;
return sum;
}

static float
InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr);
}

#endif
Expand Down Expand Up @@ -156,7 +171,12 @@ namespace hnswlib {
_mm512_store_ps(TmpRes, sum512);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];

return 1.0f - sum;
return sum;
}

static float
InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr);
}

#endif
Expand Down Expand Up @@ -196,15 +216,20 @@ namespace hnswlib {
_mm256_store_ps(TmpRes, sum256);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];

return 1.0f - sum;
return sum;
}

static float
InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr);
}

#endif

#if defined(USE_SSE)

static float
InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
static float
InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
Expand Down Expand Up @@ -245,17 +270,24 @@ namespace hnswlib {
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];

return 1.0f - sum;
return sum;
}

static float
InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr);
}

#endif

#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
DISTFUNC<float> InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE;
DISTFUNC<float> InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE;

static float
InnerProductSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
size_t qty16 = qty >> 4 << 4;
float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
Expand All @@ -264,11 +296,11 @@ namespace hnswlib {

size_t qty_left = qty - qty16;
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
return res + res_tail - 1.0f;
return 1.0f - (res + res_tail);
}

static float
InnerProductSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
size_t qty4 = qty >> 2 << 2;

Expand All @@ -279,7 +311,7 @@ namespace hnswlib {
float *pVect2 = (float *) pVect2v + qty4;
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);

return res + res_tail - 1.0f;
return 1.0f - (res + res_tail);
}
#endif

Expand All @@ -290,30 +322,37 @@ namespace hnswlib {
size_t dim_;
public:
InnerProductSpace(size_t dim) {
fstdistfunc_ = InnerProduct;
fstdistfunc_ = InnerProductDistance;
#if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
#if defined(USE_AVX512)
if (AVX512Capable())
if (AVX512Capable()) {
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
else if (AVXCapable())
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512;
} else if (AVXCapable()) {
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
}
#elif defined(USE_AVX)
if (AVXCapable())
if (AVXCapable()) {
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
}
#endif
#if defined(USE_AVX)
if (AVXCapable())
if (AVXCapable()) {
InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
}
#endif

if (dim % 16 == 0)
fstdistfunc_ = InnerProductSIMD16Ext;
fstdistfunc_ = InnerProductDistanceSIMD16Ext;
else if (dim % 4 == 0)
fstdistfunc_ = InnerProductSIMD4Ext;
fstdistfunc_ = InnerProductDistanceSIMD4Ext;
else if (dim > 16)
fstdistfunc_ = InnerProductSIMD16ExtResiduals;
fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
else if (dim > 4)
fstdistfunc_ = InnerProductSIMD4ExtResiduals;
fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
#endif
dim_ = dim;
data_size_ = dim * sizeof(float);
Expand All @@ -334,5 +373,4 @@ namespace hnswlib {
~InnerProductSpace() {}
};


}
18 changes: 9 additions & 9 deletions python_bindings/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ class Index {
py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */
std::unique_lock <std::mutex> templock(appr_alg->global);

unsigned int level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_;
unsigned int link_npy_size = 0;
std::vector<unsigned int> link_npy_offsets(appr_alg->cur_element_count);
size_t level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_;
size_t link_npy_size = 0;
std::vector<size_t> link_npy_offsets(appr_alg->cur_element_count);

for (size_t i = 0; i < appr_alg->cur_element_count; i++){
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
link_npy_offsets[i]=link_npy_size;
if (linkListSize)
link_npy_size += linkListSize;
Expand Down Expand Up @@ -326,7 +326,7 @@ class Index {
memcpy(element_levels_npy, appr_alg->element_levels_.data(), appr_alg->element_levels_.size() * sizeof(int));

for (size_t i = 0; i < appr_alg->cur_element_count; i++){
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
if (linkListSize){
memcpy(link_list_npy+link_npy_offsets[i], appr_alg->linkLists_[i], linkListSize);
}
Expand Down Expand Up @@ -500,11 +500,11 @@ class Index {

memcpy(appr_alg->element_levels_.data(), element_levels_npy.data(), element_levels_npy.nbytes());

unsigned int link_npy_size = 0;
std::vector<unsigned int> link_npy_offsets(appr_alg->cur_element_count);
size_t link_npy_size = 0;
std::vector<size_t> link_npy_offsets(appr_alg->cur_element_count);

for (size_t i = 0; i < appr_alg->cur_element_count; i++){
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
link_npy_offsets[i]=link_npy_size;
if (linkListSize)
link_npy_size += linkListSize;
Expand All @@ -513,7 +513,7 @@ class Index {
memcpy(appr_alg->data_level0_memory_, data_level0_npy.data(), data_level0_npy.nbytes());

for (size_t i = 0; i < appr_alg->max_elements_; i++) {
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
if (linkListSize == 0) {
appr_alg->linkLists_[i] = nullptr;
} else {
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import sys
import platform

import numpy as np
import pybind11
Expand Down Expand Up @@ -86,6 +87,8 @@ class BuildExt(build_ext):
}

if sys.platform == 'darwin':
if platform.machine() == 'arm64':
c_opts['unix'].remove('-march=native')
c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
else:
Expand Down