Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faiss integration #1520

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "faiss-rs"]
path = faiss-rs
url = git@github.com:Enet4/faiss-rs.git
102 changes: 102 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 13 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
[workspace]
members = [
"raphtory",
"raphtory-benchmark",
"examples/rust",
"examples/netflow",
"python",
"js-raphtory",
"raphtory-graphql",
"comparison-benchmark/rust/raphtory-rust-benchmark"
"raphtory",
"raphtory-benchmark",
"examples/rust",
"examples/netflow",
"python",
"js-raphtory",
"raphtory-graphql",
"comparison-benchmark/rust/raphtory-rust-benchmark",
"faiss-rs",
"faiss-rs/faiss-sys",
"disk-faiss",
"disk-test",
]
default-members = ["raphtory"]

Expand All @@ -25,4 +29,4 @@ edition = "2021"

[profile.release-with-debug]
inherits = "release"
debug = true
debug = true
21 changes: 21 additions & 0 deletions disk-faiss/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[package]
name = "disk-faiss"
# description = "Raphtory GraphQL server"
edition.workspace = true
rust-version.workspace = true
version.workspace = true
keywords.workspace = true
authors.workspace = true
documentation.workspace = true
repository.workspace = true
license.workspace = true
readme.workspace = true
homepage.workspace = true
# links = "faiss_c"
build = "build.rs"

[dependencies]
cpp = "0.5.4"

[build-dependencies]
cpp_build = "0.5.4"
42 changes: 42 additions & 0 deletions disk-faiss/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use std::path::PathBuf;

extern crate cpp_build;

fn main() {
if let Ok(paths) = std::env::var("LD_LIBRARY_PATH") {
for path in paths.split(":") {
if path != "" {
println!("cargo:rustc-link-search={}", path);
}
}
};

println!("cargo:rustc-link-search=/usr/local/lib");
println!("cargo:rustc-link-search=/usr/lib");

if get_os_type() == "macos" {
println!("cargo:rustc-link-lib=omp");
println!("cargo:rustc-link-lib=faiss");
} else {
println!("cargo:rustc-link-lib=static=faiss");
println!("cargo:rustc-link-lib=gomp");
println!("cargo:rustc-link-lib=blas");
println!("cargo:rustc-link-lib=lapack");
}

cpp_build::Config::new()
.include(PathBuf::from(
"/Users/pedrorico/pometry/raphtory/faiss-rs/faiss-sys/faiss",
))
.build("src/lib.rs");
}

fn get_os_type() -> &'static str {
if cfg!(target_os = "linux") {
return "linux";
} else if cfg!(target_os = "macos") {
return "macos";
} else {
panic!("unknow os type");
}
}
96 changes: 96 additions & 0 deletions disk-faiss/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#![recursion_limit = "512"]
// #![cfg_attr(not(test), allow(dead_code, unused_imports))]
// #![allow(unused)]

// #[macro_use]
// extern crate cpp;

use cpp::cpp;

cpp! {{
#include <stdio.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/invlists/OnDiskInvertedLists.h>
#include <faiss/index_factory.h>
#include <faiss/MetaIndexes.h>
#include <faiss/index_io.h>
}}

pub fn merge_ondisk(index: &str, shards: Vec<&str>, ivfdata: &str, output: &str) {
let index_path = std::ffi::CString::new(index).unwrap();
let index_path = index.as_ptr();

let shards: Vec<_> = shards
.iter()
.map(|shard| std::ffi::CString::new(*shard).unwrap())
.collect();
let shards: Vec<_> = shards.iter().map(|shard| shard.as_ptr()).collect();
let shards = &shards;
let num_shards: u32 = shards.len() as u32;

let ivfdata = std::ffi::CString::new(ivfdata).unwrap();
let ivfdata = ivfdata.as_ptr();

let output = std::ffi::CString::new(output).unwrap();
let output = output.as_ptr();

unsafe {
cpp!([index_path as "const char *", shards as "std::vector<const char *> *", num_shards as "uint32_t", ivfdata as "const char *", output as "const char *"] {
try {
std::vector<const faiss::InvertedLists*> ivfs;
std::cout << "reading shards -> " << shards->size() << std::endl;
size_t ntotal = 0;
for (unsigned int i = 0; i < num_shards; ++i) {
const char * shard = shards->at(i);
auto index = faiss::read_index(shard, faiss::IO_FLAG_MMAP);
auto ivf = dynamic_cast<faiss::IndexIVF*>(index);
assert(ivf);

ivfs.push_back(ivf->invlists);
ntotal += ivf->ntotal;

// ivf->own_invlists = false;
// delete ivf;
}

auto index_raw = faiss::read_index(index_path);
auto index = dynamic_cast<faiss::IndexIVF*>(index_raw);
assert(index);

if (index->ntotal != 0) {
std::exit(1);
}

auto il = new faiss::OnDiskInvertedLists(index->nlist, index->code_size, ivfdata);
il->merge_from(ivfs.data(), ivfs.size());

index->replace_invlists(il, true);
index->ntotal = ntotal;

// auto invlists = new faiss::OnDiskInvertedLists(
// index->nlist, index->code_size, ivfdata
// );
// std::cout << "----here----" << std::endl;

// const faiss::InvertedLists **ivfs_data = (const faiss::InvertedLists**) ivfs.data();
// std::cout << "---- about to merge lists with size ---- " << ivfs.size() << std::endl;
// auto ntotal = invlists->merge_from(ivfs_data, ivfs.size()); // TODO: this has a verbose parameter I can use
// std::cout << "----here----" << std::endl;

// index->ntotal = ntotal;
// index->replace_invlists(invlists, true);
// invlists.this.disown(); ????????????????????????

faiss::write_index(index, output);

} catch (const std::exception &e) {
std::cerr << "standard exception!" << std::endl;
std::cerr << e.what() << std::endl;
throw e;
} catch (...) {
std::cerr << "unknown exception!" << std::endl;
throw;
}
})
};
}
24 changes: 24 additions & 0 deletions disk-test/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[package]
name = "disk-test"
edition.workspace = true
rust-version.workspace = true
version.workspace = true
keywords.workspace = true
authors.workspace = true
documentation.workspace = true
repository.workspace = true
license.workspace = true
readme.workspace = true
homepage.workspace = true

[dependencies]
disk-faiss = { path = "../disk-faiss" }
faiss = { path = "../faiss-rs" }

[dev-dependencies]
criterion = "0.5.1"
rand = "0.8.5"

[[bench]]
name = "bench"
harness = false
Loading