-
Notifications
You must be signed in to change notification settings - Fork 300
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bd2a034
commit 110a3b0
Showing
5 changed files
with
344 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
use wasm_bindgen::prelude::*; | ||
|
||
#[wasm_bindgen] | ||
extern "C" { | ||
#[wasm_bindgen(extends=js_sys::Object)] | ||
#[derive(Debug, Clone, PartialEq, Eq)] | ||
pub type VectorizeIndex; | ||
|
||
#[wasm_bindgen(method, catch)] | ||
pub fn insert( | ||
this: &VectorizeIndex, | ||
vectors: js_sys::Object, | ||
) -> Result<js_sys::Promise, JsValue>; | ||
|
||
#[wasm_bindgen(method, catch)] | ||
pub fn upsert( | ||
this: &VectorizeIndex, | ||
vectors: js_sys::Object, | ||
) -> Result<js_sys::Promise, JsValue>; | ||
|
||
#[wasm_bindgen(method, catch)] | ||
pub fn describe(this: &VectorizeIndex) -> Result<js_sys::Promise, JsValue>; | ||
|
||
#[wasm_bindgen(method, catch)] | ||
pub fn query( | ||
this: &VectorizeIndex, | ||
vector: &[f32], | ||
options: js_sys::Object, | ||
) -> Result<js_sys::Promise, JsValue>; | ||
|
||
#[wasm_bindgen(method, catch, js_name = "getByIds")] | ||
pub fn get_by_ids(this: &VectorizeIndex, ids: JsValue) -> Result<js_sys::Promise, JsValue>; | ||
|
||
#[wasm_bindgen(method, catch, js_name = "deleteByIds")] | ||
pub fn delete_by_ids(this: &VectorizeIndex, ids: JsValue) -> Result<js_sys::Promise, JsValue>; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,299 @@ | ||
use crate::{send::SendFuture, EnvBinding, Result}; | ||
use serde::{Deserialize, Serialize}; | ||
use wasm_bindgen::{JsCast, JsValue}; | ||
use wasm_bindgen_futures::JsFuture; | ||
use worker_sys::types::VectorizeIndex as VectorizeIndexSys; | ||
|
||
#[derive(Debug, Deserialize)] | ||
#[serde(rename_all = "kebab-case")] | ||
/// Supported distance metrics for an index. | ||
/// Distance metrics determine how other "similar" vectors are determined. | ||
pub enum VectorizeDistanceMetric { | ||
Euclidean, | ||
Cosine, | ||
DotProduct, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
#[serde(untagged)] | ||
/// Information about the configuration of an index. | ||
pub enum VectorizeIndexConfig { | ||
Preset { | ||
preset: String, | ||
}, | ||
Custom { | ||
dimensions: u16, | ||
metric: VectorizeDistanceMetric, | ||
}, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
#[serde(rename_all = "camelCase")] | ||
/// Metadata about an existing index. | ||
/// | ||
/// This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released. | ||
pub struct VectorizeIndexDetails { | ||
pub id: String, | ||
pub name: String, | ||
pub description: Option<String>, | ||
pub config: VectorizeIndexConfig, | ||
pub vectors_count: u64, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
#[serde(rename_all = "camelCase")] | ||
/// Results of an operation that performed a mutation on a set of vectors. | ||
/// Here, `ids` is a list of vectors that were successfully processed. | ||
/// | ||
/// This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released. | ||
pub struct VectorizeVectorMutation { | ||
/// List of ids of vectors that were successfully processed. | ||
pub ids: Vec<String>, | ||
/// Total count of the number of processed vectors. | ||
pub count: u64, | ||
} | ||
|
||
#[derive(Debug, Serialize)] | ||
/// Represents a single vector value set along with its associated metadata. | ||
pub struct VectorizeVector<'a> { | ||
/// The ID for the vector. This can be user-defined, and must be unique. It should uniquely identify the object, and is best set based on the ID of what the vector represents. | ||
id: String, | ||
/// The vector values. | ||
values: &'a [f32], | ||
/// The namespace this vector belongs to. | ||
namespace: Option<String>, | ||
/// Metadata associated with the vector. Includes the values of other fields and potentially additional details. | ||
metadata: serde_json::Map<String, serde_json::Value>, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
/// Represents a single vector value set along with its associated metadata. | ||
pub struct VectorizeVectorResult { | ||
/// The ID for the vector. This can be user-defined, and must be unique. It should uniquely identify the object, and is best set based on the ID of what the vector represents. | ||
pub id: String, | ||
/// The vector values. | ||
pub values: Vec<f32>, | ||
/// The namespace this vector belongs to. | ||
pub namespace: Option<String>, | ||
/// Metadata associated with the vector. Includes the values of other fields and potentially additional details. | ||
pub metadata: serde_json::Map<String, serde_json::Value>, | ||
} | ||
|
||
impl<'a> VectorizeVector<'a> { | ||
pub fn new(id: String, values: &'a [f32]) -> Self { | ||
Self { | ||
id, | ||
values, | ||
namespace: None, | ||
metadata: serde_json::Map::new(), | ||
} | ||
} | ||
|
||
pub fn with_namespace(mut self, namespace: String) -> Self { | ||
self.namespace = Some(namespace); | ||
self | ||
} | ||
|
||
pub fn with_metadata_entry<V: Serialize>(mut self, key: String, value: V) -> Result<Self> { | ||
self.metadata.insert(key, serde_json::to_value(value)?); | ||
Ok(self) | ||
} | ||
} | ||
|
||
#[derive(Debug, Serialize)] | ||
#[serde(rename_all = "kebab-case")] | ||
/// Metadata return levels for a Vectorize query. | ||
pub enum VectorizeMetadataRetrievalLevel { | ||
/// Full metadata for the vector return set, including all fields (including those un-indexed) without truncation. This is a more expensive retrieval, as it requires additional fetching & reading of un-indexed data. | ||
All, | ||
/// Return all metadata fields configured for indexing in the vector return set. This level of retrieval is "free" in that no additional overhead is incurred returning this data. However, note that indexed metadata is subject to truncation (especially for larger strings). | ||
Indexed, | ||
/// No indexed metadata will be returned. | ||
None, | ||
} | ||
|
||
#[derive(Debug, Serialize)] | ||
#[serde(rename_all = "camelCase")] | ||
pub struct VectorizeQueryOptions { | ||
// Default 3, max 20 | ||
top_k: u8, | ||
namespace: Option<String>, | ||
/// Return vector values. Default `false`. | ||
return_values: bool, | ||
/// Return vector metadata. Default `false`. | ||
return_metadata: bool, | ||
/// Default `none`. | ||
filter: VectorizeMetadataRetrievalLevel, | ||
} | ||
|
||
impl VectorizeQueryOptions { | ||
pub fn new() -> Self { | ||
Self::default() | ||
} | ||
|
||
pub fn with_top_k(mut self, top_k: u8) -> Self { | ||
self.top_k = top_k; | ||
self | ||
} | ||
|
||
pub fn with_namespace(mut self, namespace: &str) -> Self { | ||
self.namespace = Some(namespace.to_owned()); | ||
self | ||
} | ||
|
||
pub fn with_return_values(mut self, return_values: bool) -> Self { | ||
self.return_values = return_values; | ||
self | ||
} | ||
|
||
pub fn with_return_metadata(mut self, return_metadata: bool) -> Self { | ||
self.return_metadata = return_metadata; | ||
self | ||
} | ||
|
||
pub fn with_filter(mut self, filter: VectorizeMetadataRetrievalLevel) -> Self { | ||
self.filter = filter; | ||
self | ||
} | ||
} | ||
|
||
impl Default for VectorizeQueryOptions { | ||
fn default() -> Self { | ||
Self { | ||
top_k: 3, | ||
namespace: None, | ||
return_values: false, | ||
return_metadata: false, | ||
filter: VectorizeMetadataRetrievalLevel::None, | ||
} | ||
} | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
pub struct VectorizeMatchVector { | ||
#[serde(flatten)] | ||
pub vector: VectorizeVectorResult, | ||
/// The score or rank for similarity, when returned as a result | ||
pub score: f64, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
/// A set of matching {@link VectorizeMatch} for a particular query. | ||
pub struct VectorizeMatches { | ||
pub matches: Vec<VectorizeMatchVector>, | ||
pub count: u64, | ||
} | ||
|
||
/// A Vectorize Vector Search Index for querying vectors/embeddings. | ||
/// | ||
/// This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released. | ||
pub struct VectorizeIndex(VectorizeIndexSys); | ||
|
||
unsafe impl Send for VectorizeIndex {} | ||
unsafe impl Sync for VectorizeIndex {} | ||
|
||
impl EnvBinding for VectorizeIndex { | ||
const TYPE_NAME: &'static str = "VectorizeIndex"; | ||
} | ||
|
||
impl VectorizeIndex { | ||
/// Get information about the currently bound index. | ||
pub async fn describe(&self) -> Result<VectorizeIndexDetails> { | ||
let promise = self.0.describe()?; | ||
let fut = SendFuture::new(JsFuture::from(promise)); | ||
let details = fut.await?; | ||
Ok(serde_wasm_bindgen::from_value(details)?) | ||
} | ||
|
||
/// Insert a list of vectors into the index dataset. If a provided id exists, an error will be thrown. | ||
pub async fn insert<'a>( | ||
&self, | ||
vectors: &[VectorizeVector<'a>], | ||
) -> Result<VectorizeVectorMutation> { | ||
let promise = self | ||
.0 | ||
.insert(serde_wasm_bindgen::to_value(&vectors)?.into())?; | ||
let fut = SendFuture::new(JsFuture::from(promise)); | ||
let mutation = fut.await?; | ||
Ok(serde_wasm_bindgen::from_value(mutation)?) | ||
} | ||
|
||
/// Upsert a list of vectors into the index dataset. If a provided id exists, it will be replaced with the new values. | ||
pub async fn upsert<'a>( | ||
&self, | ||
vectors: &[VectorizeVector<'a>], | ||
) -> Result<VectorizeVectorMutation> { | ||
let promise = self | ||
.0 | ||
.upsert(serde_wasm_bindgen::to_value(&vectors)?.into())?; | ||
let fut = SendFuture::new(JsFuture::from(promise)); | ||
let mutation = fut.await?; | ||
Ok(serde_wasm_bindgen::from_value(mutation)?) | ||
} | ||
|
||
/// Use the provided vector to perform a similarity search across the index. | ||
pub async fn query( | ||
&self, | ||
vector: &[f32], | ||
options: VectorizeQueryOptions, | ||
) -> Result<VectorizeMatches> { | ||
let opts = serde_wasm_bindgen::to_value(&options)?; | ||
let promise = self.0.query(vector, opts.into())?; | ||
let fut = SendFuture::new(JsFuture::from(promise)); | ||
let matches = fut.await?; | ||
Ok(serde_wasm_bindgen::from_value(matches)?) | ||
} | ||
|
||
/// Delete a list of vectors with a matching id. | ||
pub async fn delete_by_ids<'a, T>(&self, ids: T) -> Result<VectorizeVectorMutation> | ||
where | ||
T: IntoIterator<Item = &'a str>, | ||
{ | ||
// TODO: Can we avoid this allocation? | ||
let ids: Vec<String> = ids.into_iter().map(|id| id.to_string()).collect(); | ||
let arg = serde_wasm_bindgen::to_value(&ids)?; | ||
let promise = self.0.delete_by_ids(arg)?; | ||
let fut = SendFuture::new(JsFuture::from(promise)); | ||
let mutation = fut.await?; | ||
Ok(serde_wasm_bindgen::from_value(mutation)?) | ||
} | ||
|
||
/// Get a list of vectors with a matching id. | ||
pub async fn get_by_ids<'a, T>(&self, ids: T) -> Result<Vec<VectorizeVectorResult>> | ||
where | ||
T: IntoIterator<Item = &'a str>, | ||
{ | ||
let ids: Vec<String> = ids.into_iter().map(|id| id.to_string()).collect(); | ||
let arg = serde_wasm_bindgen::to_value(&ids)?; | ||
let promise = self.0.get_by_ids(arg)?; | ||
let fut = SendFuture::new(JsFuture::from(promise)); | ||
let vectors = fut.await?; | ||
Ok(serde_wasm_bindgen::from_value(vectors)?) | ||
} | ||
} | ||
|
||
impl JsCast for VectorizeIndex { | ||
fn instanceof(val: &JsValue) -> bool { | ||
val.is_instance_of::<VectorizeIndex>() | ||
} | ||
|
||
fn unchecked_from_js(val: JsValue) -> Self { | ||
Self(val.into()) | ||
} | ||
|
||
fn unchecked_from_js_ref(val: &JsValue) -> &Self { | ||
unsafe { &*(val as *const JsValue as *const Self) } | ||
} | ||
} | ||
|
||
impl From<VectorizeIndex> for JsValue { | ||
fn from(index: VectorizeIndex) -> Self { | ||
JsValue::from(index.0) | ||
} | ||
} | ||
|
||
impl AsRef<JsValue> for VectorizeIndex { | ||
fn as_ref(&self) -> &JsValue { | ||
&self.0 | ||
} | ||
} |