Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VEC-34] Client changes for changes in index proto #2

Merged
merged 5 commits into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions proto/transact.proto
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ message VectorSearchRequest {

// The bin selector.
BinSelector binSelector = 4;

// Optional parameters to tune the search.
oneof searchParams {
HnswSearchParams hnswSearchParams = 5;
}
}

// Record transaction services.
Expand Down
86 changes: 78 additions & 8 deletions proto/types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ message RecordWithKey {
}

// Unique identifier for an index.
// TODO: Include optional set name?
message IndexId {
// The name of the index.
string name = 1;
Expand All @@ -147,26 +146,97 @@ enum IndexType {
HNSW = 0;
}

// Params for the HNSW index
message HnswParams {
// Maximum number bi-directional links per HNSW vertex. Greater values of
// 'm' in general provide better recall for data with high dimensionality, while
// lower values work well for data with lower dimensionality.
// The storage space required for the index increases proportionally with 'm'.
// The default value is 16.
optional uint32 m = 1;

// The number of candidate nearest neighbors shortlisted during index creation.
// Larger values provide better recall at the cost of longer index update times.
// The default is 100.
optional uint32 efConstruction = 2;

// The default number of candidate nearest neighbors shortlisted during search.
// Larger values provide better recall at the cost of longer search times.
// The default is 100.
optional uint32 ef = 3;

// Configures batching behaviour for batch based index update.
HnswBatchingParams batchingParams = 4;
}

// Params for the HNSW index search
message HnswSearchParams {
// The default number of candidate nearest neighbors shortlisted during search.
// Larger values provide better recall at the cost of longer search times.
// The default is value set in HnswParams for the index.
optional uint32 ef = 1;
}

// Configures batching behaviour for batch based index update.
message HnswBatchingParams {
// Maximum number of records to fit in a batch.
// The default value is 10000.
optional uint32 maxRecords = 1;

// The maximum amount of time in milliseconds to wait before finalizing a batch.
// The default value is 10000.
optional uint32 interval = 2;

// Disables batching for index updates.
// Default is false meaning batching is enabled.
optional bool disabled = 3;
}

// Index in Aerospike storage configuration
message AerospikeIndexStorage {
// Optional Aerospike namespace where the index is stored.
// Defaults to the index namespace.
optional string namespace = 1;

// Optional Aerospike set where the index is stored.
// Defaults to the index name.
optional string set = 2;
}

// An index definition.
message IndexDefinition {
// The index identifier.
IndexId id = 1;

// The type of index.
IndexType type = 2;

VectorDistanceMetric vectorDistanceMetric = 3;
// Number of dimensions in data.
// Vectors not matching the dimension count will not be indexed.
uint32 dimensions = 3;

// Optional Aerospike set name.
optional string set = 4;
// Optional The distance metric to use. Defaults to SQUARED_EUCLIDEAN
VectorDistanceMetric vectorDistanceMetric = 4;

// Indexed vector bin.
string bin = 5;

// Number of dimensions in data.
// Vectors not matching the dimension count will not be indexed.
uint32 dimensions = 6;
// Optional filter on Aerospike set name from which records will be indexed.
// If not specified all sets in the index namespace will be indexed.
optional string setFilter = 6;

// Index parameters.
oneof params {
HnswParams hnswParams = 7;
}

// Index storage.
oneof storage {
AerospikeIndexStorage aerospikeStorage = 8;
}

map<string, Value> params = 7;
// Optional labels associated with the index.
map<string, string> labels = 9;
}

// List of index definitions.
Expand Down
13 changes: 9 additions & 4 deletions src/aerospike_vector/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,25 @@ def toVectorDbValue(value: Any) -> types_pb2.Value:
if isinstance(value[0], float):
return types_pb2.Value(
vectorValue=types_pb2.Vector(
floatArray={"value": [float(x) for x in value]}))
floatData={"value": [float(x) for x in value]}))
elif isinstance(value[0], bool):
return types_pb2.Value(
vectorValue=types_pb2.Vector(
boolArray={"value": [True if x else False for x in value]}))
boolData={"value": [True if x else False for x in value]}))
else:
return types_pb2.Value(
listValue=types_pb2.List(entries = [toVectorDbValue(x) for x in value]))
listValue=types_pb2.List(
entries=[toVectorDbValue(x) for x in value]))
elif isinstance(value, dict):
d = types_pb2.Value(
mapValue=types_pb2.Map(entries = [types_pb2.MapEntry(key=toMapKey(k), value=toVectorDbValue(v)) for k,v in value.items()]))
mapValue=types_pb2.Map(entries=[
types_pb2.MapEntry(key=toMapKey(k), value=toVectorDbValue(v))
for k, v in value.items()]))
return d
else:
raise Exception("Invalid type " + str(type(value)))


def toMapKey(value):
if isinstance(value, str):
return types_pb2.MapKey(stringValue=value)
Expand All @@ -45,6 +49,7 @@ def toMapKey(value):
else:
raise Exception("Invalid map key type " + str(type(value)))


def fromVectorDbKey(key: types_pb2.Key) -> types.Key:
keyValue = None
if key.HasField("stringValue"):
Expand Down
12 changes: 6 additions & 6 deletions src/aerospike_vector/transact_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading