Skip to content

Commit 17c8263

Browse files
authored
CSHARP-5717: Typed builders for vector indexes (#1795)
* CSHARP-5717: Typed builders for vector indexes Replaces #1769 * Removed binary break. * Tweaks * Feedback.
1 parent de05452 commit 17c8263

File tree

7 files changed

+511
-34
lines changed

7 files changed

+511
-34
lines changed

evergreen/evergreen.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2226,7 +2226,7 @@ task_groups:
22262226
- "AWS_SESSION_TOKEN"
22272227
env:
22282228
CLUSTER_PREFIX: dbx-csharp-search-index
2229-
MONGODB_VERSION: "7.0"
2229+
MONGODB_VERSION: "8.0"
22302230
args:
22312231
- ${DRIVERS_TOOLS}/.evergreen/atlas/setup-atlas-cluster.sh
22322232
- command: expansions.update

src/MongoDB.Driver/CreateSearchIndexModel.cs

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,45 +13,79 @@
1313
* limitations under the License.
1414
*/
1515

16+
using System;
1617
using MongoDB.Bson;
1718

1819
namespace MongoDB.Driver
1920
{
2021
/// <summary>
21-
/// Model for creating a search index.
22+
/// Defines a search index model using a <see cref="BsonDocument"/> definition. Consider using
23+
/// <see cref="CreateVectorSearchIndexModel{TDocument}"/> to build vector indexes without specifying the BSON directly.
2224
/// </summary>
23-
public sealed class CreateSearchIndexModel
25+
public class CreateSearchIndexModel
2426
{
27+
private readonly BsonDocument _definition;
28+
private readonly SearchIndexType? _type;
29+
private readonly string _name;
30+
2531
/// <summary>Gets the index name.</summary>
2632
/// <value>The index name.</value>
27-
public string Name { get; }
33+
public string Name => _name;
2834

2935
/// <summary>Gets the index type.</summary>
3036
/// <value>The index type.</value>
31-
public SearchIndexType? Type { get; }
37+
public SearchIndexType? Type => _type;
3238

33-
/// <summary>Gets the index definition.</summary>
39+
/// <summary>
40+
/// Gets the index definition, if one was passed to a constructor of this class, otherwise throws.
41+
/// </summary>
3442
/// <value>The definition.</value>
35-
public BsonDocument Definition { get; }
43+
public BsonDocument Definition
44+
=> _definition ?? throw new NotSupportedException(
45+
"This method should not be called on this subtype. Instead, call 'Render' to create a BSON document for the index model.");
3646

3747
/// <summary>
38-
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
48+
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the index
49+
/// model as a <see cref="BsonDocument"/>.
3950
/// </summary>
40-
/// <param name="name">The name.</param>
41-
/// <param name="definition">The definition.</param>
42-
public CreateSearchIndexModel(string name, BsonDocument definition) : this(name, null, definition) { }
51+
/// <remarks>
52+
/// Consider using <see cref="CreateVectorSearchIndexModel{TDocument}"/> to build vector indexes without specifying
53+
/// the BSON directly.
54+
/// </remarks>
55+
/// <param name="name">The index name.</param>
56+
/// <param name="definition">The index definition.</param>
57+
public CreateSearchIndexModel(string name, BsonDocument definition)
58+
: this(name, null, definition)
59+
{
60+
}
4361

4462
/// <summary>
45-
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
63+
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the index
64+
/// model as a <see cref="BsonDocument"/>.
4665
/// </summary>
47-
/// <param name="name">The name.</param>
48-
/// <param name="type">The type.</param>
49-
/// <param name="definition">The definition.</param>
66+
/// <remarks>
67+
/// Consider using <see cref="CreateVectorSearchIndexModel{TDocument}"/> to build vector indexes without specifying
68+
/// the BSON directly.
69+
/// </remarks>
70+
/// <param name="name">The index name.</param>
71+
/// <param name="type">The index type.</param>
72+
/// <param name="definition">The index definition.</param>
5073
public CreateSearchIndexModel(string name, SearchIndexType? type, BsonDocument definition)
5174
{
52-
Name = name;
53-
Type = type;
54-
Definition = definition;
75+
_name = name;
76+
_type = type;
77+
_definition = definition;
78+
}
79+
80+
/// <summary>
81+
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
82+
/// </summary>
83+
/// <param name="name">The index name.</param>
84+
/// <param name="type">The index type.</param>
85+
protected CreateSearchIndexModel(string name, SearchIndexType? type)
86+
{
87+
_name = name;
88+
_type = type;
5589
}
5690
}
5791
}
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/* Copyright 2010-present MongoDB Inc.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
using System;
17+
using System.Collections.Generic;
18+
using System.Linq;
19+
using System.Linq.Expressions;
20+
using MongoDB.Bson;
21+
22+
namespace MongoDB.Driver;
23+
24+
/// <summary>
25+
/// Defines a vector index model using strongly-typed C# APIs.
26+
/// </summary>
27+
public sealed class CreateVectorSearchIndexModel<TDocument> : CreateSearchIndexModel
28+
{
29+
/// <summary>
30+
/// The field containing the vectors to index.
31+
/// </summary>
32+
public FieldDefinition<TDocument> Field { get; }
33+
34+
/// <summary>
35+
/// The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.
36+
/// </summary>
37+
public VectorSimilarity Similarity { get; }
38+
39+
/// <summary>
40+
/// Number of vector dimensions that vector search enforces at index-time and query-time.
41+
/// </summary>
42+
public int Dimensions { get; }
43+
44+
/// <summary>
45+
/// Fields that may be used as filters in the vector query.
46+
/// </summary>
47+
public IReadOnlyList<FieldDefinition<TDocument>> FilterFields { get; }
48+
49+
/// <summary>
50+
/// Type of automatic vector quantization for your vectors.
51+
/// </summary>
52+
public VectorQuantization? Quantization { get; init; }
53+
54+
/// <summary>
55+
/// Maximum number of edges (or connections) that a node can have in the Hierarchical Navigable Small Worlds graph.
56+
/// </summary>
57+
public int? HnswMaxEdges { get; init; }
58+
59+
/// <summary>
60+
/// Analogous to numCandidates at query-time, this parameter controls the maximum number of nodes to evaluate to find the closest neighbors to connect to a new node.
61+
/// </summary>
62+
public int? HnswNumEdgeCandidates { get; init; }
63+
64+
/// <summary>
65+
/// Initializes a new instance of the <see cref="CreateVectorSearchIndexModel{TDocument}"/> class, passing the
66+
/// required options for <see cref="VectorSimilarity"/> and the number of vector dimensions to the constructor.
67+
/// </summary>
68+
/// <param name="name">The index name.</param>
69+
/// <param name="field">The field containing the vectors to index.</param>
70+
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
71+
/// <param name="dimensions">Number of vector dimensions that vector search enforces at index-time and query-time.</param>
72+
/// <param name="filterFields">Fields that may be used as filters in the vector query.</param>
73+
public CreateVectorSearchIndexModel(
74+
FieldDefinition<TDocument> field,
75+
string name,
76+
VectorSimilarity similarity,
77+
int dimensions,
78+
params FieldDefinition<TDocument>[] filterFields)
79+
: base(name, SearchIndexType.VectorSearch)
80+
{
81+
Field = field;
82+
Similarity = similarity;
83+
Dimensions = dimensions;
84+
FilterFields = filterFields?.ToList() ?? [];
85+
}
86+
87+
/// <summary>
88+
/// Initializes a new instance of the <see cref="CreateVectorSearchIndexModel{TDocument}"/> class, passing the
89+
/// required options for <see cref="VectorSimilarity"/> and the number of vector dimensions to the constructor.
90+
/// </summary>
91+
/// <param name="name">The index name.</param>
92+
/// <param name="field">An expression pointing to the field containing the vectors to index.</param>
93+
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
94+
/// <param name="dimensions">Number of vector dimensions that vector search enforces at index-time and query-time.</param>
95+
/// <param name="filterFields">Expressions pointing to fields that may be used as filters in the vector query.</param>
96+
public CreateVectorSearchIndexModel(
97+
Expression<Func<TDocument, object>> field,
98+
string name,
99+
VectorSimilarity similarity,
100+
int dimensions,
101+
params Expression<Func<TDocument, object>>[] filterFields)
102+
: this(
103+
new ExpressionFieldDefinition<TDocument>(field),
104+
name,
105+
similarity,
106+
dimensions,
107+
filterFields?
108+
.Select(f => (FieldDefinition<TDocument>)new ExpressionFieldDefinition<TDocument>(f))
109+
.ToArray())
110+
{
111+
}
112+
113+
/// <summary>
114+
/// Renders the index model to a <see cref="BsonDocument"/>.
115+
/// </summary>
116+
/// <param name="renderArgs">The render arguments.</param>
117+
/// <returns>A <see cref="BsonDocument" />.</returns>
118+
public BsonDocument Render(RenderArgs<TDocument> renderArgs)
119+
{
120+
var similarityValue = Similarity == VectorSimilarity.DotProduct
121+
? "dotProduct" // Because neither "DotProduct" or "dotproduct" are allowed.
122+
: Similarity.ToString().ToLowerInvariant();
123+
124+
var vectorField = new BsonDocument
125+
{
126+
{ "type", BsonString.Create("vector") },
127+
{ "path", Field.Render(renderArgs).FieldName },
128+
{ "numDimensions", BsonInt32.Create(Dimensions) },
129+
{ "similarity", BsonString.Create(similarityValue) },
130+
};
131+
132+
if (Quantization.HasValue)
133+
{
134+
vectorField.Add("quantization", BsonString.Create(Quantization.ToString()?.ToLower()));
135+
}
136+
137+
if (HnswMaxEdges != null || HnswNumEdgeCandidates != null)
138+
{
139+
var hnswDocument = new BsonDocument
140+
{
141+
{ "maxEdges", BsonInt32.Create(HnswMaxEdges ?? 16) },
142+
{ "numEdgeCandidates", BsonInt32.Create(HnswNumEdgeCandidates ?? 100) }
143+
};
144+
vectorField.Add("hnswOptions", hnswDocument);
145+
}
146+
147+
var fieldDocuments = new List<BsonDocument> { vectorField };
148+
149+
if (FilterFields != null)
150+
{
151+
foreach (var filterPath in FilterFields)
152+
{
153+
var fieldDocument = new BsonDocument
154+
{
155+
{ "type", BsonString.Create("filter") },
156+
{ "path", BsonString.Create(filterPath.Render(renderArgs).FieldName) }
157+
};
158+
159+
fieldDocuments.Add(fieldDocument);
160+
}
161+
}
162+
163+
return new BsonDocument { { "fields", new BsonArray(fieldDocuments) } };
164+
}
165+
}

src/MongoDB.Driver/MongoCollectionImpl.cs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,10 +1741,22 @@ private PipelineDefinition<TDocument, BsonDocument> CreateListIndexesStage(strin
17411741
return new BsonDocumentStagePipelineDefinition<TDocument, BsonDocument>(new[] { stage });
17421742
}
17431743

1744-
private CreateSearchIndexesOperation CreateCreateIndexesOperation(IEnumerable<CreateSearchIndexModel> models) =>
1745-
new(_collection._collectionNamespace,
1746-
models.Select(m => new CreateSearchIndexRequest(m.Name, m.Type, m.Definition)),
1744+
private CreateSearchIndexesOperation CreateCreateIndexesOperation(
1745+
IEnumerable<CreateSearchIndexModel> models)
1746+
{
1747+
var renderArgs = _collection.GetRenderArgs();
1748+
1749+
return new CreateSearchIndexesOperation(
1750+
_collection._collectionNamespace,
1751+
models.Select(model
1752+
=> new CreateSearchIndexRequest(
1753+
model.Name,
1754+
model.Type,
1755+
model is CreateVectorSearchIndexModel<TDocument> createVectorSearchIndexModel
1756+
? createVectorSearchIndexModel.Render(renderArgs)
1757+
: model.Definition)),
17471758
_collection._messageEncoderSettings);
1759+
}
17481760

17491761
private string[] GetIndexNames(BsonDocument createSearchIndexesResponse) =>
17501762
createSearchIndexesResponse["indexesCreated"]
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/* Copyright 2010-present MongoDB Inc.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
namespace MongoDB.Driver;
17+
18+
/// <summary>
19+
/// Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float
20+
/// or double vectors. See <see href="https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-quantization/">
21+
/// Vector Quantization</see> for more information.
22+
/// </summary>
23+
public enum VectorQuantization
24+
{
25+
/// <summary>
26+
/// Indicates no automatic quantization for the vector embeddings. Use this setting if you have pre-quantized
27+
/// vectors for ingestion. If omitted, this is the default value.
28+
/// </summary>
29+
None,
30+
31+
/// <summary>
32+
/// Indicates scalar quantization, which transforms values to 1 byte integers.
33+
/// </summary>
34+
Scalar,
35+
36+
/// <summary>
37+
/// Indicates binary quantization, which transforms values to a single bit.
38+
/// To use this value, numDimensions must be a multiple of 8.
39+
/// If precision is critical, select <see cref="None"/> or <see cref="Scalar"/> instead of <see cref="Binary"/>.
40+
/// </summary>
41+
Binary,
42+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/* Copyright 2010-present MongoDB Inc.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
namespace MongoDB.Driver;
17+
18+
/// <summary>
19+
/// Vector similarity function to use to search for top K-nearest neighbors.
20+
/// See <see href="https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/">How to Index Fields for
21+
/// Vector Search</see> for more information.
22+
/// </summary>
23+
public enum VectorSimilarity
24+
{
25+
/// <summary>
26+
/// Measures the distance between ends of vectors.
27+
/// </summary>
28+
Euclidean,
29+
30+
/// <summary>
31+
/// Measures similarity based on the angle between vectors.
32+
/// </summary>
33+
Cosine,
34+
35+
/// <summary>
36+
/// Measures similarity like cosine, but takes into account the magnitude of the vector.
37+
/// </summary>
38+
DotProduct,
39+
}

0 commit comments

Comments
 (0)