Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion evergreen/evergreen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2099,7 +2099,7 @@ task_groups:
- "AWS_SESSION_TOKEN"
env:
CLUSTER_PREFIX: dbx-csharp-search-index
MONGODB_VERSION: "7.0"
MONGODB_VERSION: "8.0"
args:
- ${DRIVERS_TOOLS}/.evergreen/atlas/setup-atlas-cluster.sh
- command: expansions.update
Expand Down
68 changes: 51 additions & 17 deletions src/MongoDB.Driver/CreateSearchIndexModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,45 +13,79 @@
* limitations under the License.
*/

using System;
using MongoDB.Bson;

namespace MongoDB.Driver
{
/// <summary>
/// Model for creating a search index.
/// Defines a search index model using a <see cref="BsonDocument"/> definition. Consider using
/// <see cref="CreateVectorIndexModel{TDocument}"/> to build vector indexes without specifying the BSON directly.
/// </summary>
public sealed class CreateSearchIndexModel
public class CreateSearchIndexModel
{
private readonly BsonDocument _definition;
private readonly SearchIndexType? _type;
private readonly string _name;

/// <summary>Gets the index name.</summary>
/// <value>The index name.</value>
public string Name { get; }
public string Name => _name;

/// <summary>Gets the index type.</summary>
/// <value>The index type.</value>
public SearchIndexType? Type { get; }
public SearchIndexType? Type => _type;

/// <summary>Gets the index definition.</summary>
/// <summary>
/// Gets the index definition, if one was passed to a constructor of this class, otherwise throws.
/// </summary>
/// <value>The definition.</value>
public BsonDocument Definition { get; }
public BsonDocument Definition
=> _definition ?? throw new NotSupportedException(
"This method should not be called on this subtype. Instead, call 'Render' to create a BSON document for the index model.");

/// <summary>
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the index
/// model as a <see cref="BsonDocument"/>.
/// </summary>
/// <param name="name">The name.</param>
/// <param name="definition">The definition.</param>
public CreateSearchIndexModel(string name, BsonDocument definition) : this(name, null, definition) { }
/// <remarks>
/// Consider using <see cref="CreateVectorIndexModel{TDocument}"/> to build vector indexes without specifying
/// the BSON directly.
/// </remarks>
/// <param name="name">The index name.</param>
/// <param name="definition">The index definition.</param>
public CreateSearchIndexModel(string name, BsonDocument definition)
: this(name, null, definition)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class, passing the index
/// model as a <see cref="BsonDocument"/>.
/// </summary>
/// <param name="name">The name.</param>
/// <param name="type">The type.</param>
/// <param name="definition">The definition.</param>
/// <remarks>
/// Consider using <see cref="CreateVectorIndexModel{TDocument}"/> to build vector indexes without specifying
/// the BSON directly.
/// </remarks>
/// <param name="name">The index name.</param>
/// <param name="type">The index type.</param>
/// <param name="definition">The index definition.</param>
public CreateSearchIndexModel(string name, SearchIndexType? type, BsonDocument definition)
{
Name = name;
Type = type;
Definition = definition;
_name = name;
_type = type;
_definition = definition;
}

/// <summary>
/// Initializes a new instance of the <see cref="CreateSearchIndexModel"/> class.
/// </summary>
/// <param name="name">The index name.</param>
/// <param name="type">The index type.</param>
protected CreateSearchIndexModel(string name, SearchIndexType? type)
{
_name = name;
_type = type;
}
}
}
165 changes: 165 additions & 0 deletions src/MongoDB.Driver/CreateVectorIndexModel.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/* Copyright 2010-present MongoDB Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Collections.Generic;
using System.Linq;
using System.Linq.Expressions;
using MongoDB.Bson;

namespace MongoDB.Driver;

/// <summary>
/// Defines a vector index model using strongly-typed C# APIs.
/// </summary>
public sealed class CreateVectorIndexModel<TDocument> : CreateSearchIndexModel
{
/// <summary>
/// The field containing the vectors to index.
/// </summary>
public FieldDefinition<TDocument> Field { get; }

/// <summary>
/// The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.
/// </summary>
public VectorSimilarity Similarity { get; }

/// <summary>
/// Number of vector dimensions that vector search enforces at index-time and query-time.
/// </summary>
public int Dimensions { get; }

/// <summary>
/// Fields that may be used as filters in the vector query.
/// </summary>
public IReadOnlyList<FieldDefinition<TDocument>> FilterFields { get; }

/// <summary>
/// Type of automatic vector quantization for your vectors.
/// </summary>
public VectorQuantization? Quantization { get; init; }

/// <summary>
/// Maximum number of edges (or connections) that a node can have in the Hierarchical Navigable Small Worlds graph.
/// </summary>
public int? HnswMaxEdges { get; init; }

/// <summary>
/// Analogous to numCandidates at query-time, this parameter controls the maximum number of nodes to evaluate to find the closest neighbors to connect to a new node.
/// </summary>
public int? HnswNumEdgeCandidates { get; init; }

/// <summary>
/// Initializes a new instance of the <see cref="CreateVectorIndexModel{TDocument}"/> class, passing the
/// required options for <see cref="VectorSimilarity"/> and the number of vector dimensions to the constructor.
/// </summary>
/// <param name="name">The index name.</param>
/// <param name="field">The field containing the vectors to index.</param>
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
/// <param name="dimensions">Number of vector dimensions that vector search enforces at index-time and query-time.</param>
/// <param name="filterFields">Fields that may be used as filters in the vector query.</param>
public CreateVectorIndexModel(
FieldDefinition<TDocument> field,
string name,
VectorSimilarity similarity,
int dimensions,
params FieldDefinition<TDocument>[] filterFields)
: base(name, SearchIndexType.VectorSearch)
{
Field = field;
Similarity = similarity;
Dimensions = dimensions;
FilterFields = filterFields?.ToList() ?? [];
}

/// <summary>
/// Initializes a new instance of the <see cref="CreateVectorIndexModel{TDocument}"/> class, passing the
/// required options for <see cref="VectorSimilarity"/> and the number of vector dimensions to the constructor.
/// </summary>
/// <param name="name">The index name.</param>
/// <param name="field">An expression pointing to the field containing the vectors to index.</param>
/// <param name="similarity">The <see cref="VectorSimilarity"/> to use to search for top K-nearest neighbors.</param>
/// <param name="dimensions">Number of vector dimensions that vector search enforces at index-time and query-time.</param>
/// <param name="filterFields">Expressions pointing to fields that may be used as filters in the vector query.</param>
public CreateVectorIndexModel(
Expression<Func<TDocument, object>> field,
string name,
VectorSimilarity similarity,
int dimensions,
params Expression<Func<TDocument, object>>[] filterFields)
: this(
new ExpressionFieldDefinition<TDocument>(field),
name,
similarity,
dimensions,
filterFields?
.Select(f => (FieldDefinition<TDocument>)new ExpressionFieldDefinition<TDocument>(f))
.ToArray())
{
}

/// <summary>
/// Renders the index model to a <see cref="BsonDocument"/>.
/// </summary>
/// <param name="renderArgs">The render arguments.</param>
/// <returns>A <see cref="BsonDocument" />.</returns>
public BsonDocument Render(RenderArgs<TDocument> renderArgs)
{
var similarityValue = Similarity == VectorSimilarity.DotProduct
? "dotProduct" // Because neither "DotProduct" or "dotproduct" are allowed.
: Similarity.ToString().ToLowerInvariant();

var vectorField = new BsonDocument
{
{ "type", BsonString.Create("vector") },
{ "path", Field.Render(renderArgs).FieldName },
{ "numDimensions", BsonInt32.Create(Dimensions) },
{ "similarity", BsonString.Create(similarityValue) },
};

if (Quantization.HasValue)
{
vectorField.Add("quantization", BsonString.Create(Quantization.ToString()?.ToLower()));
}

if (HnswMaxEdges != null || HnswNumEdgeCandidates != null)
{
var hnswDocument = new BsonDocument
{
{ "maxEdges", BsonInt32.Create(HnswMaxEdges ?? 16) },
{ "numEdgeCandidates", BsonInt32.Create(HnswNumEdgeCandidates ?? 100) }
};
vectorField.Add("hnswOptions", hnswDocument);
}

var fieldDocuments = new List<BsonDocument> { vectorField };

if (FilterFields != null)
{
foreach (var filterPath in FilterFields)
{
var fieldDocument = new BsonDocument
{
{ "type", BsonString.Create("filter") },
{ "path", BsonString.Create(filterPath.Render(renderArgs).FieldName) }
};

fieldDocuments.Add(fieldDocument);
}
}

return new BsonDocument { { "fields", BsonArray.Create(fieldDocuments) } };
}
}
18 changes: 15 additions & 3 deletions src/MongoDB.Driver/MongoCollectionImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1741,10 +1741,22 @@ private PipelineDefinition<TDocument, BsonDocument> CreateListIndexesStage(strin
return new BsonDocumentStagePipelineDefinition<TDocument, BsonDocument>(new[] { stage });
}

private CreateSearchIndexesOperation CreateCreateIndexesOperation(IEnumerable<CreateSearchIndexModel> models) =>
new(_collection._collectionNamespace,
models.Select(m => new CreateSearchIndexRequest(m.Name, m.Type, m.Definition)),
private CreateSearchIndexesOperation CreateCreateIndexesOperation(
IEnumerable<CreateSearchIndexModel> models)
{
var renderArgs = _collection.GetRenderArgs();

return new CreateSearchIndexesOperation(
_collection._collectionNamespace,
models.Select(model
=> new CreateSearchIndexRequest(
model.Name,
model.Type,
model is CreateVectorIndexModel<TDocument> createVectorIndexModel
? createVectorIndexModel.Render(renderArgs)
: model.Definition)),
_collection._messageEncoderSettings);
}

private string[] GetIndexNames(BsonDocument createSearchIndexesResponse) =>
createSearchIndexesResponse["indexesCreated"]
Expand Down
42 changes: 42 additions & 0 deletions src/MongoDB.Driver/VectorQuantization.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/* Copyright 2010-present MongoDB Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

namespace MongoDB.Driver;

/// <summary>
/// Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float
/// or double vectors. See <see href="https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-quantization/">
/// Vector Quantization</see> for more information.
/// </summary>
public enum VectorQuantization
{
/// <summary>
/// Indicates no automatic quantization for the vector embeddings. Use this setting if you have pre-quantized
/// vectors for ingestion. If omitted, this is the default value.
/// </summary>
None,

/// <summary>
/// Indicates scalar quantization, which transforms values to 1 byte integers.
/// </summary>
Scalar,

/// <summary>
/// Indicates binary quantization, which transforms values to a single bit.
/// To use this value, numDimensions must be a multiple of 8.
/// If precision is critical, select <see cref="None"/> or <see cref="Scalar"/> instead of <see cref="Binary"/>.
/// </summary>
Binary,
}
39 changes: 39 additions & 0 deletions src/MongoDB.Driver/VectorSimilarity.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/* Copyright 2010-present MongoDB Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

namespace MongoDB.Driver;

/// <summary>
/// Vector similarity function to use to search for top K-nearest neighbors.
/// See <see href="https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/">How to Index Fields for
/// Vector Search</see> for more information.
/// </summary>
public enum VectorSimilarity
{
/// <summary>
/// Measures the distance between ends of vectors.
/// </summary>
Euclidean,

/// <summary>
/// Measures similarity based on the angle between vectors.
/// </summary>
Cosine,

/// <summary>
/// Measures similarity like cosine, but takes into account the magnitude of the vector.
/// </summary>
DotProduct,
}
Loading
Loading