Skip to content

Commit

Permalink
Support vector search on Cosmos DB
Browse files Browse the repository at this point in the history
 Fixes #33783

 This PR introduces:

 - `IsVector()` to configure a property to be configured as a vector (embedding) in the document.
   - The distance function and dimensions are specified.
   - The data type can be specified, or otherwise is inferred.
 - `HasIndex().ForVectors()` to configure a vector index over a vector property.
 - `VectorDistance()` which translates to the Cosmos `VectorDistance` function
   - The distance function and data type are taken from the property mapping, or can be overridden.
  • Loading branch information
ajcvickers committed Jul 31, 2024
1 parent 3d1d324 commit 91b866a
Show file tree
Hide file tree
Showing 35 changed files with 1,787 additions and 130 deletions.
1 change: 1 addition & 0 deletions src/EFCore.Analyzers/EFDiagnostics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ public static class EFDiagnostics
public const string PrecompiledQueryExperimental = "EF9100";
public const string MetricsExperimental = "EF9101";
public const string PagingExperimental = "EF9102";
public const string CosmosVectorSearchExperimental = "EF9103";
}
1 change: 1 addition & 0 deletions src/EFCore.Cosmos/EFCore.Cosmos.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
<ImplicitUsings>true</ImplicitUsings>
<NoWarn>$(NoWarn);EF9101</NoWarn> <!-- Metrics is experimental -->
<NoWarn>$(NoWarn);EF9102</NoWarn> <!-- Paging is experimental -->
<NoWarn>$(NoWarn);EF9102</NoWarn> <!-- Paging is experimental -->
</PropertyGroup>

<ItemGroup>
Expand Down
51 changes: 51 additions & 0 deletions src/EFCore.Cosmos/Extensions/CosmosDbFunctionsExtensions.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;

namespace Microsoft.EntityFrameworkCore.Cosmos.Extensions;

/// <summary>
Expand Down Expand Up @@ -47,4 +49,53 @@ public static T CoalesceUndefined<T>(
T expression1,
T expression2)
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(CoalesceUndefined)));

/// <summary>
/// Returns the distance between two vectors, using the distance function and data type defined using
/// <see cref="CosmosPropertyBuilderExtensions.IsVector(Microsoft.EntityFrameworkCore.Metadata.Builders.PropertyBuilder,Microsoft.Azure.Cosmos.DistanceFunction,int,System.Nullable{Microsoft.Azure.Cosmos.VectorDataType})"/>.
/// </summary>
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
/// <param name="vector1">The first vector.</param>
/// <param name="vector2">The second vector.</param>
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
public static double VectorDistance<T>(this DbFunctions _, IEnumerable<T> vector1, IEnumerable<T> vector2)
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));

/// <summary>
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
/// </summary>
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
/// <param name="vector1">The first vector.</param>
/// <param name="vector2">The second vector.</param>
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
/// property is leveraged.</param>
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
public static double VectorDistance<T>(
this DbFunctions _,
IEnumerable<T> vector1,
IEnumerable<T> vector2,
[NotParameterized] bool useBruteForce)
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));

/// <summary>
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
/// </summary>
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
/// <param name="vector1">The first vector.</param>
/// <param name="vector2">The second vector.</param>
/// <param name="distanceFunction">The distance function to use.</param>
/// <param name="dataType">The vector data type to use.</param>
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
/// property is leveraged.</param>
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
public static double VectorDistance<T>(
this DbFunctions _,
IEnumerable<T> vector1,
IEnumerable<T> vector2,
[NotParameterized] bool useBruteForce,
[NotParameterized] DistanceFunction distanceFunction,
[NotParameterized] VectorDataType dataType)
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
}
99 changes: 99 additions & 0 deletions src/EFCore.Cosmos/Extensions/CosmosIndexBuilderExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;
using Microsoft.EntityFrameworkCore.Cosmos.Metadata.Internal;

// ReSharper disable once CheckNamespace
namespace Microsoft.EntityFrameworkCore;

/// <summary>
/// Azure Cosmos DB-specific extension methods for <see cref="IndexBuilder"/>.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
public static class CosmosIndexBuilderExtensions
{
/// <summary>
/// Configures the index as a vector index with the given vector index type, such as "flat", "diskANN", or "quantizedFlat".
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="indexBuilder">The builder for the index being configured.</param>
/// <param name="indexType">The type of vector index to create.</param>
/// <returns>A builder to further configure the index.</returns>
public static IndexBuilder ForVectors(this IndexBuilder indexBuilder, VectorIndexType? indexType)
{
indexBuilder.Metadata.SetVectorIndexType(indexType);

return indexBuilder;
}

/// <summary>
/// Configures whether the index as a vector index with the given vector index type, such as "flat", "diskANN", or "quantizedFlat".
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="indexBuilder">The builder for the index being configured.</param>
/// <param name="indexType">The type of vector index to create.</param>
/// <returns>A builder to further configure the index.</returns>
public static IndexBuilder<TEntity> ForVectors<TEntity>(
this IndexBuilder<TEntity> indexBuilder,
VectorIndexType? indexType)
=> (IndexBuilder<TEntity>)ForVectors((IndexBuilder)indexBuilder, indexType);

/// <summary>
/// Configures whether the index as a vector index with the given vector index type, such as "flat", "diskANN", or "quantizedFlat".
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="indexBuilder">The builder for the index being configured.</param>
/// <param name="indexType">The type of vector index to create.</param>
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
/// <returns>
/// The same builder instance if the configuration was applied,
/// <see langword="null" /> otherwise.
/// </returns>
public static IConventionIndexBuilder? ForVectors(
this IConventionIndexBuilder indexBuilder,
VectorIndexType? indexType,
bool fromDataAnnotation = false)
{
if (indexBuilder.CanSetVectorIndexType(indexType, fromDataAnnotation))
{
indexBuilder.Metadata.SetVectorIndexType(indexType, fromDataAnnotation);
return indexBuilder;
}

return null;
}

/// <summary>
/// Returns a value indicating whether the vector index can be configured for vectors.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="indexBuilder">The builder for the index being configured.</param>
/// <param name="indexType">The index type to use.</param>
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
/// <returns><see langword="true" /> if the index can be configured for vectors.</returns>
public static bool CanSetVectorIndexType(
this IConventionIndexBuilder indexBuilder,
VectorIndexType? indexType,
bool fromDataAnnotation = false)
=> indexBuilder.CanSetAnnotation(CosmosAnnotationNames.VectorIndexType, indexType, fromDataAnnotation);
}
64 changes: 64 additions & 0 deletions src/EFCore.Cosmos/Extensions/CosmosIndexExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;
using Microsoft.EntityFrameworkCore.Cosmos.Metadata.Internal;

// ReSharper disable once CheckNamespace
namespace Microsoft.EntityFrameworkCore;

/// <summary>
/// Index extension methods for Azure Cosmos DB-specific metadata.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
public static class CosmosIndexExtensions
{
/// <summary>
/// Returns the vector index type to use, such as "flat", "diskANN", or "quantizedFlat".
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
/// </summary>
/// <param name="index">The index.</param>
/// <returns>The index type to use, or <see langword="null" /> if none is set.</returns>
public static VectorIndexType? GetVectorIndexType(this IReadOnlyIndex index)
=> (index is RuntimeIndex)
? throw new InvalidOperationException(CoreStrings.RuntimeModelMissingData)
: (VectorIndexType?)index[CosmosAnnotationNames.VectorIndexType];

/// <summary>
/// Sets the vector index type to use, such as "flat", "diskANN", or "quantizedFlat".
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
/// </summary>
/// <param name="index">The index.</param>
/// <param name="indexType">The index type to use.</param>
public static void SetVectorIndexType(this IMutableIndex index, VectorIndexType? indexType)
=> index.SetAnnotation(CosmosAnnotationNames.VectorIndexType, indexType);

/// <summary>
/// Sets the vector index type to use, such as "flat", "diskANN", or "quantizedFlat".
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
/// </summary>
/// <param name="indexType">The index type to use.</param>
/// <param name="index">The index.</param>
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
/// <returns>The configured value.</returns>
public static string? SetVectorIndexType(
this IConventionIndex index,
VectorIndexType? indexType,
bool fromDataAnnotation = false)
=> (string?)index.SetAnnotation(
CosmosAnnotationNames.VectorIndexType,
indexType,
fromDataAnnotation)?.Value;

/// <summary>
/// Returns the <see cref="ConfigurationSource" /> for whether the <see cref="GetVectorIndexType"/>.
/// </summary>
/// <param name="property">The property.</param>
/// <returns>The <see cref="ConfigurationSource" /> for whether the index is clustered.</returns>
public static ConfigurationSource? GetVectorIndexTypeConfigurationSource(this IConventionIndex property)
=> property.FindAnnotation(CosmosAnnotationNames.VectorIndexType)?.GetConfigurationSource();
}
Loading

0 comments on commit 91b866a

Please sign in to comment.