From 702bb234be8a77848642a83c0b79f0d9816abe2d Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Mon, 29 Jan 2024 11:12:22 +0100 Subject: [PATCH 1/3] Add Dimensions parameter for embedding requests #143 --- samples/ChatGptApi/Program.cs | 2 +- samples/ChatGptApi/appsettings.json | 11 +++++++---- samples/ChatGptConsole/appsettings.json | 5 ++++- .../appsettings.json | 5 ++++- samples/ChatGptStreamConsole/appsettings.json | 5 ++++- src/ChatGptNet/ChatGptClient.cs | 11 ++++++----- src/ChatGptNet/ChatGptOptions.cs | 6 ++++++ src/ChatGptNet/ChatGptOptionsBuilder.cs | 9 ++++++++- src/ChatGptNet/Extensions/EmbeddingUtility.cs | 4 ++-- src/ChatGptNet/IChatGptClient.cs | 16 +++++++++------- .../Models/Embeddings/EmbeddingParameters.cs | 15 +++++++++++++++ .../Models/Embeddings/EmbeddingRequest.cs | 5 +++++ 12 files changed, 71 insertions(+), 23 deletions(-) create mode 100644 src/ChatGptNet/Models/Embeddings/EmbeddingParameters.cs diff --git a/samples/ChatGptApi/Program.cs b/samples/ChatGptApi/Program.cs index 325ffd5..6afc9cd 100644 --- a/samples/ChatGptApi/Program.cs +++ b/samples/ChatGptApi/Program.cs @@ -117,7 +117,7 @@ }) .WithOpenApi(); -app.MapPost("/api/embeddings/CosineSimilarity", async (CosineSimilarityRequest request, IChatGptClient chatGptClient) => +app.MapPost("/api/embeddings/cosine-similarity", async (CosineSimilarityRequest request, IChatGptClient chatGptClient) => { var firstEmbeddingResponse = await chatGptClient.GenerateEmbeddingAsync(request.FirstMessage); var secondEmbeddingResponse = await chatGptClient.GenerateEmbeddingAsync(request.SecondMessage); diff --git a/samples/ChatGptApi/appsettings.json b/samples/ChatGptApi/appsettings.json index ee2b578..2727aa9 100644 --- a/samples/ChatGptApi/appsettings.json +++ b/samples/ChatGptApi/appsettings.json @@ -1,17 +1,17 @@ { "ChatGPT": { - "Provider": "OpenAI", // Optional. Allowed values: OpenAI (default) or Azure + "Provider": "OpenaI", // Optional. Allowed values: OpenAI (default) or Azure "ApiKey": "", // Required - //"Organization": "", // Optional, used only by OpenAI + //"Organization": "", // Optional, used only by OpenAI "ResourceName": "", // Required when using Azure OpenAI Service - "ApiVersion": "2023-12-01-preview", // Optional, used only by Azure OpenAI Service (default: 2023-12-01-preview) + "ApiVersion": "2023-12-01-preview", // Optional, used only by Azure OpenAI Service (default: 2023-08-01-preview) "AuthenticationType": "ApiKey", // Optional, used only by Azure OpenAI Service. Allowed values: ApiKey (default) or ActiveDirectory "DefaultModel": "my-model", "DefaultEmbeddingModel": "text-embedding-ada-002", // Optional, set it if you want to use embeddings "MessageLimit": 20, "MessageExpiration": "00:30:00", - "ThrowExceptionOnError": true, // Optional, default: true + "ThrowExceptionOnError": true // Optional, default: true //"User": "UserName", //"DefaultParameters": { // "Temperature": 0.8, @@ -21,6 +21,9 @@ // "FrequencyPenalty": 0, // "ResponseFormat": { "Type": "text" }, // Allowed values for Type: text (default) or json_object // "Seed": 42 // Optional (any integer value) + //}, + //"DefaultEmbeddingParameters": { + // "Dimensions": 1536 //} }, "Logging": { diff --git a/samples/ChatGptConsole/appsettings.json b/samples/ChatGptConsole/appsettings.json index 66562ae..469407f 100644 --- a/samples/ChatGptConsole/appsettings.json +++ b/samples/ChatGptConsole/appsettings.json @@ -11,7 +11,7 @@ "DefaultEmbeddingModel": "text-embedding-ada-002", // Optional, it set if you want to use embeddings "MessageLimit": 20, "MessageExpiration": "00:30:00", - "ThrowExceptionOnError": true, + "ThrowExceptionOnError": true //"User": "UserName", //"DefaultParameters": { // "Temperature": 0.8, @@ -21,6 +21,9 @@ // "FrequencyPenalty": 0, // "ResponseFormat": { "Type": "text" }, // Allowed values for Type: text (default) or json_object // "Seed": 42 // Optional (any integer value) + //}, + //"DefaultEmbeddingParameters": { + // "Dimensions": 1536 //} }, "Logging": { diff --git a/samples/ChatGptFunctionCallingConsole/appsettings.json b/samples/ChatGptFunctionCallingConsole/appsettings.json index e24276f..fd15b9b 100644 --- a/samples/ChatGptFunctionCallingConsole/appsettings.json +++ b/samples/ChatGptFunctionCallingConsole/appsettings.json @@ -11,7 +11,7 @@ "DefaultEmbeddingModel": "text-embedding-ada-002", // Optional, it set if you want to use embeddings "MessageLimit": 20, "MessageExpiration": "00:30:00", - "ThrowExceptionOnError": true, // Optional, default: true + "ThrowExceptionOnError": true // Optional, default: true //"User": "UserName", //"DefaultParameters": { // "Temperature": 0.8, @@ -21,6 +21,9 @@ // "FrequencyPenalty": 0, // "ResponseFormat": { "Type": "text" }, // Allowed values for Type: text (default) or json_object // "Seed": 42 // Optional (any integer value) + //}, + //"DefaultEmbeddingParameters": { + // "Dimensions": 1536 //} }, "Logging": { diff --git a/samples/ChatGptStreamConsole/appsettings.json b/samples/ChatGptStreamConsole/appsettings.json index 1b87f57..d11773f 100644 --- a/samples/ChatGptStreamConsole/appsettings.json +++ b/samples/ChatGptStreamConsole/appsettings.json @@ -11,7 +11,7 @@ "DefaultEmbeddingModel": "text-embedding-ada-002", // Optional, set it if you want to use embeddings "MessageLimit": 20, "MessageExpiration": "00:30:00", - "ThrowExceptionOnError": true, // Optional, default: true + "ThrowExceptionOnError": true // Optional, default: true //"User": "UserName", //"DefaultParameters": { // "Temperature": 0.8, @@ -21,6 +21,9 @@ // "FrequencyPenalty": 0, // "ResponseFormat": { "Type": "text" }, // Allowed values for Type: text (default) or json_object // "Seed": 42 // Optional (any integer value) + //}, + //"DefaultEmbeddingParameters": { + // "Dimensions": 1536 //} }, "Logging": { diff --git a/src/ChatGptNet/ChatGptClient.cs b/src/ChatGptNet/ChatGptClient.cs index 591505d..64fad43 100644 --- a/src/ChatGptNet/ChatGptClient.cs +++ b/src/ChatGptNet/ChatGptClient.cs @@ -290,11 +290,11 @@ public async Task AddToolResponseAsync(Guid conversationId, string? toolId, stri await UpdateCacheAsync(conversationId, messages, cancellationToken); } - public async Task GenerateEmbeddingAsync(IEnumerable messages, string? model = null, CancellationToken cancellationToken = default) + public async Task GenerateEmbeddingAsync(IEnumerable texts, EmbeddingParameters? embeddingParameters = null, string? model = null, CancellationToken cancellationToken = default) { - ArgumentNullException.ThrowIfNull(messages); + ArgumentNullException.ThrowIfNull(texts); - var request = CreateEmbeddingRequest(messages, model); + var request = CreateEmbeddingRequest(texts, embeddingParameters, model); var requestUri = options.ServiceConfiguration.GetEmbeddingEndpoint(model ?? options.DefaultEmbeddingModel); using var httpResponse = await httpClient.PostAsJsonAsync(requestUri, request, jsonSerializerOptions, cancellationToken); @@ -360,11 +360,12 @@ private ChatGptRequest CreateChatGptRequest(IEnumerable messages ResponseFormat = parameters?.ResponseFormat ?? options.DefaultParameters.ResponseFormat }; - private EmbeddingRequest CreateEmbeddingRequest(IEnumerable messages, string? model = null) + private EmbeddingRequest CreateEmbeddingRequest(IEnumerable messages, EmbeddingParameters? parameters, string? model) => new() { Model = model ?? options.DefaultEmbeddingModel, - Input = messages + Input = messages, + Dimensions = parameters?.Dimensions ?? options.DefaultEmbeddingParameters.Dimensions, }; private async Task AddAssistantResponseAsync(Guid conversationId, IList messages, ChatGptMessage? message, CancellationToken cancellationToken = default) diff --git a/src/ChatGptNet/ChatGptOptions.cs b/src/ChatGptNet/ChatGptOptions.cs index f03ac04..7d291c5 100644 --- a/src/ChatGptNet/ChatGptOptions.cs +++ b/src/ChatGptNet/ChatGptOptions.cs @@ -55,6 +55,12 @@ public class ChatGptOptions /// public ChatGptParameters DefaultParameters { get; internal set; } = new(); + /// + /// Gets or sets the default parameters for embeddings. + /// + /// + public EmbeddingParameters DefaultEmbeddingParameters { get; internal set; } = new(); + /// /// Gets or sets the user identification for chat completion, which can help OpenAI to monitor and detect abuse. /// diff --git a/src/ChatGptNet/ChatGptOptionsBuilder.cs b/src/ChatGptNet/ChatGptOptionsBuilder.cs index cbca942..cdc061e 100644 --- a/src/ChatGptNet/ChatGptOptionsBuilder.cs +++ b/src/ChatGptNet/ChatGptOptionsBuilder.cs @@ -43,7 +43,7 @@ public class ChatGptOptionsBuilder public string? DefaultModel { get; set; } /// - /// Gets or sets the default model for embedding. (default: when the provider is OpenAI). + /// Gets or sets the default model for embeddings. (default: when the provider is OpenAI). /// /// /// @@ -55,6 +55,12 @@ public class ChatGptOptionsBuilder /// public ChatGptParameters? DefaultParameters { get; set; } = new(); + /// + /// Gets or sets the default parameters for embeddings. + /// + /// + public EmbeddingParameters DefaultEmbeddingParameters { get; internal set; } = new(); + /// /// Gets or sets the user identification for chat completion, which can help OpenAI to monitor and detect abuse. /// @@ -70,6 +76,7 @@ internal ChatGptOptions Build() DefaultModel = DefaultModel, DefaultEmbeddingModel = DefaultEmbeddingModel, DefaultParameters = DefaultParameters ?? new(), + DefaultEmbeddingParameters = DefaultEmbeddingParameters ?? new(), MessageExpiration = MessageExpiration, ThrowExceptionOnError = ThrowExceptionOnError, ServiceConfiguration = ServiceConfiguration, diff --git a/src/ChatGptNet/Extensions/EmbeddingUtility.cs b/src/ChatGptNet/Extensions/EmbeddingUtility.cs index 7cbe4cf..8fdfa9d 100644 --- a/src/ChatGptNet/Extensions/EmbeddingUtility.cs +++ b/src/ChatGptNet/Extensions/EmbeddingUtility.cs @@ -33,7 +33,7 @@ public static float CosineSimilarity(ReadOnlySpan x, ReadOnlySpan /// The embedding response. /// The other vector. /// The cosine similarity. - /// + /// /// public static float CosineSimilarity(this EmbeddingResponse embeddingResponse, ReadOnlySpan y) => CosineSimilarity(embeddingResponse.GetEmbedding() ?? [], y); @@ -44,7 +44,7 @@ public static float CosineSimilarity(this EmbeddingResponse embeddingResponse, R /// The first embedding response. /// The second embedding response. /// The cosine similarity. - /// + /// /// public static float CosineSimilarity(this EmbeddingResponse embeddingResponse, EmbeddingResponse otherResponse) => CosineSimilarity(embeddingResponse.GetEmbedding() ?? [], otherResponse.GetEmbedding() ?? []); diff --git a/src/ChatGptNet/IChatGptClient.cs b/src/ChatGptNet/IChatGptClient.cs index fb5f710..87c8e26 100644 --- a/src/ChatGptNet/IChatGptClient.cs +++ b/src/ChatGptNet/IChatGptClient.cs @@ -289,23 +289,25 @@ Task AddToolResponseAsync(Guid conversationId, ChatGptToolCall tool, string cont Task AddToolResponseAsync(Guid conversationId, string? toolId, string name, string content, CancellationToken cancellationToken = default); /// - /// Generates embeddings for a message. + /// Generates embeddings for a text. /// - /// The message to use for generating embeddings. + /// The text to use for generating embeddings. + /// An object used to override the default embedding parameters in the property. /// The name of the embedding model. If is , then the one specified in the property will be used. /// The token to monitor for cancellation requests. /// The embeddings for the provided message. /// An error occurred while calling the API and the is . - Task GenerateEmbeddingAsync(string message, string? model = null, CancellationToken cancellationToken = default) - => GenerateEmbeddingAsync([message], model, cancellationToken); + Task GenerateEmbeddingAsync(string text, EmbeddingParameters? parameters = null, string? model = null, CancellationToken cancellationToken = default) + => GenerateEmbeddingAsync([text], parameters, model, cancellationToken); /// - /// Generates embeddings for a list of messages. + /// Generates embeddings for a list of texts. /// - /// The messages to use for generating embeddings. + /// The texts to use for generating embeddings. + /// An object used to override the default embedding parameters in the property. /// The name of the embedding model. If is , then the one specified in the property will be used. /// The token to monitor for cancellation requests. /// The embeddings for the provided messages. /// An error occurred while calling the API and the is . - Task GenerateEmbeddingAsync(IEnumerable messages, string? model = null, CancellationToken cancellationToken = default); + Task GenerateEmbeddingAsync(IEnumerable texts, EmbeddingParameters? parameters = null, string? model = null, CancellationToken cancellationToken = default); } diff --git a/src/ChatGptNet/Models/Embeddings/EmbeddingParameters.cs b/src/ChatGptNet/Models/Embeddings/EmbeddingParameters.cs new file mode 100644 index 0000000..df3a4ed --- /dev/null +++ b/src/ChatGptNet/Models/Embeddings/EmbeddingParameters.cs @@ -0,0 +1,15 @@ +namespace ChatGptNet.Models.Embeddings; + +/// +/// Represents embeddings parameters. +/// +/// +/// See Create embeddings for more information. +/// +public class EmbeddingParameters +{ + /// + /// The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. + /// + public int? Dimensions { get; set; } +} diff --git a/src/ChatGptNet/Models/Embeddings/EmbeddingRequest.cs b/src/ChatGptNet/Models/Embeddings/EmbeddingRequest.cs index 86f80b3..df2e01d 100644 --- a/src/ChatGptNet/Models/Embeddings/EmbeddingRequest.cs +++ b/src/ChatGptNet/Models/Embeddings/EmbeddingRequest.cs @@ -19,6 +19,11 @@ internal class EmbeddingRequest /// public IEnumerable Input { get; set; } = []; + /// + /// The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. + /// + public int? Dimensions { get; set; } + /// /// Gets or sets the user identification for embedding request, which can help to monitor and detect abuse. /// From e33e2ac00aaa0316fd3cdd260bb633050162d68f Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Mon, 29 Jan 2024 11:22:57 +0100 Subject: [PATCH 2/3] Update documentation #143 --- .../EmbeddingParameters.md | 25 +++++++++++++++++++ .../EmbeddingParameters/Dimensions.md | 14 +++++++++++ .../EmbeddingParameters.md | 14 +++++++++++ .../OpenAIEmbeddingModels.md | 4 ++- .../TextEmbedding3Large.md | 14 +++++++++++ .../TextEmbedding3Small.md | 14 +++++++++++ .../TextEmbeddingAda002.md | 2 +- docs/ChatGptNet/ChatGptOptions.md | 1 + .../DefaultEmbeddingParameters.md | 15 +++++++++++ docs/ChatGptNet/ChatGptOptionsBuilder.md | 3 ++- .../DefaultEmbeddingModel.md | 2 +- .../DefaultEmbeddingParameters.md | 15 +++++++++++ docs/ChatGptNet/IChatGptClient.md | 2 +- .../IChatGptClient/GenerateEmbeddingAsync.md | 20 +++++++++------ docs/README.md | 1 + .../Embeddings/OpenAIEmbeddingModels.cs | 12 ++++++++- 16 files changed, 145 insertions(+), 13 deletions(-) create mode 100644 docs/ChatGptNet.Models.Embeddings/EmbeddingParameters.md create mode 100644 docs/ChatGptNet.Models.Embeddings/EmbeddingParameters/Dimensions.md create mode 100644 docs/ChatGptNet.Models.Embeddings/EmbeddingParameters/EmbeddingParameters.md create mode 100644 docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbedding3Large.md create mode 100644 docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbedding3Small.md create mode 100644 docs/ChatGptNet/ChatGptOptions/DefaultEmbeddingParameters.md create mode 100644 docs/ChatGptNet/ChatGptOptionsBuilder/DefaultEmbeddingParameters.md diff --git a/docs/ChatGptNet.Models.Embeddings/EmbeddingParameters.md b/docs/ChatGptNet.Models.Embeddings/EmbeddingParameters.md new file mode 100644 index 0000000..c7e742c --- /dev/null +++ b/docs/ChatGptNet.Models.Embeddings/EmbeddingParameters.md @@ -0,0 +1,25 @@ +# EmbeddingParameters class + +Represents embeddings parameters. + +```csharp +public class EmbeddingParameters +``` + +## Public Members + +| name | description | +| --- | --- | +| [EmbeddingParameters](EmbeddingParameters/EmbeddingParameters.md)() | The default constructor. | +| [Dimensions](EmbeddingParameters/Dimensions.md) { get; set; } | The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models. | + +## Remarks + +See [Create embeddings](https://platform.openai.com/docs/api-reference/embeddings/create) for more information. + +## See Also + +* namespace [ChatGptNet.Models.Embeddings](../ChatGptNet.md) +* [EmbeddingParameters.cs](https://github.com/marcominerva/ChatGptNet/tree/master/src/ChatGptNet/Models/Embeddings/EmbeddingParameters.cs) + + diff --git a/docs/ChatGptNet.Models.Embeddings/EmbeddingParameters/Dimensions.md b/docs/ChatGptNet.Models.Embeddings/EmbeddingParameters/Dimensions.md new file mode 100644 index 0000000..c8c5e20 --- /dev/null +++ b/docs/ChatGptNet.Models.Embeddings/EmbeddingParameters/Dimensions.md @@ -0,0 +1,14 @@ +# EmbeddingParameters.Dimensions property + +The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models. + +```csharp +public int? Dimensions { get; set; } +``` + +## See Also + +* class [EmbeddingParameters](../EmbeddingParameters.md) +* namespace [ChatGptNet.Models.Embeddings](../../ChatGptNet.md) + + diff --git a/docs/ChatGptNet.Models.Embeddings/EmbeddingParameters/EmbeddingParameters.md b/docs/ChatGptNet.Models.Embeddings/EmbeddingParameters/EmbeddingParameters.md new file mode 100644 index 0000000..82fb89a --- /dev/null +++ b/docs/ChatGptNet.Models.Embeddings/EmbeddingParameters/EmbeddingParameters.md @@ -0,0 +1,14 @@ +# EmbeddingParameters constructor + +The default constructor. + +```csharp +public EmbeddingParameters() +``` + +## See Also + +* class [EmbeddingParameters](../EmbeddingParameters.md) +* namespace [ChatGptNet.Models.Embeddings](../../ChatGptNet.md) + + diff --git a/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels.md b/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels.md index 46cb076..8e38d02 100644 --- a/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels.md +++ b/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels.md @@ -10,7 +10,9 @@ public static class OpenAIEmbeddingModels | name | description | | --- | --- | -| const [TextEmbeddingAda002](OpenAIEmbeddingModels/TextEmbeddingAda002.md) | The second generation embedding model provided by OpenAI. | +| const [TextEmbedding3Large](OpenAIEmbeddingModels/TextEmbedding3Large.md) | Most capable embedding model for both english and non-english tasks. It uses a 3072 output dimension. | +| const [TextEmbedding3Small](OpenAIEmbeddingModels/TextEmbedding3Small.md) | Increased performance over 2nd generation ada embedding model. It uses a 1536 output dimension. | +| const [TextEmbeddingAda002](OpenAIEmbeddingModels/TextEmbeddingAda002.md) | The second generation embedding model provided by OpenAI. It uses a 1536 output dimension. | ## Remarks diff --git a/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbedding3Large.md b/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbedding3Large.md new file mode 100644 index 0000000..e24c5f2 --- /dev/null +++ b/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbedding3Large.md @@ -0,0 +1,14 @@ +# OpenAIEmbeddingModels.TextEmbedding3Large field + +Most capable embedding model for both english and non-english tasks. It uses a 3072 output dimension. + +```csharp +public const string TextEmbedding3Large; +``` + +## See Also + +* class [OpenAIEmbeddingModels](../OpenAIEmbeddingModels.md) +* namespace [ChatGptNet.Models.Embeddings](../../ChatGptNet.md) + + diff --git a/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbedding3Small.md b/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbedding3Small.md new file mode 100644 index 0000000..c417a5e --- /dev/null +++ b/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbedding3Small.md @@ -0,0 +1,14 @@ +# OpenAIEmbeddingModels.TextEmbedding3Small field + +Increased performance over 2nd generation ada embedding model. It uses a 1536 output dimension. + +```csharp +public const string TextEmbedding3Small; +``` + +## See Also + +* class [OpenAIEmbeddingModels](../OpenAIEmbeddingModels.md) +* namespace [ChatGptNet.Models.Embeddings](../../ChatGptNet.md) + + diff --git a/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbeddingAda002.md b/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbeddingAda002.md index 1480502..b1c3d0b 100644 --- a/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbeddingAda002.md +++ b/docs/ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbeddingAda002.md @@ -1,6 +1,6 @@ # OpenAIEmbeddingModels.TextEmbeddingAda002 field -The second generation embedding model provided by OpenAI. +The second generation embedding model provided by OpenAI. It uses a 1536 output dimension. ```csharp public const string TextEmbeddingAda002; diff --git a/docs/ChatGptNet/ChatGptOptions.md b/docs/ChatGptNet/ChatGptOptions.md index e403302..eaa3a51 100644 --- a/docs/ChatGptNet/ChatGptOptions.md +++ b/docs/ChatGptNet/ChatGptOptions.md @@ -12,6 +12,7 @@ public class ChatGptOptions | --- | --- | | [ChatGptOptions](ChatGptOptions/ChatGptOptions.md)() | The default constructor. | | [DefaultEmbeddingModel](ChatGptOptions/DefaultEmbeddingModel.md) { get; set; } | Gets or sets the default model for embedding. (default: [`TextEmbeddingAda002`](../ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbeddingAda002.md) when the provider is OpenAI). | +| [DefaultEmbeddingParameters](ChatGptOptions/DefaultEmbeddingParameters.md) { get; } | Gets or sets the default parameters for embeddings. | | [DefaultModel](ChatGptOptions/DefaultModel.md) { get; set; } | Gets or sets the default model for chat completion. (default: [`Gpt35Turbo`](../ChatGptNet.Models/OpenAIChatGptModels/Gpt35Turbo.md) when the provider is OpenAI). | | [DefaultParameters](ChatGptOptions/DefaultParameters.md) { get; } | Gets or sets the default parameters for chat completion. | | [MessageExpiration](ChatGptOptions/MessageExpiration.md) { get; set; } | Gets or sets the expiration for cached conversation messages (default: 1 hour). | diff --git a/docs/ChatGptNet/ChatGptOptions/DefaultEmbeddingParameters.md b/docs/ChatGptNet/ChatGptOptions/DefaultEmbeddingParameters.md new file mode 100644 index 0000000..99b3d4d --- /dev/null +++ b/docs/ChatGptNet/ChatGptOptions/DefaultEmbeddingParameters.md @@ -0,0 +1,15 @@ +# ChatGptOptions.DefaultEmbeddingParameters property + +Gets or sets the default parameters for embeddings. + +```csharp +public EmbeddingParameters DefaultEmbeddingParameters { get; } +``` + +## See Also + +* class [EmbeddingParameters](../../ChatGptNet.Models.Embeddings/EmbeddingParameters.md) +* class [ChatGptOptions](../ChatGptOptions.md) +* namespace [ChatGptNet](../../ChatGptNet.md) + + diff --git a/docs/ChatGptNet/ChatGptOptionsBuilder.md b/docs/ChatGptNet/ChatGptOptionsBuilder.md index d5fde27..935b69b 100644 --- a/docs/ChatGptNet/ChatGptOptionsBuilder.md +++ b/docs/ChatGptNet/ChatGptOptionsBuilder.md @@ -11,7 +11,8 @@ public class ChatGptOptionsBuilder | name | description | | --- | --- | | [ChatGptOptionsBuilder](ChatGptOptionsBuilder/ChatGptOptionsBuilder.md)() | The default constructor. | -| [DefaultEmbeddingModel](ChatGptOptionsBuilder/DefaultEmbeddingModel.md) { get; set; } | Gets or sets the default model for embedding. (default: [`TextEmbeddingAda002`](../ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbeddingAda002.md) when the provider is OpenAI). | +| [DefaultEmbeddingModel](ChatGptOptionsBuilder/DefaultEmbeddingModel.md) { get; set; } | Gets or sets the default model for embeddings. (default: [`TextEmbeddingAda002`](../ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbeddingAda002.md) when the provider is OpenAI). | +| [DefaultEmbeddingParameters](ChatGptOptionsBuilder/DefaultEmbeddingParameters.md) { get; } | Gets or sets the default parameters for embeddings. | | [DefaultModel](ChatGptOptionsBuilder/DefaultModel.md) { get; set; } | Gets or sets the default model for chat completion. (default: [`Gpt35Turbo`](../ChatGptNet.Models/OpenAIChatGptModels/Gpt35Turbo.md) when the provider is OpenAI). | | [DefaultParameters](ChatGptOptionsBuilder/DefaultParameters.md) { get; set; } | Gets or sets the default parameters for chat completion. | | [MessageExpiration](ChatGptOptionsBuilder/MessageExpiration.md) { get; set; } | Gets or sets the expiration for cached conversation messages (default: 1 hour). | diff --git a/docs/ChatGptNet/ChatGptOptionsBuilder/DefaultEmbeddingModel.md b/docs/ChatGptNet/ChatGptOptionsBuilder/DefaultEmbeddingModel.md index 0eedd46..f2bf872 100644 --- a/docs/ChatGptNet/ChatGptOptionsBuilder/DefaultEmbeddingModel.md +++ b/docs/ChatGptNet/ChatGptOptionsBuilder/DefaultEmbeddingModel.md @@ -1,6 +1,6 @@ # ChatGptOptionsBuilder.DefaultEmbeddingModel property -Gets or sets the default model for embedding. (default: [`TextEmbeddingAda002`](../../ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbeddingAda002.md) when the provider is OpenAI). +Gets or sets the default model for embeddings. (default: [`TextEmbeddingAda002`](../../ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels/TextEmbeddingAda002.md) when the provider is OpenAI). ```csharp public string? DefaultEmbeddingModel { get; set; } diff --git a/docs/ChatGptNet/ChatGptOptionsBuilder/DefaultEmbeddingParameters.md b/docs/ChatGptNet/ChatGptOptionsBuilder/DefaultEmbeddingParameters.md new file mode 100644 index 0000000..ce497e1 --- /dev/null +++ b/docs/ChatGptNet/ChatGptOptionsBuilder/DefaultEmbeddingParameters.md @@ -0,0 +1,15 @@ +# ChatGptOptionsBuilder.DefaultEmbeddingParameters property + +Gets or sets the default parameters for embeddings. + +```csharp +public EmbeddingParameters DefaultEmbeddingParameters { get; } +``` + +## See Also + +* class [EmbeddingParameters](../../ChatGptNet.Models.Embeddings/EmbeddingParameters.md) +* class [ChatGptOptionsBuilder](../ChatGptOptionsBuilder.md) +* namespace [ChatGptNet](../../ChatGptNet.md) + + diff --git a/docs/ChatGptNet/IChatGptClient.md b/docs/ChatGptNet/IChatGptClient.md index 640ed2b..8d80426 100644 --- a/docs/ChatGptNet/IChatGptClient.md +++ b/docs/ChatGptNet/IChatGptClient.md @@ -16,7 +16,7 @@ public interface IChatGptClient | [AskStreamAsync](IChatGptClient/AskStreamAsync.md)(…) | Requests a new chat interaction with streaming response, like in ChatGPT. (2 methods) | | [ConversationExistsAsync](IChatGptClient/ConversationExistsAsync.md)(…) | Checks if a chat conversation exists. | | [DeleteConversationAsync](IChatGptClient/DeleteConversationAsync.md)(…) | Deletes a chat conversation, clearing all the history. | -| [GenerateEmbeddingAsync](IChatGptClient/GenerateEmbeddingAsync.md)(…) | Generates embeddings for a message. (2 methods) | +| [GenerateEmbeddingAsync](IChatGptClient/GenerateEmbeddingAsync.md)(…) | Generates embeddings for a text. (2 methods) | | [GetConversationAsync](IChatGptClient/GetConversationAsync.md)(…) | Retrieves a chat conversation from the cache. | | [LoadConversationAsync](IChatGptClient/LoadConversationAsync.md)(…) | Loads messages into a new conversation. (2 methods) | | [SetupAsync](IChatGptClient/SetupAsync.md)(…) | Setups a new conversation with a system message, that is used to influence assistant behavior. (2 methods) | diff --git a/docs/ChatGptNet/IChatGptClient/GenerateEmbeddingAsync.md b/docs/ChatGptNet/IChatGptClient/GenerateEmbeddingAsync.md index b1e3aab..fa3536f 100644 --- a/docs/ChatGptNet/IChatGptClient/GenerateEmbeddingAsync.md +++ b/docs/ChatGptNet/IChatGptClient/GenerateEmbeddingAsync.md @@ -1,15 +1,17 @@ # IChatGptClient.GenerateEmbeddingAsync method (1 of 2) -Generates embeddings for a list of messages. +Generates embeddings for a list of texts. ```csharp -public Task GenerateEmbeddingAsync(IEnumerable messages, - string? model = null, CancellationToken cancellationToken = default) +public Task GenerateEmbeddingAsync(IEnumerable texts, + EmbeddingParameters? parameters = null, string? model = null, + CancellationToken cancellationToken = default) ``` | parameter | description | | --- | --- | -| messages | The messages to use for generating embeddings. | +| texts | The texts to use for generating embeddings. | +| parameters | An [`EmbeddingParameters`](../../ChatGptNet.Models.Embeddings/EmbeddingParameters.md) object used to override the default embedding parameters in the [`DefaultEmbeddingParameters`](../ChatGptOptions/DefaultEmbeddingParameters.md) property. | | model | The name of the embedding model. If *model* is `null`, then the one specified in the [`DefaultEmbeddingModel`](../ChatGptOptions/DefaultEmbeddingModel.md) property will be used. | | cancellationToken | The token to monitor for cancellation requests. | @@ -26,6 +28,7 @@ The embeddings for the provided messages. ## See Also * class [EmbeddingResponse](../../ChatGptNet.Models.Embeddings/EmbeddingResponse.md) +* class [EmbeddingParameters](../../ChatGptNet.Models.Embeddings/EmbeddingParameters.md) * interface [IChatGptClient](../IChatGptClient.md) * namespace [ChatGptNet](../../ChatGptNet.md) @@ -33,16 +36,18 @@ The embeddings for the provided messages. # IChatGptClient.GenerateEmbeddingAsync method (2 of 2) -Generates embeddings for a message. +Generates embeddings for a text. ```csharp -public Task GenerateEmbeddingAsync(string message, string? model = null, +public Task GenerateEmbeddingAsync(string text, + EmbeddingParameters? parameters = null, string? model = null, CancellationToken cancellationToken = default) ``` | parameter | description | | --- | --- | -| message | The message to use for generating embeddings. | +| text | The text to use for generating embeddings. | +| parameters | An [`EmbeddingParameters`](../../ChatGptNet.Models.Embeddings/EmbeddingParameters.md) object used to override the default embedding parameters in the [`DefaultEmbeddingParameters`](../ChatGptOptions/DefaultEmbeddingParameters.md) property. | | model | The name of the embedding model. If *model* is `null`, then the one specified in the [`DefaultEmbeddingModel`](../ChatGptOptions/DefaultEmbeddingModel.md) property will be used. | | cancellationToken | The token to monitor for cancellation requests. | @@ -59,6 +64,7 @@ The embeddings for the provided message. ## See Also * class [EmbeddingResponse](../../ChatGptNet.Models.Embeddings/EmbeddingResponse.md) +* class [EmbeddingParameters](../../ChatGptNet.Models.Embeddings/EmbeddingParameters.md) * interface [IChatGptClient](../IChatGptClient.md) * namespace [ChatGptNet](../../ChatGptNet.md) diff --git a/docs/README.md b/docs/README.md index e2a9ae3..6f45e14 100644 --- a/docs/README.md +++ b/docs/README.md @@ -71,6 +71,7 @@ | public type | description | | --- | --- | | class [EmbeddingData](./ChatGptNet.Models.Embeddings/EmbeddingData.md) | Represents an embedding. | +| class [EmbeddingParameters](./ChatGptNet.Models.Embeddings/EmbeddingParameters.md) | Represents embeddings parameters. | | class [EmbeddingResponse](./ChatGptNet.Models.Embeddings/EmbeddingResponse.md) | Represents an embedding response. | | static class [OpenAIEmbeddingModels](./ChatGptNet.Models.Embeddings/OpenAIEmbeddingModels.md) | Contains all the embedding models that are currently supported by OpenAI. | diff --git a/src/ChatGptNet/Models/Embeddings/OpenAIEmbeddingModels.cs b/src/ChatGptNet/Models/Embeddings/OpenAIEmbeddingModels.cs index 8a1102c..a8a7539 100644 --- a/src/ChatGptNet/Models/Embeddings/OpenAIEmbeddingModels.cs +++ b/src/ChatGptNet/Models/Embeddings/OpenAIEmbeddingModels.cs @@ -12,7 +12,17 @@ namespace ChatGptNet.Models.Embeddings; public static class OpenAIEmbeddingModels { /// - /// The second generation embedding model provided by OpenAI. + /// The second generation embedding model provided by OpenAI. It uses a 1536 output dimension. /// public const string TextEmbeddingAda002 = "text-embedding-ada-002"; + + /// + /// Increased performance over 2nd generation ada embedding model. It uses a 1536 output dimension. + /// + public const string TextEmbedding3Small = "text-embedding-3-small"; + + /// + /// Most capable embedding model for both english and non-english tasks. It uses a 3072 output dimension. + /// + public const string TextEmbedding3Large = "text-embedding-3-large"; } \ No newline at end of file From 1e911dd82c3b4618b6371051f8d7c86926e5ba8e Mon Sep 17 00:00:00 2001 From: Marco Minerva Date: Mon, 29 Jan 2024 12:47:13 +0100 Subject: [PATCH 3/3] Documentation update #143 --- README.md | 52 ++++++++++++++++++++++++- src/ChatGptNet/ChatGptOptionsBuilder.cs | 2 +- src/ChatGptNet/version.json | 2 +- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1f9d520..a18aa0f 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,7 @@ We can also set ChatGPT parameters for chat completion at startup. Check the [of The configuration can be automatically read from [IConfiguration](https://learn.microsoft.com/en-us/dotnet/api/microsoft.extensions.configuration.iconfiguration), using for example a _ChatGPT_ section in the _appsettings.json_ file: -```yaml +``` "ChatGPT": { "Provider": "OpenAI", // Optional. Allowed values: OpenAI (default) or Azure "ApiKey": "", // Required @@ -159,6 +159,9 @@ The configuration can be automatically read from [IConfiguration](https://learn. // "FrequencyPenalty": 0, // "ResponseFormat": { "Type": "text" }, // Allowed values for Type: text (default) or json_object // "Seed": 42 // Optional (any integer value) + //}, + //"DefaultEmbeddingParameters": { + // "Dimensions": 1536 //} } ``` @@ -550,7 +553,52 @@ var response = await chatGptClient.GenerateEmbeddingAsync(message); var embeddings = response.GetEmbedding(); ``` -This code will give you a float array containing all the embeddings for the specified message. The length of the array depends on the model used. For example, if we use the _text-embedding-ada-002_ model, the array will contain 1536 elements. +This code will give you a float array containing all the embeddings for the specified message. The length of the array depends on the model used: + +| Model| Output dimension | +| - | - | +| text-embedding-ada-002 | 1536 | +| text-embedding-3-small | 1536 | +| text-embedding-3-large | 3072 | + +Newer models like _text-embedding-3-small_ and _text-embedding-3-large_ allows developers to trade-off performance and cost of using embeddings. Specifically, developers can shorten embeddings without the embedding losing its concept-representing properties. + +As for ChatGPT, this settings can be done in various ways: + +- Via code: + +```csharp +builder.Services.AddChatGpt(options => +{ + // ... + + options.DefaultEmbeddingParameters = new EmbeddingParameters + { + Dimensions = 256 + }; +}); +``` + +- Using the _appsettings.json_ file: + +``` +"ChatGPT": { + "DefaultEmbeddingParameters": { + "Dimensions": 256 + } +} +``` + +Then, if you want to change the dimension for a particular request, you can specify the *EmbeddingParameters* argument in the **GetEmbeddingAsync** invocation: + +```csharp +var response = await chatGptClient.GenerateEmbeddingAsync(request.Message, new EmbeddingParameters +{ + Dimensions = 512 +}); + +var embeddings = response.GetEmbedding(); // The length of the array is 512 +``` If you need to calculate the [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) between two embeddings, you can use the **EmbeddingUtility.CosineSimilarity** method. diff --git a/src/ChatGptNet/ChatGptOptionsBuilder.cs b/src/ChatGptNet/ChatGptOptionsBuilder.cs index cdc061e..721a801 100644 --- a/src/ChatGptNet/ChatGptOptionsBuilder.cs +++ b/src/ChatGptNet/ChatGptOptionsBuilder.cs @@ -59,7 +59,7 @@ public class ChatGptOptionsBuilder /// Gets or sets the default parameters for embeddings. /// /// - public EmbeddingParameters DefaultEmbeddingParameters { get; internal set; } = new(); + public EmbeddingParameters DefaultEmbeddingParameters { get; set; } = new(); /// /// Gets or sets the user identification for chat completion, which can help OpenAI to monitor and detect abuse. diff --git a/src/ChatGptNet/version.json b/src/ChatGptNet/version.json index 3222e9d..9b3208d 100644 --- a/src/ChatGptNet/version.json +++ b/src/ChatGptNet/version.json @@ -1,6 +1,6 @@ { "$schema": "https://raw.githubusercontent.com/dotnet/Nerdbank.GitVersioning/master/src/NerdBank.GitVersioning/version.schema.json", - "version": "3.1", + "version": "3.2", "publicReleaseRefSpec": [ "^refs/heads/master$" // we release out of master ],