Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Azure: 2024-09-01-preview support #306

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .dotnet.azure/sdk/openai/Azure.AI.OpenAI/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Release History

## 2.1.0-beta.2 (Unreleased)

This update brings compatibility with the Azure OpenAI `2024-09-01-preview` service API version as well as the `2.1.0-beta.2` release of the `OpenAI` library.

### Features Added

- `2024-09-01-preview` brings AOAI support for streaming token usage in chat completions; `Usage` is now automatically populated in `StreamingChatCompletionUpdate` instances.
- Note 1: this feature is not yet compatible when using On Your Data features (after invoking the `.AddDataSource()` extension method on `ChatCompletionOptions`)
- Note 2: this feature is not yet compatible when using image input (a `ChatMessageContentPart` of `Kind` `Image`)

## 2.1.0-beta.1 (2024-10-01)

Relative to the prior GA release, this update restores preview surfaces, retargeting to the latest `2024-08-01-preview` service `api-version` label. It also brings early support for the newly-announced `/realtime` capabilities with `gpt-4o-realtime-preview`. You can read more about Azure OpenAI support for `/realtime` in the annoucement post here: https://azure.microsoft.com/blog/announcing-new-products-and-features-for-azure-openai-service-including-gpt-4o-realtime-preview-with-audio-and-speech-capabilities/
Expand Down
2 changes: 1 addition & 1 deletion .dotnet.azure/sdk/openai/Azure.AI.OpenAI/assets.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "net",
"TagPrefix": "dotnet.azure/openai/Azure.AI.OpenAI",
"Tag": "dotnet.azure/openai/Azure.AI.OpenAI_6934ac44f7"
"Tag": "dotnet.azure/openai/Azure.AI.OpenAI_5a90d184af"
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public AzureOpenAIClientOptions(ServiceVersion version = LatestVersion)
{
#if !AZURE_OPENAI_GA
ServiceVersion.V2024_08_01_Preview => "2024-08-01-preview",
ServiceVersion.V2024_09_01_Preview => "2024-09-01-preview",
ServiceVersion.V2024_10_01_Preview => "2024-10-01-preview",
#endif
ServiceVersion.V2024_06_01 => "2024-06-01",
Expand All @@ -70,6 +71,7 @@ public enum ServiceVersion
V2024_06_01 = 0,
#if !AZURE_OPENAI_GA
V2024_08_01_Preview = 1,
V2024_09_01_Preview = 2,
V2024_10_01_Preview = 3,
#endif
}
Expand Down Expand Up @@ -103,7 +105,7 @@ protected override TimeSpan GetNextDelay(PipelineMessage message, int tryCount)
}

#if !AZURE_OPENAI_GA
private const ServiceVersion LatestVersion = ServiceVersion.V2024_08_01_Preview;
private const ServiceVersion LatestVersion = ServiceVersion.V2024_09_01_Preview;
#else
private const ServiceVersion LatestVersion = ServiceVersion.V2024_06_01;
#endif
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using Azure.AI.OpenAI.Internal;
using OpenAI.Chat;
using System.ClientModel;
using System.ClientModel.Primitives;
using System.Data.SqlTypes;
using System.Diagnostics.CodeAnalysis;

#pragma warning disable AOAI001
#pragma warning disable AZC0112

namespace Azure.AI.OpenAI.Chat;
Expand Down Expand Up @@ -63,33 +66,86 @@ public override CollectionResult<StreamingChatCompletionUpdate> CompleteChatStre
/// <inheritdoc/>
public override AsyncCollectionResult<StreamingChatCompletionUpdate> CompleteChatStreamingAsync(IEnumerable<ChatMessage> messages, ChatCompletionOptions options = null, CancellationToken cancellationToken = default)
{
PostfixClearStreamOptions(ref options);
PostfixClearStreamOptions(messages, ref options);
PostfixSwapMaxTokens(ref options);
return base.CompleteChatStreamingAsync(messages, options, cancellationToken);
}

/// <inheritdoc/>
public override CollectionResult<StreamingChatCompletionUpdate> CompleteChatStreaming(IEnumerable<ChatMessage> messages, ChatCompletionOptions options = null, CancellationToken cancellationToken = default)
{
PostfixClearStreamOptions(ref options);
PostfixClearStreamOptions(messages, ref options);
PostfixSwapMaxTokens(ref options);
return base.CompleteChatStreaming(messages, options, cancellationToken);
}

private static void PostfixClearStreamOptions(ref ChatCompletionOptions options)
/**
* As of 2024-09-01-preview, stream_options support for include_usage (which reports token usage while streaming)
* is conditionally supported:
* - When using On Your Data (non-null data_sources), stream_options is not considered valid
* - When using image input (any content part of "image" type), stream_options is not considered valid
* - Otherwise, stream_options can be defaulted to enabled per parity surface.
*/
private static void PostfixClearStreamOptions(IEnumerable<ChatMessage> messages, ref ChatCompletionOptions options)
{
options ??= new();
options.StreamOptions = null;
if (AdditionalPropertyHelpers
.GetAdditionalListProperty<ChatDataSource>(options?.SerializedAdditionalRawData, "data_sources")?.Count > 0
|| messages?.Any(
message => message?.Content?.Any(
contentPart => contentPart?.Kind == ChatMessageContentPartKind.Image) == true)
== true)
{
options ??= new();
options.StreamOptions = null;
}
}

/**
* As of 2024-09-01-preview, Azure OpenAI conditionally supports the use of the new max_completion_tokens property:
* - The o1-mini and o1-preview models accept max_completion_tokens and reject max_tokens
* - All other models reject max_completion_tokens and accept max_tokens
* To handle this, each request will manipulate serialization overrides:
* - If max tokens aren't set, no action is taken
* - If serialization of max_tokens has already been blocked (e.g. via the public extension method), no
* additional logic is used and new serialization to max_completion_tokens will occur
* - Otherwise, serialization of max_completion_tokens is blocked and an override serialization of the
* corresponding max_tokens value is established
*/
private static void PostfixSwapMaxTokens(ref ChatCompletionOptions options)
{
options ??= new();
if (options.MaxOutputTokenCount is not null)
bool valueIsSet = options.MaxOutputTokenCount is not null;
bool oldPropertyBlocked = AdditionalPropertyHelpers.GetIsEmptySentinelValue(options.SerializedAdditionalRawData, "max_tokens");

if (valueIsSet)
{
if (!oldPropertyBlocked)
{
options.SerializedAdditionalRawData ??= new ChangeTrackingDictionary<string, BinaryData>();
AdditionalPropertyHelpers.SetEmptySentinelValue(options.SerializedAdditionalRawData, "max_completion_tokens");
options.SerializedAdditionalRawData["max_tokens"] = BinaryData.FromObjectAsJson(options.MaxOutputTokenCount);
}
else
{
// Allow standard serialization to the new property to occur; remove overrides
if (options.SerializedAdditionalRawData.ContainsKey("max_completion_tokens"))
{
options.SerializedAdditionalRawData.Remove("max_completion_tokens");
}
}
}
else
{
options.SerializedAdditionalRawData ??= new Dictionary<string, BinaryData>();
options.SerializedAdditionalRawData["max_completion_tokens"] = BinaryData.FromObjectAsJson("__EMPTY__");
options.SerializedAdditionalRawData["max_tokens"] = BinaryData.FromObjectAsJson(options.MaxOutputTokenCount);
if (!AdditionalPropertyHelpers.GetIsEmptySentinelValue(options.SerializedAdditionalRawData, "max_tokens")
&& options.SerializedAdditionalRawData?.ContainsKey("max_tokens") == true)
{
options.SerializedAdditionalRawData.Remove("max_tokens");
}
if (!AdditionalPropertyHelpers.GetIsEmptySentinelValue(options.SerializedAdditionalRawData, "max_completion_tokens")
&& options.SerializedAdditionalRawData?.ContainsKey("max_completion_tokens") == true)
{
options.SerializedAdditionalRawData.Remove("max_completion_tokens");
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,26 @@ public static IReadOnlyList<ChatDataSource> GetDataSources(this ChatCompletionOp
"data_sources") as IReadOnlyList<ChatDataSource>;
}

[Experimental("AOAI001")]
public static void SetNewMaxCompletionTokensPropertyEnabled(this ChatCompletionOptions options, bool newPropertyEnabled = true)
{
if (newPropertyEnabled)
{
// Blocking serialization of max_tokens via dictionary acts as a signal to skip pre-serialization fixup
AdditionalPropertyHelpers.SetEmptySentinelValue(options.SerializedAdditionalRawData, "max_tokens");
}
else
{
// In the absence of a dictionary serialization block to max_tokens, the newer property name will
// automatically be blocked and the older property name will be used via dictionary override
if (options?.SerializedAdditionalRawData?.ContainsKey("max_tokens") == true)
{
options?.SerializedAdditionalRawData?.Remove("max_tokens");
}
}
}


[Experimental("AOAI001")]
public static RequestContentFilterResult GetRequestContentFilterResult(this ChatCompletion chatCompletion)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ namespace Azure.AI.OpenAI.Internal;

internal static class AdditionalPropertyHelpers
{
private static string SARD_EMPTY_SENTINEL = "__EMPTY__";

internal static T GetAdditionalProperty<T>(IDictionary<string, BinaryData> additionalProperties, string key)
where T : class, IJsonModel<T>
{
Expand Down Expand Up @@ -45,4 +47,17 @@ internal static void SetAdditionalProperty<T>(IDictionary<string, BinaryData> ad
BinaryData binaryValue = BinaryData.FromStream(stream);
additionalProperties[key] = binaryValue;
}

internal static void SetEmptySentinelValue(IDictionary<string, BinaryData> additionalProperties, string key)
{
Argument.AssertNotNull(additionalProperties, nameof(additionalProperties));
additionalProperties[key] = BinaryData.FromObjectAsJson(SARD_EMPTY_SENTINEL);
}

internal static bool GetIsEmptySentinelValue(IDictionary<string, BinaryData> additionalProperties, string key)
{
return additionalProperties is not null
&& additionalProperties.TryGetValue(key, out BinaryData existingValue)
&& StringComparer.OrdinalIgnoreCase.Equals(existingValue.ToString(), $@"""{SARD_EMPTY_SENTINEL}""");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ public async Task ChatWithImagesStreaming(bool useUri)
{
bool foundPromptFilter = false;
bool foundResponseFilter = false;
ChatTokenUsage? usage = null;
StringBuilder content = new();

ChatClient client = GetTestClient("vision");
Expand Down Expand Up @@ -123,9 +124,11 @@ public async Task ChatWithImagesStreaming(bool useUri)

await foreach (StreamingChatCompletionUpdate update in response)
{
ValidateUpdate(update, content, ref foundPromptFilter, ref foundResponseFilter);
ValidateUpdate(update, content, ref foundPromptFilter, ref foundResponseFilter, ref usage);
}

// Assert.That(usage, Is.Not.Null);

// TODO FIXME: gpt-4o models seem to return inconsistent prompt filters to skip this for now
//Assert.That(foundPromptFilter, Is.True);

Expand Down
77 changes: 74 additions & 3 deletions .dotnet.azure/sdk/openai/Azure.AI.OpenAI/tests/ChatTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,69 @@ public void DataSourceSerializationWorks()
Assert.That(sourcesFromOptions[1], Is.InstanceOf<CosmosChatDataSource>());
}

#if !AZURE_OPENAI_GA
[Test]
[Category("Smoke")]
public async Task MaxTokensSerializationConfigurationWorks()
{
using MockHttpMessageHandler pipeline = new(MockHttpMessageHandler.ReturnEmptyJson);

Uri endpoint = new Uri("https://www.bing.com/");
string apiKey = "not-a-real-one";
string model = "ignore";

AzureOpenAIClient topLevel = new(
endpoint,
new ApiKeyCredential(apiKey),
new AzureOpenAIClientOptions()
{
Transport = pipeline.Transport
});

ChatClient client = topLevel.GetChatClient(model);

ChatCompletionOptions options = new();
bool GetSerializedOptionsContains(string value)
{
BinaryData serialized = ModelReaderWriter.Write(options);
return serialized.ToString().Contains(value);
}
async Task AssertExpectedSerializationAsync(bool hasOldMaxTokens, bool hasNewMaxCompletionTokens)
{
_ = await client.CompleteChatAsync(["Just mocking, no call here"], options);
Assert.That(GetSerializedOptionsContains("max_tokens"), Is.EqualTo(hasOldMaxTokens));
Assert.That(GetSerializedOptionsContains("max_completion_tokens"), Is.EqualTo(hasNewMaxCompletionTokens));
}

await AssertExpectedSerializationAsync(false, false);
await AssertExpectedSerializationAsync(false, false);

options.MaxOutputTokenCount = 42;
await AssertExpectedSerializationAsync(true, false);
await AssertExpectedSerializationAsync(true, false);
options.MaxOutputTokenCount = null;
await AssertExpectedSerializationAsync(false, false);
options.MaxOutputTokenCount = 42;
await AssertExpectedSerializationAsync(true, false);

options.SetNewMaxCompletionTokensPropertyEnabled();
await AssertExpectedSerializationAsync(false, true);
await AssertExpectedSerializationAsync(false, true);
options.MaxOutputTokenCount = null;
await AssertExpectedSerializationAsync(false, false);
options.MaxOutputTokenCount = 42;
await AssertExpectedSerializationAsync(false, true);

options.SetNewMaxCompletionTokensPropertyEnabled(false);
await AssertExpectedSerializationAsync(true, false);
await AssertExpectedSerializationAsync(true, false);
options.MaxOutputTokenCount = null;
await AssertExpectedSerializationAsync(false, false);
options.MaxOutputTokenCount = 42;
await AssertExpectedSerializationAsync(true, false);
}
#endif

[RecordedTest]
public async Task ChatCompletionBadKeyGivesHelpfulError()
{
Expand Down Expand Up @@ -492,6 +555,7 @@ public async Task ChatCompletionStreaming()
StringBuilder builder = new();
bool foundPromptFilter = false;
bool foundResponseFilter = false;
ChatTokenUsage? usage = null;

ChatClient chatClient = GetTestClient();

Expand All @@ -512,12 +576,14 @@ public async Task ChatCompletionStreaming()

await foreach (StreamingChatCompletionUpdate update in streamingResults)
{
ValidateUpdate(update, builder, ref foundPromptFilter, ref foundResponseFilter);
ValidateUpdate(update, builder, ref foundPromptFilter, ref foundResponseFilter, ref usage);
}

string allText = builder.ToString();
Assert.That(allText, Is.Not.Null.Or.Empty);

Assert.That(usage, Is.Not.Null);

Assert.That(foundPromptFilter, Is.True);
Assert.That(foundResponseFilter, Is.True);
}
Expand All @@ -528,6 +594,7 @@ public async Task SearchExtensionWorksStreaming()
StringBuilder builder = new();
bool foundPromptFilter = false;
bool foundResponseFilter = false;
ChatTokenUsage? usage = null;
List<ChatMessageContext> contexts = new();

var searchConfig = TestConfig.GetConfig("search")!;
Expand Down Expand Up @@ -555,7 +622,7 @@ public async Task SearchExtensionWorksStreaming()

await foreach (StreamingChatCompletionUpdate update in chatUpdates)
{
ValidateUpdate(update, builder, ref foundPromptFilter, ref foundResponseFilter);
ValidateUpdate(update, builder, ref foundPromptFilter, ref foundResponseFilter, ref usage);

ChatMessageContext context = update.GetMessageContext();
if (context != null)
Expand All @@ -567,6 +634,8 @@ public async Task SearchExtensionWorksStreaming()
string allText = builder.ToString();
Assert.That(allText, Is.Not.Null.Or.Empty);

// Assert.That(usage, Is.Not.Null);

// TODO FIXME: When using data sources, the service does not appear to return request nor response filtering information
//Assert.That(foundPromptFilter, Is.True);
//Assert.That(foundResponseFilter, Is.True);
Expand Down Expand Up @@ -636,7 +705,7 @@ in client.CompleteChatStreamingAsync(
#endregion
#region Helper methods

private void ValidateUpdate(StreamingChatCompletionUpdate update, StringBuilder builder, ref bool foundPromptFilter, ref bool foundResponseFilter)
private void ValidateUpdate(StreamingChatCompletionUpdate update, StringBuilder builder, ref bool foundPromptFilter, ref bool foundResponseFilter, ref ChatTokenUsage? usage)
{
if (update.CreatedAt == UNIX_EPOCH)
{
Expand All @@ -656,6 +725,8 @@ private void ValidateUpdate(StreamingChatCompletionUpdate update, StringBuilder
Assert.That(update.FinishReason, Is.Null.Or.EqualTo(ChatFinishReason.Stop));
if (update.Usage != null)
{
Assert.That(usage, Is.Null);
usage = update.Usage;
Assert.That(update.Usage.InputTokenCount, Is.GreaterThanOrEqualTo(0));
Assert.That(update.Usage.OutputTokenCount, Is.GreaterThanOrEqualTo(0));
Assert.That(update.Usage.TotalTokenCount, Is.GreaterThanOrEqualTo(0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@

namespace Azure.AI.OpenAI.Tests;

[Category("FineTuning")]
public class FineTuningTests : AoaiTestBase<FineTuningClient>
{
public FineTuningTests(bool isAsync) : base(isAsync)
{ }
{
if (Mode == RecordedTestMode.Playback)
{
Assert.Inconclusive("Playback for fine-tuning temporarily disabled");
}
}

#if !AZURE_OPENAI_GA
[Test]
Expand Down Expand Up @@ -223,7 +229,7 @@ public async Task CreateAndDeleteFineTuning()
}
catch (ClientResultException e)
{
if (e.Message.Contains("ResourceNotFound"))
if(e.Message.Contains("ResourceNotFound"))
{
// upload training data
uploadedFile = await UploadAndWaitForCompleteOrFail(fileClient, fineTuningFile.RelativePath);
Expand Down
Loading