joseharriaga · trrwilson · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024
diff --git a/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/CHANGELOG.md b/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Release History
 
+## 2.1.0-beta.2 (Unreleased)
+
+This update brings compatibility with the Azure OpenAI `2024-09-01-preview` service API version as well as the `2.1.0-beta.2` release of the `OpenAI` library.
+
+### Features Added
+
+- `2024-09-01-preview` brings AOAI support for streaming token usage in chat completions; `Usage` is now automatically populated in `StreamingChatCompletionUpdate` instances.
+ - Note 1: this feature is not yet compatible when using On Your Data features (after invoking the `.AddDataSource()` extension method on `ChatCompletionOptions`)
+ - Note 2: this feature is not yet compatible when using image input (a `ChatMessageContentPart` of `Kind` `Image`)
+
 ## 2.1.0-beta.1 (2024-10-01)
 
 Relative to the prior GA release, this update restores preview surfaces, retargeting to the latest `2024-08-01-preview` service `api-version` label. It also brings early support for the newly-announced `/realtime` capabilities with `gpt-4o-realtime-preview`. You can read more about Azure OpenAI support for `/realtime` in the annoucement post here: https://azure.microsoft.com/blog/announcing-new-products-and-features-for-azure-openai-service-including-gpt-4o-realtime-preview-with-audio-and-speech-capabilities/

diff --git a/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/assets.json b/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/assets.json
@@ -2,5 +2,5 @@
  "AssetsRepo": "Azure/azure-sdk-assets",
  "AssetsRepoPrefixPath": "net",
  "TagPrefix": "dotnet.azure/openai/Azure.AI.OpenAI",
- "Tag": "dotnet.azure/openai/Azure.AI.OpenAI_6934ac44f7"
+ "Tag": "dotnet.azure/openai/Azure.AI.OpenAI_5a90d184af"
 }
diff --git a/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/src/Custom/AzureOpenAIClientOptions.cs b/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/src/Custom/AzureOpenAIClientOptions.cs
@@ -56,6 +56,7 @@ public AzureOpenAIClientOptions(ServiceVersion version = LatestVersion)
  {
 #if !AZURE_OPENAI_GA
  ServiceVersion.V2024_08_01_Preview => "2024-08-01-preview",
+ ServiceVersion.V2024_09_01_Preview => "2024-09-01-preview",
  ServiceVersion.V2024_10_01_Preview => "2024-10-01-preview",
 #endif
  ServiceVersion.V2024_06_01 => "2024-06-01",
@@ -70,6 +71,7 @@ public enum ServiceVersion
  V2024_06_01 = 0,
 #if !AZURE_OPENAI_GA
  V2024_08_01_Preview = 1,
+ V2024_09_01_Preview = 2,
  V2024_10_01_Preview = 3,
 #endif
  }
@@ -103,7 +105,7 @@ protected override TimeSpan GetNextDelay(PipelineMessage message, int tryCount)
  }
 
 #if !AZURE_OPENAI_GA
- private const ServiceVersion LatestVersion = ServiceVersion.V2024_08_01_Preview;
+ private const ServiceVersion LatestVersion = ServiceVersion.V2024_09_01_Preview;
 #else
  private const ServiceVersion LatestVersion = ServiceVersion.V2024_06_01;
 #endif

diff --git a/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/src/Custom/Chat/AzureChatClient.cs b/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/src/Custom/Chat/AzureChatClient.cs
@@ -1,11 +1,14 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+using Azure.AI.OpenAI.Internal;
 using OpenAI.Chat;
 using System.ClientModel;
 using System.ClientModel.Primitives;
+using System.Data.SqlTypes;
 using System.Diagnostics.CodeAnalysis;
 
+#pragma warning disable AOAI001
 #pragma warning disable AZC0112
 
 namespace Azure.AI.OpenAI.Chat;
@@ -63,33 +66,86 @@ public override CollectionResult<StreamingChatCompletionUpdate> CompleteChatStre
  /// <inheritdoc/>
  public override AsyncCollectionResult<StreamingChatCompletionUpdate> CompleteChatStreamingAsync(IEnumerable<ChatMessage> messages, ChatCompletionOptions options = null, CancellationToken cancellationToken = default)
  {
- PostfixClearStreamOptions(ref options);
+ PostfixClearStreamOptions(messages, ref options);
  PostfixSwapMaxTokens(ref options);
  return base.CompleteChatStreamingAsync(messages, options, cancellationToken);
  }
 
  /// <inheritdoc/>
  public override CollectionResult<StreamingChatCompletionUpdate> CompleteChatStreaming(IEnumerable<ChatMessage> messages, ChatCompletionOptions options = null, CancellationToken cancellationToken = default)
  {
- PostfixClearStreamOptions(ref options);
+ PostfixClearStreamOptions(messages, ref options);
  PostfixSwapMaxTokens(ref options);
  return base.CompleteChatStreaming(messages, options, cancellationToken);
  }
 
- private static void PostfixClearStreamOptions(ref ChatCompletionOptions options)
+ /**
+ * As of 2024-09-01-preview, stream_options support for include_usage (which reports token usage while streaming)
+ * is conditionally supported:
+ * - When using On Your Data (non-null data_sources), stream_options is not considered valid
+ * - When using image input (any content part of "image" type), stream_options is not considered valid
+ * - Otherwise, stream_options can be defaulted to enabled per parity surface.
+ */
+ private static void PostfixClearStreamOptions(IEnumerable<ChatMessage> messages, ref ChatCompletionOptions options)
  {
- options ??= new();
- options.StreamOptions = null;
+ if (AdditionalPropertyHelpers
+ .GetAdditionalListProperty<ChatDataSource>(options?.SerializedAdditionalRawData, "data_sources")?.Count > 0
+ || messages?.Any(
+ message => message?.Content?.Any(
+ contentPart => contentPart?.Kind == ChatMessageContentPartKind.Image) == true)
+ == true)
+ {
+ options ??= new();
+ options.StreamOptions = null;
+ }
  }
 
+ /**
+ * As of 2024-09-01-preview, Azure OpenAI conditionally supports the use of the new max_completion_tokens property:
+ * - The o1-mini and o1-preview models accept max_completion_tokens and reject max_tokens
+ * - All other models reject max_completion_tokens and accept max_tokens
+ * To handle this, each request will manipulate serialization overrides:
+ * - If max tokens aren't set, no action is taken
+ * - If serialization of max_tokens has already been blocked (e.g. via the public extension method), no
+ * additional logic is used and new serialization to max_completion_tokens will occur
+ * - Otherwise, serialization of max_completion_tokens is blocked and an override serialization of the
+ * corresponding max_tokens value is established
+ */
  private static void PostfixSwapMaxTokens(ref ChatCompletionOptions options)
  {
  options ??= new();
- if (options.MaxOutputTokenCount is not null)
+ bool valueIsSet = options.MaxOutputTokenCount is not null;
+ bool oldPropertyBlocked = AdditionalPropertyHelpers.GetIsEmptySentinelValue(options.SerializedAdditionalRawData, "max_tokens");
+
+ if (valueIsSet)
+ {
+ if (!oldPropertyBlocked)
+ {
+ options.SerializedAdditionalRawData ??= new ChangeTrackingDictionary<string, BinaryData>();
+ AdditionalPropertyHelpers.SetEmptySentinelValue(options.SerializedAdditionalRawData, "max_completion_tokens");
+ options.SerializedAdditionalRawData["max_tokens"] = BinaryData.FromObjectAsJson(options.MaxOutputTokenCount);
+ }
+ else
+ {
+ // Allow standard serialization to the new property to occur; remove overrides
+ if (options.SerializedAdditionalRawData.ContainsKey("max_completion_tokens"))
+ {
+ options.SerializedAdditionalRawData.Remove("max_completion_tokens");
+ }
+ }
+ }
+ else
  {
- options.SerializedAdditionalRawData ??= new Dictionary<string, BinaryData>();
- options.SerializedAdditionalRawData["max_completion_tokens"] = BinaryData.FromObjectAsJson("__EMPTY__");
- options.SerializedAdditionalRawData["max_tokens"] = BinaryData.FromObjectAsJson(options.MaxOutputTokenCount);
+ if (!AdditionalPropertyHelpers.GetIsEmptySentinelValue(options.SerializedAdditionalRawData, "max_tokens")
+ && options.SerializedAdditionalRawData?.ContainsKey("max_tokens") == true)
+ {
+ options.SerializedAdditionalRawData.Remove("max_tokens");
+ }
+ if (!AdditionalPropertyHelpers.GetIsEmptySentinelValue(options.SerializedAdditionalRawData, "max_completion_tokens")
+ && options.SerializedAdditionalRawData?.ContainsKey("max_completion_tokens") == true)
+ {
+ options.SerializedAdditionalRawData.Remove("max_completion_tokens");
+ }
  }
  }
 }
diff --git a/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/src/Custom/Chat/AzureChatExtensions.cs b/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/src/Custom/Chat/AzureChatExtensions.cs
@@ -36,6 +36,26 @@ public static IReadOnlyList<ChatDataSource> GetDataSources(this ChatCompletionOp
  "data_sources") as IReadOnlyList<ChatDataSource>;
  }
 
+ [Experimental("AOAI001")]
+ public static void SetNewMaxCompletionTokensPropertyEnabled(this ChatCompletionOptions options, bool newPropertyEnabled = true)
+ {
+ if (newPropertyEnabled)
+ {
+ // Blocking serialization of max_tokens via dictionary acts as a signal to skip pre-serialization fixup
+ AdditionalPropertyHelpers.SetEmptySentinelValue(options.SerializedAdditionalRawData, "max_tokens");
+ }
+ else
+ {
+ // In the absence of a dictionary serialization block to max_tokens, the newer property name will
+ // automatically be blocked and the older property name will be used via dictionary override
+ if (options?.SerializedAdditionalRawData?.ContainsKey("max_tokens") == true)
+ {
+ options?.SerializedAdditionalRawData?.Remove("max_tokens");
+ }
+ }
+ }
+
+
  [Experimental("AOAI001")]
  public static RequestContentFilterResult GetRequestContentFilterResult(this ChatCompletion chatCompletion)
  {

diff --git a/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/src/Custom/Common/AdditionalPropertyHelpers.cs b/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/src/Custom/Common/AdditionalPropertyHelpers.cs
@@ -8,6 +8,8 @@ namespace Azure.AI.OpenAI.Internal;
 
 internal static class AdditionalPropertyHelpers
 {
+ private static string SARD_EMPTY_SENTINEL = "__EMPTY__";
+
  internal static T GetAdditionalProperty<T>(IDictionary<string, BinaryData> additionalProperties, string key)
  where T : class, IJsonModel<T>
  {
@@ -45,4 +47,17 @@ internal static void SetAdditionalProperty<T>(IDictionary<string, BinaryData> ad
  BinaryData binaryValue = BinaryData.FromStream(stream);
  additionalProperties[key] = binaryValue;
  }
+
+ internal static void SetEmptySentinelValue(IDictionary<string, BinaryData> additionalProperties, string key)
+ {
+ Argument.AssertNotNull(additionalProperties, nameof(additionalProperties));
+ additionalProperties[key] = BinaryData.FromObjectAsJson(SARD_EMPTY_SENTINEL);
+ }
+
+ internal static bool GetIsEmptySentinelValue(IDictionary<string, BinaryData> additionalProperties, string key)
+ {
+ return additionalProperties is not null
+ && additionalProperties.TryGetValue(key, out BinaryData existingValue)
+ && StringComparer.OrdinalIgnoreCase.Equals(existingValue.ToString(), $@"""{SARD_EMPTY_SENTINEL}""");
+ }
 }
diff --git a/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/tests/ChatTests.Vision.cs b/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/tests/ChatTests.Vision.cs
@@ -87,6 +87,7 @@ public async Task ChatWithImagesStreaming(bool useUri)
  {
  bool foundPromptFilter = false;
  bool foundResponseFilter = false;
+ ChatTokenUsage? usage = null;
  StringBuilder content = new();
 
  ChatClient client = GetTestClient("vision");
@@ -123,9 +124,11 @@ public async Task ChatWithImagesStreaming(bool useUri)
 
  await foreach (StreamingChatCompletionUpdate update in response)
  {
- ValidateUpdate(update, content, ref foundPromptFilter, ref foundResponseFilter);
+ ValidateUpdate(update, content, ref foundPromptFilter, ref foundResponseFilter, ref usage);
  }
 
+ // Assert.That(usage, Is.Not.Null);
+
  // TODO FIXME: gpt-4o models seem to return inconsistent prompt filters to skip this for now
  //Assert.That(foundPromptFilter, Is.True);
 

diff --git a/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/tests/ChatTests.cs b/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/tests/ChatTests.cs
@@ -142,6 +142,69 @@ public void DataSourceSerializationWorks()
  Assert.That(sourcesFromOptions[1], Is.InstanceOf<CosmosChatDataSource>());
  }
 
+#if !AZURE_OPENAI_GA
+ [Test]
+ [Category("Smoke")]
+ public async Task MaxTokensSerializationConfigurationWorks()
+ {
+ using MockHttpMessageHandler pipeline = new(MockHttpMessageHandler.ReturnEmptyJson);
+
+ Uri endpoint = new Uri("https://www.bing.com/");
+ string apiKey = "not-a-real-one";
+ string model = "ignore";
+
+ AzureOpenAIClient topLevel = new(
+ endpoint,
+ new ApiKeyCredential(apiKey),
+ new AzureOpenAIClientOptions()
+ {
+ Transport = pipeline.Transport
+ });
+
+ ChatClient client = topLevel.GetChatClient(model);
+
+ ChatCompletionOptions options = new();
+ bool GetSerializedOptionsContains(string value)
+ {
+ BinaryData serialized = ModelReaderWriter.Write(options);
+ return serialized.ToString().Contains(value);
+ }
+ async Task AssertExpectedSerializationAsync(bool hasOldMaxTokens, bool hasNewMaxCompletionTokens)
+ {
+ _ = await client.CompleteChatAsync(["Just mocking, no call here"], options);
+ Assert.That(GetSerializedOptionsContains("max_tokens"), Is.EqualTo(hasOldMaxTokens));
+ Assert.That(GetSerializedOptionsContains("max_completion_tokens"), Is.EqualTo(hasNewMaxCompletionTokens));
+ }
+
+ await AssertExpectedSerializationAsync(false, false);
+ await AssertExpectedSerializationAsync(false, false);
+
+ options.MaxOutputTokenCount = 42;
+ await AssertExpectedSerializationAsync(true, false);
+ await AssertExpectedSerializationAsync(true, false);
+ options.MaxOutputTokenCount = null;
+ await AssertExpectedSerializationAsync(false, false);
+ options.MaxOutputTokenCount = 42;
+ await AssertExpectedSerializationAsync(true, false);
+
+ options.SetNewMaxCompletionTokensPropertyEnabled();
+ await AssertExpectedSerializationAsync(false, true);
+ await AssertExpectedSerializationAsync(false, true);
+ options.MaxOutputTokenCount = null;
+ await AssertExpectedSerializationAsync(false, false);
+ options.MaxOutputTokenCount = 42;
+ await AssertExpectedSerializationAsync(false, true);
+
+ options.SetNewMaxCompletionTokensPropertyEnabled(false);
+ await AssertExpectedSerializationAsync(true, false);
+ await AssertExpectedSerializationAsync(true, false);
+ options.MaxOutputTokenCount = null;
+ await AssertExpectedSerializationAsync(false, false);
+ options.MaxOutputTokenCount = 42;
+ await AssertExpectedSerializationAsync(true, false);
+ }
+#endif
+
  [RecordedTest]
  public async Task ChatCompletionBadKeyGivesHelpfulError()
  {
@@ -492,6 +555,7 @@ public async Task ChatCompletionStreaming()
  StringBuilder builder = new();
  bool foundPromptFilter = false;
  bool foundResponseFilter = false;
+ ChatTokenUsage? usage = null;
 
  ChatClient chatClient = GetTestClient();
 
@@ -512,12 +576,14 @@ public async Task ChatCompletionStreaming()
 
  await foreach (StreamingChatCompletionUpdate update in streamingResults)
  {
- ValidateUpdate(update, builder, ref foundPromptFilter, ref foundResponseFilter);
+ ValidateUpdate(update, builder, ref foundPromptFilter, ref foundResponseFilter, ref usage);
  }
 
  string allText = builder.ToString();
  Assert.That(allText, Is.Not.Null.Or.Empty);
 
+ Assert.That(usage, Is.Not.Null);
+
  Assert.That(foundPromptFilter, Is.True);
  Assert.That(foundResponseFilter, Is.True);
  }
@@ -528,6 +594,7 @@ public async Task SearchExtensionWorksStreaming()
  StringBuilder builder = new();
  bool foundPromptFilter = false;
  bool foundResponseFilter = false;
+ ChatTokenUsage? usage = null;
  List<ChatMessageContext> contexts = new();
 
  var searchConfig = TestConfig.GetConfig("search")!;
@@ -555,7 +622,7 @@ public async Task SearchExtensionWorksStreaming()
 
  await foreach (StreamingChatCompletionUpdate update in chatUpdates)
  {
- ValidateUpdate(update, builder, ref foundPromptFilter, ref foundResponseFilter);
+ ValidateUpdate(update, builder, ref foundPromptFilter, ref foundResponseFilter, ref usage);
 
  ChatMessageContext context = update.GetMessageContext();
  if (context != null)
@@ -567,6 +634,8 @@ public async Task SearchExtensionWorksStreaming()
  string allText = builder.ToString();
  Assert.That(allText, Is.Not.Null.Or.Empty);
 
+ // Assert.That(usage, Is.Not.Null);
+
  // TODO FIXME: When using data sources, the service does not appear to return request nor response filtering information
  //Assert.That(foundPromptFilter, Is.True);
  //Assert.That(foundResponseFilter, Is.True);
@@ -636,7 +705,7 @@ in client.CompleteChatStreamingAsync(
  #endregion
  #region Helper methods
 
- private void ValidateUpdate(StreamingChatCompletionUpdate update, StringBuilder builder, ref bool foundPromptFilter, ref bool foundResponseFilter)
+ private void ValidateUpdate(StreamingChatCompletionUpdate update, StringBuilder builder, ref bool foundPromptFilter, ref bool foundResponseFilter, ref ChatTokenUsage? usage)
  {
  if (update.CreatedAt == UNIX_EPOCH)
  {
@@ -656,6 +725,8 @@ private void ValidateUpdate(StreamingChatCompletionUpdate update, StringBuilder
  Assert.That(update.FinishReason, Is.Null.Or.EqualTo(ChatFinishReason.Stop));
  if (update.Usage != null)
  {
+ Assert.That(usage, Is.Null);
+ usage = update.Usage;
  Assert.That(update.Usage.InputTokenCount, Is.GreaterThanOrEqualTo(0));
  Assert.That(update.Usage.OutputTokenCount, Is.GreaterThanOrEqualTo(0));
  Assert.That(update.Usage.TotalTokenCount, Is.GreaterThanOrEqualTo(0));

diff --git a/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/tests/FineTuningTests.cs b/.dotnet.azure/sdk/openai/Azure.AI.OpenAI/tests/FineTuningTests.cs
@@ -24,10 +24,16 @@
 
 namespace Azure.AI.OpenAI.Tests;
 
+[Category("FineTuning")]
 public class FineTuningTests : AoaiTestBase<FineTuningClient>
 {
  public FineTuningTests(bool isAsync) : base(isAsync)
- { }
+ {
+ if (Mode == RecordedTestMode.Playback)
+ {
+ Assert.Inconclusive("Playback for fine-tuning temporarily disabled");
+ }
+ }
 
 #if !AZURE_OPENAI_GA
  [Test]
@@ -223,7 +229,7 @@ public async Task CreateAndDeleteFineTuning()
  }
  catch (ClientResultException e)
  {
- if (e.Message.Contains("ResourceNotFound"))
+ if(e.Message.Contains("ResourceNotFound"))
  {
  // upload training data
  uploadedFile = await UploadAndWaitForCompleteOrFail(fileClient, fineTuningFile.RelativePath);