diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs
index 92a67189982..54ed411bd35 100644
--- a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs
@@ -14,7 +14,7 @@ namespace Microsoft.Extensions.AI;
[Experimental("MEAI001")]
public static class LoggingSpeechToTextClientBuilderExtensions
{
- /// Adds logging to the audio transcription client pipeline.
+ /// Adds logging to the speech-to-text client pipeline.
/// The .
///
/// An optional used to create a logger with which logging should be performed.
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs
new file mode 100644
index 00000000000..40461ebe457
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs
@@ -0,0 +1,367 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.Diagnostics.Metrics;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.Logging;
+using Microsoft.Shared.Diagnostics;
+
+#pragma warning disable S3358 // Ternary operators should not be nested
+#pragma warning disable SA1111 // Closing parenthesis should be on line of last parameter
+#pragma warning disable SA1113 // Comma should be on the same line as previous parameter
+
+namespace Microsoft.Extensions.AI;
+
+/// Represents a delegating speech-to-text client that implements the OpenTelemetry Semantic Conventions for Generative AI systems.
+///
+/// This class provides an implementation of the Semantic Conventions for Generative AI systems v1.37, defined at .
+/// The specification is still experimental and subject to change; as such, the telemetry output by this client is also subject to change.
+///
+[Experimental("MEAI001")]
+public sealed class OpenTelemetrySpeechToTextClient : DelegatingSpeechToTextClient
+{
+ private readonly ActivitySource _activitySource;
+ private readonly Meter _meter;
+
+ private readonly Histogram _tokenUsageHistogram;
+ private readonly Histogram _operationDurationHistogram;
+
+ private readonly string? _defaultModelId;
+ private readonly string? _providerName;
+ private readonly string? _serverAddress;
+ private readonly int _serverPort;
+
+ /// Initializes a new instance of the class.
+ /// The underlying .
+ /// The to use for emitting any logging data from the client.
+ /// An optional source name that will be used on the telemetry data.
+#pragma warning disable IDE0060 // Remove unused parameter; it exists for consistency with IChatClient and future use
+ public OpenTelemetrySpeechToTextClient(ISpeechToTextClient innerClient, ILogger? logger = null, string? sourceName = null)
+#pragma warning restore IDE0060
+ : base(innerClient)
+ {
+ Debug.Assert(innerClient is not null, "Should have been validated by the base ctor");
+
+ if (innerClient!.GetService() is SpeechToTextClientMetadata metadata)
+ {
+ _defaultModelId = metadata.DefaultModelId;
+ _providerName = metadata.ProviderName;
+ _serverAddress = metadata.ProviderUri?.Host;
+ _serverPort = metadata.ProviderUri?.Port ?? 0;
+ }
+
+ string name = string.IsNullOrEmpty(sourceName) ? OpenTelemetryConsts.DefaultSourceName : sourceName!;
+ _activitySource = new(name);
+ _meter = new(name);
+
+ _tokenUsageHistogram = _meter.CreateHistogram(
+ OpenTelemetryConsts.GenAI.Client.TokenUsage.Name,
+ OpenTelemetryConsts.TokensUnit,
+ OpenTelemetryConsts.GenAI.Client.TokenUsage.Description
+#if NET9_0_OR_GREATER
+ , advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.TokenUsage.ExplicitBucketBoundaries }
+#endif
+ );
+
+ _operationDurationHistogram = _meter.CreateHistogram(
+ OpenTelemetryConsts.GenAI.Client.OperationDuration.Name,
+ OpenTelemetryConsts.SecondsUnit,
+ OpenTelemetryConsts.GenAI.Client.OperationDuration.Description
+#if NET9_0_OR_GREATER
+ , advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.OperationDuration.ExplicitBucketBoundaries }
+#endif
+ );
+ }
+
+ ///
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ _activitySource.Dispose();
+ _meter.Dispose();
+ }
+
+ base.Dispose(disposing);
+ }
+
+ ///
+ /// Gets or sets a value indicating whether potentially sensitive information should be included in telemetry.
+ ///
+ ///
+ /// if potentially sensitive information should be included in telemetry;
+ /// if telemetry shouldn't include raw inputs and outputs.
+ /// The default value is , unless the OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT
+ /// environment variable is set to "true" (case-insensitive).
+ ///
+ ///
+ /// By default, telemetry includes metadata, such as token counts, but not raw inputs
+ /// and outputs, such as message content, function call arguments, and function call results.
+ /// The default value can be overridden by setting the OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT
+ /// environment variable to "true". Explicitly setting this property will override the environment variable.
+ ///
+ public bool EnableSensitiveData { get; set; } = TelemetryHelpers.EnableSensitiveDataDefault;
+
+ ///
+ public override object? GetService(Type serviceType, object? serviceKey = null) =>
+ serviceType == typeof(ActivitySource) ? _activitySource :
+ base.GetService(serviceType, serviceKey);
+
+ ///
+ public override async Task GetTextAsync(Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ _ = Throw.IfNull(audioSpeechStream);
+
+ using Activity? activity = CreateAndConfigureActivity(options);
+ Stopwatch? stopwatch = _operationDurationHistogram.Enabled ? Stopwatch.StartNew() : null;
+ string? requestModelId = options?.ModelId ?? _defaultModelId;
+
+ SpeechToTextResponse? response = null;
+ Exception? error = null;
+ try
+ {
+ response = await base.GetTextAsync(audioSpeechStream, options, cancellationToken);
+ return response;
+ }
+ catch (Exception ex)
+ {
+ error = ex;
+ throw;
+ }
+ finally
+ {
+ TraceResponse(activity, requestModelId, response, error, stopwatch);
+ }
+ }
+
+ ///
+ public override async IAsyncEnumerable GetStreamingTextAsync(
+ Stream audioSpeechStream, SpeechToTextOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ _ = Throw.IfNull(audioSpeechStream);
+
+ using Activity? activity = CreateAndConfigureActivity(options);
+ Stopwatch? stopwatch = _operationDurationHistogram.Enabled ? Stopwatch.StartNew() : null;
+ string? requestModelId = options?.ModelId ?? _defaultModelId;
+
+ IAsyncEnumerable updates;
+ try
+ {
+ updates = base.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken);
+ }
+ catch (Exception ex)
+ {
+ TraceResponse(activity, requestModelId, response: null, ex, stopwatch);
+ throw;
+ }
+
+ var responseEnumerator = updates.GetAsyncEnumerator(cancellationToken);
+ List trackedUpdates = [];
+ Exception? error = null;
+ try
+ {
+ while (true)
+ {
+ SpeechToTextResponseUpdate update;
+ try
+ {
+ if (!await responseEnumerator.MoveNextAsync())
+ {
+ break;
+ }
+
+ update = responseEnumerator.Current;
+ }
+ catch (Exception ex)
+ {
+ error = ex;
+ throw;
+ }
+
+ trackedUpdates.Add(update);
+ yield return update;
+ Activity.Current = activity; // workaround for https://github.com/dotnet/runtime/issues/47802
+ }
+ }
+ finally
+ {
+ TraceResponse(activity, requestModelId, trackedUpdates.ToSpeechToTextResponse(), error, stopwatch);
+
+ await responseEnumerator.DisposeAsync();
+ }
+ }
+
+ /// Creates an activity for a speech-to-text request, or returns if not enabled.
+ private Activity? CreateAndConfigureActivity(SpeechToTextOptions? options)
+ {
+ Activity? activity = null;
+ if (_activitySource.HasListeners())
+ {
+ string? modelId = options?.ModelId ?? _defaultModelId;
+
+ activity = _activitySource.StartActivity(
+ string.IsNullOrWhiteSpace(modelId) ? OpenTelemetryConsts.GenAI.GenerateContentName : $"{OpenTelemetryConsts.GenAI.GenerateContentName} {modelId}",
+ ActivityKind.Client);
+
+ if (activity is { IsAllDataRequested: true })
+ {
+ _ = activity
+ .AddTag(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName)
+ .AddTag(OpenTelemetryConsts.GenAI.Request.Model, modelId)
+ .AddTag(OpenTelemetryConsts.GenAI.Provider.Name, _providerName)
+ .AddTag(OpenTelemetryConsts.GenAI.Output.Type, OpenTelemetryConsts.TypeText);
+
+ if (_serverAddress is not null)
+ {
+ _ = activity
+ .AddTag(OpenTelemetryConsts.Server.Address, _serverAddress)
+ .AddTag(OpenTelemetryConsts.Server.Port, _serverPort);
+ }
+
+ if (options is not null)
+ {
+ if (EnableSensitiveData)
+ {
+ // Log all additional request options as raw values on the span.
+ // Since AdditionalProperties has undefined meaning, we treat it as potentially sensitive data.
+ if (options.AdditionalProperties is { } props)
+ {
+ foreach (KeyValuePair prop in props)
+ {
+ _ = activity.AddTag(prop.Key, prop.Value);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return activity;
+ }
+
+ /// Adds speech-to-text response information to the activity.
+ private void TraceResponse(
+ Activity? activity,
+ string? requestModelId,
+ SpeechToTextResponse? response,
+ Exception? error,
+ Stopwatch? stopwatch)
+ {
+ if (_operationDurationHistogram.Enabled && stopwatch is not null)
+ {
+ TagList tags = default;
+
+ AddMetricTags(ref tags, requestModelId, response);
+ if (error is not null)
+ {
+ tags.Add(OpenTelemetryConsts.Error.Type, error.GetType().FullName);
+ }
+
+ _operationDurationHistogram.Record(stopwatch.Elapsed.TotalSeconds, tags);
+ }
+
+ if (_tokenUsageHistogram.Enabled && response?.Usage is { } usage)
+ {
+ if (usage.InputTokenCount is long inputTokens)
+ {
+ TagList tags = default;
+ tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeInput);
+ AddMetricTags(ref tags, requestModelId, response);
+ _tokenUsageHistogram.Record((int)inputTokens, tags);
+ }
+
+ if (usage.OutputTokenCount is long outputTokens)
+ {
+ TagList tags = default;
+ tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeOutput);
+ AddMetricTags(ref tags, requestModelId, response);
+ _tokenUsageHistogram.Record((int)outputTokens, tags);
+ }
+ }
+
+ if (error is not null)
+ {
+ _ = activity?
+ .AddTag(OpenTelemetryConsts.Error.Type, error.GetType().FullName)
+ .SetStatus(ActivityStatusCode.Error, error.Message);
+ }
+
+ if (response is not null)
+ {
+ AddOutputMessagesTags(response, activity);
+
+ if (activity is not null)
+ {
+ if (!string.IsNullOrWhiteSpace(response.ResponseId))
+ {
+ _ = activity.AddTag(OpenTelemetryConsts.GenAI.Response.Id, response.ResponseId);
+ }
+
+ if (response.ModelId is not null)
+ {
+ _ = activity.AddTag(OpenTelemetryConsts.GenAI.Response.Model, response.ModelId);
+ }
+
+ if (response.Usage?.InputTokenCount is long inputTokens)
+ {
+ _ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.InputTokens, (int)inputTokens);
+ }
+
+ if (response.Usage?.OutputTokenCount is long outputTokens)
+ {
+ _ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.OutputTokens, (int)outputTokens);
+ }
+
+ // Log all additional response properties as raw values on the span.
+ // Since AdditionalProperties has undefined meaning, we treat it as potentially sensitive data.
+ if (EnableSensitiveData && response.AdditionalProperties is { } props)
+ {
+ foreach (KeyValuePair prop in props)
+ {
+ _ = activity.AddTag(prop.Key, prop.Value);
+ }
+ }
+ }
+ }
+
+ void AddMetricTags(ref TagList tags, string? requestModelId, SpeechToTextResponse? response)
+ {
+ tags.Add(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName);
+
+ if (requestModelId is not null)
+ {
+ tags.Add(OpenTelemetryConsts.GenAI.Request.Model, requestModelId);
+ }
+
+ tags.Add(OpenTelemetryConsts.GenAI.Provider.Name, _providerName);
+
+ if (_serverAddress is string endpointAddress)
+ {
+ tags.Add(OpenTelemetryConsts.Server.Address, endpointAddress);
+ tags.Add(OpenTelemetryConsts.Server.Port, _serverPort);
+ }
+
+ if (response?.ModelId is string responseModel)
+ {
+ tags.Add(OpenTelemetryConsts.GenAI.Response.Model, responseModel);
+ }
+ }
+ }
+
+ private void AddOutputMessagesTags(SpeechToTextResponse response, Activity? activity)
+ {
+ if (EnableSensitiveData && activity is { IsAllDataRequested: true })
+ {
+ _ = activity.AddTag(
+ OpenTelemetryConsts.GenAI.Output.Messages,
+ OpenTelemetryChatClient.SerializeChatMessages([new(ChatRole.Assistant, response.Contents)]));
+ }
+ }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs
new file mode 100644
index 00000000000..5e23a41358e
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs
@@ -0,0 +1,42 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// Provides extensions for configuring instances.
+[Experimental("MEAI001")]
+public static class OpenTelemetrySpeechToTextClientBuilderExtensions
+{
+ ///
+ /// Adds OpenTelemetry support to the speech-to-text client pipeline, following the OpenTelemetry Semantic Conventions for Generative AI systems.
+ ///
+ ///
+ /// The draft specification this follows is available at .
+ /// The specification is still experimental and subject to change; as such, the telemetry output by this client is also subject to change.
+ ///
+ /// The .
+ /// An optional to use to create a logger for logging events.
+ /// An optional source name that will be used on the telemetry data.
+ /// An optional callback that can be used to configure the instance.
+ /// The .
+ public static SpeechToTextClientBuilder UseOpenTelemetry(
+ this SpeechToTextClientBuilder builder,
+ ILoggerFactory? loggerFactory = null,
+ string? sourceName = null,
+ Action? configure = null) =>
+ Throw.IfNull(builder).Use((innerClient, services) =>
+ {
+ loggerFactory ??= services.GetService();
+
+ var client = new OpenTelemetrySpeechToTextClient(innerClient, loggerFactory?.CreateLogger(typeof(OpenTelemetrySpeechToTextClient)), sourceName);
+ configure?.Invoke(client);
+
+ return client;
+ });
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs
index dae4224a94d..1945a140762 100644
--- a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs
@@ -58,7 +58,7 @@ public ISpeechToTextClient Build(IServiceProvider? services = null)
return audioClient;
}
- /// Adds a factory for an intermediate audio transcription client to the audio transcription client pipeline.
+ /// Adds a factory for an intermediate speech-to-text client to the speech-to-text client pipeline.
/// The client factory function.
/// The updated instance.
public SpeechToTextClientBuilder Use(Func clientFactory)
@@ -68,7 +68,7 @@ public SpeechToTextClientBuilder Use(Func clientFactory(innerClient));
}
- /// Adds a factory for an intermediate audio transcription client to the audio transcription client pipeline.
+ /// Adds a factory for an intermediate speech-to-text client to the speech-to-text client pipeline.
/// The client factory function.
/// The updated instance.
public SpeechToTextClientBuilder Use(Func clientFactory)
diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs
new file mode 100644
index 00000000000..c243bf2bf12
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs
@@ -0,0 +1,150 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text.RegularExpressions;
+using System.Threading;
+using System.Threading.Tasks;
+using OpenTelemetry.Trace;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class OpenTelemetrySpeechToTextClientTests
+{
+ [Fact]
+ public void InvalidArgs_Throws()
+ {
+ Assert.Throws("innerClient", () => new OpenTelemetrySpeechToTextClient(null!));
+ }
+
+ [Theory]
+ [InlineData(false, false)]
+ [InlineData(false, true)]
+ [InlineData(true, false)]
+ [InlineData(true, true)]
+ public async Task ExpectedInformationLogged_Async(bool streaming, bool enableSensitiveData)
+ {
+ var sourceName = Guid.NewGuid().ToString();
+ var activities = new List();
+ using var tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder()
+ .AddSource(sourceName)
+ .AddInMemoryExporter(activities)
+ .Build();
+
+ using var innerClient = new TestSpeechToTextClient
+ {
+ GetTextAsyncCallback = async (request, options, cancellationToken) =>
+ {
+ await Task.Yield();
+ return new("This is the recognized text.")
+ {
+ Usage = new()
+ {
+ InputTokenCount = 10,
+ OutputTokenCount = 20,
+ TotalTokenCount = 30,
+ },
+ };
+ },
+
+ GetStreamingTextAsyncCallback = TestClientStreamAsync,
+
+ GetServiceCallback = (serviceType, serviceKey) =>
+ serviceType == typeof(SpeechToTextClientMetadata) ? new SpeechToTextClientMetadata("testservice", new Uri("http://localhost:12345/something"), "amazingmodel") :
+ null,
+ };
+
+ static async IAsyncEnumerable TestClientStreamAsync(
+ Stream request, SpeechToTextOptions? options, [EnumeratorCancellation] CancellationToken cancellationToken)
+ {
+ await Task.Yield();
+ yield return new("This is");
+ yield return new(" the recognized");
+ yield return new()
+ {
+ Contents =
+ [
+ new TextContent(" text."),
+ new UsageContent(new()
+ {
+ InputTokenCount = 10,
+ OutputTokenCount = 20,
+ TotalTokenCount = 30,
+ }),
+ ]
+ };
+ }
+
+ using var client = innerClient
+ .AsBuilder()
+ .UseOpenTelemetry(null, sourceName, configure: instance =>
+ {
+ instance.EnableSensitiveData = enableSensitiveData;
+ })
+ .Build();
+
+ SpeechToTextOptions options = new()
+ {
+ ModelId = "mycoolspeechmodel",
+ AdditionalProperties = new()
+ {
+ ["service_tier"] = "value1",
+ ["SomethingElse"] = "value2",
+ },
+ };
+
+ var response = streaming ?
+ await client.GetStreamingTextAsync(Stream.Null, options).ToSpeechToTextResponseAsync() :
+ await client.GetTextAsync(Stream.Null, options);
+
+ var activity = Assert.Single(activities);
+
+ Assert.NotNull(activity.Id);
+ Assert.NotEmpty(activity.Id);
+
+ Assert.Equal("localhost", activity.GetTagItem("server.address"));
+ Assert.Equal(12345, (int)activity.GetTagItem("server.port")!);
+
+ Assert.Equal("generate_content mycoolspeechmodel", activity.DisplayName);
+ Assert.Equal("testservice", activity.GetTagItem("gen_ai.provider.name"));
+
+ Assert.Equal("mycoolspeechmodel", activity.GetTagItem("gen_ai.request.model"));
+ Assert.Equal(enableSensitiveData ? "value1" : null, activity.GetTagItem("service_tier"));
+ Assert.Equal(enableSensitiveData ? "value2" : null, activity.GetTagItem("SomethingElse"));
+
+ Assert.Equal(10, activity.GetTagItem("gen_ai.usage.input_tokens"));
+ Assert.Equal(20, activity.GetTagItem("gen_ai.usage.output_tokens"));
+
+ Assert.True(activity.Duration.TotalMilliseconds > 0);
+
+ var tags = activity.Tags.ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
+ if (enableSensitiveData)
+ {
+ Assert.Equal(ReplaceWhitespace("""
+ [
+ {
+ "role": "assistant",
+ "parts": [
+ {
+ "type": "text",
+ "content": "This is the recognized text."
+ }
+ ]
+ }
+ ]
+ """), ReplaceWhitespace(tags["gen_ai.output.messages"]));
+ }
+ else
+ {
+ Assert.False(tags.ContainsKey("gen_ai.output.messages"));
+ }
+
+ static string ReplaceWhitespace(string? input) => Regex.Replace(input ?? "", @"\s+", " ").Trim();
+ }
+}