diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs index 92a67189982..54ed411bd35 100644 --- a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs +++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs @@ -14,7 +14,7 @@ namespace Microsoft.Extensions.AI; [Experimental("MEAI001")] public static class LoggingSpeechToTextClientBuilderExtensions { - /// Adds logging to the audio transcription client pipeline. + /// Adds logging to the speech-to-text client pipeline. /// The . /// /// An optional used to create a logger with which logging should be performed. diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs new file mode 100644 index 00000000000..40461ebe457 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs @@ -0,0 +1,367 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Diagnostics.Metrics; +using System.IO; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Shared.Diagnostics; + +#pragma warning disable S3358 // Ternary operators should not be nested +#pragma warning disable SA1111 // Closing parenthesis should be on line of last parameter +#pragma warning disable SA1113 // Comma should be on the same line as previous parameter + +namespace Microsoft.Extensions.AI; + +/// Represents a delegating speech-to-text client that implements the OpenTelemetry Semantic Conventions for Generative AI systems. +/// +/// This class provides an implementation of the Semantic Conventions for Generative AI systems v1.37, defined at . +/// The specification is still experimental and subject to change; as such, the telemetry output by this client is also subject to change. +/// +[Experimental("MEAI001")] +public sealed class OpenTelemetrySpeechToTextClient : DelegatingSpeechToTextClient +{ + private readonly ActivitySource _activitySource; + private readonly Meter _meter; + + private readonly Histogram _tokenUsageHistogram; + private readonly Histogram _operationDurationHistogram; + + private readonly string? _defaultModelId; + private readonly string? _providerName; + private readonly string? _serverAddress; + private readonly int _serverPort; + + /// Initializes a new instance of the class. + /// The underlying . + /// The to use for emitting any logging data from the client. + /// An optional source name that will be used on the telemetry data. +#pragma warning disable IDE0060 // Remove unused parameter; it exists for consistency with IChatClient and future use + public OpenTelemetrySpeechToTextClient(ISpeechToTextClient innerClient, ILogger? logger = null, string? sourceName = null) +#pragma warning restore IDE0060 + : base(innerClient) + { + Debug.Assert(innerClient is not null, "Should have been validated by the base ctor"); + + if (innerClient!.GetService() is SpeechToTextClientMetadata metadata) + { + _defaultModelId = metadata.DefaultModelId; + _providerName = metadata.ProviderName; + _serverAddress = metadata.ProviderUri?.Host; + _serverPort = metadata.ProviderUri?.Port ?? 0; + } + + string name = string.IsNullOrEmpty(sourceName) ? OpenTelemetryConsts.DefaultSourceName : sourceName!; + _activitySource = new(name); + _meter = new(name); + + _tokenUsageHistogram = _meter.CreateHistogram( + OpenTelemetryConsts.GenAI.Client.TokenUsage.Name, + OpenTelemetryConsts.TokensUnit, + OpenTelemetryConsts.GenAI.Client.TokenUsage.Description +#if NET9_0_OR_GREATER + , advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.TokenUsage.ExplicitBucketBoundaries } +#endif + ); + + _operationDurationHistogram = _meter.CreateHistogram( + OpenTelemetryConsts.GenAI.Client.OperationDuration.Name, + OpenTelemetryConsts.SecondsUnit, + OpenTelemetryConsts.GenAI.Client.OperationDuration.Description +#if NET9_0_OR_GREATER + , advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.OperationDuration.ExplicitBucketBoundaries } +#endif + ); + } + + /// + protected override void Dispose(bool disposing) + { + if (disposing) + { + _activitySource.Dispose(); + _meter.Dispose(); + } + + base.Dispose(disposing); + } + + /// + /// Gets or sets a value indicating whether potentially sensitive information should be included in telemetry. + /// + /// + /// if potentially sensitive information should be included in telemetry; + /// if telemetry shouldn't include raw inputs and outputs. + /// The default value is , unless the OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT + /// environment variable is set to "true" (case-insensitive). + /// + /// + /// By default, telemetry includes metadata, such as token counts, but not raw inputs + /// and outputs, such as message content, function call arguments, and function call results. + /// The default value can be overridden by setting the OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT + /// environment variable to "true". Explicitly setting this property will override the environment variable. + /// + public bool EnableSensitiveData { get; set; } = TelemetryHelpers.EnableSensitiveDataDefault; + + /// + public override object? GetService(Type serviceType, object? serviceKey = null) => + serviceType == typeof(ActivitySource) ? _activitySource : + base.GetService(serviceType, serviceKey); + + /// + public override async Task GetTextAsync(Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(audioSpeechStream); + + using Activity? activity = CreateAndConfigureActivity(options); + Stopwatch? stopwatch = _operationDurationHistogram.Enabled ? Stopwatch.StartNew() : null; + string? requestModelId = options?.ModelId ?? _defaultModelId; + + SpeechToTextResponse? response = null; + Exception? error = null; + try + { + response = await base.GetTextAsync(audioSpeechStream, options, cancellationToken); + return response; + } + catch (Exception ex) + { + error = ex; + throw; + } + finally + { + TraceResponse(activity, requestModelId, response, error, stopwatch); + } + } + + /// + public override async IAsyncEnumerable GetStreamingTextAsync( + Stream audioSpeechStream, SpeechToTextOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(audioSpeechStream); + + using Activity? activity = CreateAndConfigureActivity(options); + Stopwatch? stopwatch = _operationDurationHistogram.Enabled ? Stopwatch.StartNew() : null; + string? requestModelId = options?.ModelId ?? _defaultModelId; + + IAsyncEnumerable updates; + try + { + updates = base.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken); + } + catch (Exception ex) + { + TraceResponse(activity, requestModelId, response: null, ex, stopwatch); + throw; + } + + var responseEnumerator = updates.GetAsyncEnumerator(cancellationToken); + List trackedUpdates = []; + Exception? error = null; + try + { + while (true) + { + SpeechToTextResponseUpdate update; + try + { + if (!await responseEnumerator.MoveNextAsync()) + { + break; + } + + update = responseEnumerator.Current; + } + catch (Exception ex) + { + error = ex; + throw; + } + + trackedUpdates.Add(update); + yield return update; + Activity.Current = activity; // workaround for https://github.com/dotnet/runtime/issues/47802 + } + } + finally + { + TraceResponse(activity, requestModelId, trackedUpdates.ToSpeechToTextResponse(), error, stopwatch); + + await responseEnumerator.DisposeAsync(); + } + } + + /// Creates an activity for a speech-to-text request, or returns if not enabled. + private Activity? CreateAndConfigureActivity(SpeechToTextOptions? options) + { + Activity? activity = null; + if (_activitySource.HasListeners()) + { + string? modelId = options?.ModelId ?? _defaultModelId; + + activity = _activitySource.StartActivity( + string.IsNullOrWhiteSpace(modelId) ? OpenTelemetryConsts.GenAI.GenerateContentName : $"{OpenTelemetryConsts.GenAI.GenerateContentName} {modelId}", + ActivityKind.Client); + + if (activity is { IsAllDataRequested: true }) + { + _ = activity + .AddTag(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName) + .AddTag(OpenTelemetryConsts.GenAI.Request.Model, modelId) + .AddTag(OpenTelemetryConsts.GenAI.Provider.Name, _providerName) + .AddTag(OpenTelemetryConsts.GenAI.Output.Type, OpenTelemetryConsts.TypeText); + + if (_serverAddress is not null) + { + _ = activity + .AddTag(OpenTelemetryConsts.Server.Address, _serverAddress) + .AddTag(OpenTelemetryConsts.Server.Port, _serverPort); + } + + if (options is not null) + { + if (EnableSensitiveData) + { + // Log all additional request options as raw values on the span. + // Since AdditionalProperties has undefined meaning, we treat it as potentially sensitive data. + if (options.AdditionalProperties is { } props) + { + foreach (KeyValuePair prop in props) + { + _ = activity.AddTag(prop.Key, prop.Value); + } + } + } + } + } + } + + return activity; + } + + /// Adds speech-to-text response information to the activity. + private void TraceResponse( + Activity? activity, + string? requestModelId, + SpeechToTextResponse? response, + Exception? error, + Stopwatch? stopwatch) + { + if (_operationDurationHistogram.Enabled && stopwatch is not null) + { + TagList tags = default; + + AddMetricTags(ref tags, requestModelId, response); + if (error is not null) + { + tags.Add(OpenTelemetryConsts.Error.Type, error.GetType().FullName); + } + + _operationDurationHistogram.Record(stopwatch.Elapsed.TotalSeconds, tags); + } + + if (_tokenUsageHistogram.Enabled && response?.Usage is { } usage) + { + if (usage.InputTokenCount is long inputTokens) + { + TagList tags = default; + tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeInput); + AddMetricTags(ref tags, requestModelId, response); + _tokenUsageHistogram.Record((int)inputTokens, tags); + } + + if (usage.OutputTokenCount is long outputTokens) + { + TagList tags = default; + tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeOutput); + AddMetricTags(ref tags, requestModelId, response); + _tokenUsageHistogram.Record((int)outputTokens, tags); + } + } + + if (error is not null) + { + _ = activity? + .AddTag(OpenTelemetryConsts.Error.Type, error.GetType().FullName) + .SetStatus(ActivityStatusCode.Error, error.Message); + } + + if (response is not null) + { + AddOutputMessagesTags(response, activity); + + if (activity is not null) + { + if (!string.IsNullOrWhiteSpace(response.ResponseId)) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Response.Id, response.ResponseId); + } + + if (response.ModelId is not null) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Response.Model, response.ModelId); + } + + if (response.Usage?.InputTokenCount is long inputTokens) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.InputTokens, (int)inputTokens); + } + + if (response.Usage?.OutputTokenCount is long outputTokens) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.OutputTokens, (int)outputTokens); + } + + // Log all additional response properties as raw values on the span. + // Since AdditionalProperties has undefined meaning, we treat it as potentially sensitive data. + if (EnableSensitiveData && response.AdditionalProperties is { } props) + { + foreach (KeyValuePair prop in props) + { + _ = activity.AddTag(prop.Key, prop.Value); + } + } + } + } + + void AddMetricTags(ref TagList tags, string? requestModelId, SpeechToTextResponse? response) + { + tags.Add(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName); + + if (requestModelId is not null) + { + tags.Add(OpenTelemetryConsts.GenAI.Request.Model, requestModelId); + } + + tags.Add(OpenTelemetryConsts.GenAI.Provider.Name, _providerName); + + if (_serverAddress is string endpointAddress) + { + tags.Add(OpenTelemetryConsts.Server.Address, endpointAddress); + tags.Add(OpenTelemetryConsts.Server.Port, _serverPort); + } + + if (response?.ModelId is string responseModel) + { + tags.Add(OpenTelemetryConsts.GenAI.Response.Model, responseModel); + } + } + } + + private void AddOutputMessagesTags(SpeechToTextResponse response, Activity? activity) + { + if (EnableSensitiveData && activity is { IsAllDataRequested: true }) + { + _ = activity.AddTag( + OpenTelemetryConsts.GenAI.Output.Messages, + OpenTelemetryChatClient.SerializeChatMessages([new(ChatRole.Assistant, response.Contents)])); + } + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs new file mode 100644 index 00000000000..5e23a41358e --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// Provides extensions for configuring instances. +[Experimental("MEAI001")] +public static class OpenTelemetrySpeechToTextClientBuilderExtensions +{ + /// + /// Adds OpenTelemetry support to the speech-to-text client pipeline, following the OpenTelemetry Semantic Conventions for Generative AI systems. + /// + /// + /// The draft specification this follows is available at . + /// The specification is still experimental and subject to change; as such, the telemetry output by this client is also subject to change. + /// + /// The . + /// An optional to use to create a logger for logging events. + /// An optional source name that will be used on the telemetry data. + /// An optional callback that can be used to configure the instance. + /// The . + public static SpeechToTextClientBuilder UseOpenTelemetry( + this SpeechToTextClientBuilder builder, + ILoggerFactory? loggerFactory = null, + string? sourceName = null, + Action? configure = null) => + Throw.IfNull(builder).Use((innerClient, services) => + { + loggerFactory ??= services.GetService(); + + var client = new OpenTelemetrySpeechToTextClient(innerClient, loggerFactory?.CreateLogger(typeof(OpenTelemetrySpeechToTextClient)), sourceName); + configure?.Invoke(client); + + return client; + }); +} diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs index dae4224a94d..1945a140762 100644 --- a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs +++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs @@ -58,7 +58,7 @@ public ISpeechToTextClient Build(IServiceProvider? services = null) return audioClient; } - /// Adds a factory for an intermediate audio transcription client to the audio transcription client pipeline. + /// Adds a factory for an intermediate speech-to-text client to the speech-to-text client pipeline. /// The client factory function. /// The updated instance. public SpeechToTextClientBuilder Use(Func clientFactory) @@ -68,7 +68,7 @@ public SpeechToTextClientBuilder Use(Func clientFactory(innerClient)); } - /// Adds a factory for an intermediate audio transcription client to the audio transcription client pipeline. + /// Adds a factory for an intermediate speech-to-text client to the speech-to-text client pipeline. /// The client factory function. /// The updated instance. public SpeechToTextClientBuilder Use(Func clientFactory) diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs new file mode 100644 index 00000000000..c243bf2bf12 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs @@ -0,0 +1,150 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text.RegularExpressions; +using System.Threading; +using System.Threading.Tasks; +using OpenTelemetry.Trace; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class OpenTelemetrySpeechToTextClientTests +{ + [Fact] + public void InvalidArgs_Throws() + { + Assert.Throws("innerClient", () => new OpenTelemetrySpeechToTextClient(null!)); + } + + [Theory] + [InlineData(false, false)] + [InlineData(false, true)] + [InlineData(true, false)] + [InlineData(true, true)] + public async Task ExpectedInformationLogged_Async(bool streaming, bool enableSensitiveData) + { + var sourceName = Guid.NewGuid().ToString(); + var activities = new List(); + using var tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder() + .AddSource(sourceName) + .AddInMemoryExporter(activities) + .Build(); + + using var innerClient = new TestSpeechToTextClient + { + GetTextAsyncCallback = async (request, options, cancellationToken) => + { + await Task.Yield(); + return new("This is the recognized text.") + { + Usage = new() + { + InputTokenCount = 10, + OutputTokenCount = 20, + TotalTokenCount = 30, + }, + }; + }, + + GetStreamingTextAsyncCallback = TestClientStreamAsync, + + GetServiceCallback = (serviceType, serviceKey) => + serviceType == typeof(SpeechToTextClientMetadata) ? new SpeechToTextClientMetadata("testservice", new Uri("http://localhost:12345/something"), "amazingmodel") : + null, + }; + + static async IAsyncEnumerable TestClientStreamAsync( + Stream request, SpeechToTextOptions? options, [EnumeratorCancellation] CancellationToken cancellationToken) + { + await Task.Yield(); + yield return new("This is"); + yield return new(" the recognized"); + yield return new() + { + Contents = + [ + new TextContent(" text."), + new UsageContent(new() + { + InputTokenCount = 10, + OutputTokenCount = 20, + TotalTokenCount = 30, + }), + ] + }; + } + + using var client = innerClient + .AsBuilder() + .UseOpenTelemetry(null, sourceName, configure: instance => + { + instance.EnableSensitiveData = enableSensitiveData; + }) + .Build(); + + SpeechToTextOptions options = new() + { + ModelId = "mycoolspeechmodel", + AdditionalProperties = new() + { + ["service_tier"] = "value1", + ["SomethingElse"] = "value2", + }, + }; + + var response = streaming ? + await client.GetStreamingTextAsync(Stream.Null, options).ToSpeechToTextResponseAsync() : + await client.GetTextAsync(Stream.Null, options); + + var activity = Assert.Single(activities); + + Assert.NotNull(activity.Id); + Assert.NotEmpty(activity.Id); + + Assert.Equal("localhost", activity.GetTagItem("server.address")); + Assert.Equal(12345, (int)activity.GetTagItem("server.port")!); + + Assert.Equal("generate_content mycoolspeechmodel", activity.DisplayName); + Assert.Equal("testservice", activity.GetTagItem("gen_ai.provider.name")); + + Assert.Equal("mycoolspeechmodel", activity.GetTagItem("gen_ai.request.model")); + Assert.Equal(enableSensitiveData ? "value1" : null, activity.GetTagItem("service_tier")); + Assert.Equal(enableSensitiveData ? "value2" : null, activity.GetTagItem("SomethingElse")); + + Assert.Equal(10, activity.GetTagItem("gen_ai.usage.input_tokens")); + Assert.Equal(20, activity.GetTagItem("gen_ai.usage.output_tokens")); + + Assert.True(activity.Duration.TotalMilliseconds > 0); + + var tags = activity.Tags.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); + if (enableSensitiveData) + { + Assert.Equal(ReplaceWhitespace(""" + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "This is the recognized text." + } + ] + } + ] + """), ReplaceWhitespace(tags["gen_ai.output.messages"])); + } + else + { + Assert.False(tags.ContainsKey("gen_ai.output.messages")); + } + + static string ReplaceWhitespace(string? input) => Regex.Replace(input ?? "", @"\s+", " ").Trim(); + } +}