Skip to content

Commit aaa1a25

Browse files
Add ISpeechToTextClient abstractions (#5838)
* Speech to text abstractions --------- Co-authored-by: Stephen Toub <stoub@microsoft.com>
1 parent f78418d commit aaa1a25

File tree

54 files changed

+3848
-56
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+3848
-56
lines changed

eng/spellchecking_exclusions.dic

18 Bytes
Binary file not shown.

src/Libraries/Microsoft.Extensions.AI.Abstractions/ChatCompletion/ChatResponseExtensions.cs

Lines changed: 49 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,55 @@ static async Task<ChatResponse> ToChatResponseAsync(
180180
}
181181
}
182182

183+
/// <summary>Coalesces sequential <see cref="TextContent"/> content elements.</summary>
184+
internal static void CoalesceTextContent(List<AIContent> contents)
185+
{
186+
StringBuilder? coalescedText = null;
187+
188+
// Iterate through all of the items in the list looking for contiguous items that can be coalesced.
189+
int start = 0;
190+
while (start < contents.Count - 1)
191+
{
192+
// We need at least two TextContents in a row to be able to coalesce.
193+
if (contents[start] is not TextContent firstText)
194+
{
195+
start++;
196+
continue;
197+
}
198+
199+
if (contents[start + 1] is not TextContent secondText)
200+
{
201+
start += 2;
202+
continue;
203+
}
204+
205+
// Append the text from those nodes and continue appending subsequent TextContents until we run out.
206+
// We null out nodes as their text is appended so that we can later remove them all in one O(N) operation.
207+
coalescedText ??= new();
208+
_ = coalescedText.Clear().Append(firstText.Text).Append(secondText.Text);
209+
contents[start + 1] = null!;
210+
int i = start + 2;
211+
for (; i < contents.Count && contents[i] is TextContent next; i++)
212+
{
213+
_ = coalescedText.Append(next.Text);
214+
contents[i] = null!;
215+
}
216+
217+
// Store the replacement node.
218+
contents[start] = new TextContent(coalescedText.ToString())
219+
{
220+
// We inherit the properties of the first text node. We don't currently propagate additional
221+
// properties from the subsequent nodes. If we ever need to, we can add that here.
222+
AdditionalProperties = firstText.AdditionalProperties?.Clone(),
223+
};
224+
225+
start = i;
226+
}
227+
228+
// Remove all of the null slots left over from the coalescing process.
229+
_ = contents.RemoveAll(u => u is null);
230+
}
231+
183232
/// <summary>Finalizes the <paramref name="response"/> object.</summary>
184233
private static void FinalizeResponse(ChatResponse response)
185234
{
@@ -296,53 +345,4 @@ private static void ProcessUpdate(ChatResponseUpdate update, ChatResponse respon
296345
}
297346
}
298347
}
299-
300-
/// <summary>Coalesces sequential <see cref="TextContent"/> content elements.</summary>
301-
private static void CoalesceTextContent(List<AIContent> contents)
302-
{
303-
StringBuilder? coalescedText = null;
304-
305-
// Iterate through all of the items in the list looking for contiguous items that can be coalesced.
306-
int start = 0;
307-
while (start < contents.Count - 1)
308-
{
309-
// We need at least two TextContents in a row to be able to coalesce.
310-
if (contents[start] is not TextContent firstText)
311-
{
312-
start++;
313-
continue;
314-
}
315-
316-
if (contents[start + 1] is not TextContent secondText)
317-
{
318-
start += 2;
319-
continue;
320-
}
321-
322-
// Append the text from those nodes and continue appending subsequent TextContents until we run out.
323-
// We null out nodes as their text is appended so that we can later remove them all in one O(N) operation.
324-
coalescedText ??= new();
325-
_ = coalescedText.Clear().Append(firstText.Text).Append(secondText.Text);
326-
contents[start + 1] = null!;
327-
int i = start + 2;
328-
for (; i < contents.Count && contents[i] is TextContent next; i++)
329-
{
330-
_ = coalescedText.Append(next.Text);
331-
contents[i] = null!;
332-
}
333-
334-
// Store the replacement node.
335-
contents[start] = new TextContent(coalescedText.ToString())
336-
{
337-
// We inherit the properties of the first text node. We don't currently propagate additional
338-
// properties from the subsequent nodes. If we ever need to, we can add that here.
339-
AdditionalProperties = firstText.AdditionalProperties?.Clone(),
340-
};
341-
342-
start = i;
343-
}
344-
345-
// Remove all of the null slots left over from the coalescing process.
346-
_ = contents.RemoveAll(u => u is null);
347-
}
348348
}

src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/AIContent.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ namespace Microsoft.Extensions.AI;
88
/// <summary>Provides a base class for all content used with AI services.</summary>
99
[JsonPolymorphic(TypeDiscriminatorPropertyName = "$type")]
1010
[JsonDerivedType(typeof(DataContent), typeDiscriminator: "data")]
11+
[JsonDerivedType(typeof(ErrorContent), typeDiscriminator: "error")]
1112
[JsonDerivedType(typeof(FunctionCallContent), typeDiscriminator: "functionCall")]
1213
[JsonDerivedType(typeof(FunctionResultContent), typeDiscriminator: "functionResult")]
1314
[JsonDerivedType(typeof(TextContent), typeDiscriminator: "text")]
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics;
5+
using System.Text.Json.Serialization;
6+
using Microsoft.Shared.Diagnostics;
7+
8+
namespace Microsoft.Extensions.AI;
9+
10+
/// <summary>Represents an error.</summary>
11+
/// <remarks>
12+
/// Typically, <see cref="ErrorContent"/> is used for non-fatal errors, where something went wrong
13+
/// as part of the operation but the operation was still able to continue.
14+
/// </remarks>
15+
[DebuggerDisplay("{DebuggerDisplay,nq}")]
16+
public class ErrorContent : AIContent
17+
{
18+
/// <summary>The error message.</summary>
19+
private string _message;
20+
21+
/// <summary>Initializes a new instance of the <see cref="ErrorContent"/> class with the specified message.</summary>
22+
/// <param name="message">The message to store in this content.</param>
23+
[JsonConstructor]
24+
public ErrorContent(string message)
25+
{
26+
_message = Throw.IfNull(message);
27+
}
28+
29+
/// <summary>Gets or sets the error message.</summary>
30+
public string Message
31+
{
32+
get => _message;
33+
set => _message = Throw.IfNull(value);
34+
}
35+
36+
/// <summary>Gets or sets the error code.</summary>
37+
public string? ErrorCode { get; set; }
38+
39+
/// <summary>Gets or sets the error details.</summary>
40+
public string? Details { get; set; }
41+
42+
/// <summary>Gets a string representing this instance to display in the debugger.</summary>
43+
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
44+
private string DebuggerDisplay =>
45+
$"Error = {Message}" +
46+
(ErrorCode is not null ? $" ({ErrorCode})" : string.Empty) +
47+
(Details is not null ? $" - {Details}" : string.Empty);
48+
}

src/Libraries/Microsoft.Extensions.AI.Abstractions/Microsoft.Extensions.AI.Abstractions.csproj

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,18 @@
1616
<PropertyGroup>
1717
<TargetFrameworks>$(TargetFrameworks);netstandard2.0</TargetFrameworks>
1818
<NoWarn>$(NoWarn);CA2227;CA1034;SA1316;S3253</NoWarn>
19+
<NoWarn>$(NoWarn);MEAI001</NoWarn>
1920
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
2021
<DisableNETStandardCompatErrors>true</DisableNETStandardCompatErrors>
2122
</PropertyGroup>
2223

2324
<PropertyGroup>
25+
<InjectExperimentalAttributeOnLegacy>true</InjectExperimentalAttributeOnLegacy>
2426
<InjectJsonSchemaExporterOnLegacy>true</InjectJsonSchemaExporterOnLegacy>
27+
<InjectRequiredMemberOnLegacy>true</InjectRequiredMemberOnLegacy>
2528
<InjectSharedEmptyCollections>true</InjectSharedEmptyCollections>
2629
<InjectStringHashOnLegacy>true</InjectStringHashOnLegacy>
2730
<InjectStringSyntaxAttributeOnLegacy>true</InjectStringSyntaxAttributeOnLegacy>
28-
<InjectRequiredMemberOnLegacy>true</InjectRequiredMemberOnLegacy>
2931
</PropertyGroup>
3032

3133
<ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'">
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Diagnostics.CodeAnalysis;
7+
using System.IO;
8+
using System.Threading;
9+
using System.Threading.Tasks;
10+
using Microsoft.Shared.Diagnostics;
11+
12+
namespace Microsoft.Extensions.AI;
13+
14+
/// <summary>
15+
/// Provides an optional base class for an <see cref="ISpeechToTextClient"/> that passes through calls to another instance.
16+
/// </summary>
17+
/// <remarks>
18+
/// This is recommended as a base type when building clients that can be chained in any order around an underlying <see cref="ISpeechToTextClient"/>.
19+
/// The default implementation simply passes each call to the inner client instance.
20+
/// </remarks>
21+
[Experimental("MEAI001")]
22+
public class DelegatingSpeechToTextClient : ISpeechToTextClient
23+
{
24+
/// <summary>
25+
/// Initializes a new instance of the <see cref="DelegatingSpeechToTextClient"/> class.
26+
/// </summary>
27+
/// <param name="innerClient">The wrapped client instance.</param>
28+
protected DelegatingSpeechToTextClient(ISpeechToTextClient innerClient)
29+
{
30+
InnerClient = Throw.IfNull(innerClient);
31+
}
32+
33+
/// <inheritdoc />
34+
public void Dispose()
35+
{
36+
Dispose(disposing: true);
37+
GC.SuppressFinalize(this);
38+
}
39+
40+
/// <summary>Gets the inner <see cref="ISpeechToTextClient" />.</summary>
41+
protected ISpeechToTextClient InnerClient { get; }
42+
43+
/// <inheritdoc />
44+
public virtual Task<SpeechToTextResponse> GetTextAsync(
45+
Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default)
46+
{
47+
return InnerClient.GetTextAsync(audioSpeechStream, options, cancellationToken);
48+
}
49+
50+
/// <inheritdoc />
51+
public virtual IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
52+
Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default)
53+
{
54+
return InnerClient.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken);
55+
}
56+
57+
/// <inheritdoc />
58+
public virtual object? GetService(Type serviceType, object? serviceKey = null)
59+
{
60+
_ = Throw.IfNull(serviceType);
61+
62+
// If the key is non-null, we don't know what it means so pass through to the inner service.
63+
return
64+
serviceKey is null && serviceType.IsInstanceOfType(this) ? this :
65+
InnerClient.GetService(serviceType, serviceKey);
66+
}
67+
68+
/// <summary>Provides a mechanism for releasing unmanaged resources.</summary>
69+
/// <param name="disposing"><see langword="true"/> if being called from <see cref="Dispose()"/>; otherwise, <see langword="false"/>.</param>
70+
protected virtual void Dispose(bool disposing)
71+
{
72+
if (disposing)
73+
{
74+
InnerClient.Dispose();
75+
}
76+
}
77+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Diagnostics.CodeAnalysis;
7+
using System.IO;
8+
using System.Threading;
9+
using System.Threading.Tasks;
10+
11+
namespace Microsoft.Extensions.AI;
12+
13+
/// <summary>Represents a speech to text client.</summary>
14+
/// <remarks>
15+
/// <para>
16+
/// Unless otherwise specified, all members of <see cref="ISpeechToTextClient"/> are thread-safe for concurrent use.
17+
/// It is expected that all implementations of <see cref="ISpeechToTextClient"/> support being used by multiple requests concurrently.
18+
/// </para>
19+
/// <para>
20+
/// However, implementations of <see cref="ISpeechToTextClient"/> might mutate the arguments supplied to <see cref="GetTextAsync"/> and
21+
/// <see cref="GetStreamingTextAsync"/>, such as by configuring the options instance. Thus, consumers of the interface either should avoid
22+
/// using shared instances of these arguments for concurrent invocations or should otherwise ensure by construction that no
23+
/// <see cref="ISpeechToTextClient"/> instances are used which might employ such mutation. For example, the ConfigureOptions method be
24+
/// provided with a callback that could mutate the supplied options argument, and that should be avoided if using a singleton options instance.
25+
/// The audio speech stream passed to these methods will not be closed or disposed by the implementation.
26+
/// </para>
27+
/// </remarks>
28+
[Experimental("MEAI001")]
29+
public interface ISpeechToTextClient : IDisposable
30+
{
31+
/// <summary>Sends audio speech content to the model and returns the generated text.</summary>
32+
/// <param name="audioSpeechStream">The audio speech stream to send.</param>
33+
/// <param name="options">The speech to text options to configure the request.</param>
34+
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
35+
/// <returns>The text generated.</returns>
36+
Task<SpeechToTextResponse> GetTextAsync(
37+
Stream audioSpeechStream,
38+
SpeechToTextOptions? options = null,
39+
CancellationToken cancellationToken = default);
40+
41+
/// <summary>Sends audio speech content to the model and streams back the generated text.</summary>
42+
/// <param name="audioSpeechStream">The audio speech stream to send.</param>
43+
/// <param name="options">The speech to text options to configure the request.</param>
44+
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
45+
/// <returns>The text updates representing the streamed output.</returns>
46+
IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
47+
Stream audioSpeechStream,
48+
SpeechToTextOptions? options = null,
49+
CancellationToken cancellationToken = default);
50+
51+
/// <summary>Asks the <see cref="ISpeechToTextClient"/> for an object of the specified type <paramref name="serviceType"/>.</summary>
52+
/// <param name="serviceType">The type of object being requested.</param>
53+
/// <param name="serviceKey">An optional key that can be used to help identify the target service.</param>
54+
/// <returns>The found object, otherwise <see langword="null"/>.</returns>
55+
/// <exception cref="ArgumentNullException"><paramref name="serviceType"/> is <see langword="null"/>.</exception>
56+
/// <remarks>
57+
/// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the <see cref="ISpeechToTextClient"/>,
58+
/// including itself or any services it might be wrapping.
59+
/// </remarks>
60+
object? GetService(Type serviceType, object? serviceKey = null);
61+
}

0 commit comments

Comments
 (0)