Skip to content

Commit bd3f9b0

Browse files
Remove the abstraction for token counting from the main evaluation API
This change is being made because there is still some uncertainty around what a general purpose token counting abstraction (that supports all kinds of future models, and all kinds of input modalities) should look like at the moment. We do not want to bake in an API that only supports text based inputs for the models and use cases that are prevalent today, since it would be a potential breaking change to change this API after we release a stable version of the evaluation APIs. We can always reintroduce the token counting support in a non-breaking fashion in the future if and when there is more clarity on what a general purpose token counting abstraction should look like, or if and when a general purpose token counting abstraction is introduced in a lower layer (Microsoft.Extensions.AI) in the future. In the meanwhile, callers should still be able to use the Microsoft.ML.Tokenizers library directly to count tokens in text-based content and trim down the conversation history before calling EvaluateAsync() if needed.
1 parent 6abae3c commit bd3f9b0

File tree

21 files changed

+48
-314
lines changed

21 files changed

+48
-314
lines changed

src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/ChatConversationEvaluator.cs

Lines changed: 2 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -57,99 +57,6 @@ public virtual async ValueTask<EvaluationResult> EvaluateAsync(
5757

5858
(ChatMessage? userRequest, List<ChatMessage> history) = GetUserRequestAndHistory(messages);
5959

60-
int inputTokenLimit = 0;
61-
int ignoredMessagesCount = 0;
62-
63-
if (chatConfiguration.TokenCounter is not null)
64-
{
65-
IEvaluationTokenCounter tokenCounter = chatConfiguration.TokenCounter;
66-
inputTokenLimit = tokenCounter.InputTokenLimit;
67-
int tokenBudget = inputTokenLimit;
68-
69-
void OnTokenBudgetExceeded()
70-
{
71-
EvaluationDiagnostic tokenBudgetExceeded =
72-
EvaluationDiagnostic.Error(
73-
$"Evaluation failed because the specified limit of {inputTokenLimit} input tokens was exceeded.");
74-
75-
result.AddDiagnosticsToAllMetrics(tokenBudgetExceeded);
76-
}
77-
78-
if (!string.IsNullOrWhiteSpace(SystemPrompt))
79-
{
80-
tokenBudget -= tokenCounter.CountTokens(SystemPrompt!);
81-
if (tokenBudget < 0)
82-
{
83-
OnTokenBudgetExceeded();
84-
return result;
85-
}
86-
}
87-
88-
string baseEvaluationPrompt =
89-
await RenderEvaluationPromptAsync(
90-
userRequest,
91-
modelResponse,
92-
includedHistory: [],
93-
additionalContext,
94-
cancellationToken).ConfigureAwait(false);
95-
96-
tokenBudget -= tokenCounter.CountTokens(baseEvaluationPrompt);
97-
if (tokenBudget < 0)
98-
{
99-
OnTokenBudgetExceeded();
100-
return result;
101-
}
102-
103-
if (history.Count > 0 && !IgnoresHistory)
104-
{
105-
if (history.Count == 1)
106-
{
107-
(bool canRender, tokenBudget) =
108-
await CanRenderAsync(
109-
history[0],
110-
tokenBudget,
111-
chatConfiguration,
112-
cancellationToken).ConfigureAwait(false);
113-
114-
if (!canRender)
115-
{
116-
ignoredMessagesCount = 1;
117-
history = [];
118-
}
119-
}
120-
else
121-
{
122-
int totalMessagesCount = history.Count;
123-
int includedMessagesCount = 0;
124-
125-
history.Reverse();
126-
127-
foreach (ChatMessage message in history)
128-
{
129-
cancellationToken.ThrowIfCancellationRequested();
130-
131-
(bool canRender, tokenBudget) =
132-
await CanRenderAsync(
133-
message,
134-
tokenBudget,
135-
chatConfiguration,
136-
cancellationToken).ConfigureAwait(false);
137-
138-
if (!canRender)
139-
{
140-
ignoredMessagesCount = totalMessagesCount - includedMessagesCount;
141-
history.RemoveRange(index: includedMessagesCount, count: ignoredMessagesCount);
142-
break;
143-
}
144-
145-
includedMessagesCount++;
146-
}
147-
148-
history.Reverse();
149-
}
150-
}
151-
}
152-
15360
var evaluationMessages = new List<ChatMessage>();
15461
if (!string.IsNullOrWhiteSpace(SystemPrompt))
15562
{
@@ -172,84 +79,9 @@ await PerformEvaluationAsync(
17279
result,
17380
cancellationToken).ConfigureAwait(false);
17481

175-
if (inputTokenLimit > 0 && ignoredMessagesCount > 0)
176-
{
177-
#pragma warning disable S103 // Lines should not be too long
178-
result.AddDiagnosticsToAllMetrics(
179-
EvaluationDiagnostic.Warning(
180-
$"The evaluation may be inconclusive because the oldest {ignoredMessagesCount} messages in the supplied conversation history were ignored in order to stay under the specified limit of {inputTokenLimit} input tokens."));
181-
#pragma warning restore S103
182-
}
183-
18482
return result;
18583
}
18684

187-
/// <summary>
188-
/// Determines if there is sufficient <paramref name="tokenBudget"/> remaining to render the
189-
/// supplied <paramref name="message"/> as part of the evaluation prompt that this <see cref="IEvaluator"/> uses.
190-
/// </summary>
191-
/// <param name="message">
192-
/// A message that is part of the conversation history for the response being evaluated and that is to be rendered
193-
/// as part of the evaluation prompt.
194-
/// </param>
195-
/// <param name="tokenBudget">
196-
/// The number of tokens available for the rendering additional content as part of the evaluation prompt.
197-
/// </param>
198-
/// <param name="chatConfiguration">
199-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
200-
/// <see cref="IEvaluationTokenCounter"/> that this <see cref="IEvaluator"/> uses to perform the evaluation.
201-
/// </param>
202-
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can cancel the operation.</param>
203-
/// <returns>
204-
/// A tuple containing a <see langword="bool"/> indicating whether there is sufficient
205-
/// <paramref name="tokenBudget"/> remaining to render the supplied <paramref name="message"/> as part of the
206-
/// evaluation prompt, and an <see langword="int"/> containing the remaining token budget that would be available
207-
/// once this <paramref name="message"/> is rendered.
208-
/// </returns>
209-
protected virtual ValueTask<(bool canRender, int remainingTokenBudget)> CanRenderAsync(
210-
ChatMessage message,
211-
int tokenBudget,
212-
ChatConfiguration chatConfiguration,
213-
CancellationToken cancellationToken)
214-
{
215-
_ = Throw.IfNull(message);
216-
_ = Throw.IfNull(chatConfiguration);
217-
218-
IEvaluationTokenCounter? tokenCounter = chatConfiguration.TokenCounter;
219-
if (tokenCounter is null)
220-
{
221-
return new ValueTask<(bool, int)>((true, tokenBudget));
222-
}
223-
224-
string? author = message.AuthorName;
225-
string role = message.Role.Value;
226-
string content = message.Text ?? string.Empty;
227-
228-
int tokenCount =
229-
string.IsNullOrWhiteSpace(author)
230-
? tokenCounter.CountTokens("[") +
231-
tokenCounter.CountTokens(role) +
232-
tokenCounter.CountTokens("] ") +
233-
tokenCounter.CountTokens(content) +
234-
tokenCounter.CountTokens("\n")
235-
: tokenCounter.CountTokens("[") +
236-
tokenCounter.CountTokens(author!) +
237-
tokenCounter.CountTokens(" (") +
238-
tokenCounter.CountTokens(role) +
239-
tokenCounter.CountTokens(")] ") +
240-
tokenCounter.CountTokens(content) +
241-
tokenCounter.CountTokens("\n");
242-
243-
if (tokenCount > tokenBudget)
244-
{
245-
return new ValueTask<(bool, int)>((false, tokenBudget));
246-
}
247-
else
248-
{
249-
return new ValueTask<(bool, int)>((true, tokenBudget - tokenCount));
250-
}
251-
}
252-
25385
/// <summary>
25486
/// Renders the supplied <paramref name="response"/> to a string that can be included as part of the evaluation
25587
/// prompt that this <see cref="IEvaluator"/> uses.
@@ -351,8 +183,8 @@ protected abstract ValueTask<string> RenderEvaluationPromptAsync(
351183
/// <see cref="EvaluationMetric"/>s in the supplied <paramref name="result"/>.
352184
/// </summary>
353185
/// <param name="chatConfiguration">
354-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
355-
/// <see cref="IEvaluationTokenCounter"/> that this <see cref="IEvaluator"/> uses to perform the evaluation.
186+
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that should be used if one or
187+
/// more composed <see cref="IEvaluator"/>s use an AI model to perform evaluation.
356188
/// </param>
357189
/// <param name="evaluationMessages">
358190
/// The set of messages that are to be sent to the supplied <see cref="ChatConfiguration.ChatClient"/> to perform

src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/Utilities/JsonOutputFixer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ internal static ReadOnlySpan<char> TrimMarkdownDelimiters(string json)
2424
// Trim 'json' marker from markdown if it exists.
2525
const string JsonMarker = "json";
2626
int markerLength = JsonMarker.Length;
27-
if (trimmed.Length > markerLength && trimmed[0..markerLength].SequenceEqual(JsonMarker.AsSpan()))
27+
if (trimmed.Length > markerLength && trimmed.Slice(0, markerLength).SequenceEqual(JsonMarker.AsSpan()))
2828
{
2929
trimmed = trimmed.Slice(markerLength);
3030
}

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/Storage/AzureStorageReportingConfiguration.cs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,9 @@ public static class AzureStorageReportingConfiguration
2929
/// survive in the cache before they are considered expired and evicted.
3030
/// </param>
3131
/// <param name="chatConfiguration">
32-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
33-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <paramref name="evaluators"/> included in the
34-
/// returned <see cref="ReportingConfiguration"/>. Can be omitted if none of the included
35-
/// <paramref name="evaluators"/> are AI-based.
32+
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by AI-based
33+
/// <paramref name="evaluators"/> included in the returned <see cref="ReportingConfiguration"/>. Can be omitted if
34+
/// none of the included <paramref name="evaluators"/> are AI-based.
3635
/// </param>
3736
/// <param name="enableResponseCaching">
3837
/// <see langword="true"/> to enable caching of AI responses; <see langword="false"/> otherwise.

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/ReportingConfiguration.cs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,8 @@ public sealed class ReportingConfiguration
3030
public IResultStore ResultStore { get; }
3131

3232
/// <summary>
33-
/// Gets a <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
34-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <see cref="Evaluators"/> included in this
35-
/// <see cref="ReportingConfiguration"/>.
33+
/// Gets a <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by
34+
/// AI-based <see cref="Evaluators"/> included in this <see cref="ReportingConfiguration"/>.
3635
/// </summary>
3736
public ChatConfiguration? ChatConfiguration { get; }
3837

@@ -103,10 +102,9 @@ public sealed class ReportingConfiguration
103102
/// The <see cref="IResultStore"/> that should be used to persist the <see cref="ScenarioRunResult"/>s.
104103
/// </param>
105104
/// <param name="chatConfiguration">
106-
/// A <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
107-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <paramref name="evaluators"/> included in this
108-
/// <see cref="ReportingConfiguration"/>. Can be omitted if none of the included <paramref name="evaluators"/> are
109-
/// AI-based.
105+
/// A <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by
106+
/// AI-based <paramref name="evaluators"/> included in this <see cref="ReportingConfiguration"/>. Can be omitted if
107+
/// none of the included <paramref name="evaluators"/> are AI-based.
110108
/// </param>
111109
/// <param name="responseCacheProvider">
112110
/// The <see cref="IResponseCacheProvider"/> that should be used to cache AI responses. If omitted, AI responses
@@ -246,7 +244,7 @@ await ResponseCacheProvider.GetCacheAsync(
246244
}
247245
#pragma warning restore CA2000
248246

249-
chatConfiguration = new ChatConfiguration(chatClient, chatConfiguration.TokenCounter);
247+
chatConfiguration = new ChatConfiguration(chatClient);
250248
}
251249

252250
return new ScenarioRun(

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/ScenarioRun.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ public sealed class ScenarioRun : IAsyncDisposable
8080
public string ExecutionName { get; }
8181

8282
/// <summary>
83-
/// Gets a <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
84-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <see cref="IEvaluator"/>s that are invoked as
85-
/// part of the evaluation of this <see cref="ScenarioRun"/>.
83+
/// Gets a <see cref="Evaluation.ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by
84+
/// AI-based <see cref="IEvaluator"/>s that are invoked as part of the evaluation of this
85+
/// <see cref="ScenarioRun"/>.
8686
/// </summary>
8787
public ChatConfiguration? ChatConfiguration { get; }
8888

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Storage/DiskBasedReportingConfiguration.cs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ public static class DiskBasedReportingConfiguration
2525
/// The set of <see cref="IEvaluator"/>s that should be invoked to evaluate AI responses.
2626
/// </param>
2727
/// <param name="chatConfiguration">
28-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
29-
/// <see cref="IEvaluationTokenCounter"/> that are used by AI-based <paramref name="evaluators"/> included in the
30-
/// returned <see cref="ReportingConfiguration"/>. Can be omitted if none of the included
31-
/// <paramref name="evaluators"/> are AI-based.
28+
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that is used by AI-based
29+
/// <paramref name="evaluators"/> included in the returned <see cref="ReportingConfiguration"/>. Can be omitted if
30+
/// none of the included <paramref name="evaluators"/> are AI-based.
3231
/// </param>
3332
/// <param name="enableResponseCaching">
3433
/// <see langword="true"/> to enable caching of AI responses; <see langword="false"/> otherwise.

src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/ContentSafetyServiceConfigurationExtensions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public static ChatConfiguration ToChatConfiguration(
4646
originalChatClient: originalChatConfiguration?.ChatClient);
4747
#pragma warning restore CA2000
4848

49-
return new ChatConfiguration(newChatClient, originalChatConfiguration?.TokenCounter);
49+
return new ChatConfiguration(newChatClient);
5050
}
5151

5252
/// <summary>

src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/Microsoft.Extensions.AI.Evaluation.Safety.csproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
<MinCodeCoverage>0</MinCodeCoverage>
1616
<MinMutationScore>0</MinMutationScore>
1717
</PropertyGroup>
18+
19+
<ItemGroup>
20+
<PackageReference Include="Microsoft.Bcl.HashCode" />
21+
</ItemGroup>
1822

1923
<ItemGroup>
2024
<ProjectReference Include="..\Microsoft.Extensions.AI.Evaluation\Microsoft.Extensions.AI.Evaluation.csproj" />

src/Libraries/Microsoft.Extensions.AI.Evaluation/ChatConfiguration.cs

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,13 @@
99
namespace Microsoft.Extensions.AI.Evaluation;
1010

1111
/// <summary>
12-
/// Specifies the <see cref="IChatClient"/> and the <see cref="IEvaluationTokenCounter"/> that should be used when
13-
/// evaluation is performed using an AI model.
12+
/// Specifies the <see cref="IChatClient"/> that should be used when evaluation is performed using an AI model.
1413
/// </summary>
1514
/// <param name="chatClient">An <see cref="IChatClient"/> that can be used to communicate with an AI model.</param>
16-
/// <param name="tokenCounter">
17-
/// An <see cref="IEvaluationTokenCounter"/> that can be used to counts tokens present in evaluation prompts, or
18-
/// <see langword="null"/> if the AI model / deployment being used does not impose an input token limit.
19-
/// </param>
20-
public sealed class ChatConfiguration(IChatClient chatClient, IEvaluationTokenCounter? tokenCounter = null)
15+
public sealed class ChatConfiguration(IChatClient chatClient)
2116
{
2217
/// <summary>
2318
/// Gets an <see cref="IChatClient"/> that can be used to communicate with an AI model.
2419
/// </summary>
2520
public IChatClient ChatClient { get; } = chatClient;
26-
27-
/// <summary>
28-
/// Gets an <see cref="IEvaluationTokenCounter"/> that can be used to counts tokens present in evaluation prompts.
29-
/// </summary>
30-
/// <remarks>
31-
/// <see cref="TokenCounter"/> can be set to <see langword="null"/> if the AI model / deployment being used does
32-
/// not impose an input token limit.
33-
/// </remarks>
34-
public IEvaluationTokenCounter? TokenCounter { get; } = tokenCounter;
3521
}

src/Libraries/Microsoft.Extensions.AI.Evaluation/CompositeEvaluator.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,8 @@ public CompositeEvaluator(IEnumerable<IEvaluator> evaluators)
8888
/// </param>
8989
/// <param name="modelResponse">The response that is to be evaluated.</param>
9090
/// <param name="chatConfiguration">
91-
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
92-
/// <see cref="IEvaluationTokenCounter"/> that should be used if one or more composed <see cref="IEvaluator"/>s use
93-
/// an AI model to perform evaluation.
91+
/// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> that should be used if one or
92+
/// more composed <see cref="IEvaluator"/>s use an AI model to perform evaluation.
9493
/// </param>
9594
/// <param name="additionalContext">
9695
/// Additional contextual information (beyond that which is available in <paramref name="messages"/>) that composed

0 commit comments

Comments
 (0)