From 0285046e6b8c51e8979f02442d129bbb58b004d8 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 24 Oct 2025 12:27:55 +0200 Subject: [PATCH 01/10] move code as is --- .../Processors/ClassificationEnricher.cs | 77 ++++++++++++++++ .../ImageAlternativeTextEnricher.cs | 73 +++++++++++++++ .../Processors/KeywordEnricher.cs | 88 +++++++++++++++++++ .../Processors/SentimentEnricher.cs | 65 ++++++++++++++ .../Processors/SummaryEnricher.cs | 66 ++++++++++++++ 5 files changed, 369 insertions(+) create mode 100644 src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs create mode 100644 src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs create mode 100644 src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs create mode 100644 src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs create mode 100644 src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs new file mode 100644 index 00000000000..89902138947 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs @@ -0,0 +1,77 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.Extensions.AI; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Extensions.DataIngestion; + +/// +/// Enriches document chunks with a classification label based on their content. +/// +/// This class uses a chat-based language model to analyze the content of document chunks and assign a +/// single, most relevant classification label. The classification is performed using a predefined set of classes, with +/// an optional fallback class for cases where no suitable classification can be determined. +public sealed class ClassificationEnricher : IngestionChunkProcessor +{ + private readonly IChatClient _chatClient; + private readonly ChatOptions? _chatOptions; + private readonly TextContent _request; + + public ClassificationEnricher(IChatClient chatClient, ReadOnlySpan predefinedClasses, + ChatOptions? chatOptions = null, string? fallbackClass = null) + { + if (predefinedClasses.Length == 0) + { + throw new ArgumentException("Predefined classes must be provided.", nameof(predefinedClasses)); + } + + _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatOptions = chatOptions; + _request = CreateLlmRequest(predefinedClasses, string.IsNullOrEmpty(fallbackClass) ? "Unknown" : fallbackClass!); + } + + public static string MetadataKey => "classification"; + + public override async IAsyncEnumerable> ProcessAsync(IAsyncEnumerable> chunks, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + cancellationToken.ThrowIfCancellationRequested(); + + if (chunks is null) + { + throw new ArgumentNullException(nameof(chunks)); + } + + await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken)) + { + var response = await _chatClient.GetResponseAsync( + [ + new(ChatRole.User, + [ + _request, + new TextContent(chunk.Content), + ]) + ], _chatOptions, cancellationToken: cancellationToken); + + chunk.Metadata[MetadataKey] = response.Text; + + yield return chunk; + } + } + + private static TextContent CreateLlmRequest(ReadOnlySpan predefinedClasses, string fallbackClass) + => new($"You are a classification expert. Analyze the given text and assign single, most relevant class. " + + $"Use only the following predefined classes: {Join(predefinedClasses)} and return {fallbackClass} when unable to classify."); + + private static string Join(ReadOnlySpan predefinedClasses) + => string.Join(", ", predefinedClasses! +#if !NET + .ToArray() +#endif + ); +} diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs new file mode 100644 index 00000000000..df3155f38f7 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.Extensions.AI; +using System; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Extensions.DataIngestion; + +/// +/// Enriches elements with alternative text using an AI service, +/// so the generated embeddings can include the image content information. +/// +public sealed class ImageAlternativeTextEnricher : IngestionDocumentProcessor +{ + private readonly IChatClient _chatClient; + private readonly ChatOptions? _chatOptions; + + public ImageAlternativeTextEnricher(IChatClient chatClient, ChatOptions? chatOptions = null) + { + _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatOptions = chatOptions; + } + + public override async Task ProcessAsync(IngestionDocument document, CancellationToken cancellationToken = default) + { + cancellationToken.ThrowIfCancellationRequested(); + + if (document is null) + { + throw new ArgumentNullException(nameof(document)); + } + + foreach (var element in document.EnumerateContent()) + { + if (element is IngestionDocumentImage image) + { + await ProcessAsync(image, cancellationToken); + } + else if (element is IngestionDocumentTable table) + { + foreach (var cell in table.Cells) + { + if (cell is IngestionDocumentImage cellImage) + { + await ProcessAsync(cellImage, cancellationToken); + } + } + } + } + + return document; + } + + private async Task ProcessAsync(IngestionDocumentImage image, CancellationToken cancellationToken) + { + if (image.Content.HasValue && !string.IsNullOrEmpty(image.MediaType) + && string.IsNullOrEmpty(image.AlternativeText)) + { + var response = await _chatClient.GetResponseAsync( + [ + new(ChatRole.User, + [ + new TextContent("Write a detailed alternative text for this image with less than 50 words."), + new DataContent(image.Content.Value, image.MediaType!), + ]) + ], _chatOptions, cancellationToken: cancellationToken); + + image.AlternativeText = response.Text; + } + } +} diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs new file mode 100644 index 00000000000..3b5681d1e1d --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs @@ -0,0 +1,88 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.Extensions.AI; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Extensions.DataIngestion; + +/// +/// Enriches chunks with keyword extraction using an AI chat model. +/// +/// +/// It adds "keywords" metadata to each chunk. It's an array of strings representing the extracted keywords. +/// +public sealed class KeywordEnricher : IngestionChunkProcessor +{ + private readonly IChatClient _chatClient; + private readonly ChatOptions? _chatOptions; + private readonly TextContent _request; + + // API design: predefinedKeywords needs to be provided in explicit way, so the user is encouraged to think about it. + // And for example provide a closed set, so the results are more predictable. + public KeywordEnricher(IChatClient chatClient, ReadOnlySpan predefinedKeywords, + ChatOptions? chatOptions = null, int? maxKeywords = null, double? confidenceThreshold = null) + { + if (confidenceThreshold.HasValue && (confidenceThreshold < 0.0 || confidenceThreshold > 1.0)) + { + throw new ArgumentOutOfRangeException(nameof(confidenceThreshold), "The confidence threshold must be between 0.0 and 1.0."); + } + + _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatOptions = chatOptions; + _request = CreateLlmRequest(maxKeywords ?? 5, predefinedKeywords, confidenceThreshold ?? 0.7); + } + + public static string MetadataKey => "keywords"; + + public override async IAsyncEnumerable> ProcessAsync(IAsyncEnumerable> chunks, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + cancellationToken.ThrowIfCancellationRequested(); + + if (chunks is null) + { + throw new ArgumentNullException(nameof(chunks)); + } + + await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken)) + { + ChatResponse response = await _chatClient.GetResponseAsync( + [ + new(ChatRole.User, + [ + _request, + new TextContent(chunk.Content), + ]) + ], _chatOptions, cancellationToken: cancellationToken); + + chunk.Metadata[MetadataKey] = response.Result; + + yield return chunk; + } + } + + private static TextContent CreateLlmRequest(int maxKeywords, ReadOnlySpan predefinedKeywords, double confidenceThreshold) + { + StringBuilder sb = new($"You are a keyword extraction expert. Analyze the given text and extract up to {maxKeywords} most relevant keywords."); + + if (predefinedKeywords.Length > 0) + { + string joined = string.Join(", ", predefinedKeywords! +#if !NET + .ToArray() +#endif + ); + sb.Append($" Focus on extracting keywords from the following predefined list: {joined}."); + } + + sb.Append($" Exclude keywords with confidence score below {confidenceThreshold}."); + + return new(sb.ToString()); + } +} diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs new file mode 100644 index 00000000000..76fcfbb9420 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs @@ -0,0 +1,65 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.Extensions.AI; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Extensions.DataIngestion; + +/// +/// Enriches chunks with sentiment analysis using an AI chat model. +/// +/// +/// It adds "sentiment" metadata to each chunk. It can be Positive, Negative, Neutral or Unknown when confidence score is below the threshold. +/// +public sealed class SentimentEnricher : IngestionChunkProcessor +{ + private readonly IChatClient _chatClient; + private readonly ChatOptions? _chatOptions; + private readonly double _confidenceThreshold; + + public SentimentEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, double? confidenceThreshold = 0.7) + { + if (confidenceThreshold.HasValue && (confidenceThreshold < 0.0 || confidenceThreshold > 1.0)) + { + throw new ArgumentOutOfRangeException(nameof(confidenceThreshold), "The confidence threshold must be between 0.0 and 1.0."); + } + + _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatOptions = chatOptions; + _confidenceThreshold = confidenceThreshold ?? 0.7; + } + + public static string MetadataKey => "sentiment"; + + public override async IAsyncEnumerable> ProcessAsync(IAsyncEnumerable> chunks, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + cancellationToken.ThrowIfCancellationRequested(); + + if (chunks is null) + { + throw new ArgumentNullException(nameof(chunks)); + } + + await foreach (var chunk in chunks.WithCancellation(cancellationToken)) + { + var response = await _chatClient.GetResponseAsync( + [ + new(ChatRole.User, + [ + new TextContent($"You are a sentiment analysis expert. Analyze the sentiment of the given text and return Positive/Negative/Neutral or Unknown when confidence score is below {_confidenceThreshold}. Return just the value of the sentiment."), + new TextContent(chunk.Content), + ]) + ], _chatOptions, cancellationToken: cancellationToken); + + chunk.Metadata[MetadataKey] = response.Text; + + yield return chunk; + } + } +} diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs new file mode 100644 index 00000000000..cbfd46b0546 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs @@ -0,0 +1,66 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.Extensions.AI; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Extensions.DataIngestion; + +/// +/// Enriches chunks with summary text using an AI chat model. +/// +/// +/// It adds "summary" text metadata to each chunk. +/// +public sealed class SummaryEnricher : IngestionChunkProcessor +{ + private readonly IChatClient _chatClient; + private readonly ChatOptions? _chatOptions; + private readonly int _maxWordCount; + + public SummaryEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, int? maxWordCount = null) + { + _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatOptions = chatOptions; + + if (maxWordCount.HasValue && maxWordCount.Value <= 0) + { + throw new ArgumentOutOfRangeException(nameof(maxWordCount), "Max word count must be greater than zero."); + } + + _maxWordCount = maxWordCount ?? 100; + } + + public static string MetadataKey => "summary"; + + public override async IAsyncEnumerable> ProcessAsync(IAsyncEnumerable> chunks, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + cancellationToken.ThrowIfCancellationRequested(); + + if (chunks is null) + { + throw new ArgumentNullException(nameof(chunks)); + } + + await foreach (var chunk in chunks.WithCancellation(cancellationToken)) + { + var response = await _chatClient.GetResponseAsync( + [ + new(ChatRole.User, + [ + new TextContent($"Write a summary text for this text with less than {_maxWordCount} words. Return just the summary."), + new TextContent(chunk.Content), + ]) + ], _chatOptions, cancellationToken: cancellationToken); + + chunk.Metadata[MetadataKey] = response.Text; + + yield return chunk; + } + } +} From 08a894d09e67a460cb7787c28d958677e9d22bc9 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 24 Oct 2025 12:44:11 +0200 Subject: [PATCH 02/10] solve the warnings --- .../Processors/ClassificationEnricher.cs | 54 ++++++++++----- .../ImageAlternativeTextEnricher.cs | 28 ++++---- .../Processors/KeywordEnricher.cs | 65 ++++++++++++------- .../Processors/SentimentEnricher.cs | 38 ++++++----- .../Processors/SummaryEnricher.cs | 36 +++++----- 5 files changed, 133 insertions(+), 88 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs index 89902138947..642d4436f66 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs @@ -1,12 +1,14 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using Microsoft.Extensions.AI; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; +using System.Text; using System.Threading; using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Microsoft.Shared.Diagnostics; namespace Microsoft.Extensions.DataIngestion; @@ -22,30 +24,36 @@ public sealed class ClassificationEnricher : IngestionChunkProcessor private readonly ChatOptions? _chatOptions; private readonly TextContent _request; + /// + /// Initializes a new instance of the class. + /// + /// The chat client used for classification. + /// The set of predefined classification classes. + /// Options for the chat client. + /// The fallback class to use when no suitable classification is found. public ClassificationEnricher(IChatClient chatClient, ReadOnlySpan predefinedClasses, ChatOptions? chatOptions = null, string? fallbackClass = null) { if (predefinedClasses.Length == 0) { - throw new ArgumentException("Predefined classes must be provided.", nameof(predefinedClasses)); + Throw.ArgumentException(nameof(predefinedClasses), "Predefined classes must be provided."); } - _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatClient = Throw.IfNull(chatClient); _chatOptions = chatOptions; _request = CreateLlmRequest(predefinedClasses, string.IsNullOrEmpty(fallbackClass) ? "Unknown" : fallbackClass!); } + /// + /// Gets the metadata key used to store the classification. + /// public static string MetadataKey => "classification"; + /// public override async IAsyncEnumerable> ProcessAsync(IAsyncEnumerable> chunks, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - cancellationToken.ThrowIfCancellationRequested(); - - if (chunks is null) - { - throw new ArgumentNullException(nameof(chunks)); - } + _ = Throw.IfNull(chunks); await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken)) { @@ -56,7 +64,7 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy _request, new TextContent(chunk.Content), ]) - ], _chatOptions, cancellationToken: cancellationToken); + ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); chunk.Metadata[MetadataKey] = response.Text; @@ -65,13 +73,23 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy } private static TextContent CreateLlmRequest(ReadOnlySpan predefinedClasses, string fallbackClass) - => new($"You are a classification expert. Analyze the given text and assign single, most relevant class. " + - $"Use only the following predefined classes: {Join(predefinedClasses)} and return {fallbackClass} when unable to classify."); + { + StringBuilder sb = new("You are a classification expert. Analyze the given text and assign single, most relevant class. "); - private static string Join(ReadOnlySpan predefinedClasses) - => string.Join(", ", predefinedClasses! -#if !NET - .ToArray() -#endif - ); +#pragma warning disable IDE0058 // Expression value is never used + sb.Append("Use only the following predefined classes: "); + for (int i = 0; i < predefinedClasses.Length; i++) + { + sb.Append(predefinedClasses[i]); + if (i < predefinedClasses.Length - 1) + { + sb.Append(", "); + } + } + + sb.Append(" and return ").Append(fallbackClass).Append(" when unable to classify."); +#pragma warning restore IDE0058 // Expression value is never used + + return new(sb.ToString()); + } } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs index df3155f38f7..63cb4f5894a 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs @@ -1,10 +1,11 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using Microsoft.Extensions.AI; using System; using System.Threading; using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Microsoft.Shared.Diagnostics; namespace Microsoft.Extensions.DataIngestion; @@ -16,27 +17,30 @@ public sealed class ImageAlternativeTextEnricher : IngestionDocumentProcessor { private readonly IChatClient _chatClient; private readonly ChatOptions? _chatOptions; + private readonly TextContent _request; + /// + /// Initializes a new instance of the class. + /// + /// The chat client used to get responses for generating alternative text. + /// Options for the chat client. public ImageAlternativeTextEnricher(IChatClient chatClient, ChatOptions? chatOptions = null) { - _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatClient = Throw.IfNull(chatClient); _chatOptions = chatOptions; + _request = new("Write a detailed alternative text for this image with less than 50 words."); } + /// public override async Task ProcessAsync(IngestionDocument document, CancellationToken cancellationToken = default) { - cancellationToken.ThrowIfCancellationRequested(); - - if (document is null) - { - throw new ArgumentNullException(nameof(document)); - } + _ = Throw.IfNull(document); foreach (var element in document.EnumerateContent()) { if (element is IngestionDocumentImage image) { - await ProcessAsync(image, cancellationToken); + await ProcessAsync(image, cancellationToken).ConfigureAwait(false); } else if (element is IngestionDocumentTable table) { @@ -44,7 +48,7 @@ public override async Task ProcessAsync(IngestionDocument doc { if (cell is IngestionDocumentImage cellImage) { - await ProcessAsync(cellImage, cancellationToken); + await ProcessAsync(cellImage, cancellationToken).ConfigureAwait(false); } } } @@ -62,10 +66,10 @@ private async Task ProcessAsync(IngestionDocumentImage image, CancellationToken [ new(ChatRole.User, [ - new TextContent("Write a detailed alternative text for this image with less than 50 words."), + _request, new DataContent(image.Content.Value, image.MediaType!), ]) - ], _chatOptions, cancellationToken: cancellationToken); + ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); image.AlternativeText = response.Text; } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs index 3b5681d1e1d..9dcc18127c1 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs @@ -1,13 +1,14 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using Microsoft.Extensions.AI; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Text; using System.Threading; using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Microsoft.Shared.Diagnostics; namespace Microsoft.Extensions.DataIngestion; @@ -19,36 +20,45 @@ namespace Microsoft.Extensions.DataIngestion; /// public sealed class KeywordEnricher : IngestionChunkProcessor { + private const int DefaultMaxKeywords = 5; private readonly IChatClient _chatClient; private readonly ChatOptions? _chatOptions; private readonly TextContent _request; - // API design: predefinedKeywords needs to be provided in explicit way, so the user is encouraged to think about it. - // And for example provide a closed set, so the results are more predictable. + /// + /// Initializes a new instance of the class. + /// + /// The chat client used for keyword extraction. + /// The set of predefined keywords for extraction. + /// Options for the chat client. + /// The maximum number of keywords to extract. + /// The confidence threshold for keyword inclusion. + /// + /// If no predefined keywords are provided, the model will extract keywords based on the content alone. + /// Such results may vary more significantly between different AI models. + /// public KeywordEnricher(IChatClient chatClient, ReadOnlySpan predefinedKeywords, ChatOptions? chatOptions = null, int? maxKeywords = null, double? confidenceThreshold = null) { - if (confidenceThreshold.HasValue && (confidenceThreshold < 0.0 || confidenceThreshold > 1.0)) - { - throw new ArgumentOutOfRangeException(nameof(confidenceThreshold), "The confidence threshold must be between 0.0 and 1.0."); - } + double threshold = confidenceThreshold.HasValue + ? Throw.IfOutOfRange(confidenceThreshold.Value, 0.0, 1.0) + : 0.7; - _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatClient = Throw.IfNull(chatClient); _chatOptions = chatOptions; - _request = CreateLlmRequest(maxKeywords ?? 5, predefinedKeywords, confidenceThreshold ?? 0.7); + _request = CreateLlmRequest(maxKeywords ?? DefaultMaxKeywords, predefinedKeywords, threshold); } + /// + /// Gets the metadata key used to store the keywords. + /// public static string MetadataKey => "keywords"; + /// public override async IAsyncEnumerable> ProcessAsync(IAsyncEnumerable> chunks, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - cancellationToken.ThrowIfCancellationRequested(); - - if (chunks is null) - { - throw new ArgumentNullException(nameof(chunks)); - } + _ = Throw.IfNull(chunks); await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken)) { @@ -59,7 +69,7 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy _request, new TextContent(chunk.Content), ]) - ], _chatOptions, cancellationToken: cancellationToken); + ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); chunk.Metadata[MetadataKey] = response.Result; @@ -69,19 +79,26 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy private static TextContent CreateLlmRequest(int maxKeywords, ReadOnlySpan predefinedKeywords, double confidenceThreshold) { - StringBuilder sb = new($"You are a keyword extraction expert. Analyze the given text and extract up to {maxKeywords} most relevant keywords."); + StringBuilder sb = new($"You are a keyword extraction expert. Analyze the given text and extract up to {maxKeywords} most relevant keywords. "); if (predefinedKeywords.Length > 0) { - string joined = string.Join(", ", predefinedKeywords! -#if !NET - .ToArray() -#endif - ); - sb.Append($" Focus on extracting keywords from the following predefined list: {joined}."); +#pragma warning disable IDE0058 // Expression value is never used + sb.Append("Focus on extracting keywords from the following predefined list: "); + for (int i = 0; i < predefinedKeywords.Length; i++) + { + sb.Append(predefinedKeywords[i]); + if (i < predefinedKeywords.Length - 1) + { + sb.Append(", "); + } + } + + sb.Append(". "); } - sb.Append($" Exclude keywords with confidence score below {confidenceThreshold}."); + sb.Append("Exclude keywords with confidence score below ").Append(confidenceThreshold).Append('.'); +#pragma warning restore IDE0058 // Expression value is never used return new(sb.ToString()); } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs index 76fcfbb9420..b5047c3dae7 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs @@ -1,12 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using Microsoft.Extensions.AI; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Microsoft.Shared.Diagnostics; namespace Microsoft.Extensions.DataIngestion; @@ -20,31 +21,34 @@ public sealed class SentimentEnricher : IngestionChunkProcessor { private readonly IChatClient _chatClient; private readonly ChatOptions? _chatOptions; - private readonly double _confidenceThreshold; + private readonly TextContent _request; + /// + /// Initializes a new instance of the class. + /// + /// The chat client used for sentiment analysis. + /// Options for the chat client. + /// The confidence threshold for sentiment determination. public SentimentEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, double? confidenceThreshold = 0.7) { - if (confidenceThreshold.HasValue && (confidenceThreshold < 0.0 || confidenceThreshold > 1.0)) - { - throw new ArgumentOutOfRangeException(nameof(confidenceThreshold), "The confidence threshold must be between 0.0 and 1.0."); - } - - _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatClient = Throw.IfNull(chatClient); _chatOptions = chatOptions; - _confidenceThreshold = confidenceThreshold ?? 0.7; + + double threshold = confidenceThreshold.HasValue ? Throw.IfOutOfRange(confidenceThreshold.Value, 0.0, 1.0) : 0.7; + _request = new("You are a sentiment analysis expert. Analyze the sentiment of the given text and return Positive/Negative/Neutral or" + + $" Unknown when confidence score is below {threshold}. Return just the value of the sentiment."); } + /// + /// Gets the metadata key used to store the sentiment. + /// public static string MetadataKey => "sentiment"; + /// public override async IAsyncEnumerable> ProcessAsync(IAsyncEnumerable> chunks, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - cancellationToken.ThrowIfCancellationRequested(); - - if (chunks is null) - { - throw new ArgumentNullException(nameof(chunks)); - } + _ = Throw.IfNull(chunks); await foreach (var chunk in chunks.WithCancellation(cancellationToken)) { @@ -52,10 +56,10 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy [ new(ChatRole.User, [ - new TextContent($"You are a sentiment analysis expert. Analyze the sentiment of the given text and return Positive/Negative/Neutral or Unknown when confidence score is below {_confidenceThreshold}. Return just the value of the sentiment."), + _request, new TextContent(chunk.Content), ]) - ], _chatOptions, cancellationToken: cancellationToken); + ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); chunk.Metadata[MetadataKey] = response.Text; diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs index cbfd46b0546..c7decb35804 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs @@ -1,12 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using Microsoft.Extensions.AI; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Microsoft.Shared.Diagnostics; namespace Microsoft.Extensions.DataIngestion; @@ -20,32 +21,33 @@ public sealed class SummaryEnricher : IngestionChunkProcessor { private readonly IChatClient _chatClient; private readonly ChatOptions? _chatOptions; - private readonly int _maxWordCount; + private readonly TextContent _request; + /// + /// Initializes a new instance of the class. + /// + /// The chat client used for summary generation. + /// Options for the chat client. + /// The maximum number of words for the summary. public SummaryEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, int? maxWordCount = null) { - _chatClient = chatClient ?? throw new ArgumentNullException(nameof(chatClient)); + _chatClient = Throw.IfNull(chatClient); _chatOptions = chatOptions; - if (maxWordCount.HasValue && maxWordCount.Value <= 0) - { - throw new ArgumentOutOfRangeException(nameof(maxWordCount), "Max word count must be greater than zero."); - } - - _maxWordCount = maxWordCount ?? 100; + int wordCount = maxWordCount.HasValue ? Throw.IfLessThanOrEqual(maxWordCount.Value, 0) : 100; + _request = new($"Write a summary text for this text with less than {wordCount} words. Return just the summary."); } + /// + /// Gets the metadata key used to store the summary. + /// public static string MetadataKey => "summary"; + /// public override async IAsyncEnumerable> ProcessAsync(IAsyncEnumerable> chunks, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - cancellationToken.ThrowIfCancellationRequested(); - - if (chunks is null) - { - throw new ArgumentNullException(nameof(chunks)); - } + _ = Throw.IfNull(chunks); await foreach (var chunk in chunks.WithCancellation(cancellationToken)) { @@ -53,10 +55,10 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy [ new(ChatRole.User, [ - new TextContent($"Write a summary text for this text with less than {_maxWordCount} words. Return just the summary."), + _request, new TextContent(chunk.Content), ]) - ], _chatOptions, cancellationToken: cancellationToken); + ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); chunk.Metadata[MetadataKey] = response.Text; From 4ebb495c8b796b0d7d178e76adc6d264f516df58 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 24 Oct 2025 13:14:37 +0200 Subject: [PATCH 03/10] avoid the need of using structured input and dependency on MEAI (MEAI.Abstractions is fine) --- .../Processors/KeywordEnricher.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs index 9dcc18127c1..0d28945ac32 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs @@ -62,7 +62,8 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy await foreach (IngestionChunk chunk in chunks.WithCancellation(cancellationToken)) { - ChatResponse response = await _chatClient.GetResponseAsync( + // Structured response is not used here because it's not part of Microsoft.Extensions.AI.Abstractions. + var response = await _chatClient.GetResponseAsync( [ new(ChatRole.User, [ @@ -71,7 +72,9 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy ]) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); - chunk.Metadata[MetadataKey] = response.Result; +#pragma warning disable EA0009 // Use 'System.MemoryExtensions.Split' for improved performance + chunk.Metadata[MetadataKey] = response.Text.Split(';'); +#pragma warning restore EA0009 // Use 'System.MemoryExtensions.Split' for improved performance yield return chunk; } @@ -98,6 +101,7 @@ private static TextContent CreateLlmRequest(int maxKeywords, ReadOnlySpan Date: Fri, 24 Oct 2025 13:25:22 +0200 Subject: [PATCH 04/10] add tests --- .../Processors/KeywordEnricher.cs | 7 +- .../Processors/SentimentEnricher.cs | 2 +- .../Processors/SummaryEnricher.cs | 2 +- ...soft.Extensions.DataIngestion.Tests.csproj | 3 +- .../AlternativeTextEnricherTests.cs | 102 ++++++++++++++++++ .../Processors/ClassificationEnricherTests.cs | 73 +++++++++++++ .../Processors/KeywordEnricherTests.cs | 85 +++++++++++++++ .../Processors/SentimentEnricherTests.cs | 79 ++++++++++++++ .../Processors/SummaryEnricherTests.cs | 74 +++++++++++++ .../Utils/IAsyncEnumerableExtensions.cs | 11 ++ 10 files changed, 433 insertions(+), 5 deletions(-) create mode 100644 test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs create mode 100644 test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs create mode 100644 test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs create mode 100644 test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs create mode 100644 test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SummaryEnricherTests.cs diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs index 0d28945ac32..c0ba9c51c0d 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs @@ -41,12 +41,15 @@ public KeywordEnricher(IChatClient chatClient, ReadOnlySpan predefinedKe ChatOptions? chatOptions = null, int? maxKeywords = null, double? confidenceThreshold = null) { double threshold = confidenceThreshold.HasValue - ? Throw.IfOutOfRange(confidenceThreshold.Value, 0.0, 1.0) + ? Throw.IfOutOfRange(confidenceThreshold.Value, 0.0, 1.0, nameof(confidenceThreshold)) : 0.7; + int keywordsCount = maxKeywords.HasValue + ? Throw.IfLessThanOrEqual(maxKeywords.Value, 0, nameof(maxKeywords)) + : DefaultMaxKeywords; _chatClient = Throw.IfNull(chatClient); _chatOptions = chatOptions; - _request = CreateLlmRequest(maxKeywords ?? DefaultMaxKeywords, predefinedKeywords, threshold); + _request = CreateLlmRequest(keywordsCount, predefinedKeywords, threshold); } /// diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs index b5047c3dae7..035d9ae63f5 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs @@ -34,7 +34,7 @@ public SentimentEnricher(IChatClient chatClient, ChatOptions? chatOptions = null _chatClient = Throw.IfNull(chatClient); _chatOptions = chatOptions; - double threshold = confidenceThreshold.HasValue ? Throw.IfOutOfRange(confidenceThreshold.Value, 0.0, 1.0) : 0.7; + double threshold = confidenceThreshold.HasValue ? Throw.IfOutOfRange(confidenceThreshold.Value, 0.0, 1.0, nameof(confidenceThreshold)) : 0.7; _request = new("You are a sentiment analysis expert. Analyze the sentiment of the given text and return Positive/Negative/Neutral or" + $" Unknown when confidence score is below {threshold}. Return just the value of the sentiment."); } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs index c7decb35804..c7ecbe8ad61 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs @@ -34,7 +34,7 @@ public SummaryEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, _chatClient = Throw.IfNull(chatClient); _chatOptions = chatOptions; - int wordCount = maxWordCount.HasValue ? Throw.IfLessThanOrEqual(maxWordCount.Value, 0) : 100; + int wordCount = maxWordCount.HasValue ? Throw.IfLessThanOrEqual(maxWordCount.Value, 0, nameof(maxWordCount)) : 100; _request = new($"Write a summary text for this text with less than {wordCount} words. Return just the summary."); } diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Microsoft.Extensions.DataIngestion.Tests.csproj b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Microsoft.Extensions.DataIngestion.Tests.csproj index d00b7b652e6..49e5b1e33ee 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Microsoft.Extensions.DataIngestion.Tests.csproj +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Microsoft.Extensions.DataIngestion.Tests.csproj @@ -20,8 +20,9 @@ + - + diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs new file mode 100644 index 00000000000..f21c4b71c3e --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs @@ -0,0 +1,102 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Xunit; + +namespace Microsoft.Extensions.DataIngestion.Processors.Tests; + +public class AlternativeTextEnricherTests +{ + [Fact] + public void ThrowsOnNullChatClient() + => Assert.Throws(() => new ImageAlternativeTextEnricher(null!)); + + [Fact] + public async Task ThrowsOnNullDocument() + { + using TestChatClient chatClient = new(); + + ImageAlternativeTextEnricher sut = new(chatClient); + + await Assert.ThrowsAsync(async () => await sut.ProcessAsync(null!)); + } + + [Fact] + public async Task CanGenerateImageAltText() + { + const string PreExistingAltText = "Pre-existing alt text"; + ReadOnlyMemory imageContent = new byte[256]; + + int counter = 0; + string[] descriptions = { "First alt text", "Second alt text" }; + using TestChatClient chatClient = new() + { + GetResponseAsyncCallback = (messages, options, cancellationToken) => + { + var message = Assert.Single(messages); + DataContent dataContent = Assert.IsType(message.Contents[1]); + Assert.Equal("image/png", dataContent.MediaType); + Assert.Equal(imageContent.ToArray(), dataContent.Data.ToArray()); + + return Task.FromResult(new ChatResponse(new[] + { + new ChatMessage(ChatRole.Assistant, descriptions[counter++]) + })); + } + }; + ImageAlternativeTextEnricher sut = new(chatClient); + + IngestionDocumentImage documentImage = new($"![](nonExisting.png)") + { + AlternativeText = null, + Content = imageContent, + MediaType = "image/png" + }; + + IngestionDocumentImage tableCell = new($"![](another.png)") + { + AlternativeText = null, + Content = imageContent, + MediaType = "image/png" + }; + + IngestionDocumentImage imageWithAltText = new($"![](noChangesNeeded.png)") + { + AlternativeText = PreExistingAltText, + Content = imageContent, + MediaType = "image/png" + }; + + IngestionDocumentImage imageWithNoContent = new($"![](noImage.png)") + { + AlternativeText = null, + Content = default, + MediaType = "image/png" + }; + + IngestionDocument document = new("withImage") + { + Sections = + { + new IngestionDocumentSection + { + Elements = + { + documentImage, + new IngestionDocumentTable("nvm", new[,] { { tableCell } }) + } + } + } + }; + + await sut.ProcessAsync(document); + + Assert.Equal(descriptions[0], documentImage.AlternativeText); + Assert.Equal(descriptions[1], tableCell.AlternativeText); + Assert.Same(PreExistingAltText, imageWithAltText.AlternativeText); + Assert.Null(imageWithNoContent.AlternativeText); + } +} diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs new file mode 100644 index 00000000000..9167b8e3d1a --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Xunit; + +namespace Microsoft.Extensions.DataIngestion.Processors.Tests; + +public class ClassificationEnricherTests +{ + private static readonly IngestionDocument _document = new("test"); + + [Fact] + public void ThrowsOnNullChatClient() + => Assert.Throws(() => new ClassificationEnricher(null!, predefinedClasses: ["some"])); + + [Fact] + public void ThrowsOnEmptyPredefinedClasses() + => Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: [])); + + [Fact] + public async Task ThrowsOnNullChunks() + { + using TestChatClient chatClient = new(); + ClassificationEnricher sut = new(chatClient, predefinedClasses: ["some"]); + + await Assert.ThrowsAsync(async () => + { + await foreach (var _ in sut.ProcessAsync(null!)) + { + // No-op + } + }); + } + + [Fact] + public async Task CanClassify() + { + int counter = 0; + string[] classes = ["AI", "Animals", "UFO"]; + using TestChatClient chatClient = new() + { + GetResponseAsyncCallback = (messages, options, cancellationToken) => + { + return Task.FromResult(new ChatResponse(new[] + { + new ChatMessage(ChatRole.Assistant, classes[counter++]) + })); + } + }; + ClassificationEnricher sut = new(chatClient, ["AI", "Animals", "Sports"], fallbackClass: "UFO"); + + IReadOnlyList> got = await sut.ProcessAsync(CreateChunks().ToAsyncEnumerable()).ToListAsync(); + + Assert.Equal(3, got.Count); + Assert.Equal("AI", got[0].Metadata[ClassificationEnricher.MetadataKey]); + Assert.Equal("Animals", got[1].Metadata[ClassificationEnricher.MetadataKey]); + Assert.Equal("UFO", got[2].Metadata[ClassificationEnricher.MetadataKey]); + } + + private static List> CreateChunks() => + [ + new(".NET developers need to integrate and interact with a growing variety of artificial intelligence (AI) services in their apps. " + + "The Microsoft.Extensions.AI libraries provide a unified approach for representing generative AI components, and enable seamless" + + " integration and interoperability with various AI services.", _document), + new ("Rabbits are small mammals in the family Leporidae of the order Lagomorpha (along with the hare and the pika)." + + "They are herbivorous animals and are known for their long ears, large hind legs, and short fluffy tails.", _document), + new("This text does not belong to any category.", _document), + ]; +} diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs new file mode 100644 index 00000000000..152c5c99190 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs @@ -0,0 +1,85 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Xunit; + +namespace Microsoft.Extensions.DataIngestion.Processors.Tests; + +public class KeywordEnricherTests +{ + private static readonly IngestionDocument _document = new("test"); + + [Fact] + public void ThrowsOnNullChatClient() + => Assert.Throws(() => new KeywordEnricher(null!, predefinedKeywords: null, confidenceThreshold: 0.5)); + + [Theory] + [InlineData(-0.1)] + [InlineData(1.1)] + public void ThrowsOnInvalidThreshold(double threshold) + { + var ex = Assert.Throws(() => new KeywordEnricher(new TestChatClient(), predefinedKeywords: null, confidenceThreshold: threshold)); + Assert.Equal("confidenceThreshold", ex.ParamName); + } + + [Theory] + [InlineData(0)] + [InlineData(-1)] + public void ThrowsOnInvalidMaxKeywords(int keywordCount) + { + var ex = Assert.Throws(() => new KeywordEnricher(new TestChatClient(), predefinedKeywords: null, maxKeywords: keywordCount)); + Assert.Equal("maxKeywords", ex.ParamName); + } + + [Fact] + public async Task ThrowsOnNullChunks() + { + using TestChatClient chatClient = new(); + KeywordEnricher sut = new(chatClient, predefinedKeywords: null, confidenceThreshold: 0.5); + + await Assert.ThrowsAsync(async () => + { + await foreach (var _ in sut.ProcessAsync(null!)) + { + // No-op + } + }); + } + + [Theory] + [InlineData] + [InlineData("AI", "MEAI", "Animals", "Rabbits")] + public async Task CanExtractKeywords(params string[] predefined) + { + int counter = 0; + string[] keywords = { "AI;MEAI", "Animals;Rabbits" }; + using TestChatClient chatClient = new() + { + GetResponseAsyncCallback = (messages, options, cancellationToken) => + { + return Task.FromResult(new ChatResponse(new[] + { + new ChatMessage(ChatRole.Assistant, keywords[counter++]) + })); + } + }; + + KeywordEnricher sut = new(chatClient, predefinedKeywords: predefined, confidenceThreshold: 0.5); + var chunks = CreateChunks().ToAsyncEnumerable(); + + IReadOnlyList> got = await sut.ProcessAsync(chunks).ToListAsync(); + + Assert.Equal(["AI", "MEAI"], (string[])got[0].Metadata[KeywordEnricher.MetadataKey]); + Assert.Equal(["Animals", "Rabbits"], (string[])got[1].Metadata[KeywordEnricher.MetadataKey]); + } + + private static List> CreateChunks() => + [ + new("The Microsoft.Extensions.AI libraries provide a unified approach for representing generative AI components", _document), + new("Rabbits are great pets. They are friendly and make excellent companions.", _document) + ]; +} diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs new file mode 100644 index 00000000000..095ab2a3fe6 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs @@ -0,0 +1,79 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Xunit; + +namespace Microsoft.Extensions.DataIngestion.Processors.Tests; + +public class SentimentEnricherTests +{ + private static readonly IngestionDocument _document = new("test"); + + [Fact] + public void ThrowsOnNullChatClient() + => Assert.Throws(() => new SentimentEnricher(null!)); + + [Theory] + [InlineData(-0.1)] + [InlineData(1.1)] + public void ThrowsOnInvalidThreshold(double threshold) + { + var ex = Assert.Throws(() => new SentimentEnricher(new TestChatClient(), confidenceThreshold: threshold)); + Assert.Equal("confidenceThreshold", ex.ParamName); + } + + [Fact] + public async Task ThrowsOnNullChunks() + { + using TestChatClient chatClient = new(); + SentimentEnricher sut = new(chatClient); + + await Assert.ThrowsAsync(async () => + { + await foreach (var _ in sut.ProcessAsync(null!)) + { + // No-op + } + }); + } + + [Fact] + public async Task CanProvideSentiment() + { + int counter = 0; + string[] sentiments = { "Positive", "Negative", "Neutral", "Unknown" }; + using TestChatClient chatClient = new() + { + GetResponseAsyncCallback = (messages, options, cancellationToken) => + { + return Task.FromResult(new ChatResponse(new[] + { + new ChatMessage(ChatRole.Assistant, sentiments[counter++]) + })); + } + }; + SentimentEnricher sut = new(chatClient); + var input = CreateChunks().ToAsyncEnumerable(); + + var chunks = await sut.ProcessAsync(input).ToListAsync(); + + Assert.Equal(4, chunks.Count); + + Assert.Equal("Positive", chunks[0].Metadata[SentimentEnricher.MetadataKey]); + Assert.Equal("Negative", chunks[1].Metadata[SentimentEnricher.MetadataKey]); + Assert.Equal("Neutral", chunks[2].Metadata[SentimentEnricher.MetadataKey]); + Assert.Equal("Unknown", chunks[3].Metadata[SentimentEnricher.MetadataKey]); + } + + private static List> CreateChunks() => + [ + new("I love programming! It's so much fun and rewarding.", _document), + new("I hate bugs. They are so frustrating and time-consuming.", _document), + new("The weather is okay, not too bad but not great either.", _document), + new("I hate you. I am sorry, I actually don't. I am not sure myself what my feelings are.", _document) + ]; +} diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SummaryEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SummaryEnricherTests.cs new file mode 100644 index 00000000000..a3b698e1d9a --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SummaryEnricherTests.cs @@ -0,0 +1,74 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Xunit; + +namespace Microsoft.Extensions.DataIngestion.Processors.Tests; + +public class SummaryEnricherTests +{ + private static readonly IngestionDocument _document = new("test"); + + [Fact] + public void ThrowsOnNullChatClient() + => Assert.Throws(() => new SummaryEnricher(null!)); + + [Theory] + [InlineData(0)] + [InlineData(-1)] + public void ThrowsOnInvalidMaxKeywords(int wordCount) + { + var ex = Assert.Throws(() => new SummaryEnricher(new TestChatClient(), maxWordCount: wordCount)); + Assert.Equal("maxWordCount", ex.ParamName); + } + + [Fact] + public async Task ThrowsOnNullChunks() + { + using TestChatClient chatClient = new(); + SummaryEnricher sut = new(chatClient); + + await Assert.ThrowsAsync(async () => + { + await foreach (var _ in sut.ProcessAsync(null!)) + { + // No-op + } + }); + } + + [Fact] + public async Task CanProvideSummary() + { + int counter = 0; + string[] summaries = { "First summary.", "Second summary." }; + using TestChatClient chatClient = new() + { + GetResponseAsyncCallback = (messages, options, cancellationToken) => + { + return Task.FromResult(new ChatResponse(new[] + { + new ChatMessage(ChatRole.Assistant, summaries[counter++]) + })); + } + }; + SummaryEnricher sut = new(chatClient); + var input = CreateChunks().ToAsyncEnumerable(); + + var chunks = await sut.ProcessAsync(input).ToListAsync(); + + Assert.Equal(2, chunks.Count); + Assert.Equal(summaries[0], (string)chunks[0].Metadata[SummaryEnricher.MetadataKey]!); + Assert.Equal(summaries[1], (string)chunks[1].Metadata[SummaryEnricher.MetadataKey]!); + } + + private static List> CreateChunks() => + [ + new("I love programming! It's so much fun and rewarding.", _document), + new("I hate bugs. They are so frustrating and time-consuming.", _document) + ]; +} diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Utils/IAsyncEnumerableExtensions.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Utils/IAsyncEnumerableExtensions.cs index 60120dded5d..bb30b585233 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Utils/IAsyncEnumerableExtensions.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Utils/IAsyncEnumerableExtensions.cs @@ -49,4 +49,15 @@ internal static async ValueTask SingleAsync(this IAsyncEnumerable sourc ? result : throw new InvalidOperationException(); } + + internal static async ValueTask> ToListAsync(this IAsyncEnumerable source) + { + List list = []; + await foreach (var item in source) + { + list.Add(item); + } + + return list; + } } From 6248fba0ec2ddf138ef679fecd6ae39af4ad4620 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 24 Oct 2025 16:23:57 +0200 Subject: [PATCH 05/10] add note about defaults --- .../Processors/ClassificationEnricher.cs | 2 +- .../Processors/KeywordEnricher.cs | 4 ++-- .../Processors/SentimentEnricher.cs | 4 ++-- .../Processors/SummaryEnricher.cs | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs index 642d4436f66..547a8c5bdaa 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs @@ -30,7 +30,7 @@ public sealed class ClassificationEnricher : IngestionChunkProcessor /// The chat client used for classification. /// The set of predefined classification classes. /// Options for the chat client. - /// The fallback class to use when no suitable classification is found. + /// The fallback class to use when no suitable classification is found. When not provided, it defaults to "Unknown". public ClassificationEnricher(IChatClient chatClient, ReadOnlySpan predefinedClasses, ChatOptions? chatOptions = null, string? fallbackClass = null) { diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs index c0ba9c51c0d..db0054828cf 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs @@ -31,8 +31,8 @@ public sealed class KeywordEnricher : IngestionChunkProcessor /// The chat client used for keyword extraction. /// The set of predefined keywords for extraction. /// Options for the chat client. - /// The maximum number of keywords to extract. - /// The confidence threshold for keyword inclusion. + /// The maximum number of keywords to extract. When not provided, it defaults to 5. + /// The confidence threshold for keyword inclusion. When not provided, it defaults to 0.7. /// /// If no predefined keywords are provided, the model will extract keywords based on the content alone. /// Such results may vary more significantly between different AI models. diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs index 035d9ae63f5..c9692bb3bba 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SentimentEnricher.cs @@ -28,8 +28,8 @@ public sealed class SentimentEnricher : IngestionChunkProcessor /// /// The chat client used for sentiment analysis. /// Options for the chat client. - /// The confidence threshold for sentiment determination. - public SentimentEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, double? confidenceThreshold = 0.7) + /// The confidence threshold for sentiment determination. When not provided, it defaults to 0.7. + public SentimentEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, double? confidenceThreshold = null) { _chatClient = Throw.IfNull(chatClient); _chatOptions = chatOptions; diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs index c7ecbe8ad61..44d19d59e62 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs @@ -28,7 +28,7 @@ public sealed class SummaryEnricher : IngestionChunkProcessor /// /// The chat client used for summary generation. /// Options for the chat client. - /// The maximum number of words for the summary. + /// The maximum number of words for the summary. When not provided, it defaults to 100. public SummaryEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, int? maxWordCount = null) { _chatClient = Throw.IfNull(chatClient); From 991f649fb8b6fc4a131c2a9e0455eca6a626ff92 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 24 Oct 2025 16:26:30 +0200 Subject: [PATCH 06/10] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../Processors/ClassificationEnricher.cs | 2 +- .../Processors/KeywordEnricher.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs index 547a8c5bdaa..eda16cf917e 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs @@ -74,7 +74,7 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy private static TextContent CreateLlmRequest(ReadOnlySpan predefinedClasses, string fallbackClass) { - StringBuilder sb = new("You are a classification expert. Analyze the given text and assign single, most relevant class. "); + StringBuilder sb = new("You are a classification expert. Analyze the given text and assign a single, most relevant class. "); #pragma warning disable IDE0058 // Expression value is never used sb.Append("Use only the following predefined classes: "); diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs index db0054828cf..544d62b8fed 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs @@ -104,7 +104,7 @@ private static TextContent CreateLlmRequest(int maxKeywords, ReadOnlySpan Date: Fri, 24 Oct 2025 16:49:07 +0200 Subject: [PATCH 07/10] Add warning suppression for IDisposable implementation for the test project --- .../Microsoft.Extensions.DataIngestion.Tests.csproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Microsoft.Extensions.DataIngestion.Tests.csproj b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Microsoft.Extensions.DataIngestion.Tests.csproj index 49e5b1e33ee..b5ff0659d57 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Microsoft.Extensions.DataIngestion.Tests.csproj +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Microsoft.Extensions.DataIngestion.Tests.csproj @@ -3,8 +3,8 @@ $(NoWarn);S3967 - - $(NoWarn);RT0002 + + $(NoWarn);CA1063 x64 From a13954f883f9755a086cd01e2fe1efbb579363e3 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 27 Oct 2025 11:08:08 +0100 Subject: [PATCH 08/10] address code review feedback: - use ChatOptions.Instructions - validate the responses --- .../Microsoft.Extensions.DataIngestion.csproj | 2 + .../Processors/ClassificationEnricher.cs | 55 +++++++++++++----- .../ImageAlternativeTextEnricher.cs | 13 ++--- .../Processors/KeywordEnricher.cs | 55 +++++++++++++----- .../Processors/SentimentEnricher.cs | 27 +++++---- .../Processors/SummaryEnricher.cs | 13 ++--- .../AlternativeTextEnricherTests.cs | 3 +- .../Processors/ClassificationEnricherTests.cs | 57 ++++++++++++++++++- .../Processors/KeywordEnricherTests.cs | 33 +++++++++++ .../Processors/SentimentEnricherTests.cs | 26 +++++++++ 10 files changed, 225 insertions(+), 59 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Microsoft.Extensions.DataIngestion.csproj b/src/Libraries/Microsoft.Extensions.DataIngestion/Microsoft.Extensions.DataIngestion.csproj index a3e3b4e7a9a..1cae686f9d8 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Microsoft.Extensions.DataIngestion.csproj +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Microsoft.Extensions.DataIngestion.csproj @@ -15,9 +15,11 @@ + + diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs index eda16cf917e..946c06a68c6 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Collections.Frozen; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Text; @@ -21,8 +22,8 @@ namespace Microsoft.Extensions.DataIngestion; public sealed class ClassificationEnricher : IngestionChunkProcessor { private readonly IChatClient _chatClient; - private readonly ChatOptions? _chatOptions; - private readonly TextContent _request; + private readonly ChatOptions _chatOptions; + private readonly FrozenSet _predefinedClasses; /// /// Initializes a new instance of the class. @@ -34,14 +35,14 @@ public sealed class ClassificationEnricher : IngestionChunkProcessor public ClassificationEnricher(IChatClient chatClient, ReadOnlySpan predefinedClasses, ChatOptions? chatOptions = null, string? fallbackClass = null) { - if (predefinedClasses.Length == 0) + _chatClient = Throw.IfNull(chatClient); + if (string.IsNullOrWhiteSpace(fallbackClass)) { - Throw.ArgumentException(nameof(predefinedClasses), "Predefined classes must be provided."); + fallbackClass = "Unknown"; } - _chatClient = Throw.IfNull(chatClient); - _chatOptions = chatOptions; - _request = CreateLlmRequest(predefinedClasses, string.IsNullOrEmpty(fallbackClass) ? "Unknown" : fallbackClass!); + _predefinedClasses = CreatePredefinedSet(predefinedClasses, fallbackClass!); + _chatOptions = CreateChatOptions(predefinedClasses, chatOptions, fallbackClass!); } /// @@ -59,20 +60,42 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy { var response = await _chatClient.GetResponseAsync( [ - new(ChatRole.User, - [ - _request, - new TextContent(chunk.Content), - ]) + new(ChatRole.User, chunk.Content) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); - chunk.Metadata[MetadataKey] = response.Text; + chunk.Metadata[MetadataKey] = _predefinedClasses.Contains(response.Text) + ? response.Text + : throw new InvalidOperationException($"Classification returned an unexpected class: '{response.Text}'."); yield return chunk; } } - private static TextContent CreateLlmRequest(ReadOnlySpan predefinedClasses, string fallbackClass) + private static FrozenSet CreatePredefinedSet(ReadOnlySpan predefinedClasses, string fallbackClass) + { + if (predefinedClasses.Length == 0) + { + Throw.ArgumentException(nameof(predefinedClasses), "Predefined classes must be provided."); + } + + HashSet predefinedClassesSet = new(StringComparer.Ordinal) { fallbackClass }; + foreach (string predefinedClass in predefinedClasses) + { + if (!predefinedClassesSet.Add(predefinedClass)) + { + if (predefinedClass.Equals(fallbackClass, StringComparison.Ordinal)) + { + Throw.ArgumentException(nameof(predefinedClasses), $"Fallback class '{fallbackClass}' must not be one of the predefined classes."); + } + + Throw.ArgumentException(nameof(predefinedClasses), $"Duplicate class found: '{predefinedClass}'."); + } + } + + return predefinedClassesSet.ToFrozenSet(); + } + + private static ChatOptions CreateChatOptions(ReadOnlySpan predefinedClasses, ChatOptions? userProvided, string fallbackClass) { StringBuilder sb = new("You are a classification expert. Analyze the given text and assign a single, most relevant class. "); @@ -90,6 +113,8 @@ private static TextContent CreateLlmRequest(ReadOnlySpan predefinedClass sb.Append(" and return ").Append(fallbackClass).Append(" when unable to classify."); #pragma warning restore IDE0058 // Expression value is never used - return new(sb.ToString()); + ChatOptions result = userProvided?.Clone() ?? new(); + result.Instructions = sb.ToString(); + return result; } } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs index 63cb4f5894a..4e5dae6be8c 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs @@ -16,8 +16,7 @@ namespace Microsoft.Extensions.DataIngestion; public sealed class ImageAlternativeTextEnricher : IngestionDocumentProcessor { private readonly IChatClient _chatClient; - private readonly ChatOptions? _chatOptions; - private readonly TextContent _request; + private readonly ChatOptions _chatOptions; /// /// Initializes a new instance of the class. @@ -27,8 +26,8 @@ public sealed class ImageAlternativeTextEnricher : IngestionDocumentProcessor public ImageAlternativeTextEnricher(IChatClient chatClient, ChatOptions? chatOptions = null) { _chatClient = Throw.IfNull(chatClient); - _chatOptions = chatOptions; - _request = new("Write a detailed alternative text for this image with less than 50 words."); + _chatOptions = chatOptions?.Clone() ?? new(); + _chatOptions.Instructions = "Write a detailed alternative text for this image with less than 50 words."; } /// @@ -64,11 +63,7 @@ private async Task ProcessAsync(IngestionDocumentImage image, CancellationToken { var response = await _chatClient.GetResponseAsync( [ - new(ChatRole.User, - [ - _request, - new DataContent(image.Content.Value, image.MediaType!), - ]) + new(ChatRole.User, [new DataContent(image.Content.Value, image.MediaType!)]) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); image.AlternativeText = response.Text; diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs index 544d62b8fed..65893433cbb 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Collections.Frozen; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Text; @@ -22,8 +23,8 @@ public sealed class KeywordEnricher : IngestionChunkProcessor { private const int DefaultMaxKeywords = 5; private readonly IChatClient _chatClient; - private readonly ChatOptions? _chatOptions; - private readonly TextContent _request; + private readonly ChatOptions _chatOptions; + private readonly FrozenSet? _predefinedKeywords; /// /// Initializes a new instance of the class. @@ -48,8 +49,8 @@ public KeywordEnricher(IChatClient chatClient, ReadOnlySpan predefinedKe : DefaultMaxKeywords; _chatClient = Throw.IfNull(chatClient); - _chatOptions = chatOptions; - _request = CreateLlmRequest(keywordsCount, predefinedKeywords, threshold); + _predefinedKeywords = CreatePredfinedKeywords(predefinedKeywords); + _chatOptions = CreateChatOptions(keywordsCount, predefinedKeywords, threshold, chatOptions); } /// @@ -68,22 +69,48 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy // Structured response is not used here because it's not part of Microsoft.Extensions.AI.Abstractions. var response = await _chatClient.GetResponseAsync( [ - new(ChatRole.User, - [ - _request, - new TextContent(chunk.Content), - ]) + new(ChatRole.User, chunk.Content) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); #pragma warning disable EA0009 // Use 'System.MemoryExtensions.Split' for improved performance - chunk.Metadata[MetadataKey] = response.Text.Split(';'); -#pragma warning restore EA0009 // Use 'System.MemoryExtensions.Split' for improved performance + string[] keywords = response.Text.Split(';'); + if (_predefinedKeywords is not null) + { + foreach (var keyword in keywords) + { + if (!_predefinedKeywords.Contains(keyword)) + { + throw new InvalidOperationException($"The extracted keyword '{keyword}' is not in the predefined keywords list."); + } + } + } + + chunk.Metadata[MetadataKey] = keywords; yield return chunk; } } - private static TextContent CreateLlmRequest(int maxKeywords, ReadOnlySpan predefinedKeywords, double confidenceThreshold) + private static FrozenSet? CreatePredfinedKeywords(ReadOnlySpan predefinedKeywords) + { + if (predefinedKeywords.Length == 0) + { + return null; + } + + HashSet result = new(StringComparer.Ordinal); + foreach (string keyword in predefinedKeywords) + { + if (!result.Add(keyword)) + { + Throw.ArgumentException(nameof(predefinedKeywords), $"Duplicate keyword found: '{keyword}'"); + } + } + + return result.ToFrozenSet(StringComparer.Ordinal); + } + + private static ChatOptions CreateChatOptions(int maxKeywords, ReadOnlySpan predefinedKeywords, double confidenceThreshold, ChatOptions? userProvided) { StringBuilder sb = new($"You are a keyword extraction expert. Analyze the given text and extract up to {maxKeywords} most relevant keywords. "); @@ -107,6 +134,8 @@ private static TextContent CreateLlmRequest(int maxKeywords, ReadOnlySpan { private readonly IChatClient _chatClient; - private readonly ChatOptions? _chatOptions; - private readonly TextContent _request; + private readonly ChatOptions _chatOptions; + private readonly FrozenSet _validSentiments = +#if NET9_0_OR_GREATER + FrozenSet.Create(StringComparer.Ordinal, "Positive", "Negative", "Neutral", "Unknown"); +#else + new string[] { "Positive", "Negative", "Neutral", "Unknown" }.ToFrozenSet(StringComparer.Ordinal); +#endif /// /// Initializes a new instance of the class. @@ -32,11 +38,11 @@ public sealed class SentimentEnricher : IngestionChunkProcessor public SentimentEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, double? confidenceThreshold = null) { _chatClient = Throw.IfNull(chatClient); - _chatOptions = chatOptions; double threshold = confidenceThreshold.HasValue ? Throw.IfOutOfRange(confidenceThreshold.Value, 0.0, 1.0, nameof(confidenceThreshold)) : 0.7; - _request = new("You are a sentiment analysis expert. Analyze the sentiment of the given text and return Positive/Negative/Neutral or" + - $" Unknown when confidence score is below {threshold}. Return just the value of the sentiment."); + _chatOptions = chatOptions?.Clone() ?? new(); + _chatOptions.Instructions = "You are a sentiment analysis expert. Analyze the sentiment of the given text and return Positive/Negative/Neutral or" + + $" Unknown when confidence score is below {threshold}. Return just the value of the sentiment."; } /// @@ -54,13 +60,14 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy { var response = await _chatClient.GetResponseAsync( [ - new(ChatRole.User, - [ - _request, - new TextContent(chunk.Content), - ]) + new(ChatRole.User, chunk.Content) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); + if (!_validSentiments.Contains(response.Text)) + { + throw new InvalidOperationException($"Invalid sentiment response: '{response.Text}'."); + } + chunk.Metadata[MetadataKey] = response.Text; yield return chunk; diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs index 44d19d59e62..70e59318595 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs @@ -20,8 +20,7 @@ namespace Microsoft.Extensions.DataIngestion; public sealed class SummaryEnricher : IngestionChunkProcessor { private readonly IChatClient _chatClient; - private readonly ChatOptions? _chatOptions; - private readonly TextContent _request; + private readonly ChatOptions _chatOptions; /// /// Initializes a new instance of the class. @@ -32,10 +31,10 @@ public sealed class SummaryEnricher : IngestionChunkProcessor public SummaryEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, int? maxWordCount = null) { _chatClient = Throw.IfNull(chatClient); - _chatOptions = chatOptions; int wordCount = maxWordCount.HasValue ? Throw.IfLessThanOrEqual(maxWordCount.Value, 0, nameof(maxWordCount)) : 100; - _request = new($"Write a summary text for this text with less than {wordCount} words. Return just the summary."); + _chatOptions = chatOptions?.Clone() ?? new(); + _chatOptions.Instructions = $"Write a summary text for this text with less than {wordCount} words. Return just the summary."; } /// @@ -53,11 +52,7 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy { var response = await _chatClient.GetResponseAsync( [ - new(ChatRole.User, - [ - _request, - new TextContent(chunk.Content), - ]) + new(ChatRole.User, chunk.Content) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); chunk.Metadata[MetadataKey] = response.Text; diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs index f21c4b71c3e..90cfbf383f3 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs @@ -37,7 +37,8 @@ public async Task CanGenerateImageAltText() GetResponseAsyncCallback = (messages, options, cancellationToken) => { var message = Assert.Single(messages); - DataContent dataContent = Assert.IsType(message.Contents[1]); + var content = Assert.Single(message.Contents); + DataContent dataContent = Assert.IsType(content); Assert.Equal("image/png", dataContent.MediaType); Assert.Equal(imageContent.ToArray(), dataContent.Data.ToArray()); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs index 9167b8e3d1a..06c87561ec2 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs @@ -15,11 +15,38 @@ public class ClassificationEnricherTests [Fact] public void ThrowsOnNullChatClient() - => Assert.Throws(() => new ClassificationEnricher(null!, predefinedClasses: ["some"])); + { + var ex = Assert.Throws(() => new ClassificationEnricher(null!, predefinedClasses: ["some"])); + Assert.Equal("chatClient", ex.ParamName); + } [Fact] public void ThrowsOnEmptyPredefinedClasses() - => Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: [])); + { + var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: [])); + Assert.Equal("predefinedClasses", ex.ParamName); + } + + [Fact] + public void ThrowsOnDuplicatePredefinedClasses() + { + var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["same", "same"])); + Assert.Equal("predefinedClasses", ex.ParamName); + } + + [Fact] + public void ThrowsOnPredefinedClassesContainingFallback() + { + var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["same", "Unknown"])); + Assert.Equal("predefinedClasses", ex.ParamName); + } + + [Fact] + public void ThrowsOnFallbackInPredefinedClasses() + { + var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["some"], fallbackClass: "some")); + Assert.Equal("predefinedClasses", ex.ParamName); + } [Fact] public async Task ThrowsOnNullChunks() @@ -61,6 +88,32 @@ public async Task CanClassify() Assert.Equal("UFO", got[2].Metadata[ClassificationEnricher.MetadataKey]); } + [Fact] + public async Task ThrowsOnInvalidResponse() + { + using TestChatClient chatClient = new() + { + GetResponseAsyncCallback = (messages, options, cancellationToken) => + { + return Task.FromResult(new ChatResponse(new[] + { + new ChatMessage(ChatRole.Assistant, "Unexpected result!") + })); + } + }; + + ClassificationEnricher sut = new(chatClient, ["AI", "Animals", "Sports"]); + var input = CreateChunks().ToAsyncEnumerable(); + + await Assert.ThrowsAsync(async () => + { + await foreach (var _ in sut.ProcessAsync(input)) + { + // No-op + } + }); + } + private static List> CreateChunks() => [ new(".NET developers need to integrate and interact with a growing variety of artificial intelligence (AI) services in their apps. " + diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs index 152c5c99190..1286a873c05 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs @@ -35,6 +35,13 @@ public void ThrowsOnInvalidMaxKeywords(int keywordCount) Assert.Equal("maxKeywords", ex.ParamName); } + [Fact] + public void ThrowsOnDuplicateKeywords() + { + var ex = Assert.Throws(() => new KeywordEnricher(new TestChatClient(), predefinedKeywords: ["same", "same"], confidenceThreshold: 0.5)); + Assert.Equal("predefinedKeywords", ex.ParamName); + } + [Fact] public async Task ThrowsOnNullChunks() { @@ -77,6 +84,32 @@ public async Task CanExtractKeywords(params string[] predefined) Assert.Equal(["Animals", "Rabbits"], (string[])got[1].Metadata[KeywordEnricher.MetadataKey]); } + [Fact] + public async Task ThrowsOnInvalidResponse() + { + using TestChatClient chatClient = new() + { + GetResponseAsyncCallback = (messages, options, cancellationToken) => + { + return Task.FromResult(new ChatResponse(new[] + { + new ChatMessage(ChatRole.Assistant, "Unexpected result!") + })); + } + }; + + KeywordEnricher sut = new(chatClient, ["some"]); + var input = CreateChunks().ToAsyncEnumerable(); + + await Assert.ThrowsAsync(async () => + { + await foreach (var _ in sut.ProcessAsync(input)) + { + // No-op + } + }); + } + private static List> CreateChunks() => [ new("The Microsoft.Extensions.AI libraries provide a unified approach for representing generative AI components", _document), diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs index 095ab2a3fe6..fe92024c5a3 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs @@ -69,6 +69,32 @@ public async Task CanProvideSentiment() Assert.Equal("Unknown", chunks[3].Metadata[SentimentEnricher.MetadataKey]); } + [Fact] + public async Task ThrowsOnInvalidResponse() + { + using TestChatClient chatClient = new() + { + GetResponseAsyncCallback = (messages, options, cancellationToken) => + { + return Task.FromResult(new ChatResponse(new[] + { + new ChatMessage(ChatRole.Assistant, "Unexpected result!") + })); + } + }; + + SentimentEnricher sut = new(chatClient); + var input = CreateChunks().ToAsyncEnumerable(); + + await Assert.ThrowsAsync(async () => + { + await foreach (var _ in sut.ProcessAsync(input)) + { + // No-op + } + }); + } + private static List> CreateChunks() => [ new("I love programming! It's so much fun and rewarding.", _document), From 8e6e3de58a68f0031cc2fe2bcf161345f0202bef Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 27 Oct 2025 15:14:47 +0100 Subject: [PATCH 09/10] address code review feedback: reject invalid keywords/classes, improve prompt message to better handle wordCount = 1 --- .../Processors/ClassificationEnricher.cs | 9 +++++++++ .../Processors/KeywordEnricher.cs | 14 ++++++++++++++ .../Processors/SummaryEnricher.cs | 2 +- .../Processors/ClassificationEnricherTests.cs | 7 +++++++ .../Processors/KeywordEnricherTests.cs | 9 +++++++++ 5 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs index 946c06a68c6..4132d238b86 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs @@ -81,6 +81,15 @@ private static FrozenSet CreatePredefinedSet(ReadOnlySpan predef HashSet predefinedClassesSet = new(StringComparer.Ordinal) { fallbackClass }; foreach (string predefinedClass in predefinedClasses) { +#if NET + if (predefinedClass.Contains(',', StringComparison.Ordinal)) +#else + if (predefinedClass.IndexOf(',') >= 0) +#endif + { + Throw.ArgumentException(nameof(predefinedClasses), $"Predefined class '{predefinedClass}' must not contain ',' character."); + } + if (!predefinedClassesSet.Add(predefinedClass)) { if (predefinedClass.Equals(fallbackClass, StringComparison.Ordinal)) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs index 65893433cbb..108664fb456 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs @@ -22,6 +22,11 @@ namespace Microsoft.Extensions.DataIngestion; public sealed class KeywordEnricher : IngestionChunkProcessor { private const int DefaultMaxKeywords = 5; +#if NET + private static readonly System.Buffers.SearchValues _illegalCharacters = System.Buffers.SearchValues.Create([';', ',']); +#else + private static readonly char[] _illegalCharacters = [';', ',']; +#endif private readonly IChatClient _chatClient; private readonly ChatOptions _chatOptions; private readonly FrozenSet? _predefinedKeywords; @@ -101,6 +106,15 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy HashSet result = new(StringComparer.Ordinal); foreach (string keyword in predefinedKeywords) { +#if NET + if (keyword.AsSpan().ContainsAny(_illegalCharacters)) +#else + if (keyword.IndexOfAny(_illegalCharacters) >= 0) +#endif + { + Throw.ArgumentException(nameof(predefinedKeywords), $"Predefined keyword '{keyword}' contains an invalid character (';' or ',')."); + } + if (!result.Add(keyword)) { Throw.ArgumentException(nameof(predefinedKeywords), $"Duplicate keyword found: '{keyword}'"); diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs index 70e59318595..13959d82d71 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs @@ -34,7 +34,7 @@ public SummaryEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, int wordCount = maxWordCount.HasValue ? Throw.IfLessThanOrEqual(maxWordCount.Value, 0, nameof(maxWordCount)) : 100; _chatOptions = chatOptions?.Clone() ?? new(); - _chatOptions.Instructions = $"Write a summary text for this text with less than {wordCount} words. Return just the summary."; + _chatOptions.Instructions = $"Write a summary text for this text with no more than {wordCount} words. Return just the summary."; } /// diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs index 06c87561ec2..28f493edf6d 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs @@ -48,6 +48,13 @@ public void ThrowsOnFallbackInPredefinedClasses() Assert.Equal("predefinedClasses", ex.ParamName); } + [Fact] + public void ThrowsOnPredefinedClassesContainingComma() + { + var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["n,t"])); + Assert.Equal("predefinedClasses", ex.ParamName); + } + [Fact] public async Task ThrowsOnNullChunks() { diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs index 1286a873c05..2356ee18a15 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs @@ -42,6 +42,15 @@ public void ThrowsOnDuplicateKeywords() Assert.Equal("predefinedKeywords", ex.ParamName); } + [Theory] + [InlineData(',')] + [InlineData(';')] + public void ThrowsOnIllegalCharacters(char illegal) + { + var ex = Assert.Throws(() => new KeywordEnricher(new TestChatClient(), predefinedKeywords: [$"n{illegal}t"])); + Assert.Equal("predefinedKeywords", ex.ParamName); + } + [Fact] public async Task ThrowsOnNullChunks() { From 11ada2bf36bdcc43d3e7b9c7cd420626ba0e6991 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 30 Oct 2025 14:24:50 +0100 Subject: [PATCH 10/10] address code review feedback --- .../Microsoft.Extensions.DataIngestion.csproj | 1 - .../Processors/ClassificationEnricher.cs | 21 ++++++++------- .../ImageAlternativeTextEnricher.cs | 8 +++--- .../Processors/KeywordEnricher.cs | 23 +++++++++------- .../Processors/SentimentEnricher.cs | 14 +++++++--- .../Processors/SummaryEnricher.cs | 8 +++--- .../AlternativeTextEnricherTests.cs | 15 ++++++++--- .../Processors/ClassificationEnricherTests.cs | 27 ++++++++++--------- .../Processors/KeywordEnricherTests.cs | 25 ++++++++++------- .../Processors/SentimentEnricherTests.cs | 16 ++++++++--- .../Processors/SummaryEnricherTests.cs | 16 ++++++++--- 11 files changed, 110 insertions(+), 64 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Microsoft.Extensions.DataIngestion.csproj b/src/Libraries/Microsoft.Extensions.DataIngestion/Microsoft.Extensions.DataIngestion.csproj index 1cae686f9d8..bab4b509a36 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Microsoft.Extensions.DataIngestion.csproj +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Microsoft.Extensions.DataIngestion.csproj @@ -15,7 +15,6 @@ - diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs index 4132d238b86..e1cb1ca7438 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ClassificationEnricher.cs @@ -22,8 +22,9 @@ namespace Microsoft.Extensions.DataIngestion; public sealed class ClassificationEnricher : IngestionChunkProcessor { private readonly IChatClient _chatClient; - private readonly ChatOptions _chatOptions; + private readonly ChatOptions? _chatOptions; private readonly FrozenSet _predefinedClasses; + private readonly ChatMessage _systemPrompt; /// /// Initializes a new instance of the class. @@ -36,13 +37,14 @@ public ClassificationEnricher(IChatClient chatClient, ReadOnlySpan prede ChatOptions? chatOptions = null, string? fallbackClass = null) { _chatClient = Throw.IfNull(chatClient); + _chatOptions = chatOptions; if (string.IsNullOrWhiteSpace(fallbackClass)) { fallbackClass = "Unknown"; } _predefinedClasses = CreatePredefinedSet(predefinedClasses, fallbackClass!); - _chatOptions = CreateChatOptions(predefinedClasses, chatOptions, fallbackClass!); + _systemPrompt = CreateSystemPrompt(predefinedClasses, fallbackClass!); } /// @@ -60,6 +62,7 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy { var response = await _chatClient.GetResponseAsync( [ + _systemPrompt, new(ChatRole.User, chunk.Content) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); @@ -104,12 +107,14 @@ private static FrozenSet CreatePredefinedSet(ReadOnlySpan predef return predefinedClassesSet.ToFrozenSet(); } - private static ChatOptions CreateChatOptions(ReadOnlySpan predefinedClasses, ChatOptions? userProvided, string fallbackClass) + private static ChatMessage CreateSystemPrompt(ReadOnlySpan predefinedClasses, string fallbackClass) { - StringBuilder sb = new("You are a classification expert. Analyze the given text and assign a single, most relevant class. "); + StringBuilder sb = new("You are a classification expert. Analyze the given text and assign a single, most relevant class. Use only the following predefined classes: "); +#if NET9_0_OR_GREATER + sb.AppendJoin(", ", predefinedClasses!); +#else #pragma warning disable IDE0058 // Expression value is never used - sb.Append("Use only the following predefined classes: "); for (int i = 0; i < predefinedClasses.Length; i++) { sb.Append(predefinedClasses[i]); @@ -118,12 +123,10 @@ private static ChatOptions CreateChatOptions(ReadOnlySpan predefinedClas sb.Append(", "); } } - +#endif sb.Append(" and return ").Append(fallbackClass).Append(" when unable to classify."); #pragma warning restore IDE0058 // Expression value is never used - ChatOptions result = userProvided?.Clone() ?? new(); - result.Instructions = sb.ToString(); - return result; + return new(ChatRole.System, sb.ToString()); } } diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs index 4e5dae6be8c..5f68552cc3f 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/ImageAlternativeTextEnricher.cs @@ -16,7 +16,8 @@ namespace Microsoft.Extensions.DataIngestion; public sealed class ImageAlternativeTextEnricher : IngestionDocumentProcessor { private readonly IChatClient _chatClient; - private readonly ChatOptions _chatOptions; + private readonly ChatOptions? _chatOptions; + private readonly ChatMessage _systemPrompt; /// /// Initializes a new instance of the class. @@ -26,8 +27,8 @@ public sealed class ImageAlternativeTextEnricher : IngestionDocumentProcessor public ImageAlternativeTextEnricher(IChatClient chatClient, ChatOptions? chatOptions = null) { _chatClient = Throw.IfNull(chatClient); - _chatOptions = chatOptions?.Clone() ?? new(); - _chatOptions.Instructions = "Write a detailed alternative text for this image with less than 50 words."; + _chatOptions = chatOptions; + _systemPrompt = new(ChatRole.System, "Write a detailed alternative text for this image with less than 50 words."); } /// @@ -63,6 +64,7 @@ private async Task ProcessAsync(IngestionDocumentImage image, CancellationToken { var response = await _chatClient.GetResponseAsync( [ + _systemPrompt, new(ChatRole.User, [new DataContent(image.Content.Value, image.MediaType!)]) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs index 108664fb456..56a305e2a87 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/KeywordEnricher.cs @@ -28,8 +28,9 @@ public sealed class KeywordEnricher : IngestionChunkProcessor private static readonly char[] _illegalCharacters = [';', ',']; #endif private readonly IChatClient _chatClient; - private readonly ChatOptions _chatOptions; + private readonly ChatOptions? _chatOptions; private readonly FrozenSet? _predefinedKeywords; + private readonly ChatMessage _systemPrompt; /// /// Initializes a new instance of the class. @@ -46,16 +47,17 @@ public sealed class KeywordEnricher : IngestionChunkProcessor public KeywordEnricher(IChatClient chatClient, ReadOnlySpan predefinedKeywords, ChatOptions? chatOptions = null, int? maxKeywords = null, double? confidenceThreshold = null) { + _chatClient = Throw.IfNull(chatClient); + _chatOptions = chatOptions; + _predefinedKeywords = CreatePredfinedKeywords(predefinedKeywords); + double threshold = confidenceThreshold.HasValue ? Throw.IfOutOfRange(confidenceThreshold.Value, 0.0, 1.0, nameof(confidenceThreshold)) : 0.7; int keywordsCount = maxKeywords.HasValue ? Throw.IfLessThanOrEqual(maxKeywords.Value, 0, nameof(maxKeywords)) : DefaultMaxKeywords; - - _chatClient = Throw.IfNull(chatClient); - _predefinedKeywords = CreatePredfinedKeywords(predefinedKeywords); - _chatOptions = CreateChatOptions(keywordsCount, predefinedKeywords, threshold, chatOptions); + _systemPrompt = CreateSystemPrompt(keywordsCount, predefinedKeywords, threshold); } /// @@ -74,6 +76,7 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy // Structured response is not used here because it's not part of Microsoft.Extensions.AI.Abstractions. var response = await _chatClient.GetResponseAsync( [ + _systemPrompt, new(ChatRole.User, chunk.Content) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); @@ -124,7 +127,7 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy return result.ToFrozenSet(StringComparer.Ordinal); } - private static ChatOptions CreateChatOptions(int maxKeywords, ReadOnlySpan predefinedKeywords, double confidenceThreshold, ChatOptions? userProvided) + private static ChatMessage CreateSystemPrompt(int maxKeywords, ReadOnlySpan predefinedKeywords, double confidenceThreshold) { StringBuilder sb = new($"You are a keyword extraction expert. Analyze the given text and extract up to {maxKeywords} most relevant keywords. "); @@ -132,6 +135,9 @@ private static ChatOptions CreateChatOptions(int maxKeywords, ReadOnlySpan { private readonly IChatClient _chatClient; - private readonly ChatOptions _chatOptions; + private readonly ChatOptions? _chatOptions; private readonly FrozenSet _validSentiments = #if NET9_0_OR_GREATER FrozenSet.Create(StringComparer.Ordinal, "Positive", "Negative", "Neutral", "Unknown"); #else new string[] { "Positive", "Negative", "Neutral", "Unknown" }.ToFrozenSet(StringComparer.Ordinal); #endif + private readonly ChatMessage _systemPrompt; /// /// Initializes a new instance of the class. @@ -38,11 +39,15 @@ public sealed class SentimentEnricher : IngestionChunkProcessor public SentimentEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, double? confidenceThreshold = null) { _chatClient = Throw.IfNull(chatClient); + _chatOptions = chatOptions; double threshold = confidenceThreshold.HasValue ? Throw.IfOutOfRange(confidenceThreshold.Value, 0.0, 1.0, nameof(confidenceThreshold)) : 0.7; - _chatOptions = chatOptions?.Clone() ?? new(); - _chatOptions.Instructions = "You are a sentiment analysis expert. Analyze the sentiment of the given text and return Positive/Negative/Neutral or" + - $" Unknown when confidence score is below {threshold}. Return just the value of the sentiment."; + + string prompt = $""" + You are a sentiment analysis expert. Analyze the sentiment of the given text and return Positive/Negative/Neutral or + Unknown when confidence score is below {threshold}. Return just the value of the sentiment. + """; + _systemPrompt = new(ChatRole.System, prompt); } /// @@ -60,6 +65,7 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy { var response = await _chatClient.GetResponseAsync( [ + _systemPrompt, new(ChatRole.User, chunk.Content) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); diff --git a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs index 13959d82d71..f91b9809b05 100644 --- a/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs +++ b/src/Libraries/Microsoft.Extensions.DataIngestion/Processors/SummaryEnricher.cs @@ -20,7 +20,8 @@ namespace Microsoft.Extensions.DataIngestion; public sealed class SummaryEnricher : IngestionChunkProcessor { private readonly IChatClient _chatClient; - private readonly ChatOptions _chatOptions; + private readonly ChatOptions? _chatOptions; + private readonly ChatMessage _systemPrompt; /// /// Initializes a new instance of the class. @@ -31,10 +32,10 @@ public sealed class SummaryEnricher : IngestionChunkProcessor public SummaryEnricher(IChatClient chatClient, ChatOptions? chatOptions = null, int? maxWordCount = null) { _chatClient = Throw.IfNull(chatClient); + _chatOptions = chatOptions; int wordCount = maxWordCount.HasValue ? Throw.IfLessThanOrEqual(maxWordCount.Value, 0, nameof(maxWordCount)) : 100; - _chatOptions = chatOptions?.Clone() ?? new(); - _chatOptions.Instructions = $"Write a summary text for this text with no more than {wordCount} words. Return just the summary."; + _systemPrompt = new(ChatRole.System, $"Write a summary text for this text with no more than {wordCount} words. Return just the summary."); } /// @@ -52,6 +53,7 @@ public override async IAsyncEnumerable> ProcessAsync(IAsy { var response = await _chatClient.GetResponseAsync( [ + _systemPrompt, new(ChatRole.User, chunk.Content) ], _chatOptions, cancellationToken: cancellationToken).ConfigureAwait(false); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs index 90cfbf383f3..cc59db3f389 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/AlternativeTextEnricherTests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Linq; using System.Threading.Tasks; using Microsoft.Extensions.AI; using Xunit; @@ -12,7 +13,9 @@ public class AlternativeTextEnricherTests { [Fact] public void ThrowsOnNullChatClient() - => Assert.Throws(() => new ImageAlternativeTextEnricher(null!)); + { + Assert.Throws("chatClient", () => new ImageAlternativeTextEnricher(null!)); + } [Fact] public async Task ThrowsOnNullDocument() @@ -21,7 +24,7 @@ public async Task ThrowsOnNullDocument() ImageAlternativeTextEnricher sut = new(chatClient); - await Assert.ThrowsAsync(async () => await sut.ProcessAsync(null!)); + await Assert.ThrowsAsync("document", async () => await sut.ProcessAsync(null!)); } [Fact] @@ -36,8 +39,12 @@ public async Task CanGenerateImageAltText() { GetResponseAsyncCallback = (messages, options, cancellationToken) => { - var message = Assert.Single(messages); - var content = Assert.Single(message.Contents); + var materializedMessages = messages.ToArray(); + + Assert.Equal(2, materializedMessages.Length); + Assert.Equal(ChatRole.System, materializedMessages[0].Role); + Assert.Equal(ChatRole.User, materializedMessages[1].Role); + var content = Assert.Single(materializedMessages[1].Contents); DataContent dataContent = Assert.IsType(content); Assert.Equal("image/png", dataContent.MediaType); Assert.Equal(imageContent.ToArray(), dataContent.Data.ToArray()); diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs index 28f493edf6d..3f890969262 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/ClassificationEnricherTests.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Threading.Tasks; using Microsoft.Extensions.AI; using Xunit; @@ -16,43 +17,37 @@ public class ClassificationEnricherTests [Fact] public void ThrowsOnNullChatClient() { - var ex = Assert.Throws(() => new ClassificationEnricher(null!, predefinedClasses: ["some"])); - Assert.Equal("chatClient", ex.ParamName); + Assert.Throws("chatClient", () => new ClassificationEnricher(null!, predefinedClasses: ["some"])); } [Fact] public void ThrowsOnEmptyPredefinedClasses() { - var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: [])); - Assert.Equal("predefinedClasses", ex.ParamName); + Assert.Throws("predefinedClasses", () => new ClassificationEnricher(new TestChatClient(), predefinedClasses: [])); } [Fact] public void ThrowsOnDuplicatePredefinedClasses() { - var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["same", "same"])); - Assert.Equal("predefinedClasses", ex.ParamName); + Assert.Throws("predefinedClasses", () => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["same", "same"])); } [Fact] public void ThrowsOnPredefinedClassesContainingFallback() { - var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["same", "Unknown"])); - Assert.Equal("predefinedClasses", ex.ParamName); + Assert.Throws("predefinedClasses", () => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["same", "Unknown"])); } [Fact] public void ThrowsOnFallbackInPredefinedClasses() { - var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["some"], fallbackClass: "some")); - Assert.Equal("predefinedClasses", ex.ParamName); + Assert.Throws("predefinedClasses", () => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["some"], fallbackClass: "some")); } [Fact] public void ThrowsOnPredefinedClassesContainingComma() { - var ex = Assert.Throws(() => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["n,t"])); - Assert.Equal("predefinedClasses", ex.ParamName); + Assert.Throws("predefinedClasses", () => new ClassificationEnricher(new TestChatClient(), predefinedClasses: ["n,t"])); } [Fact] @@ -61,7 +56,7 @@ public async Task ThrowsOnNullChunks() using TestChatClient chatClient = new(); ClassificationEnricher sut = new(chatClient, predefinedClasses: ["some"]); - await Assert.ThrowsAsync(async () => + await Assert.ThrowsAsync("chunks", async () => { await foreach (var _ in sut.ProcessAsync(null!)) { @@ -79,6 +74,12 @@ public async Task CanClassify() { GetResponseAsyncCallback = (messages, options, cancellationToken) => { + var materializedMessages = messages.ToArray(); + + Assert.Equal(2, materializedMessages.Length); + Assert.Equal(ChatRole.System, materializedMessages[0].Role); + Assert.Equal(ChatRole.User, materializedMessages[1].Role); + return Task.FromResult(new ChatResponse(new[] { new ChatMessage(ChatRole.Assistant, classes[counter++]) diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs index 2356ee18a15..0f11cd7d46b 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/KeywordEnricherTests.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Threading.Tasks; using Microsoft.Extensions.AI; using Xunit; @@ -15,15 +16,16 @@ public class KeywordEnricherTests [Fact] public void ThrowsOnNullChatClient() - => Assert.Throws(() => new KeywordEnricher(null!, predefinedKeywords: null, confidenceThreshold: 0.5)); + { + Assert.Throws("chatClient", () => new KeywordEnricher(null!, predefinedKeywords: null, confidenceThreshold: 0.5)); + } [Theory] [InlineData(-0.1)] [InlineData(1.1)] public void ThrowsOnInvalidThreshold(double threshold) { - var ex = Assert.Throws(() => new KeywordEnricher(new TestChatClient(), predefinedKeywords: null, confidenceThreshold: threshold)); - Assert.Equal("confidenceThreshold", ex.ParamName); + Assert.Throws("confidenceThreshold", () => new KeywordEnricher(new TestChatClient(), predefinedKeywords: null, confidenceThreshold: threshold)); } [Theory] @@ -31,15 +33,13 @@ public void ThrowsOnInvalidThreshold(double threshold) [InlineData(-1)] public void ThrowsOnInvalidMaxKeywords(int keywordCount) { - var ex = Assert.Throws(() => new KeywordEnricher(new TestChatClient(), predefinedKeywords: null, maxKeywords: keywordCount)); - Assert.Equal("maxKeywords", ex.ParamName); + Assert.Throws("maxKeywords", () => new KeywordEnricher(new TestChatClient(), predefinedKeywords: null, maxKeywords: keywordCount)); } [Fact] public void ThrowsOnDuplicateKeywords() { - var ex = Assert.Throws(() => new KeywordEnricher(new TestChatClient(), predefinedKeywords: ["same", "same"], confidenceThreshold: 0.5)); - Assert.Equal("predefinedKeywords", ex.ParamName); + Assert.Throws("predefinedKeywords", () => new KeywordEnricher(new TestChatClient(), predefinedKeywords: ["same", "same"], confidenceThreshold: 0.5)); } [Theory] @@ -47,8 +47,7 @@ public void ThrowsOnDuplicateKeywords() [InlineData(';')] public void ThrowsOnIllegalCharacters(char illegal) { - var ex = Assert.Throws(() => new KeywordEnricher(new TestChatClient(), predefinedKeywords: [$"n{illegal}t"])); - Assert.Equal("predefinedKeywords", ex.ParamName); + Assert.Throws("predefinedKeywords", () => new KeywordEnricher(new TestChatClient(), predefinedKeywords: [$"n{illegal}t"])); } [Fact] @@ -57,7 +56,7 @@ public async Task ThrowsOnNullChunks() using TestChatClient chatClient = new(); KeywordEnricher sut = new(chatClient, predefinedKeywords: null, confidenceThreshold: 0.5); - await Assert.ThrowsAsync(async () => + await Assert.ThrowsAsync("chunks", async () => { await foreach (var _ in sut.ProcessAsync(null!)) { @@ -77,6 +76,12 @@ public async Task CanExtractKeywords(params string[] predefined) { GetResponseAsyncCallback = (messages, options, cancellationToken) => { + var materializedMessages = messages.ToArray(); + + Assert.Equal(2, materializedMessages.Length); + Assert.Equal(ChatRole.System, materializedMessages[0].Role); + Assert.Equal(ChatRole.User, materializedMessages[1].Role); + return Task.FromResult(new ChatResponse(new[] { new ChatMessage(ChatRole.Assistant, keywords[counter++]) diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs index fe92024c5a3..166b3c05959 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SentimentEnricherTests.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Threading.Tasks; using Microsoft.Extensions.AI; using Xunit; @@ -15,15 +16,16 @@ public class SentimentEnricherTests [Fact] public void ThrowsOnNullChatClient() - => Assert.Throws(() => new SentimentEnricher(null!)); + { + Assert.Throws("chatClient", () => new SentimentEnricher(null!)); + } [Theory] [InlineData(-0.1)] [InlineData(1.1)] public void ThrowsOnInvalidThreshold(double threshold) { - var ex = Assert.Throws(() => new SentimentEnricher(new TestChatClient(), confidenceThreshold: threshold)); - Assert.Equal("confidenceThreshold", ex.ParamName); + Assert.Throws("confidenceThreshold", () => new SentimentEnricher(new TestChatClient(), confidenceThreshold: threshold)); } [Fact] @@ -32,7 +34,7 @@ public async Task ThrowsOnNullChunks() using TestChatClient chatClient = new(); SentimentEnricher sut = new(chatClient); - await Assert.ThrowsAsync(async () => + await Assert.ThrowsAsync("chunks", async () => { await foreach (var _ in sut.ProcessAsync(null!)) { @@ -50,6 +52,12 @@ public async Task CanProvideSentiment() { GetResponseAsyncCallback = (messages, options, cancellationToken) => { + var materializedMessages = messages.ToArray(); + + Assert.Equal(2, materializedMessages.Length); + Assert.Equal(ChatRole.System, materializedMessages[0].Role); + Assert.Equal(ChatRole.User, materializedMessages[1].Role); + return Task.FromResult(new ChatResponse(new[] { new ChatMessage(ChatRole.Assistant, sentiments[counter++]) diff --git a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SummaryEnricherTests.cs b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SummaryEnricherTests.cs index a3b698e1d9a..6fda37004d3 100644 --- a/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SummaryEnricherTests.cs +++ b/test/Libraries/Microsoft.Extensions.DataIngestion.Tests/Processors/SummaryEnricherTests.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Threading.Tasks; using Microsoft.Extensions.AI; using Xunit; @@ -15,15 +16,16 @@ public class SummaryEnricherTests [Fact] public void ThrowsOnNullChatClient() - => Assert.Throws(() => new SummaryEnricher(null!)); + { + Assert.Throws("chatClient", () => new SummaryEnricher(null!)); + } [Theory] [InlineData(0)] [InlineData(-1)] public void ThrowsOnInvalidMaxKeywords(int wordCount) { - var ex = Assert.Throws(() => new SummaryEnricher(new TestChatClient(), maxWordCount: wordCount)); - Assert.Equal("maxWordCount", ex.ParamName); + Assert.Throws("maxWordCount", () => new SummaryEnricher(new TestChatClient(), maxWordCount: wordCount)); } [Fact] @@ -32,7 +34,7 @@ public async Task ThrowsOnNullChunks() using TestChatClient chatClient = new(); SummaryEnricher sut = new(chatClient); - await Assert.ThrowsAsync(async () => + await Assert.ThrowsAsync("chunks", async () => { await foreach (var _ in sut.ProcessAsync(null!)) { @@ -50,6 +52,12 @@ public async Task CanProvideSummary() { GetResponseAsyncCallback = (messages, options, cancellationToken) => { + var materializedMessages = messages.ToArray(); + + Assert.Equal(2, materializedMessages.Length); + Assert.Equal(ChatRole.System, materializedMessages[0].Role); + Assert.Equal(ChatRole.User, materializedMessages[1].Role); + return Task.FromResult(new ChatResponse(new[] { new ChatMessage(ChatRole.Assistant, summaries[counter++])