diff --git a/clients/dotnet/WebClient/MemoryWebClient.cs b/clients/dotnet/WebClient/MemoryWebClient.cs index a56aa3eb2..4303fd4de 100644 --- a/clients/dotnet/WebClient/MemoryWebClient.cs +++ b/clients/dotnet/WebClient/MemoryWebClient.cs @@ -7,6 +7,8 @@ using System.Linq; using System.Net; using System.Net.Http; +using System.Net.Http.Json; +using System.Runtime.CompilerServices; using System.Text; using System.Text.Json; using System.Threading; @@ -358,6 +360,48 @@ public async Task AskAsync( return JsonSerializer.Deserialize(json, s_caseInsensitiveJsonOptions) ?? new MemoryAnswer(); } + /// + public async IAsyncEnumerable AskStreamingAsync( + string question, + string? index = null, + MemoryFilter? filter = null, + ICollection? filters = null, + double minRelevance = 0, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + if (filter != null) + { + if (filters == null) { filters = new List(); } + + filters.Add(filter); + } + + MemoryQuery request = new() + { + Index = index, + Question = question, + Filters = (filters is { Count: > 0 }) ? filters.ToList() : new(), + MinRelevance = minRelevance + }; + using StringContent content = new(JsonSerializer.Serialize(request), Encoding.UTF8, "application/json"); + + using var httpRequest = new HttpRequestMessage(HttpMethod.Post, Constants.HttpAskStreamEndpoint); + httpRequest.Content = content; + + using HttpResponseMessage response = await this._client.SendAsync(httpRequest, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + response.EnsureSuccessStatusCode(); + + await foreach (var responsePart in response.Content.ReadFromJsonAsAsyncEnumerable(cancellationToken)) + { + if (responsePart is null) + { + continue; + } + + yield return responsePart; + } + } + #region private private static (string contentType, long contentLength, DateTimeOffset lastModified) GetFileDetails(HttpResponseMessage response) diff --git a/examples/001-dotnet-WebClient/Program.cs b/examples/001-dotnet-WebClient/Program.cs index 437dee4e4..72eb71ab1 100644 --- a/examples/001-dotnet-WebClient/Program.cs +++ b/examples/001-dotnet-WebClient/Program.cs @@ -55,6 +55,7 @@ public static async Task Main() // ======================= await AskSimpleQuestion(); + await AskSimpleQuestionWithStreamingAndShowSources(); await AskSimpleQuestionAndShowSources(); await AskQuestionAboutImageContent(); await AskQuestionUsingFilter(); @@ -272,6 +273,53 @@ due to the speed of light being a very large number when squared. This concept i */ } + private static async Task AskSimpleQuestionWithStreamingAndShowSources() + { + var question = "Any news from NASA about Orion?"; + Console.WriteLine($"Question: {question}"); + var answer = s_memory.AskStreamingAsync(question, filter: MemoryFilters.ByTag("user", "Taylor")); + Console.WriteLine("\nAnswer:\n"); + + List? citations = []; + bool isFirstPart = true; + await foreach (var answerPart in answer) + { + if (isFirstPart) + { + citations = answerPart.RelevantSources; + isFirstPart = false; + } + + Console.Write(answerPart.Result); + } + + Console.WriteLine("\n\nSources:\n"); + foreach (var x in citations) + { + Console.WriteLine(x.SourceUrl != null + ? $" - {x.SourceUrl} [{x.Partitions.First().LastUpdate:D}]" + : $" - {x.SourceName} - {x.Link} [{x.Partitions.First().LastUpdate:D}]"); + } + + Console.WriteLine("\n====================================\n"); + + /* OUTPUT + + Question: Any news from NASA about Orion? + + Answer: + Yes, NASA has invited media to see the new test version of the Orion spacecraft and the hardware teams will use to recover the capsule and astronauts upon their return from space during the Artemis II mission. + The event will take place at Naval Base San Diego on August 2. + Personnel involved in recovery operations from NASA, the U.S. Navy, and the U.S. Air Force will be available to speak with media. + Teams are currently conducting tests in the Pacific Ocean to demonstrate and evaluate the processes, procedures, and hardware for recovery operations for crewed Artemis missions. + The tests will help prepare the team for Artemis II, NASA's first crewed mission under Artemis that will send four astronauts in Orion around the Moon to checkout systems ahead of future lunar missions. + The Artemis II crew will participate in recovery testing at sea next year. + + Sources: + - /download?index=default&documentId=doc003&filename=file5-NASA-news.pdf [Friday, 17 May 2024] + */ + } + // Another question without filters and show sources private static async Task AskSimpleQuestionAndShowSources() { diff --git a/examples/002-dotnet-Serverless/Program.cs b/examples/002-dotnet-Serverless/Program.cs index 10bb354a5..33411b2db 100644 --- a/examples/002-dotnet-Serverless/Program.cs +++ b/examples/002-dotnet-Serverless/Program.cs @@ -91,6 +91,7 @@ public static async Task Main() // ======================= await AskSimpleQuestion(); + await AskSimpleQuestionWithStreamingAndShowSources(); await AskSimpleQuestionAndShowSources(); await AskQuestionAboutImageContent(); await AskQuestionUsingFilter(); @@ -310,6 +311,55 @@ due to the speed of light being a very large number when squared. This concept i */ } + // Question without filters and show sources, with streaming + private static async Task AskSimpleQuestionWithStreamingAndShowSources() + { + var question = "What's E = m*c^2?"; + Console.WriteLine($"Question: {question}"); + + var answer = s_memory.AskStreamingAsync(question, minRelevance: 0.76); + Console.WriteLine("\nAnswer:\n"); + + List? citations = []; + bool isFirstPart = true; + await foreach (var answerPart in answer) + { + if (isFirstPart) + { + citations = answerPart.RelevantSources; + isFirstPart = false; + } + + Console.Write(answerPart.Result); + } + + Console.WriteLine("\n\nSources:\n"); + foreach (var x in citations) + { + Console.WriteLine(x.SourceUrl != null + ? $" - {x.SourceUrl} [{x.Partitions.First().LastUpdate:D}]" + : $" - {x.SourceName} - {x.Link} [{x.Partitions.First().LastUpdate:D}]"); + } + + Console.WriteLine("\n====================================\n"); + + /* OUTPUT + + Question: What's E = m*c^2? + + Answer: E = m*c^2 is the formula representing the principle of mass-energy equivalence, which was introduced by Albert Einstein. In this equation, + E stands for energy, m represents mass, and c is the speed of light in a vacuum, which is approximately 299,792,458 meters per second (m/s). + The equation states that the energy (E) of a system in its rest frame is equal to its mass (m) multiplied by the square of the speed of light (c^2). + This implies that mass and energy are interchangeable; a small amount of mass can be converted into a large amount of energy and vice versa, + due to the speed of light being a very large number when squared. This concept is a fundamental principle in physics and has important implications + in various fields, including nuclear physics and cosmology. + + Sources: + - /download?index=default&documentId=doc003&filename=file5-NASA-news.pdf [Friday, 17 May 2024] + + */ + } + // Another question without filters and show sources private static async Task AskSimpleQuestionAndShowSources() { diff --git a/service/Abstractions/Constants.cs b/service/Abstractions/Constants.cs index 820e4af20..465af33d4 100644 --- a/service/Abstractions/Constants.cs +++ b/service/Abstractions/Constants.cs @@ -52,6 +52,7 @@ public static class Constants // Endpoints public const string HttpAskEndpoint = "/ask"; + public const string HttpAskStreamEndpoint = "/ask/stream"; public const string HttpSearchEndpoint = "/search"; public const string HttpDownloadEndpoint = "/download"; public const string HttpUploadEndpoint = "/upload"; diff --git a/service/Abstractions/IKernelMemory.cs b/service/Abstractions/IKernelMemory.cs index 00ce78632..6da82c9b6 100644 --- a/service/Abstractions/IKernelMemory.cs +++ b/service/Abstractions/IKernelMemory.cs @@ -211,4 +211,22 @@ public Task AskAsync( ICollection? filters = null, double minRelevance = 0, CancellationToken cancellationToken = default); + + /// + /// Search the given index for an answer to the given query. + /// + /// Question to answer + /// Optional index name + /// Filter to match + /// Filters to match (using inclusive OR logic). If 'filter' is provided too, the value is merged into this list. + /// Minimum Cosine Similarity required + /// Async task cancellation token + /// A stream that contains an answer to the query, or an empty list + public IAsyncEnumerable AskStreamingAsync( + string question, + string? index = null, + MemoryFilter? filter = null, + ICollection? filters = null, + double minRelevance = 0, + CancellationToken cancellationToken = default); } diff --git a/service/Abstractions/Search/ISearchClient.cs b/service/Abstractions/Search/ISearchClient.cs index 9925ba052..33c5724bd 100644 --- a/service/Abstractions/Search/ISearchClient.cs +++ b/service/Abstractions/Search/ISearchClient.cs @@ -45,6 +45,23 @@ Task AskAsync( double minRelevance = 0, CancellationToken cancellationToken = default); + /// + /// Answer the given question, if possible, grounding the response with relevant memories matching the given criteria. + /// First result in the stream contains metadata about the result, subsequent results only contain answer tokens. + /// + /// Index (aka collection) to search for grounding information + /// Question to answer + /// Filtering criteria to select memories to consider + /// Minimum relevance of the memories considered + /// Async task cancellation token + /// Answer to the given question + IAsyncEnumerable AskStreamingAsync( + string index, + string question, + ICollection? filters = null, + double minRelevance = 0, + CancellationToken cancellationToken = default); + /// /// List the available memory indexes (aka collections). /// diff --git a/service/Core/MemoryServerless.cs b/service/Core/MemoryServerless.cs index d8f9ff78b..5a3e4d05c 100644 --- a/service/Core/MemoryServerless.cs +++ b/service/Core/MemoryServerless.cs @@ -257,4 +257,29 @@ public Task AskAsync( minRelevance: minRelevance, cancellationToken: cancellationToken); } + + /// + public IAsyncEnumerable AskStreamingAsync( + string question, + string? index = null, + MemoryFilter? filter = null, + ICollection? filters = null, + double minRelevance = 0, + CancellationToken cancellationToken = default) + { + if (filter != null) + { + if (filters == null) { filters = new List(); } + + filters.Add(filter); + } + + index = IndexName.CleanName(index, this._defaultIndexName); + return this._searchClient.AskStreamingAsync( + index: index, + question: question, + filters: filters, + minRelevance: minRelevance, + cancellationToken: cancellationToken); + } } diff --git a/service/Core/MemoryService.cs b/service/Core/MemoryService.cs index 818e3d6c4..1370cb552 100644 --- a/service/Core/MemoryService.cs +++ b/service/Core/MemoryService.cs @@ -234,4 +234,29 @@ public Task AskAsync( minRelevance: minRelevance, cancellationToken: cancellationToken); } + + /// + public IAsyncEnumerable AskStreamingAsync( + string question, + string? index = null, + MemoryFilter? filter = null, + ICollection? filters = null, + double minRelevance = 0, + CancellationToken cancellationToken = default) + { + if (filter != null) + { + if (filters == null) { filters = new List(); } + + filters.Add(filter); + } + + index = IndexName.CleanName(index, this._defaultIndexName); + return this._searchClient.AskStreamingAsync( + index: index, + question: question, + filters: filters, + minRelevance: minRelevance, + cancellationToken: cancellationToken); + } } diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs index 8a7eaf269..ad6027185 100644 --- a/service/Core/Search/SearchClient.cs +++ b/service/Core/Search/SearchClient.cs @@ -3,7 +3,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; -using System.Linq; +using System.Runtime.CompilerServices; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -118,15 +118,6 @@ public async Task SearchAsync( // Memories are sorted by relevance, starting from the most relevant foreach ((MemoryRecord memory, double relevance) in list) { - // Note: a document can be composed by multiple files - string documentId = memory.GetDocumentId(this._log); - - // Identify the file in case there are multiple files - string fileId = memory.GetFileId(this._log); - - // Note: this is not a URL and perhaps could be dropped. For now it acts as a unique identifier. See also SourceUrl. - string linkToFile = $"{index}/{documentId}/{fileId}"; - var partitionText = memory.GetPartitionText(this._log).Trim(); if (string.IsNullOrEmpty(partitionText)) { @@ -137,32 +128,7 @@ public async Task SearchAsync( // Relevance is `float.MinValue` when search uses only filters and no embeddings (see code above) if (relevance > float.MinValue) { this._log.LogTrace("Adding result with relevance {0}", relevance); } - // If the file is already in the list of citations, only add the partition - var citation = result.Results.FirstOrDefault(x => x.Link == linkToFile); - if (citation == null) - { - citation = new Citation(); - result.Results.Add(citation); - } - - // Add the partition to the list of citations - citation.Index = index; - citation.DocumentId = documentId; - citation.FileId = fileId; - citation.Link = linkToFile; - citation.SourceContentType = memory.GetFileContentType(this._log); - citation.SourceName = memory.GetFileName(this._log); - citation.SourceUrl = memory.GetWebPageUrl(index); - - citation.Partitions.Add(new Citation.Partition - { - Text = partitionText, - Relevance = (float)relevance, - PartitionNumber = memory.GetPartitionNumber(this._log), - SectionNumber = memory.GetSectionNumber(), - LastUpdate = memory.GetLastUpdate(), - Tags = memory.Tags, - }); + this.MapMatchToCitation(index, result.Results, memory, relevance); // In cases where a buggy storage connector is returning too many records if (result.Results.Count >= this._config.MaxMatchesCount) @@ -227,17 +193,7 @@ public async Task AskAsync( // Memories are sorted by relevance, starting from the most relevant await foreach ((MemoryRecord memory, double relevance) in matches.ConfigureAwait(false)) { - // Note: a document can be composed by multiple files - string documentId = memory.GetDocumentId(this._log); - - // Identify the file in case there are multiple files - string fileId = memory.GetFileId(this._log); - - // Note: this is not a URL and perhaps could be dropped. For now it acts as a unique identifier. See also SourceUrl. - string linkToFile = $"{index}/{documentId}/{fileId}"; - string fileName = memory.GetFileName(this._log); - string webPageUrl = memory.GetWebPageUrl(index); var partitionText = memory.GetPartitionText(this._log).Trim(); @@ -249,8 +205,7 @@ public async Task AskAsync( factsAvailableCount++; - // TODO: add file age in days, to push relevance of newer documents - var fact = $"==== [File:{(fileName == "content.url" ? webPageUrl : fileName)};Relevance:{relevance:P1}]:\n{partitionText}\n"; + var fact = GenerateFactString(fileName, webPageUrl, relevance, partitionText); // Use the partition/chunk only if there's room for it var size = this._textGenerator.CountTokens(fact); @@ -266,32 +221,7 @@ public async Task AskAsync( facts.Append(fact); tokensAvailable -= size; - // If the file is already in the list of citations, only add the partition - var citation = answer.RelevantSources.FirstOrDefault(x => x.Link == linkToFile); - if (citation == null) - { - citation = new Citation(); - answer.RelevantSources.Add(citation); - } - - // Add the partition to the list of citations - citation.Index = index; - citation.DocumentId = documentId; - citation.FileId = fileId; - citation.Link = linkToFile; - citation.SourceContentType = memory.GetFileContentType(this._log); - citation.SourceName = fileName; - citation.SourceUrl = memory.GetWebPageUrl(index); - - citation.Partitions.Add(new Citation.Partition - { - Text = partitionText, - Relevance = (float)relevance, - PartitionNumber = memory.GetPartitionNumber(this._log), - SectionNumber = memory.GetSectionNumber(), - LastUpdate = memory.GetLastUpdate(), - Tags = memory.Tags, - }); + this.MapMatchToCitation(index, answer.RelevantSources, memory, relevance); // In cases where a buggy storage connector is returning too many records if (factsUsedCount >= this._config.MaxMatchesCount) @@ -333,8 +263,9 @@ public async Task AskAsync( watch.Stop(); answer.Result = text.ToString(); - answer.NoResult = ValueIsEquivalentTo(answer.Result, this._config.EmptyAnswer); - if (answer.NoResult) + var noResult = ValueIsEquivalentTo(answer.Result, this._config.EmptyAnswer); + answer.NoResult = noResult; + if (noResult) { answer.NoResultReason = "No relevant memories found"; this._log.LogTrace("Answer generated in {0} msecs. No relevant memories found", watch.ElapsedMilliseconds); @@ -347,6 +278,169 @@ public async Task AskAsync( return answer; } + /// + public async IAsyncEnumerable AskStreamingAsync( + string index, + string question, + ICollection? filters = null, + double minRelevance = 0, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var noAnswerFound = new MemoryAnswer + { + Question = question, + NoResult = true, + Result = this._config.EmptyAnswer, + }; + + if (string.IsNullOrEmpty(question)) + { + this._log.LogWarning("No question provided"); + noAnswerFound.NoResultReason = "No question provided"; + yield return noAnswerFound; + yield break; + } + + var facts = new StringBuilder(); + var maxTokens = this._config.MaxAskPromptSize > 0 + ? this._config.MaxAskPromptSize + : this._textGenerator.MaxTokenTotal; + var tokensAvailable = maxTokens + - this._textGenerator.CountTokens(this._answerPrompt) + - this._textGenerator.CountTokens(question) + - this._config.AnswerTokens; + + var factsUsedCount = 0; + var factsAvailableCount = 0; + var answer = noAnswerFound; + + this._log.LogTrace("Fetching relevant memories"); + IAsyncEnumerable<(MemoryRecord, double)> matches = this._memoryDb.GetSimilarListAsync( + index: index, + text: question, + filters: filters, + minRelevance: minRelevance, + limit: this._config.MaxMatchesCount, + withEmbeddings: false, + cancellationToken: cancellationToken); + + // Memories are sorted by relevance, starting from the most relevant + await foreach ((MemoryRecord memory, double relevance) in matches.ConfigureAwait(false)) + { + string fileName = memory.GetFileName(this._log); + string webPageUrl = memory.GetWebPageUrl(index); + + var partitionText = memory.GetPartitionText(this._log).Trim(); + if (string.IsNullOrEmpty(partitionText)) + { + this._log.LogError("The document partition is empty, doc: {0}", memory.Id); + continue; + } + + factsAvailableCount++; + + var fact = GenerateFactString(fileName, webPageUrl, relevance, partitionText); + + // Use the partition/chunk only if there's room for it + var size = this._textGenerator.CountTokens(fact); + if (size >= tokensAvailable) + { + // Stop after reaching the max number of tokens + break; + } + + factsUsedCount++; + this._log.LogTrace("Adding text {0} with relevance {1}", factsUsedCount, relevance); + + facts.Append(fact); + tokensAvailable -= size; + + this.MapMatchToCitation(index, answer.RelevantSources, memory, relevance); + + // In cases where a buggy storage connector is returning too many records + if (factsUsedCount >= this._config.MaxMatchesCount) + { + break; + } + } + + if (factsAvailableCount > 0 && factsUsedCount == 0) + { + this._log.LogError("Unable to inject memories in the prompt, not enough tokens available"); + noAnswerFound.NoResultReason = "Unable to use memories"; + yield return noAnswerFound; + yield break; + } + + if (factsUsedCount == 0) + { + this._log.LogWarning("No memories available"); + noAnswerFound.NoResultReason = "No memories available"; + yield return noAnswerFound; + yield break; + } + + StringBuilder bufferedAnswer = new(); + bool finishedRequiredBuffering = false; + var watch = Stopwatch.StartNew(); + await foreach (var token in this.GenerateAnswer(question, facts.ToString()).WithCancellation(cancellationToken).ConfigureAwait(false)) + { + if (token is null || token.Length == 0) + { + continue; + } + + bufferedAnswer.Append(token); + + int currentLength = bufferedAnswer.Length; + + if (!finishedRequiredBuffering) + { + // Adding 5 to the length to ensure that the extra tokens in ValueIsEquivalentTo can be checked (non-text tokens) + if (currentLength <= this._config.EmptyAnswer.Length + 5 && ValueIsEquivalentTo(bufferedAnswer.ToString(), this._config.EmptyAnswer)) + { + this._log.LogTrace("Answer generated in {0} msecs. No relevant memories found", watch.ElapsedMilliseconds); + noAnswerFound.NoResultReason = "No relevant memories found"; + yield return noAnswerFound; + yield break; + } + else if (currentLength > this._config.EmptyAnswer.Length) + { + finishedRequiredBuffering = true; + answer.NoResult = false; + answer.Result = bufferedAnswer.ToString(); + yield return answer; + } + } + else + { + yield return new MemoryAnswer + { + Result = token, + NoResult = false, + Question = "", + RelevantSources = [], + }; + } + + if (this._log.IsEnabled(LogLevel.Trace) && currentLength >= 30) + { + this._log.LogTrace("{0} chars generated", currentLength); + } + } + + //Edge case when the generated answer is shorter than the configured empty answer + if (!finishedRequiredBuffering) + { + answer.NoResult = false; + answer.Result = bufferedAnswer.ToString(); + yield return answer; + } + + watch.Stop(); + this._log.LogTrace("Answer generated in {0} msecs", watch.ElapsedMilliseconds); + } + private IAsyncEnumerable GenerateAnswer(string question, string facts) { var prompt = this._answerPrompt; @@ -385,4 +479,51 @@ private static bool ValueIsEquivalentTo(string value, string target) target = target.Trim().Trim('.', '"', '\'', '`', '~', '!', '?', '@', '#', '$', '%', '^', '+', '*', '_', '-', '=', '|', '\\', '/', '(', ')', '[', ']', '{', '}', '<', '>'); return string.Equals(value, target, StringComparison.OrdinalIgnoreCase); } + + private static string GenerateFactString(string fileName, string webPageUrl, double relevance, string partitionText) + { + // TODO: add file age in days, to push relevance of newer documents + return $"==== [File:{(fileName == "content.url" ? webPageUrl : fileName)};Relevance:{relevance:P1}]:\n{partitionText}\n"; + } + + private void MapMatchToCitation(string index, List citations, MemoryRecord memory, double relevance) + { + string partitionText = memory.GetPartitionText(this._log).Trim(); + + // Note: a document can be composed by multiple files + string documentId = memory.GetDocumentId(this._log); + + // Identify the file in case there are multiple files + string fileId = memory.GetFileId(this._log); + + // Note: this is not a URL and perhaps could be dropped. For now it acts as a unique identifier. See also SourceUrl. + string linkToFile = $"{index}/{documentId}/{fileId}"; + + // If the file is already in the list of citations, only add the partition + Citation? citation = citations.Find(x => x.Link == linkToFile); + if (citation == null) + { + citation = new Citation(); + citations.Add(citation); + } + + // Add the partition to the list of citations + citation.Index = index; + citation.DocumentId = documentId; + citation.FileId = fileId; + citation.Link = linkToFile; + citation.SourceContentType = memory.GetFileContentType(this._log); + citation.SourceName = memory.GetFileName(this._log); + citation.SourceUrl = memory.GetWebPageUrl(index); + + citation.Partitions.Add(new Citation.Partition + { + Text = partitionText, + Relevance = (float)relevance, + PartitionNumber = memory.GetPartitionNumber(this._log), + SectionNumber = memory.GetSectionNumber(), + LastUpdate = memory.GetLastUpdate(), + Tags = memory.Tags, + }); + } } diff --git a/service/Service.AspNetCore/WebAPIEndpoints.cs b/service/Service.AspNetCore/WebAPIEndpoints.cs index 0d6d5e5c6..e1eb77066 100644 --- a/service/Service.AspNetCore/WebAPIEndpoints.cs +++ b/service/Service.AspNetCore/WebAPIEndpoints.cs @@ -11,6 +11,8 @@ using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.Service.AspNetCore.Models; using System.IO; +using System.Text.Json; +using System.Text.Json.Serialization; using Microsoft.AspNetCore.Http.HttpResults; using Microsoft.KernelMemory.DocumentStorage; @@ -28,6 +30,7 @@ public static IEndpointRouteBuilder AddKernelMemoryEndpoints( builder.AddDeleteIndexesEndpoint(apiPrefix, authFilter); builder.AddDeleteDocumentsEndpoint(apiPrefix, authFilter); builder.AddAskEndpoint(apiPrefix, authFilter); + builder.AddAskStreamEndpoint(apiPrefix, authFilter); builder.AddSearchEndpoint(apiPrefix, authFilter); builder.AddUploadStatusEndpoint(apiPrefix, authFilter); builder.AddGetDownloadEndpoint(apiPrefix, authFilter); @@ -224,6 +227,38 @@ async Task ( if (authFilter != null) { route.AddEndpointFilter(authFilter); } } + public static void AddAskStreamEndpoint( + this IEndpointRouteBuilder builder, string apiPrefix = "/", IEndpointFilter? authFilter = null) + { + RouteGroupBuilder group = builder.MapGroup(apiPrefix); + + // Ask streaming endpoint + var route = group.MapPost(Constants.HttpAskStreamEndpoint, IResult ( + MemoryQuery query, + IKernelMemory service, + ILogger log, + CancellationToken cancellationToken) => + { + log.LogTrace("New search request, index '{0}', minRelevance {1}", query.Index, query.MinRelevance); + return Results.Json( + service.AskStreamingAsync( + question: query.Question, + index: query.Index, + filters: query.Filters, + minRelevance: query.MinRelevance, + cancellationToken: cancellationToken), + new JsonSerializerOptions + { + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull + }); + }) + .Produces>(StatusCodes.Status200OK) + .Produces(StatusCodes.Status401Unauthorized) + .Produces(StatusCodes.Status403Forbidden); + + if (authFilter != null) { route.AddEndpointFilter(authFilter); } + } + public static void AddSearchEndpoint( this IEndpointRouteBuilder builder, string apiPrefix = "/", IEndpointFilter? authFilter = null) {