diff --git a/dotnet/src/Plugins/Plugins.UnitTests/Web/Brave/BraveTextSearchTests.cs b/dotnet/src/Plugins/Plugins.UnitTests/Web/Brave/BraveTextSearchTests.cs index 0435df46a31d..84f7a3a478e9 100644 --- a/dotnet/src/Plugins/Plugins.UnitTests/Web/Brave/BraveTextSearchTests.cs +++ b/dotnet/src/Plugins/Plugins.UnitTests/Web/Brave/BraveTextSearchTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. #pragma warning disable CS0618 // ITextSearch is obsolete +#pragma warning disable CS8602 // Dereference of a possibly null reference - for LINQ expression properties using System; using System.IO; @@ -110,7 +111,7 @@ public async Task GetSearchResultsReturnsSuccessfullyAsync() var resultList = await result.Results.ToListAsync(); Assert.NotNull(resultList); Assert.Equal(10, resultList.Count); - foreach (BraveWebResult webPage in resultList) + foreach (BraveWebPage webPage in resultList.Cast()) { Assert.NotNull(webPage.Title); Assert.NotNull(webPage.Description); @@ -195,7 +196,7 @@ public async Task BuildsCorrectUriForEqualityFilterAsync(string paramName, objec // Act TextSearchOptions searchOptions = new() { Top = 5, Skip = 0, Filter = new TextSearchFilter().Equality(paramName, paramValue) }; - KernelSearchResults result = await textSearch.GetSearchResultsAsync("What is the Semantic Kernel?", searchOptions); + var result = await textSearch.GetSearchResultsAsync("What is the Semantic Kernel?", searchOptions); // Assert var requestUris = this._messageHandlerStub.RequestUris; @@ -243,6 +244,151 @@ public void Dispose() GC.SuppressFinalize(this); } + #region Generic ITextSearch Interface Tests + + [Fact] + public async Task LinqSearchAsyncReturnsResultsSuccessfullyAsync() + { + // Arrange + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSkResponseJson)); + ITextSearch textSearch = new BraveTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + + // Act + var searchOptions = new TextSearchOptions + { + Top = 4, + Skip = 0 + }; + KernelSearchResults result = await textSearch.SearchAsync("What is the Semantic Kernel?", searchOptions); + + // Assert - Verify basic generic interface functionality + Assert.NotNull(result); + Assert.NotNull(result.Results); + var resultList = await result.Results.ToListAsync(); + Assert.NotEmpty(resultList); + + // Verify the request was made correctly + var requestUris = this._messageHandlerStub.RequestUris; + Assert.Single(requestUris); + Assert.NotNull(requestUris[0]); + Assert.Contains("count=4", requestUris[0].AbsoluteUri); + } + + [Fact] + public async Task LinqGetSearchResultsAsyncReturnsResultsSuccessfullyAsync() + { + // Arrange + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSkResponseJson)); + ITextSearch textSearch = new BraveTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + + // Act + var searchOptions = new TextSearchOptions + { + Top = 3, + Skip = 0 + }; + KernelSearchResults result = await textSearch.GetSearchResultsAsync("What is the Semantic Kernel?", searchOptions); + + // Assert - Verify generic interface returns results + Assert.NotNull(result); + Assert.NotNull(result.Results); + var resultList = await result.Results.ToListAsync(); + Assert.NotEmpty(resultList); + // Results are now strongly typed as BraveWebPage + + // Verify the request was made correctly + var requestUris = this._messageHandlerStub.RequestUris; + Assert.Single(requestUris); + Assert.NotNull(requestUris[0]); + Assert.Contains("count=3", requestUris[0].AbsoluteUri); + } + + [Fact] + public async Task LinqGetTextSearchResultsAsyncReturnsResultsSuccessfullyAsync() + { + // Arrange + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSkResponseJson)); + ITextSearch textSearch = new BraveTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + + // Act + var searchOptions = new TextSearchOptions + { + Top = 5, + Skip = 0 + }; + KernelSearchResults result = await textSearch.GetTextSearchResultsAsync("What is the Semantic Kernel?", searchOptions); + + // Assert - Verify generic interface returns TextSearchResult objects + Assert.NotNull(result); + Assert.NotNull(result.Results); + var resultList = await result.Results.ToListAsync(); + Assert.NotEmpty(resultList); + Assert.All(resultList, item => Assert.IsType(item)); + + // Verify the request was made correctly + var requestUris = this._messageHandlerStub.RequestUris; + Assert.Single(requestUris); + Assert.NotNull(requestUris[0]); + Assert.Contains("count=5", requestUris[0].AbsoluteUri); + } + + [Fact] + public async Task CollectionContainsFilterThrowsNotSupportedExceptionAsync() + { + // Arrange - Tests both Enumerable.Contains (C# 13-) and MemoryExtensions.Contains (C# 14+) + // The same code array.Contains() resolves differently based on C# language version: + // - C# 13 and earlier: Enumerable.Contains (LINQ extension method) + // - C# 14 and later: MemoryExtensions.Contains (span-based optimization due to "first-class spans") + // Our implementation handles both identically since Brave API has limited query operators + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSkResponseJson)); + ITextSearch textSearch = new BraveTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + string[] sites = ["microsoft.com", "github.com"]; + + // Act & Assert - Verify that collection Contains pattern throws clear exception + var searchOptions = new TextSearchOptions + { + Top = 5, + Skip = 0, + Filter = page => sites.Contains(page.Url!.ToString()) // Enumerable.Contains (C# 13-) or MemoryExtensions.Contains (C# 14+) + }; + + var exception = await Assert.ThrowsAsync(async () => + { + await textSearch.SearchAsync("test", searchOptions); + }); + + // Assert - Verify error message explains the limitation clearly + Assert.Contains("Collection Contains filters", exception.Message); + Assert.Contains("not supported", exception.Message); + } + + [Fact] + public async Task StringContainsStillWorksWithLINQFiltersAsync() + { + // Arrange - Verify that String.Contains (instance method) still works + // String.Contains is NOT affected by C# 14 "first-class spans" - only arrays are + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSkResponseJson)); + ITextSearch textSearch = new BraveTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + + // Act - String.Contains should continue to work + var searchOptions = new TextSearchOptions + { + Top = 5, + Skip = 0, + Filter = page => page.Title.Contains("Kernel") // String.Contains - instance method + }; + KernelSearchResults result = await textSearch.SearchAsync("Semantic Kernel tutorial", searchOptions); + + // Assert - Verify String.Contains works correctly + var requestUris = this._messageHandlerStub.RequestUris; + Assert.Single(requestUris); + Assert.NotNull(requestUris[0]); + Assert.Contains("Kernel", requestUris[0].AbsoluteUri); + Assert.Contains("count=5", requestUris[0].AbsoluteUri); + } + + #endregion + #region private private const string WhatIsTheSkResponseJson = "./TestData/brave_what_is_the_semantic_kernel.json"; private const string SiteFilterSkResponseJson = "./TestData/brave_site_filter_what_is_the_semantic_kernel.json"; @@ -273,7 +419,7 @@ public TextSearchResult MapFromResultToTextSearchResult(object result) { if (result is not BraveWebResult webPage) { - throw new ArgumentException("Result must be a BraveWebPage", nameof(result)); + throw new ArgumentException("Result must be a BraveWebResult", nameof(result)); } return new TextSearchResult(webPage.Description?.ToUpperInvariant() ?? string.Empty) diff --git a/dotnet/src/Plugins/Plugins.UnitTests/Web/Tavily/TavilyTextSearchTests.cs b/dotnet/src/Plugins/Plugins.UnitTests/Web/Tavily/TavilyTextSearchTests.cs index f510d0555168..c51dbb769e34 100644 --- a/dotnet/src/Plugins/Plugins.UnitTests/Web/Tavily/TavilyTextSearchTests.cs +++ b/dotnet/src/Plugins/Plugins.UnitTests/Web/Tavily/TavilyTextSearchTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. #pragma warning disable CS0618 // ITextSearch is obsolete +#pragma warning disable CS8602 // Dereference of a possibly null reference - for LINQ expression properties using System; using System.IO; @@ -346,6 +347,156 @@ public void Dispose() GC.SuppressFinalize(this); } + #region Generic ITextSearch Interface Tests + + [Fact] + public async Task LinqSearchAsyncReturnsResultsSuccessfullyAsync() + { + // Arrange + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(SiteFilterDevBlogsResponseJson)); + ITextSearch textSearch = new TavilyTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + + // Act + var searchOptions = new TextSearchOptions + { + Top = 4, + Skip = 0 + }; + KernelSearchResults result = await textSearch.SearchAsync("What is the Semantic Kernel?", searchOptions); + + // Assert - Verify basic generic interface functionality + Assert.NotNull(result); + Assert.NotNull(result.Results); + var resultList = await result.Results.ToListAsync(); + Assert.NotEmpty(resultList); + + // Verify the request was made correctly + var requestContents = this._messageHandlerStub.RequestContents; + Assert.Single(requestContents); + Assert.NotNull(requestContents[0]); + var requestBodyJson = Encoding.UTF8.GetString(requestContents[0]!); + Assert.Contains("\"query\"", requestBodyJson); + Assert.Contains("\"max_results\":4", requestBodyJson); + } + + [Fact] + public async Task LinqGetSearchResultsAsyncReturnsResultsSuccessfullyAsync() + { + // Arrange + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(SiteFilterDevBlogsResponseJson)); + ITextSearch textSearch = new TavilyTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + + // Act + var searchOptions = new TextSearchOptions + { + Top = 3, + Skip = 0 + }; + KernelSearchResults result = await textSearch.GetSearchResultsAsync("What is the Semantic Kernel?", searchOptions); + + // Assert - Verify generic interface returns results + Assert.NotNull(result); + Assert.NotNull(result.Results); + var resultList = await result.Results.ToListAsync(); + Assert.NotEmpty(resultList); + // Results are now strongly typed as TavilyWebPage + + // Verify the request was made correctly + var requestContents = this._messageHandlerStub.RequestContents; + Assert.Single(requestContents); + Assert.NotNull(requestContents[0]); + var requestBodyJson = Encoding.UTF8.GetString(requestContents[0]!); + Assert.Contains("\"max_results\":3", requestBodyJson); + } + + [Fact] + public async Task LinqGetTextSearchResultsAsyncReturnsResultsSuccessfullyAsync() + { + // Arrange + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(SiteFilterDevBlogsResponseJson)); + ITextSearch textSearch = new TavilyTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + + // Act + var searchOptions = new TextSearchOptions + { + Top = 5, + Skip = 0 + }; + KernelSearchResults result = await textSearch.GetTextSearchResultsAsync("What is the Semantic Kernel?", searchOptions); + + // Assert - Verify generic interface returns TextSearchResult objects + Assert.NotNull(result); + Assert.NotNull(result.Results); + var resultList = await result.Results.ToListAsync(); + Assert.NotEmpty(resultList); + Assert.All(resultList, item => Assert.IsType(item)); + + // Verify the request was made correctly + var requestContents = this._messageHandlerStub.RequestContents; + Assert.Single(requestContents); + Assert.NotNull(requestContents[0]); + var requestBodyJson = Encoding.UTF8.GetString(requestContents[0]!); + Assert.Contains("\"max_results\":5", requestBodyJson); + } + + [Fact] + public async Task CollectionContainsFilterThrowsNotSupportedExceptionAsync() + { + // Arrange - Tests both Enumerable.Contains (C# 13-) and MemoryExtensions.Contains (C# 14+) + // The same code array.Contains() resolves differently based on C# language version: + // - C# 13 and earlier: Enumerable.Contains (LINQ extension method) + // - C# 14 and later: MemoryExtensions.Contains (span-based optimization due to "first-class spans") + // Our implementation handles both identically since Tavily API has limited query operators + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(SiteFilterDevBlogsResponseJson)); + ITextSearch textSearch = new TavilyTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + string[] domains = ["microsoft.com", "github.com"]; + + // Act & Assert - Verify that collection Contains pattern throws clear exception + var searchOptions = new TextSearchOptions + { + Top = 5, + Skip = 0, + Filter = page => domains.Contains(page.Url!.ToString()) // Enumerable.Contains (C# 13-) or MemoryExtensions.Contains (C# 14+) + }; + + var exception = await Assert.ThrowsAsync(async () => + { + await textSearch.SearchAsync("test", searchOptions); + }); + + // Assert - Verify error message explains the limitation clearly + Assert.Contains("Collection Contains filters", exception.Message); + Assert.Contains("not supported", exception.Message); + } + + [Fact] + public async Task StringContainsStillWorksWithLINQFiltersAsync() + { + // Arrange - Verify that String.Contains (instance method) still works + // String.Contains is NOT affected by C# 14 "first-class spans" - only arrays are + this._messageHandlerStub.AddJsonResponse(File.ReadAllText(SiteFilterDevBlogsResponseJson)); + ITextSearch textSearch = new TavilyTextSearch(apiKey: "ApiKey", options: new() { HttpClient = this._httpClient }); + + // Act - String.Contains should continue to work + var searchOptions = new TextSearchOptions + { + Top = 5, + Skip = 0, + Filter = page => page.Title.Contains("Kernel") // String.Contains - instance method + }; + KernelSearchResults result = await textSearch.SearchAsync("Semantic Kernel tutorial", searchOptions); + + // Assert - Verify String.Contains works correctly + var requestContents = this._messageHandlerStub.RequestContents; + Assert.Single(requestContents); + Assert.NotNull(requestContents[0]); + var requestBodyJson = Encoding.UTF8.GetString(requestContents[0]!); + Assert.Contains("Kernel", requestBodyJson); + Assert.Contains("\"max_results\":5", requestBodyJson); + } + + #endregion + #region private private const string WhatIsTheSKResponseJson = "./TestData/tavily_what_is_the_semantic_kernel.json"; private const string SiteFilterDevBlogsResponseJson = "./TestData/tavily_site_filter_devblogs_microsoft.com.json"; diff --git a/dotnet/src/Plugins/Plugins.Web/Brave/BraveTextSearch.cs b/dotnet/src/Plugins/Plugins.Web/Brave/BraveTextSearch.cs index af54b42f704c..e7b6eab6f780 100644 --- a/dotnet/src/Plugins/Plugins.Web/Brave/BraveTextSearch.cs +++ b/dotnet/src/Plugins/Plugins.Web/Brave/BraveTextSearch.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Linq.Expressions; using System.Net.Http; using System.Runtime.CompilerServices; using System.Text; @@ -21,7 +22,7 @@ namespace Microsoft.SemanticKernel.Plugins.Web.Brave; /// A Brave Text Search implementation that can be used to perform searches using the Brave Web Search API. /// #pragma warning disable CS0618 // ITextSearch is obsolete - this class provides backward compatibility -public sealed class BraveTextSearch : ITextSearch +public sealed class BraveTextSearch : ITextSearch, ITextSearch #pragma warning restore CS0618 { /// @@ -77,10 +78,438 @@ public async Task> GetSearchResultsAsync(string quer long? totalCount = searchOptions.IncludeTotalCount ? searchResponse?.Web?.Results.Count : null; - return new KernelSearchResults(this.GetResultsAsWebPageAsync(searchResponse, cancellationToken), totalCount, GetResultsMetadata(searchResponse)); + return new KernelSearchResults(this.GetResultsAsObjectAsync(searchResponse, cancellationToken), totalCount, GetResultsMetadata(searchResponse)); } - #region private + #region Generic ITextSearch Implementation + + /// + async Task> ITextSearch.SearchAsync(string query, TextSearchOptions? searchOptions, CancellationToken cancellationToken) + { + var (modifiedQuery, legacyOptions) = this.ConvertToLegacyOptionsWithQuery(query, searchOptions); + BraveSearchResponse? searchResponse = await this.ExecuteSearchAsync(modifiedQuery, legacyOptions, cancellationToken).ConfigureAwait(false); + + long? totalCount = legacyOptions.IncludeTotalCount ? searchResponse?.Web?.Results.Count : null; + + return new KernelSearchResults(this.GetResultsAsStringAsync(searchResponse, cancellationToken), totalCount, GetResultsMetadata(searchResponse)); + } + + /// + async Task> ITextSearch.GetTextSearchResultsAsync(string query, TextSearchOptions? searchOptions, CancellationToken cancellationToken) + { + var (modifiedQuery, legacyOptions) = this.ConvertToLegacyOptionsWithQuery(query, searchOptions); + BraveSearchResponse? searchResponse = await this.ExecuteSearchAsync(modifiedQuery, legacyOptions, cancellationToken).ConfigureAwait(false); + + long? totalCount = legacyOptions.IncludeTotalCount ? searchResponse?.Web?.Results.Count : null; + + return new KernelSearchResults(this.GetResultsAsTextSearchResultAsync(searchResponse, cancellationToken), totalCount, GetResultsMetadata(searchResponse)); + } + + /// + async Task> ITextSearch.GetSearchResultsAsync(string query, TextSearchOptions? searchOptions, CancellationToken cancellationToken) + { + var (modifiedQuery, legacyOptions) = this.ConvertToLegacyOptionsWithQuery(query, searchOptions); + BraveSearchResponse? searchResponse = await this.ExecuteSearchAsync(modifiedQuery, legacyOptions, cancellationToken).ConfigureAwait(false); + + long? totalCount = legacyOptions.IncludeTotalCount ? searchResponse?.Web?.Results.Count : null; + + return new KernelSearchResults(this.GetResultsAsBraveWebPageAsync(searchResponse, cancellationToken), totalCount, GetResultsMetadata(searchResponse)); + } + + #endregion + + #region LINQ-to-Brave Conversion Logic + + /// + /// Converts generic TextSearchOptions with LINQ filtering to legacy TextSearchOptions and extracts additional search terms. + /// + /// The original search query. + /// The generic search options with LINQ filter. + /// A tuple containing the modified query and legacy TextSearchOptions with converted filters. + private (string modifiedQuery, TextSearchOptions legacyOptions) ConvertToLegacyOptionsWithQuery(string query, TextSearchOptions? options) + { + var legacyOptions = this.ConvertToLegacyOptions(options); + + if (options?.Filter != null) + { + // Extract search terms from the LINQ expression + var additionalSearchTerms = ExtractSearchTermsFromLinqExpression(options.Filter); + if (additionalSearchTerms.Count > 0) + { + // Append additional search terms to the original query + var modifiedQuery = $"{query} {string.Join(" ", additionalSearchTerms)}".Trim(); + return (modifiedQuery, legacyOptions); + } + } + + return (query, legacyOptions); + } + + /// + /// Converts generic TextSearchOptions with LINQ filtering to legacy TextSearchOptions. + /// + /// The generic search options with LINQ filter. + /// Legacy TextSearchOptions with converted filters. + private TextSearchOptions ConvertToLegacyOptions(TextSearchOptions? options) + { + if (options == null) + { + return new TextSearchOptions(); + } + + var legacyOptions = new TextSearchOptions + { + Top = options.Top, + Skip = options.Skip, + IncludeTotalCount = options.IncludeTotalCount + }; + + // Convert LINQ expression to TextSearchFilter if present + if (options.Filter != null) + { + try + { + var convertedFilter = ConvertLinqExpressionToBraveFilter(options.Filter); + legacyOptions = new TextSearchOptions + { + Top = options.Top, + Skip = options.Skip, + IncludeTotalCount = options.IncludeTotalCount, + Filter = convertedFilter + }; + } + catch (NotSupportedException) + { + // All unsupported LINQ patterns should fail explicitly to provide clear developer feedback + // This helps developers understand which patterns work with the Brave API + throw; + } + } + + return legacyOptions; + } + + /// + /// Extracts search terms that should be added to the search query from a LINQ expression. + /// + /// The LINQ expression to analyze. + /// A list of search terms to add to the query. + private static List ExtractSearchTermsFromLinqExpression(Expression> linqExpression) + { + var searchTerms = new List(); + var filterClauses = new List(); + + // Analyze the LINQ expression to get all filter clauses + AnalyzeExpression(linqExpression.Body, filterClauses); + + // Extract search terms from SearchQueryFilterClause instances + foreach (var clause in filterClauses) + { + if (clause is SearchQueryFilterClause searchQueryClause) + { + searchTerms.Add(searchQueryClause.SearchTerm); + } + } + + return searchTerms; + } + + /// + /// Converts a LINQ expression to Brave-compatible TextSearchFilter. + /// + /// The LINQ expression to convert. + /// A TextSearchFilter with Brave-compatible filter clauses. + private static TextSearchFilter ConvertLinqExpressionToBraveFilter(Expression> linqExpression) + { + var filter = new TextSearchFilter(); + var filterClauses = new List(); + + // Analyze the LINQ expression and convert to filter clauses + AnalyzeExpression(linqExpression.Body, filterClauses); + + // Validate and add clauses that are supported by Brave + foreach (var clause in filterClauses) + { + if (clause is EqualToFilterClause equalityClause) + { + var mappedFieldName = MapPropertyToBraveFilter(equalityClause.FieldName); + if (mappedFieldName != null) + { + filter.Equality(mappedFieldName, equalityClause.Value); + } + else + { + throw new NotSupportedException( + $"Property '{equalityClause.FieldName}' cannot be mapped to Brave API filters. " + + $"Supported properties: {string.Join(", ", s_queryParameters)}. " + + "Example: page => page.Country == \"US\" && page.SafeSearch == \"moderate\""); + } + } + else if (clause is SearchQueryFilterClause) + { + // SearchQueryFilterClause is handled at the query level, not the filter level + // Skip it here as it's processed by ConvertToLegacyOptionsWithQuery + continue; + } + } + + return filter; + } + + /// + /// Maps BraveWebPage property names to Brave API filter parameter names. + /// + /// The property name from BraveWebPage. + /// The corresponding Brave API parameter name, or null if not mappable. + private static string? MapPropertyToBraveFilter(string propertyName) => + propertyName.ToUpperInvariant() switch + { + "COUNTRY" => BraveParamCountry, + "SEARCHLANG" => BraveParamSearchLang, + "UILANG" => BraveParamUiLang, + "SAFESEARCH" => BraveParamSafeSearch, + "TEXTDECORATIONS" => BraveParamTextDecorations, + "SPELLCHECK" => BraveParamSpellCheck, + "RESULTFILTER" => BraveParamResultFilter, + "UNITS" => BraveParamUnits, + "EXTRASNIPPETS" => BraveParamExtraSnippets, + _ => null // Property not mappable to Brave filters + }; + + // TODO: Consider extracting LINQ expression analysis logic to a shared utility class + // to reduce duplication across text search connectors (Brave, Tavily, etc.). + // See code review for details. + /// + /// Analyzes a LINQ expression and extracts filter clauses. + /// + /// The expression to analyze. + /// The list to add extracted filter clauses to. + private static void AnalyzeExpression(Expression expression, List filterClauses) + { + switch (expression) + { + case BinaryExpression binaryExpr: + if (binaryExpr.NodeType == ExpressionType.AndAlso) + { + // Handle AND expressions by recursively analyzing both sides + AnalyzeExpression(binaryExpr.Left, filterClauses); + AnalyzeExpression(binaryExpr.Right, filterClauses); + } + else if (binaryExpr.NodeType == ExpressionType.OrElse) + { + // Handle OR expressions by recursively analyzing both sides + // Note: OR results in multiple filter values for the same property + AnalyzeExpression(binaryExpr.Left, filterClauses); + AnalyzeExpression(binaryExpr.Right, filterClauses); + } + else if (binaryExpr.NodeType == ExpressionType.Equal) + { + // Handle equality expressions + ExtractEqualityClause(binaryExpr, filterClauses); + } + else if (binaryExpr.NodeType == ExpressionType.NotEqual) + { + // Handle inequality expressions (property != value) + // This is supported as a negation pattern + ExtractInequalityClause(binaryExpr, filterClauses); + } + else + { + throw new NotSupportedException($"Binary expression type '{binaryExpr.NodeType}' is not supported. Supported operators: AndAlso (&&), OrElse (||), Equal (==), NotEqual (!=)."); + } + break; + + case UnaryExpression unaryExpr when unaryExpr.NodeType == ExpressionType.Not: + // Handle NOT expressions (negation) + AnalyzeNotExpression(unaryExpr, filterClauses); + break; + + case MethodCallExpression methodCall: + // Handle method calls like Contains, StartsWith, etc. + ExtractMethodCallClause(methodCall, filterClauses); + break; + + default: + throw new NotSupportedException($"Expression type '{expression.NodeType}' is not supported in Brave search filters."); + } + } + + /// + /// Extracts an equality filter clause from a binary equality expression. + /// + /// The binary equality expression. + /// The list to add the extracted clause to. + private static void ExtractEqualityClause(BinaryExpression binaryExpr, List filterClauses) + { + string? propertyName = null; + object? value = null; + + // Determine which side is the property and which is the value + if (binaryExpr.Left is MemberExpression leftMember) + { + propertyName = leftMember.Member.Name; + value = ExtractValue(binaryExpr.Right); + } + else if (binaryExpr.Right is MemberExpression rightMember) + { + propertyName = rightMember.Member.Name; + value = ExtractValue(binaryExpr.Left); + } + + if (propertyName != null && value != null) + { + filterClauses.Add(new EqualToFilterClause(propertyName, value)); + } + else + { + throw new NotSupportedException("Unable to extract property name and value from equality expression."); + } + } + + /// + /// Extracts an inequality filter clause from a binary not-equal expression. + /// + /// The binary not-equal expression. + /// The list to add the extracted clause to. + private static void ExtractInequalityClause(BinaryExpression binaryExpr, List filterClauses) + { + // Note: Inequality is tracked but handled differently depending on the property + // For now, we log a warning that inequality filtering may not work as expected + string? propertyName = null; + object? value = null; + + if (binaryExpr.Left is MemberExpression leftMember) + { + propertyName = leftMember.Member.Name; + value = ExtractValue(binaryExpr.Right); + } + else if (binaryExpr.Right is MemberExpression rightMember) + { + propertyName = rightMember.Member.Name; + value = ExtractValue(binaryExpr.Left); + } + + if (propertyName != null && value != null) + { + // Add a marker for inequality - this will need special handling in conversion + // For now, we don't add it to filter clauses as Brave API doesn't support direct negation + throw new NotSupportedException($"Inequality operator (!=) is not directly supported for property '{propertyName}'. Use NOT operator instead: !(page.{propertyName} == value)."); + } + + throw new NotSupportedException("Unable to extract property name and value from inequality expression."); + } + + /// + /// Analyzes a NOT (negation) expression. + /// + /// The unary NOT expression. + /// The list to add extracted filter clauses to. + private static void AnalyzeNotExpression(UnaryExpression unaryExpr, List filterClauses) + { + // NOT expressions are complex for web search APIs + // We support simple cases like !(page.SafeSearch == "off") + if (unaryExpr.Operand is BinaryExpression binaryExpr && binaryExpr.NodeType == ExpressionType.Equal) + { + // This is !(property == value), which we can handle for some properties + throw new NotSupportedException("NOT operator (!) with equality is not directly supported. Most web search APIs don't support negative filtering."); + } + + throw new NotSupportedException("NOT operator (!) is only supported with simple equality expressions."); + } + + /// + /// Extracts a filter clause from a method call expression (e.g., Contains, StartsWith). + /// + /// The method call expression. + /// The list to add the extracted clause to. + private static void ExtractMethodCallClause(MethodCallExpression methodCall, List filterClauses) + { + if (methodCall.Method.Name == "Contains") + { + // Check if this is property.Contains(value) or array.Contains(property) + if (methodCall.Object is MemberExpression member) + { + // This is property.Contains(value) - e.g., page.ResultFilter.Contains("web") + var propertyName = member.Member.Name; + var value = ExtractValue(methodCall.Arguments[0]); + + if (value != null) + { + // For Contains, we'll map it to equality for certain properties + if (propertyName.Equals("ResultFilter", StringComparison.OrdinalIgnoreCase)) + { + filterClauses.Add(new EqualToFilterClause(propertyName, value)); + } + else if (propertyName.Equals("Title", StringComparison.OrdinalIgnoreCase)) + { + // For Title.Contains(), add the term to the search query itself + filterClauses.Add(new SearchQueryFilterClause(value.ToString() ?? string.Empty)); + } + else + { + throw new NotSupportedException($"Contains method is only supported for ResultFilter and Title properties, not '{propertyName}'."); + } + } + } + else if (methodCall.Object == null && methodCall.Arguments.Count == 2) + { + // This is array.Contains(property) - e.g., new[] { "US", "GB" }.Contains(page.Country) + // This pattern is not supported regardless of whether it's Enumerable.Contains (C# 13-) or MemoryExtensions.Contains (C# 14+) + // Both resolve to extension method calls with methodCall.Object == null + + // Provide detailed error message that covers both C# language versions + string errorMessage = "Collection Contains filters (e.g., array.Contains(page.Property)) are not supported by Brave Search API. " + + "Brave's API does not support OR logic across multiple values. "; + + if (IsMemoryExtensionsContains(methodCall)) + { + errorMessage += "Note: This occurs when using C# 14+ language features with span-based Contains methods (MemoryExtensions.Contains). "; + } + else + { + errorMessage += "Note: This occurs with standard LINQ extension methods (Enumerable.Contains). "; + } + + errorMessage += "Consider either: (1) performing multiple separate searches for each value, or " + + "(2) retrieving broader results and filtering on the client side."; + + throw new NotSupportedException(errorMessage); + } + else + { + throw new NotSupportedException("Unsupported Contains expression format."); + } + } + else + { + throw new NotSupportedException($"Method '{methodCall.Method.Name}' is not supported in Brave search filters. Only 'Contains' is supported."); + } + } + + /// + /// Extracts a constant value from an expression. + /// + /// The expression to extract the value from. + /// The extracted value, or null if extraction failed. + private static object? ExtractValue(Expression expression) + { + return expression switch + { + ConstantExpression constant => constant.Value, + MemberExpression member when member.Expression is ConstantExpression constantExpr => + member.Member switch + { + System.Reflection.FieldInfo field => field.GetValue(constantExpr.Value), + System.Reflection.PropertyInfo property => property.GetValue(constantExpr.Value), + _ => null + }, + _ => Expression.Lambda(expression).Compile().DynamicInvoke() + }; + } + + #endregion + + #region Private Methods private readonly ILogger _logger; private readonly HttpClient _httpClient; @@ -92,8 +521,19 @@ public async Task> GetSearchResultsAsync(string quer private static readonly ITextSearchStringMapper s_defaultStringMapper = new DefaultTextSearchStringMapper(); private static readonly ITextSearchResultMapper s_defaultResultMapper = new DefaultTextSearchResultMapper(); + // Constants for Brave API parameter names + private const string BraveParamCountry = "country"; + private const string BraveParamSearchLang = "search_lang"; + private const string BraveParamUiLang = "ui_lang"; + private const string BraveParamSafeSearch = "safesearch"; + private const string BraveParamTextDecorations = "text_decorations"; + private const string BraveParamSpellCheck = "spellcheck"; + private const string BraveParamResultFilter = "result_filter"; + private const string BraveParamUnits = "units"; + private const string BraveParamExtraSnippets = "extra_snippets"; + // See https://api-dashboard.search.brave.com/app/documentation/web-search/query#WebSearchAPIQueryParameters - private static readonly string[] s_queryParameters = ["country", "search_lang", "ui_lang", "safesearch", "text_decorations", "spellcheck", "result_filter", "units", "extra_snippets"]; + private static readonly string[] s_queryParameters = [BraveParamCountry, BraveParamSearchLang, BraveParamUiLang, BraveParamSafeSearch, BraveParamTextDecorations, BraveParamSpellCheck, BraveParamResultFilter, BraveParamUnits, BraveParamExtraSnippets]; private static readonly string[] s_safeSearch = ["off", "moderate", "strict"]; @@ -162,11 +602,36 @@ private async Task SendGetRequestAsync(string query, TextSe } /// - /// Return the search results as instances of . + /// Return the search results as instances of . + /// + /// Response containing the web pages matching the query. + /// Cancellation token + private async IAsyncEnumerable GetResultsAsObjectAsync(BraveSearchResponse? searchResponse, [EnumeratorCancellation] CancellationToken cancellationToken) + { + if (searchResponse?.Web?.Results is null) + { + yield break; + } + + foreach (var result in searchResponse.Web.Results) + { + yield return new BraveWebPage + { + Title = result.Title, + Url = string.IsNullOrWhiteSpace(result.Url) ? null : new Uri(result.Url), + Description = result.Description, + }; + + await Task.Yield(); + } + } + + /// + /// Return the search results as instances of . /// /// Response containing the web pages matching the query. /// Cancellation token - private async IAsyncEnumerable GetResultsAsWebPageAsync(BraveSearchResponse? searchResponse, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable GetResultsAsBraveWebPageAsync(BraveSearchResponse? searchResponse, [EnumeratorCancellation] CancellationToken cancellationToken) { if (searchResponse is null) { yield break; } @@ -174,7 +639,7 @@ private async IAsyncEnumerable GetResultsAsWebPageAsync(BraveSearchRespo { foreach (var webPage in webResults) { - yield return webPage; + yield return BraveWebPage.FromWebResult(webPage); await Task.Yield(); } } @@ -385,5 +850,42 @@ private static void CheckQueryValidation(string queryParam, object value) break; } } + + /// + /// Determines if a method call expression is a MemoryExtensions.Contains call (C# 14+ compatibility). + /// In C# 14+, array.Contains(property) may resolve to MemoryExtensions.Contains instead of Enumerable.Contains. + /// + /// The method call expression to check. + /// True if this is a MemoryExtensions.Contains call, false otherwise. + private static bool IsMemoryExtensionsContains(MethodCallExpression methodCall) + { + // Check if this is a static method call (Object is null) + if (methodCall.Object != null) + { + return false; + } + + // Check if it's MemoryExtensions.Contains + if (methodCall.Method.DeclaringType?.Name != "MemoryExtensions") + { + return false; + } + + // MemoryExtensions.Contains has 2-3 parameters: (ReadOnlySpan, T) or (ReadOnlySpan, T, IEqualityComparer) + if (methodCall.Arguments.Count < 2 || methodCall.Arguments.Count > 3) + { + return false; + } + + // For our text search scenarios, we don't support span comparers + if (methodCall.Arguments.Count == 3) + { + throw new NotSupportedException( + "MemoryExtensions.Contains with custom IEqualityComparer is not supported. " + + "Use simple array.Contains(property) expressions without custom comparers."); + } + + return true; + } #endregion } diff --git a/dotnet/src/Plugins/Plugins.Web/Brave/BraveWebPage.cs b/dotnet/src/Plugins/Plugins.Web/Brave/BraveWebPage.cs new file mode 100644 index 000000000000..c6938c7b0ef8 --- /dev/null +++ b/dotnet/src/Plugins/Plugins.Web/Brave/BraveWebPage.cs @@ -0,0 +1,145 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; + +namespace Microsoft.SemanticKernel.Plugins.Web.Brave; + +/// +/// Represents a type-safe web page result from Brave search for use with generic ITextSearch<TRecord> interface. +/// This class provides compile-time type safety and IntelliSense support for Brave search filtering. +/// +public sealed class BraveWebPage +{ + /// + /// Gets or sets the title of the web page. + /// + public string? Title { get; set; } + + /// + /// Gets or sets the URL of the web page. + /// + public Uri? Url { get; set; } + + /// + /// Gets or sets the description of the web page. + /// + public string? Description { get; set; } + + /// + /// Gets or sets the type of the search result. + /// + public string? Type { get; set; } + + /// + /// Gets or sets the age of the web search result. + /// + public string? Age { get; set; } + + /// + /// Gets or sets the page age timestamp. + /// + public DateTime? PageAge { get; set; } + + /// + /// Gets or sets the language of the web page. + /// + public string? Language { get; set; } + + /// + /// Gets or sets whether the web page is family friendly. + /// + public bool? FamilyFriendly { get; set; } + + /// + /// Gets or sets the country filter for search results. + /// Maps to Brave's 'country' parameter (e.g., "US", "GB", "CA"). + /// + public string? Country { get; set; } + + /// + /// Gets or sets the search language filter. + /// Maps to Brave's 'search_lang' parameter (e.g., "en", "es", "fr"). + /// + public string? SearchLang { get; set; } + + /// + /// Gets or sets the UI language filter. + /// Maps to Brave's 'ui_lang' parameter (e.g., "en-US", "en-GB"). + /// + public string? UiLang { get; set; } + + /// + /// Gets or sets the safe search filter. + /// Maps to Brave's 'safesearch' parameter ("off", "moderate", "strict"). + /// + public string? SafeSearch { get; set; } + + /// + /// Gets or sets whether text decorations are enabled. + /// Maps to Brave's 'text_decorations' parameter. + /// + public bool? TextDecorations { get; set; } + + /// + /// Gets or sets whether spell check is enabled. + /// Maps to Brave's 'spellcheck' parameter. + /// + public bool? SpellCheck { get; set; } + + /// + /// Gets or sets the result filter for search types. + /// Maps to Brave's 'result_filter' parameter (e.g., "web", "news", "videos"). + /// + public string? ResultFilter { get; set; } + + /// + /// Gets or sets the units system for measurements. + /// Maps to Brave's 'units' parameter ("metric" or "imperial"). + /// + public string? Units { get; set; } + + /// + /// Gets or sets whether extra snippets are included. + /// Maps to Brave's 'extra_snippets' parameter. + /// + public bool? ExtraSnippets { get; set; } + + /// + /// Initializes a new instance of the class. + /// + public BraveWebPage() + { + } + + /// + /// Initializes a new instance of the class with specified values. + /// + /// The title of the web page. + /// The URL of the web page. + /// The description of the web page. + /// The type of the search result. + public BraveWebPage(string? title, Uri? url, string? description, string? type = null) + { + this.Title = title; + this.Url = url; + this.Description = description; + this.Type = type; + } + + /// + /// Creates a BraveWebPage from a BraveWebResult. + /// + /// The web result to convert. + /// A new BraveWebPage instance. + internal static BraveWebPage FromWebResult(BraveWebResult result) + { + Uri? url = string.IsNullOrWhiteSpace(result.Url) ? null : new Uri(result.Url); + return new BraveWebPage(result.Title, url, result.Description, result.Type) + { + Age = result.Age, + PageAge = result.PageAge, + Language = result.Language, + FamilyFriendly = result.FamilyFriendly + }; + } +} diff --git a/dotnet/src/Plugins/Plugins.Web/FilterClauses/SearchQueryFilterClause.cs b/dotnet/src/Plugins/Plugins.Web/FilterClauses/SearchQueryFilterClause.cs new file mode 100644 index 000000000000..9909da9579e6 --- /dev/null +++ b/dotnet/src/Plugins/Plugins.Web/FilterClauses/SearchQueryFilterClause.cs @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.VectorData; + +namespace Microsoft.SemanticKernel.Plugins.Web; + +/// +/// Represents a filter clause that adds terms to the search query itself for text search engines. +/// +/// +/// This filter clause is used when the underlying search service should add the specified +/// terms to the search query to help find matching results, rather than filtering results +/// after they are returned. +/// +/// Primary use case: Supporting Title.Contains("value") LINQ expressions for search engines +/// that don't have field-specific operators (e.g., Brave, Tavily). The implementation extracts +/// the search term and appends it to the base query for enhanced relevance. +/// +/// Example: Title.Contains("AI") → SearchQueryFilterClause("AI") → query + " AI" +/// +/// See ADR-TextSearch-Contains-Support.md for architectural context and cross-engine comparison. +/// +internal sealed class SearchQueryFilterClause : FilterClause +{ + /// + /// Initializes a new instance of the class. + /// + /// The term to add to the search query. + public SearchQueryFilterClause(string searchTerm) + { + this.SearchTerm = searchTerm; + } + + /// + /// Gets the search term to add to the query. + /// + public string SearchTerm { get; private set; } +} diff --git a/dotnet/src/Plugins/Plugins.Web/Tavily/TavilyTextSearch.cs b/dotnet/src/Plugins/Plugins.Web/Tavily/TavilyTextSearch.cs index a7ddacab3469..ab06f08cb9ad 100644 --- a/dotnet/src/Plugins/Plugins.Web/Tavily/TavilyTextSearch.cs +++ b/dotnet/src/Plugins/Plugins.Web/Tavily/TavilyTextSearch.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; +using System.Linq.Expressions; using System.Net.Http; using System.Runtime.CompilerServices; using System.Text; @@ -21,7 +22,7 @@ namespace Microsoft.SemanticKernel.Plugins.Web.Tavily; /// A Tavily Text Search implementation that can be used to perform searches using the Tavily Web Search API. /// #pragma warning disable CS0618 // ITextSearch is obsolete - this class provides backward compatibility -public sealed class TavilyTextSearch : ITextSearch +public sealed class TavilyTextSearch : ITextSearch, ITextSearch #pragma warning restore CS0618 { /// @@ -77,7 +78,431 @@ public async Task> GetSearchResultsAsync(string quer return new KernelSearchResults(this.GetSearchResultsAsync(searchResponse, cancellationToken), totalCount, GetResultsMetadata(searchResponse)); } - #region private + #region Generic ITextSearch Implementation + + /// + async Task> ITextSearch.SearchAsync(string query, TextSearchOptions? searchOptions, CancellationToken cancellationToken) + { + var (modifiedQuery, legacyOptions) = this.ConvertToLegacyOptionsWithQuery(query, searchOptions); + TavilySearchResponse? searchResponse = await this.ExecuteSearchAsync(modifiedQuery, legacyOptions, cancellationToken).ConfigureAwait(false); + + long? totalCount = null; + + return new KernelSearchResults(this.GetResultsAsStringAsync(searchResponse, cancellationToken), totalCount, GetResultsMetadata(searchResponse)); + } + + /// + async Task> ITextSearch.GetTextSearchResultsAsync(string query, TextSearchOptions? searchOptions, CancellationToken cancellationToken) + { + var (modifiedQuery, legacyOptions) = this.ConvertToLegacyOptionsWithQuery(query, searchOptions); + TavilySearchResponse? searchResponse = await this.ExecuteSearchAsync(modifiedQuery, legacyOptions, cancellationToken).ConfigureAwait(false); + + long? totalCount = null; + + return new KernelSearchResults(this.GetResultsAsTextSearchResultAsync(searchResponse, cancellationToken), totalCount, GetResultsMetadata(searchResponse)); + } + + /// + async Task> ITextSearch.GetSearchResultsAsync(string query, TextSearchOptions? searchOptions, CancellationToken cancellationToken) + { + var (modifiedQuery, legacyOptions) = this.ConvertToLegacyOptionsWithQuery(query, searchOptions); + TavilySearchResponse? searchResponse = await this.ExecuteSearchAsync(modifiedQuery, legacyOptions, cancellationToken).ConfigureAwait(false); + + long? totalCount = null; + + return new KernelSearchResults(this.GetResultsAsWebPageAsync(searchResponse, cancellationToken), totalCount, GetResultsMetadata(searchResponse)); + } + + #endregion + + #region LINQ-to-Tavily Conversion Logic + + /// + /// Converts generic TextSearchOptions with LINQ filtering to legacy TextSearchOptions and extracts additional search terms. + /// + /// The original search query. + /// The generic search options with LINQ filter. + /// A tuple containing the modified query and legacy TextSearchOptions with converted filters. + private (string modifiedQuery, TextSearchOptions legacyOptions) ConvertToLegacyOptionsWithQuery(string query, TextSearchOptions? options) + { + var legacyOptions = this.ConvertToLegacyOptions(options); + + if (options?.Filter != null) + { + // Extract search terms from the LINQ expression + var additionalSearchTerms = ExtractSearchTermsFromLinqExpression(options.Filter); + if (additionalSearchTerms.Count > 0) + { + // Append additional search terms to the original query + var modifiedQuery = $"{query} {string.Join(" ", additionalSearchTerms)}".Trim(); + return (modifiedQuery, legacyOptions); + } + } + + return (query, legacyOptions); + } + + /// + /// Converts generic TextSearchOptions with LINQ filtering to legacy TextSearchOptions. + /// + /// The generic search options with LINQ filter. + /// Legacy TextSearchOptions with converted filters. + private TextSearchOptions ConvertToLegacyOptions(TextSearchOptions? options) + { + if (options == null) + { + return new TextSearchOptions(); + } + + var legacyOptions = new TextSearchOptions + { + Top = options.Top, + Skip = options.Skip, + IncludeTotalCount = options.IncludeTotalCount + }; + + // Convert LINQ expression to TextSearchFilter if present + if (options.Filter != null) + { + try + { + var convertedFilter = ConvertLinqExpressionToTavilyFilter(options.Filter); + legacyOptions = new TextSearchOptions + { + Top = options.Top, + Skip = options.Skip, + IncludeTotalCount = options.IncludeTotalCount, + Filter = convertedFilter + }; + } + catch (NotSupportedException) + { + // All unsupported LINQ patterns should fail explicitly to provide clear developer feedback + // This helps developers understand which patterns work with the Tavily API + throw; + } + } + + return legacyOptions; + } + + /// + /// Extracts search terms that should be added to the search query from a LINQ expression. + /// + /// The LINQ expression to analyze. + /// A list of search terms to add to the query. + private static List ExtractSearchTermsFromLinqExpression(Expression> linqExpression) + { + var searchTerms = new List(); + var filterClauses = new List(); + + // Analyze the LINQ expression to get all filter clauses + AnalyzeExpression(linqExpression.Body, filterClauses); + + // Extract search terms from SearchQueryFilterClause instances + foreach (var clause in filterClauses) + { + if (clause is SearchQueryFilterClause searchQueryClause) + { + searchTerms.Add(searchQueryClause.SearchTerm); + } + } + + return searchTerms; + } + + /// + /// Converts a LINQ expression to Tavily-compatible TextSearchFilter. + /// + /// The LINQ expression to convert. + /// A TextSearchFilter with Tavily-compatible filter clauses. + private static TextSearchFilter ConvertLinqExpressionToTavilyFilter(Expression> linqExpression) + { + var filter = new TextSearchFilter(); + var filterClauses = new List(); + + // Analyze the LINQ expression and convert to filter clauses + AnalyzeExpression(linqExpression.Body, filterClauses); + + // Validate and add clauses that are supported by Tavily + foreach (var clause in filterClauses) + { + if (clause is EqualToFilterClause equalityClause) + { + var mappedFieldName = MapPropertyToTavilyFilter(equalityClause.FieldName); + if (mappedFieldName != null) + { + filter.Equality(mappedFieldName, equalityClause.Value); + } + else + { + throw new NotSupportedException( + $"Property '{equalityClause.FieldName}' cannot be mapped to Tavily API filters. " + + $"Supported properties: {string.Join(", ", s_validFieldNames)}. " + + "Example: page => page.Topic == \"general\" && page.TimeRange == \"week\""); + } + } + else if (clause is SearchQueryFilterClause) + { + // SearchQueryFilterClause is handled at the query level, not the filter level + // Skip it here as it's processed by ConvertToLegacyOptionsWithQuery + continue; + } + } + + return filter; + } + + /// + /// Maps TavilyWebPage property names to Tavily API filter parameter names. + /// + /// The property name from TavilyWebPage. + /// The corresponding Tavily API parameter name, or null if not mappable. + private static string? MapPropertyToTavilyFilter(string propertyName) => + propertyName.ToUpperInvariant() switch + { + "TOPIC" => Topic, + "TIMERANGE" => TimeRange, + "DAYS" => Days, + "INCLUDEDOMAIN" => IncludeDomain, + "EXCLUDEDOMAIN" => ExcludeDomain, + _ => null // Property not mappable to Tavily filters + }; + + // TODO: Consider extracting LINQ expression analysis logic to a shared utility class + // to reduce duplication across text search connectors (Brave, Tavily, etc.). + // See code review for details. + /// + /// Analyzes a LINQ expression and extracts filter clauses. + /// + /// The expression to analyze. + /// The list to add extracted filter clauses to. + private static void AnalyzeExpression(Expression expression, List filterClauses) + { + switch (expression) + { + case BinaryExpression binaryExpr: + if (binaryExpr.NodeType == ExpressionType.AndAlso) + { + // Handle AND expressions by recursively analyzing both sides + AnalyzeExpression(binaryExpr.Left, filterClauses); + AnalyzeExpression(binaryExpr.Right, filterClauses); + } + else if (binaryExpr.NodeType == ExpressionType.OrElse) + { + // Handle OR expressions by recursively analyzing both sides + // Note: OR results in multiple filter values for the same property (especially for domains) + AnalyzeExpression(binaryExpr.Left, filterClauses); + AnalyzeExpression(binaryExpr.Right, filterClauses); + } + else if (binaryExpr.NodeType == ExpressionType.Equal) + { + // Handle equality expressions + ExtractEqualityClause(binaryExpr, filterClauses); + } + else if (binaryExpr.NodeType == ExpressionType.NotEqual) + { + // Handle inequality expressions (property != value) + // This is supported as a negation pattern + ExtractInequalityClause(binaryExpr, filterClauses); + } + else + { + throw new NotSupportedException($"Binary expression type '{binaryExpr.NodeType}' is not supported. Supported operators: AndAlso (&&), OrElse (||), Equal (==), NotEqual (!=)."); + } + break; + + case UnaryExpression unaryExpr when unaryExpr.NodeType == ExpressionType.Not: + // Handle NOT expressions (negation) + AnalyzeNotExpression(unaryExpr, filterClauses); + break; + + case MethodCallExpression methodCall: + // Handle method calls like Contains, StartsWith, etc. + ExtractMethodCallClause(methodCall, filterClauses); + break; + + default: + throw new NotSupportedException($"Expression type '{expression.NodeType}' is not supported in Tavily search filters."); + } + } + + /// + /// Extracts an equality filter clause from a binary equality expression. + /// + /// The binary equality expression. + /// The list to add the extracted clause to. + private static void ExtractEqualityClause(BinaryExpression binaryExpr, List filterClauses) + { + string? propertyName = null; + object? value = null; + + // Determine which side is the property and which is the value + if (binaryExpr.Left is MemberExpression leftMember) + { + propertyName = leftMember.Member.Name; + value = ExtractValue(binaryExpr.Right); + } + else if (binaryExpr.Right is MemberExpression rightMember) + { + propertyName = rightMember.Member.Name; + value = ExtractValue(binaryExpr.Left); + } + + if (propertyName != null && value != null) + { + filterClauses.Add(new EqualToFilterClause(propertyName, value)); + } + else + { + throw new NotSupportedException("Unable to extract property name and value from equality expression."); + } + } + + /// + /// Extracts an inequality filter clause from a binary not-equal expression. + /// + /// The binary not-equal expression. + /// The list to add the extracted clause to. + private static void ExtractInequalityClause(BinaryExpression binaryExpr, List filterClauses) + { + // Note: Inequality is tracked but handled differently depending on the property + // For now, we log a warning that inequality filtering may not work as expected + string? propertyName = null; + object? value = null; + + if (binaryExpr.Left is MemberExpression leftMember) + { + propertyName = leftMember.Member.Name; + value = ExtractValue(binaryExpr.Right); + } + else if (binaryExpr.Right is MemberExpression rightMember) + { + propertyName = rightMember.Member.Name; + value = ExtractValue(binaryExpr.Left); + } + + if (propertyName != null && value != null) + { + // Add a marker for inequality - this will need special handling in conversion + // For now, we don't add it to filter clauses as Tavily API doesn't support direct negation + throw new NotSupportedException($"Inequality operator (!=) is not directly supported for property '{propertyName}'. Use NOT operator instead: !(page.{propertyName} == value)."); + } + + throw new NotSupportedException("Unable to extract property name and value from inequality expression."); + } + + /// + /// Analyzes a NOT (negation) expression. + /// + /// The unary NOT expression. + /// The list to add extracted filter clauses to. + private static void AnalyzeNotExpression(UnaryExpression unaryExpr, List filterClauses) + { + // NOT expressions are complex for web search APIs + // We support simple cases like !(page.Topic == "general") + if (unaryExpr.Operand is BinaryExpression binaryExpr && binaryExpr.NodeType == ExpressionType.Equal) + { + // This is !(property == value), which we can handle for some properties + throw new NotSupportedException("NOT operator (!) with equality is not directly supported. Most web search APIs don't support negative filtering."); + } + + throw new NotSupportedException("NOT operator (!) is only supported with simple equality expressions."); + } + + /// + /// Extracts a filter clause from a method call expression (e.g., Contains, StartsWith). + /// + /// The method call expression. + /// The list to add the extracted clause to. + private static void ExtractMethodCallClause(MethodCallExpression methodCall, List filterClauses) + { + if (methodCall.Method.Name == "Contains") + { + // Check if this is property.Contains(value) or array.Contains(property) + if (methodCall.Object is MemberExpression member) + { + // This is property.Contains(value) - e.g., page.IncludeDomain.Contains("wikipedia.org") + var propertyName = member.Member.Name; + var value = ExtractValue(methodCall.Arguments[0]); + + if (value != null) + { + // For Contains, we'll map it to equality for domains (Tavily supports domain filtering) + if (propertyName.EndsWith("Domain", StringComparison.OrdinalIgnoreCase)) + { + filterClauses.Add(new EqualToFilterClause(propertyName, value)); + } + else if (propertyName.Equals("Title", StringComparison.OrdinalIgnoreCase)) + { + // For Title.Contains(), add the term to the search query itself + filterClauses.Add(new SearchQueryFilterClause(value.ToString() ?? string.Empty)); + } + else + { + throw new NotSupportedException($"Contains method is only supported for domain properties (IncludeDomain, ExcludeDomain) and Title, not '{propertyName}'."); + } + } + } + else if (methodCall.Object == null && methodCall.Arguments.Count == 2) + { + // This is array.Contains(property) - e.g., new[] { "general", "news" }.Contains(page.Topic) + // This pattern is not supported regardless of whether it's Enumerable.Contains (C# 13-) or MemoryExtensions.Contains (C# 14+) + // Both resolve to extension method calls with methodCall.Object == null + + // Provide detailed error message that covers both C# language versions + string errorMessage = "Collection Contains filters (e.g., array.Contains(page.Property)) are not supported by Tavily Search API. " + + "Tavily's API does not support OR logic across multiple values. "; + + if (IsMemoryExtensionsContains(methodCall)) + { + errorMessage += "Note: This occurs when using C# 14+ language features with span-based Contains methods (MemoryExtensions.Contains). "; + } + else + { + errorMessage += "Note: This occurs with standard LINQ extension methods (Enumerable.Contains). "; + } + + errorMessage += "Consider either: (1) performing multiple separate searches for each value, or " + + "(2) retrieving broader results and filtering on the client side."; + + throw new NotSupportedException(errorMessage); + } + else + { + throw new NotSupportedException("Unsupported Contains expression format."); + } + } + else + { + throw new NotSupportedException($"Method '{methodCall.Method.Name}' is not supported in Tavily search filters. Only 'Contains' is supported."); + } + } + + /// + /// Extracts a constant value from an expression. + /// + /// The expression to extract the value from. + /// The extracted value, or null if extraction failed. + private static object? ExtractValue(Expression expression) + { + return expression switch + { + ConstantExpression constant => constant.Value, + MemberExpression member when member.Expression is ConstantExpression constantExpr => + member.Member switch + { + System.Reflection.FieldInfo field => field.GetValue(constantExpr.Value), + System.Reflection.PropertyInfo property => property.GetValue(constantExpr.Value), + _ => null + }, + _ => Expression.Lambda(expression).Compile().DynamicInvoke() + }; + } + + #endregion + + #region Private Methods private readonly ILogger _logger; private readonly HttpClient _httpClient; @@ -177,6 +602,41 @@ private async IAsyncEnumerable GetSearchResultsAsync(TavilySearchRespons } } + /// + /// Return the search results as instances of . + /// + /// Response containing the web pages matching the query. + /// Cancellation token + private async IAsyncEnumerable GetResultsAsWebPageAsync(TavilySearchResponse? searchResponse, [EnumeratorCancellation] CancellationToken cancellationToken) + { + if (searchResponse is null || searchResponse.Results is null) + { + yield break; + } + + foreach (var result in searchResponse.Results) + { + yield return TavilyWebPage.FromSearchResult(result); + await Task.Yield(); + } + + if (this._searchOptions?.IncludeImages ?? false && searchResponse.Images is not null) + { + foreach (var image in searchResponse.Images!) + { + //For images, create a basic TavilyWebPage representation + Uri? imageUri = string.IsNullOrWhiteSpace(image.Url) ? null : new Uri(image.Url); + yield return new TavilyWebPage( + title: "Image Result", + url: imageUri, + content: image.Description ?? string.Empty, + score: 0.0 + ); + await Task.Yield(); + } + } + } + /// /// Return the search results as instances of . /// @@ -383,5 +843,40 @@ private TavilySearchRequest BuildRequestContent(string query, TextSearchOptions string strPayload = payload as string ?? JsonSerializer.Serialize(payload, s_jsonOptionsCache); return new(strPayload, Encoding.UTF8, "application/json"); } + + /// + /// Determines if a method call expression is a MemoryExtensions.Contains call (C# 14+ compatibility). + /// In C# 14+, array.Contains(property) may resolve to MemoryExtensions.Contains instead of Enumerable.Contains. + /// + /// The method call expression to check. + /// True if this is a MemoryExtensions.Contains call, false otherwise. + private static bool IsMemoryExtensionsContains(MethodCallExpression methodCall) + { + // Check if this is a static method call (Object is null) + if (methodCall.Object != null) + { + return false; + } + + // Check if it's MemoryExtensions.Contains + if (methodCall.Method.DeclaringType?.Name != "MemoryExtensions") + { + return false; + } + + // MemoryExtensions.Contains has 2-3 parameters: (ReadOnlySpan, T) or (ReadOnlySpan, T, IEqualityComparer) + if (methodCall.Arguments.Count < 2 || methodCall.Arguments.Count > 3) + { + return false; + } // For our text search scenarios, we don't support span comparers + if (methodCall.Arguments.Count == 3) + { + throw new NotSupportedException( + "MemoryExtensions.Contains with custom IEqualityComparer is not supported. " + + "Use simple array.Contains(property) expressions without custom comparers."); + } + + return true; + } #endregion } diff --git a/dotnet/src/Plugins/Plugins.Web/Tavily/TavilyWebPage.cs b/dotnet/src/Plugins/Plugins.Web/Tavily/TavilyWebPage.cs new file mode 100644 index 000000000000..fddf338e1e06 --- /dev/null +++ b/dotnet/src/Plugins/Plugins.Web/Tavily/TavilyWebPage.cs @@ -0,0 +1,102 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; + +namespace Microsoft.SemanticKernel.Plugins.Web.Tavily; + +/// +/// Represents a type-safe web page result from Tavily search for use with generic ITextSearch<TRecord> interface. +/// This class provides compile-time type safety and IntelliSense support for Tavily search filtering. +/// +public sealed class TavilyWebPage +{ + /// + /// Gets or sets the title of the web page. + /// + public string? Title { get; set; } + + /// + /// Gets or sets the URL of the web page. + /// + public Uri? Url { get; set; } + + /// + /// Gets or sets the content/description of the web page. + /// + public string? Content { get; set; } + + /// + /// Gets or sets the raw content of the web page (if available). + /// + public string? RawContent { get; set; } + + /// + /// Gets or sets the relevance score of the search result. + /// + public double Score { get; set; } + + /// + /// Gets or sets the topic filter for search results. + /// Maps to Tavily's 'topic' parameter for focused search. + /// + public string? Topic { get; set; } + + /// + /// Gets or sets the time range filter for search results. + /// Maps to Tavily's 'time_range' parameter (e.g., "day", "week", "month", "year"). + /// + public string? TimeRange { get; set; } + + /// + /// Gets or sets the number of days for time-based filtering. + /// Maps to Tavily's 'days' parameter for custom date ranges. + /// + public int? Days { get; set; } + + /// + /// Gets or sets the domain to include in search results. + /// Maps to Tavily's 'include_domain' parameter. + /// + public string? IncludeDomain { get; set; } + + /// + /// Gets or sets the domain to exclude from search results. + /// Maps to Tavily's 'exclude_domain' parameter. + /// + public string? ExcludeDomain { get; set; } + + /// + /// Initializes a new instance of the class. + /// + public TavilyWebPage() + { + } + + /// + /// Initializes a new instance of the class with specified values. + /// + /// The title of the web page. + /// The URL of the web page. + /// The content/description of the web page. + /// The relevance score. + /// The raw content (optional). + public TavilyWebPage(string? title, Uri? url, string? content, double score, string? rawContent = null) + { + this.Title = title; + this.Url = url; + this.Content = content; + this.Score = score; + this.RawContent = rawContent; + } + + /// + /// Creates a TavilyWebPage from a TavilySearchResult. + /// + /// The search result to convert. + /// A new TavilyWebPage instance. + internal static TavilyWebPage FromSearchResult(TavilySearchResult result) + { + Uri? url = string.IsNullOrWhiteSpace(result.Url) ? null : new Uri(result.Url); + return new TavilyWebPage(result.Title, url, result.Content, result.Score, result.RawContent); + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/TextSearch/ITextSearch.cs b/dotnet/src/SemanticKernel.Abstractions/Data/TextSearch/ITextSearch.cs index 57da1a9ec677..e955af86bc6c 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/TextSearch/ITextSearch.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/TextSearch/ITextSearch.cs @@ -36,12 +36,12 @@ Task> GetTextSearchResultsAsync( CancellationToken cancellationToken = default); /// - /// Perform a search for content related to the specified query and return values representing the search results. + /// Perform a search for content related to the specified query and return strongly-typed values representing the search results. /// /// What to search for. /// Options used when executing a text search. /// The to monitor for cancellation requests. The default is . - Task> GetSearchResultsAsync( + Task> GetSearchResultsAsync( string query, TextSearchOptions? searchOptions = null, CancellationToken cancellationToken = default); diff --git a/dotnet/src/SemanticKernel.Core/Data/TextSearch/VectorStoreTextSearch.cs b/dotnet/src/SemanticKernel.Core/Data/TextSearch/VectorStoreTextSearch.cs index 121ff9b6c7bb..f1b18483c43a 100644 --- a/dotnet/src/SemanticKernel.Core/Data/TextSearch/VectorStoreTextSearch.cs +++ b/dotnet/src/SemanticKernel.Core/Data/TextSearch/VectorStoreTextSearch.cs @@ -213,11 +213,11 @@ Task> ITextSearch.GetTextSearchRe } /// - Task> ITextSearch.GetSearchResultsAsync(string query, TextSearchOptions? searchOptions, CancellationToken cancellationToken) + Task> ITextSearch.GetSearchResultsAsync(string query, TextSearchOptions? searchOptions, CancellationToken cancellationToken) { var searchResponse = this.ExecuteVectorSearchAsync(query, searchOptions, cancellationToken); - return Task.FromResult(new KernelSearchResults(this.GetResultsAsRecordAsync(searchResponse, cancellationToken))); + return Task.FromResult(new KernelSearchResults(this.GetResultsAsTRecordAsync(searchResponse, cancellationToken))); } #region private @@ -367,6 +367,28 @@ private async IAsyncEnumerable GetResultsAsRecordAsync(IAsyncEnumerable< } } + /// + /// Return the search results as strongly-typed instances. + /// + /// Response containing the records matching the query. + /// Cancellation token + private async IAsyncEnumerable GetResultsAsTRecordAsync(IAsyncEnumerable>? searchResponse, [EnumeratorCancellation] CancellationToken cancellationToken) + { + if (searchResponse is null) + { + yield break; + } + + await foreach (var result in searchResponse.WithCancellation(cancellationToken).ConfigureAwait(false)) + { + if (result.Record is not null) + { + yield return result.Record; + await Task.Yield(); + } + } + } + /// /// Return the search results as instances of . /// diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreTextSearchTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreTextSearchTests.cs index 8dd095710c06..75f4b090590e 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreTextSearchTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreTextSearchTests.cs @@ -78,12 +78,14 @@ public async Task CanGetSearchResultAsync() { // Arrange. var sut = await CreateVectorStoreTextSearchAsync(); + ITextSearch typeSafeInterface = sut; // Act. - KernelSearchResults searchResults = await sut.GetSearchResultsAsync("What is the Semantic Kernel?", new() { Top = 2, Skip = 0 }); + KernelSearchResults searchResults = await typeSafeInterface.GetSearchResultsAsync("What is the Semantic Kernel?", new TextSearchOptions { Top = 2, Skip = 0 }); var results = await searchResults.Results.ToListAsync(); Assert.Equal(2, results.Count); + Assert.All(results, result => Assert.IsType(result)); } [Fact] @@ -117,12 +119,14 @@ public async Task CanGetSearchResultsWithEmbeddingGeneratorAsync() { // Arrange. var sut = await CreateVectorStoreTextSearchWithEmbeddingGeneratorAsync(); + ITextSearch typeSafeInterface = sut; // Act. - KernelSearchResults searchResults = await sut.GetSearchResultsAsync("What is the Semantic Kernel?", new() { Top = 2, Skip = 0 }); + KernelSearchResults searchResults = await typeSafeInterface.GetSearchResultsAsync("What is the Semantic Kernel?", new TextSearchOptions { Top = 2, Skip = 0 }); var results = await searchResults.Results.ToListAsync(); Assert.Equal(2, results.Count); + Assert.All(results, result => Assert.IsType(result)); } #pragma warning disable CS0618 // VectorStoreTextSearch with ITextEmbeddingGenerationService is obsolete @@ -270,17 +274,16 @@ public async Task LinqGetSearchResultsAsync() Filter = r => r.Tag == "Even" }; - KernelSearchResults searchResults = await typeSafeInterface.GetSearchResultsAsync( + KernelSearchResults searchResults = await typeSafeInterface.GetSearchResultsAsync( "What is the Semantic Kernel?", searchOptions); var results = await searchResults.Results.ToListAsync(); - // Assert - Results should be DataModel objects with Tag == "Even" + // Assert - Results should be strongly-typed DataModel objects with Tag == "Even" Assert.NotEmpty(results); Assert.All(results, result => { - var dataModel = Assert.IsType(result); - Assert.Equal("Even", dataModel.Tag); + Assert.Equal("Even", result.Tag); // Direct property access - no cast needed! }); } diff --git a/dotnet/src/VectorData/VectorData.Abstractions/FilterClauses/FilterClause.cs b/dotnet/src/VectorData/VectorData.Abstractions/FilterClauses/FilterClause.cs index af0c1dac51b3..be72560ffc2f 100644 --- a/dotnet/src/VectorData/VectorData.Abstractions/FilterClauses/FilterClause.cs +++ b/dotnet/src/VectorData/VectorData.Abstractions/FilterClauses/FilterClause.cs @@ -11,7 +11,10 @@ namespace Microsoft.Extensions.VectorData; /// public abstract class FilterClause { - internal FilterClause() + /// + /// Initializes a new instance of the class. + /// + protected FilterClause() { } }