Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7db0c37
Added very first spike for NOT filtering
alkampfergit Jul 9, 2024
79028ba
Added more test to verify not filter.
alkampfergit Jul 10, 2024
1eb67bf
Qdrant seems to work.
alkampfergit Jul 10, 2024
40a76ed
Not filter supported in SQL Server.
alkampfergit Jul 12, 2024
194a37e
Supported MongodB atlas.
alkampfergit Jul 12, 2024
3d1cb0f
Implemented Postgres.
alkampfergit Jul 12, 2024
efdf1b0
Fixed test in memory text db and simplevector db.
alkampfergit Jul 12, 2024
3c22929
AZure Ai filtering updated support for Not Filter.
alkampfergit Jul 12, 2024
b6a53cc
Not filter on elasticsearch.
alkampfergit Jul 15, 2024
f5611c0
Update service/Abstractions/Pipeline/MimeTypes.cs
dluc Aug 24, 2024
e434147
Update extensions/AzureAISearch/AzureAISearch/Internals/AzureAISearch…
dluc Aug 24, 2024
2eb631e
Update extensions/Qdrant/Qdrant/QdrantMemory.cs
dluc Aug 24, 2024
900295e
Fix spacing
dluc Aug 24, 2024
66af032
Fix build warning
dluc Aug 24, 2024
d931a5f
Update extensions/AzureAISearch/AzureAISearch.FunctionalTests/Default…
dluc Aug 24, 2024
4383148
Code style
dluc Aug 24, 2024
6fa77d2
Update service/Core/MemoryStorage/DevTools/SimpleTextDb.cs
dluc Aug 24, 2024
4fdea7d
Update service/Core/MemoryStorage/DevTools/SimpleTextDb.cs
dluc Aug 24, 2024
ead9dd4
Update service/Core/MemoryStorage/DevTools/SimpleTextDb.cs
dluc Aug 24, 2024
1cfe86e
Update service/Core/MemoryStorage/DevTools/SimpleTextDb.cs
dluc Aug 24, 2024
40a8dba
Apply suggestions from code review
dluc Aug 24, 2024
1d0c474
Not filters: Fix after PR Comments
alkampfergit Sep 3, 2024
5f5b820
Fixed Qdrant filtering with NOT Clause.
alkampfergit Sep 3, 2024
3d0e39a
Fixed test for empty key when azure is used
alkampfergit Sep 3, 2024
aa7311d
Removed UserSecrets, fixed Qdrant test
alkampfergit Sep 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,41 @@ public DefaultTests(IConfiguration cfg, ITestOutputHelper output) : base(cfg, ou
{
Assert.False(string.IsNullOrEmpty(this.AzureAiSearchConfig.Endpoint));
Assert.False(string.IsNullOrEmpty(this.AzureAiSearchConfig.APIKey));
Assert.False(string.IsNullOrEmpty(this.OpenAiConfig.APIKey));

this._memory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithOpenAI(this.OpenAiConfig)
.WithAzureAISearchMemoryDb(this.AzureAiSearchConfig.Endpoint, this.AzureAiSearchConfig.APIKey)
.Build<MemoryServerless>();

if (cfg.GetValue<bool>("UseAzureOpenAI"))
{
//ok in azure we can use managed identities so we need to check the configuration
if (this.AzureOpenAITextConfiguration.Auth == AzureOpenAIConfig.AuthTypes.APIKey)
{
//verify that we really have an api key.
Assert.False(string.IsNullOrEmpty(this.AzureOpenAITextConfiguration.APIKey));
}

if (this.AzureOpenAIEmbeddingConfiguration.Auth == AzureOpenAIConfig.AuthTypes.APIKey)
{
//verify that we really have an api key.
Assert.False(string.IsNullOrEmpty(this.AzureOpenAIEmbeddingConfiguration.APIKey));
}

this._memory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithAzureOpenAITextGeneration(this.AzureOpenAITextConfiguration)
.WithAzureOpenAITextEmbeddingGeneration(this.AzureOpenAIEmbeddingConfiguration)
.WithAzureAISearchMemoryDb(this.AzureAiSearchConfig.Endpoint, this.AzureAiSearchConfig.APIKey)
.Build<MemoryServerless>();
}
else
{
Assert.False(string.IsNullOrEmpty(this.OpenAiConfig.APIKey));

this._memory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithOpenAI(this.OpenAiConfig)
.WithAzureAISearchMemoryDb(this.AzureAiSearchConfig.Endpoint, this.AzureAiSearchConfig.APIKey)
.Build<MemoryServerless>();
}
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) Microsoft. All rights reserved.
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.KernelMemory;
using Microsoft.KernelMemory.MemoryDb.AzureAISearch;
Expand Down Expand Up @@ -225,7 +225,13 @@ public void ItHandlesEdgeCase3()

// Assert
Console.WriteLine($"Result: {result}");
Assert.Equal("(tags/any(s: s eq 'color:blue') and tags/any(s: s eq 'color:blue'))", result);

// Note: Before introducing Not filter the test expected the result
// (tags/any(s: s eq 'color:blue') and tags/any(s: s eq 'color:blue'))
// in my opinion it is better to have a more coincise result because the
// previous result contains two identical conditions with an and it is
// better to have a single condition.
Assert.Equal("(tags/any(s: s eq 'color:blue'))", result);
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@ internal static string BuildSearchFilter(IEnumerable<MemoryFilter> filters)
var filtersForSearchInQuery = filterList
// Filters with only one key, but not multiple values (i.e: excluding MemoryFilters.ByTag("department", "HR").ByTag("department", "Marketing") as here we want an `AND`)
.Where(filter => !filter.IsEmpty() && filter.Keys.Count == 1 && filter.Values.First().Count == 1)
.SelectMany(filter => filter.Pairs) // Flattening to pairs
.SelectMany(filter => filter.GetFilters()) // Flattening to pairs
.GroupBy(pair => pair.Key) // Grouping by the tag key
.Where(g => g.Count() > 1)
.Select(group => new
{
Key = group.Key,
Values = group.Select(pair => $"{pair.Key}:{pair.Value?.Replace("'", "''", StringComparison.Ordinal)}").ToList(),
EqualValues = group.OfType<EqualFilter>().Select(baseFilter => $"{baseFilter.Key}:{baseFilter.Value?.Replace("'", "''", StringComparison.Ordinal)}").ToList(),
NotEqualValues = group.OfType<NotEqualFilter>().Select(baseFilter => $"{baseFilter.Key}:{baseFilter.Value?.Replace("'", "''", StringComparison.Ordinal)}").ToList(),
SearchInDelimiter = s_searchInDelimitersAvailable.FirstOrDefault(specialChar =>
!group.Any(pair =>
(pair.Value != null && pair.Value.Contains(specialChar, StringComparison.Ordinal)) ||
Expand All @@ -54,7 +55,15 @@ internal static string BuildSearchFilter(IEnumerable<MemoryFilter> filters)
// The default value of this parameter is ' ,' which means that any values with spaces and/or commas between them will be separated.
// If you need to use separators other than spaces and commas because your values include those characters,
// you can specify alternate delimiters such as '|' in this parameter.
conditions.Add($"tags/any(s: search.in(s, '{string.Join(filterGroup.SearchInDelimiter, filterGroup.Values)}', '{filterGroup.SearchInDelimiter}'))");
if (filterGroup.EqualValues.Count != 0)
{
conditions.Add($"tags/any(s: search.in(s, '{string.Join(filterGroup.SearchInDelimiter, filterGroup.EqualValues)}', '{filterGroup.SearchInDelimiter}'))");
}

if (filterGroup.NotEqualValues.Count != 0)
{
conditions.Add($"not tags/any(s: search.in(s, '{string.Join(filterGroup.SearchInDelimiter, filterGroup.NotEqualValues)}', '{filterGroup.SearchInDelimiter}'))");
}
}

//Exclude filters that were grouped before in the search.in process
Expand All @@ -65,13 +74,25 @@ internal static string BuildSearchFilter(IEnumerable<MemoryFilter> filters)

// Note: empty filters would lead to a syntax error, so even if they are supposed
// to be removed upstream, we check again and remove them here too.
foreach (var filter in remainingFilters.Where(f => !f.IsEmpty()))
foreach (var filter in remainingFilters)
{
var filterConditions = filter.GetFilters()
.Select(keyValue =>
.Select(baseFilter =>
{
var fieldValue = keyValue.Value?.Replace("'", "''", StringComparison.Ordinal);
return $"tags/any(s: s eq '{keyValue.Key}{Constants.ReservedEqualsChar}{fieldValue}')";
if (baseFilter is EqualFilter eq)
{
var fieldValue = eq.Value?.Replace("'", "''", StringComparison.Ordinal);
return $"tags/any(s: s eq '{baseFilter.Key}{Constants.ReservedEqualsChar}{fieldValue}')";
}
else if (baseFilter is NotEqualFilter neq)
{
var fieldValue = neq.Value?.Replace("'", "''", StringComparison.Ordinal);
return $"not tags/any(s: s eq '{baseFilter.Key}{Constants.ReservedEqualsChar}{fieldValue}')";
}
else
{
throw new AzureAISearchMemoryException($"Filter type {baseFilter.GetType().Name} is not supported.");
}
})
.ToList();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public void BadIndexNamesAreRejected(string indexName, int errorCount)
$"" +
$"The expected number of errors was {errorCount}.");

Assert.True(errorCount == exception.Errors.Count(), $"The number of errprs expected is different than the number of errors found.");
Assert.True(errorCount == exception.Errors.Count(), $"The number of errors expected is different than the number of errors found.");
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,40 @@ public class KernelMemoryTests : MemoryDbFunctionalTest
public KernelMemoryTests(IConfiguration cfg, ITestOutputHelper output)
: base(cfg, output)
{
this.KernelMemory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithOpenAI(this.OpenAiConfig)
.WithElasticsearchMemoryDb(this.ElasticsearchConfig)
.Build<MemoryServerless>();
if (cfg.GetValue<bool>("UseAzureOpenAI"))
{
//ok in azure we can use managed identities so we need to check the configuration
if (this.AzureOpenAITextConfiguration.Auth == AzureOpenAIConfig.AuthTypes.APIKey)
{
//verify that we really have an api key.
Assert.False(string.IsNullOrEmpty(this.AzureOpenAITextConfiguration.APIKey));
}

if (this.AzureOpenAIEmbeddingConfiguration.Auth == AzureOpenAIConfig.AuthTypes.APIKey)
{
//verify that we really have an api key.
Assert.False(string.IsNullOrEmpty(this.AzureOpenAIEmbeddingConfiguration.APIKey));
}

this.KernelMemory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithAzureOpenAITextGeneration(this.AzureOpenAITextConfiguration)
.WithAzureOpenAITextEmbeddingGeneration(this.AzureOpenAIEmbeddingConfiguration)
.WithElasticsearchMemoryDb(this.ElasticsearchConfig)
.Build<MemoryServerless>();
}
else
{
Assert.False(string.IsNullOrEmpty(this.OpenAiConfig.APIKey));

this.KernelMemory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithOpenAI(this.OpenAiConfig)
.WithElasticsearchMemoryDb(this.ElasticsearchConfig)
.Build<MemoryServerless>();
}
}

public IKernelMemory KernelMemory { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

using Elastic.Clients.Elasticsearch;
using Microsoft.KernelMemory.AI;
using Microsoft.KernelMemory.AI.AzureOpenAI;
using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.MemoryDb.Elasticsearch;
using Microsoft.KernelMemory.MemoryDb.Elasticsearch.Internals;
Expand All @@ -24,12 +25,21 @@ protected MemoryDbFunctionalTest(IConfiguration cfg, ITestOutputHelper output)
: base(cfg, output)
{
this.Output = output ?? throw new ArgumentNullException(nameof(output));

#pragma warning disable KMEXP01 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
this.TextEmbeddingGenerator = new OpenAITextEmbeddingGenerator(
config: base.OpenAiConfig,
textTokenizer: default,
loggerFactory: default);
if (cfg.GetValue<bool>("UseAzureOpenAI"))
{
this.TextEmbeddingGenerator = new AzureOpenAITextEmbeddingGenerator(
config: base.AzureOpenAIEmbeddingConfiguration,
textTokenizer: default,
loggerFactory: default);
}
else
{
this.TextEmbeddingGenerator = new OpenAITextEmbeddingGenerator(
config: base.OpenAiConfig,
textTokenizer: default,
loggerFactory: default);
}
#pragma warning restore KMEXP01 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.

this.Client = new ElasticsearchClient(base.ElasticsearchConfig.ToElasticsearchClientSettings());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,42 @@ public class DefaultTests : BaseFunctionalTestCase

public DefaultTests(IConfiguration cfg, ITestOutputHelper output) : base(cfg, output)
{
Assert.False(string.IsNullOrEmpty(this.OpenAiConfig.APIKey));

this._esConfig = cfg.GetSection("KernelMemory:Services:Elasticsearch").Get<ElasticsearchConfig>()!;

this._memory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithOpenAI(this.OpenAiConfig)
// .WithAzureOpenAITextGeneration(this.AzureOpenAITextConfiguration)
// .WithAzureOpenAITextEmbeddingGeneration(this.AzureOpenAIEmbeddingConfiguration)
.WithElasticsearchMemoryDb(this._esConfig)
.Build<MemoryServerless>();
if (cfg.GetValue<bool>("UseAzureOpenAI"))
{
//ok in azure we can use managed identities so we need to check the configuration
if (this.AzureOpenAITextConfiguration.Auth == AzureOpenAIConfig.AuthTypes.APIKey)
{
//verify that we really have an api key.
Assert.False(string.IsNullOrEmpty(this.AzureOpenAITextConfiguration.APIKey));
}

if (this.AzureOpenAIEmbeddingConfiguration.Auth == AzureOpenAIConfig.AuthTypes.APIKey)
{
//verify that we really have an api key.
Assert.False(string.IsNullOrEmpty(this.AzureOpenAIEmbeddingConfiguration.APIKey));
}

this._memory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithAzureOpenAITextGeneration(this.AzureOpenAITextConfiguration)
.WithAzureOpenAITextEmbeddingGeneration(this.AzureOpenAIEmbeddingConfiguration)
.WithElasticsearchMemoryDb(this._esConfig)
.Build<MemoryServerless>();
}
else
{
Assert.False(string.IsNullOrEmpty(this.OpenAiConfig.APIKey));

this._memory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithOpenAI(this.OpenAiConfig)
.WithElasticsearchMemoryDb(this._esConfig)
.Build<MemoryServerless>();
}
}

[Fact]
Expand Down
35 changes: 20 additions & 15 deletions extensions/Elasticsearch/Elasticsearch/ElasticsearchMemory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -306,40 +306,45 @@ private QueryDescriptor<ElasticsearchMemoryRecord> ConvertTagFilters(
QueryDescriptor<ElasticsearchMemoryRecord> qd,
ICollection<MemoryFilter>? filters = null)
{
if ((filters == null) || (filters.Count == 0))
{
qd.MatchAll();
return qd;
}

filters = filters.Where(f => f.Keys.Count > 0)
.ToList(); // Remove empty filters

if (filters.Count == 0)
var hasOneNotEmptyFilter = filters != null && filters.Any(f => !f.IsEmpty());
if (!hasOneNotEmptyFilter)
{
qd.MatchAll();
return qd;
}

List<Query> super = new();

foreach (MemoryFilter filter in filters)
foreach (MemoryFilter filter in filters!)
{
List<Query> thisMust = new();

// Each filter is a list of key/value pairs.
foreach (var pair in filter.Pairs)
foreach (var baseFilter in filter.GetFilters())
{
Query newTagQuery = new TermQuery(ElasticsearchMemoryRecord.TagsName) { Value = pair.Key };
Query termQuery = new TermQuery(ElasticsearchMemoryRecord.TagsValue) { Value = pair.Value ?? string.Empty };
Query newTagQuery = new TermQuery(ElasticsearchMemoryRecord.TagsName) { Value = baseFilter.Key };
Query termQuery = new TermQuery(ElasticsearchMemoryRecord.TagsValue) { Value = baseFilter.Value ?? string.Empty };

newTagQuery &= termQuery;

var nestedQd = new NestedQuery();
nestedQd.Path = ElasticsearchMemoryRecord.TagsField;
nestedQd.Query = newTagQuery;

thisMust.Add(nestedQd);
if (baseFilter is EqualFilter eq)
{
thisMust.Add(nestedQd);
}
else if (baseFilter is NotEqualFilter neq)
{
var notQuery = new BoolQuery();
notQuery.MustNot = [nestedQd];
thisMust.Add(notQuery);
}
else
{
throw new ElasticsearchException($"Filter type {baseFilter.GetType().Name} is not supported.");
}
}

var filterQuery = new BoolQuery();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ public abstract class DefaultTests : BaseFunctionalTestCase

protected DefaultTests(IConfiguration cfg, ITestOutputHelper output, bool multiCollection) : base(cfg, output)
{
Assert.False(string.IsNullOrEmpty(this.OpenAiConfig.APIKey), "OpenAI API Key is empty");

if (multiCollection)
{
// this._config = this.MongoDbAtlasConfig;
Expand Down Expand Up @@ -62,13 +60,40 @@ protected DefaultTests(IConfiguration cfg, ITestOutputHelper output, bool multiC
ash.DropDatabaseAsync().Wait();
}

this._memory = new KernelMemoryBuilder()
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithOpenAI(this.OpenAiConfig)
// .WithAzureOpenAITextGeneration(this.AzureOpenAITextConfiguration)
// .WithAzureOpenAITextEmbeddingGeneration(this.AzureOpenAIEmbeddingConfiguration)
.WithMongoDbAtlasMemoryDb(this.MongoDbAtlasConfig)
.Build<MemoryServerless>();
if (cfg.GetValue<bool>("UseAzureOpenAI"))
{
//ok in azure we can use managed identities so we need to check the configuration
if (this.AzureOpenAITextConfiguration.Auth == AzureOpenAIConfig.AuthTypes.APIKey)
{
//verify that we really have an api key.
Assert.False(string.IsNullOrEmpty(this.AzureOpenAITextConfiguration.APIKey));
}

if (this.AzureOpenAIEmbeddingConfiguration.Auth == AzureOpenAIConfig.AuthTypes.APIKey)
{
//verify that we really have an api key.
Assert.False(string.IsNullOrEmpty(this.AzureOpenAIEmbeddingConfiguration.APIKey));
}

this._memory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithAzureOpenAITextGeneration(this.AzureOpenAITextConfiguration)
.WithAzureOpenAITextEmbeddingGeneration(this.AzureOpenAIEmbeddingConfiguration)
.WithMongoDbAtlasMemoryDb(this.MongoDbAtlasConfig)
.Build<MemoryServerless>();
}
else
{
Assert.False(string.IsNullOrEmpty(this.OpenAiConfig.APIKey));

this._memory = new KernelMemoryBuilder()
.With(new KernelMemoryConfig { DefaultIndexName = "default4tests" })
.WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound })
.WithOpenAI(this.OpenAiConfig)
.WithMongoDbAtlasMemoryDb(this.MongoDbAtlasConfig)
.Build<MemoryServerless>();
}
}

[Fact]
Expand Down
Loading