Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Flaky Test] Fix flaky test IngestFromKinesisIT.testAllActiveIngestion ([#19380](https://github.com/opensearch-project/OpenSearch/pull/19380))
- Fix lag metric for pull-based ingestion when streaming source is empty ([#19393](https://github.com/opensearch-project/OpenSearch/pull/19393))
- Fix ingestion state xcontent serialization in IndexMetadata and fail fast on mapping errors([#19320](https://github.com/opensearch-project/OpenSearch/pull/19320))
- Fix updated keyword field params leading to stale responses from request cache ([#19385](https://github.com/opensearch-project/OpenSearch/pull/19385))

### Dependencies
- Update to Gradle 9.1.0 ([#19329](https://github.com/opensearch-project/OpenSearch/pull/19329))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.opensearch.action.admin.indices.alias.Alias;
import org.opensearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest;
import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse;
import org.opensearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.action.search.SearchType;
import org.opensearch.cluster.ClusterState;
Expand Down Expand Up @@ -861,6 +862,115 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
assertEquals(0, requestCacheStats.getMemorySizeInBytes());
}

public void testKeywordFieldUseSimilarityCacheability() throws Exception {
testKeywordFieldParameterCacheabilityCase("use_similarity");
}

public void testKeywordFieldSplitWhitespaceCacheability() throws Exception {
testKeywordFieldParameterCacheabilityCase("split_queries_on_whitespace");
}

private void testKeywordFieldParameterCacheabilityCase(String paramName) throws Exception {
// When keyword fields have non-default values for "use_similarity" or "split_queries_on_whitespace",
// queries on those fields shouldn't be cacheable. Default value is false for both parameters.
String node = internalCluster().startNode(
Settings.builder().put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, TimeValue.timeValueMillis(50))
);
Client client = client(node);
String index = "index";
String field1 = "field1";
String field2 = "field2";
assertAcked(
client.admin()
.indices()
.prepareCreate(index)
.setMapping(field1, "type=keyword," + paramName + "=false", field2, "type=keyword," + paramName + "=true")
.setSettings(
Settings.builder()
.put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true)
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
// Disable index refreshing to avoid cache being invalidated mid-test
.put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), TimeValue.timeValueMillis(-1))
)
.get()
);

indexRandom(true, client.prepareIndex(index).setSource(field1, "foo", field2, "foo"));
indexRandom(true, client.prepareIndex(index).setSource(field1, "foo2", field2, "foo2"));
ensureSearchable(index);
// Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache
forceMerge(client, index);

// Show this behavior works for a nested query where the termQuery built by KeywordFieldMapper is not the outermost query
SearchResponse resp = client.prepareSearch(index)
.setRequestCache(true)
.setQuery(
QueryBuilders.boolQuery().should(QueryBuilders.termQuery(field1, "foo")).should(QueryBuilders.termQuery(field1, "foo2"))
)
.get();
assertSearchResponse(resp);
// This query should be cached
RequestCacheStats stats = getNodeCacheStats(client);
assertEquals(1, stats.getMissCount());
long cacheSize = stats.getMemorySizeInBytes();
assertTrue(cacheSize > 0);

// The same query but involving field2 at all should not be cacheable even if setRequestCache is true:
// no extra misses and no extra values in cache
assertQueryCausesCacheState(
client,
index,
QueryBuilders.boolQuery().should(QueryBuilders.termQuery(field1, "foo")).should(QueryBuilders.termQuery(field2, "foo2")),
0,
1,
cacheSize
);

// Now change the parameter value to true for field1, and check the same query again does NOT come from the cache (hits == 0)
// and also shouldn't add any extra entries to the cache
PutMappingRequest mappingRequest = new PutMappingRequest().indices(index).source(field1, "type=keyword," + paramName + "=true");
client.admin().indices().putMapping(mappingRequest).actionGet();
assertQueryCausesCacheState(
client,
index,
QueryBuilders.boolQuery().should(QueryBuilders.termQuery(field1, "foo")).should(QueryBuilders.termQuery(field1, "foo2")),
0,
1,
cacheSize
);

// Now test all query building methods on field1 and ensure none are cacheable

assertQueryCausesCacheState(client, index, QueryBuilders.termsQuery(field1, "foo", "foo2"), 0, 1, cacheSize);

assertQueryCausesCacheState(client, index, QueryBuilders.prefixQuery(field1, "fo"), 0, 1, cacheSize);

assertQueryCausesCacheState(client, index, QueryBuilders.regexpQuery(field1, "f*o"), 0, 1, cacheSize);

assertQueryCausesCacheState(client, index, QueryBuilders.rangeQuery(field1).gte("f").lte("g"), 0, 1, cacheSize);

assertQueryCausesCacheState(client, index, QueryBuilders.fuzzyQuery(field1, "foe"), 0, 1, cacheSize);

assertQueryCausesCacheState(client, index, QueryBuilders.wildcardQuery(field1, "f*"), 0, 1, cacheSize);
}

private void assertQueryCausesCacheState(
Client client,
String index,
QueryBuilder builder,
long expectedHits,
long expectedMisses,
long expectedMemory
) {
SearchResponse resp = client.prepareSearch(index).setRequestCache(true).setQuery(builder).get();
assertSearchResponse(resp);
RequestCacheStats stats = getNodeCacheStats(client);
assertEquals(expectedHits, stats.getHitCount());
assertEquals(expectedMisses, stats.getMissCount());
assertEquals(expectedMemory, stats.getMemorySizeInBytes());
}

private Path[] shardDirectory(String server, Index index, int shard) {
NodeEnvironment env = internalCluster().getInstance(NodeEnvironment.class, server);
final Path[] paths = env.availableShardPaths(new ShardId(index, shard));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ public static class KeywordFieldType extends StringFieldType {
private final int ignoreAbove;
private final String nullValue;
private final boolean useSimilarity;
private final boolean splitQueriesOnWhitespace;

public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normalizer, NamedAnalyzer searchAnalyzer, Builder builder) {
super(
Expand All @@ -328,18 +329,31 @@ public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normaliz
this.ignoreAbove = builder.ignoreAbove.getValue();
this.nullValue = builder.nullValue.getValue();
this.useSimilarity = builder.useSimilarity.getValue();
this.splitQueriesOnWhitespace = builder.splitQueriesOnWhitespace.getValue();
}

public KeywordFieldType(String name, boolean isSearchable, boolean hasDocValues, Map<String, String> meta) {
this(name, isSearchable, hasDocValues, false, meta);
}

public KeywordFieldType(String name, boolean isSearchable, boolean hasDocValues, boolean useSimilarity, Map<String, String> meta) {
this(name, isSearchable, hasDocValues, useSimilarity, false, meta);
}

public KeywordFieldType(
String name,
boolean isSearchable,
boolean hasDocValues,
boolean useSimilarity,
boolean splitQueriesOnWhitespace,
Map<String, String> meta
) {
super(name, isSearchable, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
this.ignoreAbove = Integer.MAX_VALUE;
this.nullValue = null;
this.useSimilarity = useSimilarity;
this.splitQueriesOnWhitespace = splitQueriesOnWhitespace;
}

public KeywordFieldType(String name) {
Expand All @@ -358,13 +372,15 @@ public KeywordFieldType(String name, FieldType fieldType) {
this.ignoreAbove = Integer.MAX_VALUE;
this.nullValue = null;
this.useSimilarity = false;
this.splitQueriesOnWhitespace = false;
}

public KeywordFieldType(String name, NamedAnalyzer analyzer) {
super(name, true, false, true, new TextSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), Collections.emptyMap());
this.ignoreAbove = Integer.MAX_VALUE;
this.nullValue = null;
this.useSimilarity = false;
this.splitQueriesOnWhitespace = false;
}

@Override
Expand Down Expand Up @@ -447,6 +463,7 @@ protected Object rewriteForDocValue(Object value) {
@Override
public Query termQueryCaseInsensitive(Object value, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
checkToDisableCaching(context);
if (isSearchable()) {
return super.termQueryCaseInsensitive(value, context);
} else {
Expand All @@ -467,6 +484,7 @@ public Query termQueryCaseInsensitive(Object value, QueryShardContext context) {
@Override
public Query termQuery(Object value, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
checkToDisableCaching(context);
if (isSearchable()) {
Query query = super.termQuery(value, context);
if (!this.useSimilarity) {
Expand Down Expand Up @@ -494,6 +512,7 @@ public Query termQuery(Object value, QueryShardContext context) {
@Override
public Query termsQuery(List<?> values, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
checkToDisableCaching(context);
// has index and doc_values enabled
if (isSearchable() && hasDocValues()) {
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
Expand Down Expand Up @@ -540,6 +559,7 @@ public Query prefixQuery(
);
}
failIfNotIndexedAndNoDocValues();
checkToDisableCaching(context);
if (isSearchable() && hasDocValues()) {
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
return super.prefixQuery(value, method, caseInsensitive, context);
Expand Down Expand Up @@ -583,6 +603,7 @@ public Query regexpQuery(
);
}
failIfNotIndexedAndNoDocValues();
checkToDisableCaching(context);
if (isSearchable() && hasDocValues()) {
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context);
Expand Down Expand Up @@ -621,6 +642,7 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower
);
}
failIfNotIndexedAndNoDocValues();
checkToDisableCaching(context);
if (isSearchable() && hasDocValues()) {
Query indexQuery = new TermRangeQuery(
name(),
Expand Down Expand Up @@ -669,6 +691,7 @@ public Query fuzzyQuery(
QueryShardContext context
) {
failIfNotIndexedAndNoDocValues();
checkToDisableCaching(context);
if (context.allowExpensiveQueries() == false) {
throw new OpenSearchException(
"[fuzzy] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false."
Expand Down Expand Up @@ -716,6 +739,7 @@ public Query wildcardQuery(
);
}
failIfNotIndexedAndNoDocValues();
checkToDisableCaching(context);
// keyword field types are always normalized, so ignore case sensitivity and force normalize the
// wildcard
// query text
Expand Down Expand Up @@ -745,6 +769,13 @@ public Query wildcardQuery(
return super.wildcardQuery(value, method, caseInsensitive, true, context);
}

private void checkToDisableCaching(QueryShardContext context) {
// Mark the query as non-cacheable if the defaults for useSimilarity or splitQueriesOnWhitespace are not used.
if (useSimilarity || splitQueriesOnWhitespace) {
context.setIsCacheable(false);
}
}

}

private final boolean indexed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,10 @@ public final boolean isCacheable() {
return cacheable;
}

public final void setIsCacheable(boolean isCacheable) {
this.cacheable = isCacheable;
}

/**
* Returns the shard ID this context was created for.
*/
Expand Down
Loading