Skip to content

Commit

Permalink
Fail span_multi queries that exceeds Boolean.maxclause term expansion
Browse files Browse the repository at this point in the history
By default span_multi query will limit term expansions = boolean max clause.
This will limit high heap usage in case of high cardinality term
expansions.This applies only if top_terms_N is not used in inner multi
query
  • Loading branch information
nirmalc committed Jun 6, 2018
1 parent 6fd4eb5 commit 816e37f
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 14 deletions.
13 changes: 6 additions & 7 deletions docs/reference/query-dsl/span-multi-term-query.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,9 @@ GET /_search
--------------------------------------------------
// CONSOLE

WARNING: By default `span_multi queries are rewritten to a `span_or` query
containing **all** the expanded terms. This can be expensive if the number of expanded
terms is large. To avoid an unbounded expansion you can set the
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
rewrite. Or, if you use `span_multi` on `prefix` query only, you can
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the
boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <<query-dsl-multi-term-rewrite,
rewrite method>> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only,
you can activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.

Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,25 @@
package org.elasticsearch.index.query;

import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.Version;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
Expand All @@ -42,19 +49,19 @@
import org.elasticsearch.index.query.support.QueryParsers;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

/**
* Query that allows wrapping a {@link MultiTermQueryBuilder} (one of wildcard, fuzzy, prefix, term, range or regexp query)
* as a {@link SpanQueryBuilder} so it can be nested.
*/
public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTermQueryBuilder>
implements SpanQueryBuilder {
implements SpanQueryBuilder {

public static final String NAME = "span_multi";

private static final ParseField MATCH_FIELD = new ParseField("match");

private final MultiTermQueryBuilder multiTermQueryBuilder;

public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) {
Expand Down Expand Up @@ -83,7 +90,7 @@ public MultiTermQueryBuilder innerQuery() {

@Override
protected void doXContent(XContentBuilder builder, Params params)
throws IOException {
throws IOException {
builder.startObject(NAME);
builder.field(MATCH_FIELD.getPreferredName());
multiTermQueryBuilder.toXContent(builder, params);
Expand All @@ -105,7 +112,7 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro
QueryBuilder query = parseInnerQueryBuilder(parser);
if (query instanceof MultiTermQueryBuilder == false) {
throw new ParsingException(parser.getTokenLocation(),
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
}
subQuery = (MultiTermQueryBuilder) query;
} else {
Expand All @@ -124,12 +131,55 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro

if (subQuery == null) {
throw new ParsingException(parser.getTokenLocation(),
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
}

return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost);
}

public static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod {

private MultiTermQuery multiTermQuery;
private final long maxExpansions;

TopTermSpanBooleanQueryRewriteWithMaxClause(long max) {
maxExpansions = max;
}

@Override
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
multiTermQuery = query;
return (SpanQuery) this.delegate.rewrite(reader, multiTermQuery);
}

final ScoringRewrite<List<SpanQuery>> delegate = new ScoringRewrite<List<SpanQuery>>() {

@Override
protected List<SpanQuery> getTopLevelBuilder() {
return new ArrayList();
}

@Override
protected Query build(List<SpanQuery> builder) {
return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()]));
}

@Override
protected void checkMaxClauseCount(int count) {
if (count > maxExpansions) {
throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " +
"exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
}
}

@Override
protected void addClause(List<SpanQuery> topLevel, Term term, int docCount, float boost, TermContext states) {
SpanTermQuery q = new SpanTermQuery(term, states);
topLevel.add(q);
}
};
}

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
Query subQuery = multiTermQueryBuilder.toQuery(context);
Expand Down Expand Up @@ -190,10 +240,15 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
}
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) {
((SpanMultiTermQueryWrapper<MultiTermQuery>) spanQuery).setRewriteMethod(new
TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount()));
}
}
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
return new SpanBoostQuery(spanQuery, boost);
}

return spanQuery;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.internal.SearchContext;
Expand Down Expand Up @@ -238,4 +237,38 @@ public void testFromJson() throws IOException {
assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value());
assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001);
}

public void testDefaultMaxRewriteBuilder() throws Exception {
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")).
toQuery(createShardContext());

if (query instanceof SpanBoostQuery) {
query = ((SpanBoostQuery)query).getQuery();
}

assertTrue(query instanceof SpanMultiTermQueryWrapper);
if (query instanceof SpanMultiTermQueryWrapper) {
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
}

}

public void testTopNMultiTermsRewriteInsideSpan() throws Exception {

Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b").rewrite
("top_terms_boost_2000")).
toQuery(createShardContext());

if (query instanceof SpanBoostQuery) {
query = ((SpanBoostQuery)query).getQuery();
}

assertTrue(query instanceof SpanMultiTermQueryWrapper);
if (query instanceof SpanMultiTermQueryWrapper) {
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@

package org.elasticsearch.search.query;

import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.util.English;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
Expand All @@ -33,8 +35,12 @@
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.SpanMultiTermQueryBuilder;
import org.elasticsearch.index.query.SpanNearQueryBuilder;
import org.elasticsearch.index.query.SpanTermQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.query.WrapperQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
Expand All @@ -52,6 +58,7 @@
import org.joda.time.format.ISODateTimeFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Random;
Expand Down Expand Up @@ -1819,4 +1826,30 @@ public void testRangeQueryRangeFields_24744() throws Exception {
assertHitCount(searchResponse, 1);
}

public void testTermExpansionExceptionOnSpanFailure() throws ExecutionException, InterruptedException {
Settings.Builder builder = Settings.builder();
builder.put(SETTING_NUMBER_OF_SHARDS, 1).build();

createIndex("test", builder.build());
ArrayList<IndexRequestBuilder> reqs = new ArrayList<>();
int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
try {
BooleanQuery.setMaxClauseCount(2);
for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) {
reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" +
Integer.toString(i) + " bar baz"));
}
indexRandom(true, false, reqs);

QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery
("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar"));

expectThrows(ElasticsearchException.class, () ->
client().prepareSearch().setIndices("test").setQuery(queryBuilder).get());
} finally {
BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount);
}

}

}

0 comments on commit 816e37f

Please sign in to comment.