Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added max_expansion param to span_multi #30913

Merged
merged 1 commit into from
Jun 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions docs/reference/query-dsl/span-multi-term-query.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,9 @@ GET /_search
--------------------------------------------------
// CONSOLE

WARNING: By default `span_multi queries are rewritten to a `span_or` query
containing **all** the expanded terms. This can be expensive if the number of expanded
terms is large. To avoid an unbounded expansion you can set the
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
rewrite. Or, if you use `span_multi` on `prefix` query only, you can
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the
boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <<query-dsl-multi-term-rewrite,
rewrite method>> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only,
you can activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.

Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,25 @@
package org.elasticsearch.index.query;

import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.Version;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
Expand All @@ -42,19 +49,19 @@
import org.elasticsearch.index.query.support.QueryParsers;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

/**
* Query that allows wrapping a {@link MultiTermQueryBuilder} (one of wildcard, fuzzy, prefix, term, range or regexp query)
* as a {@link SpanQueryBuilder} so it can be nested.
*/
public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTermQueryBuilder>
implements SpanQueryBuilder {
implements SpanQueryBuilder {

public static final String NAME = "span_multi";

private static final ParseField MATCH_FIELD = new ParseField("match");

private final MultiTermQueryBuilder multiTermQueryBuilder;

public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) {
Expand Down Expand Up @@ -83,7 +90,7 @@ public MultiTermQueryBuilder innerQuery() {

@Override
protected void doXContent(XContentBuilder builder, Params params)
throws IOException {
throws IOException {
builder.startObject(NAME);
builder.field(MATCH_FIELD.getPreferredName());
multiTermQueryBuilder.toXContent(builder, params);
Expand All @@ -105,7 +112,7 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro
QueryBuilder query = parseInnerQueryBuilder(parser);
if (query instanceof MultiTermQueryBuilder == false) {
throw new ParsingException(parser.getTokenLocation(),
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
}
subQuery = (MultiTermQueryBuilder) query;
} else {
Expand All @@ -124,12 +131,55 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro

if (subQuery == null) {
throw new ParsingException(parser.getTokenLocation(),
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
}

return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost);
}

public static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod {

private MultiTermQuery multiTermQuery;
private final long maxExpansions;

TopTermSpanBooleanQueryRewriteWithMaxClause(long max) {
maxExpansions = max;
}

@Override
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
multiTermQuery = query;
return (SpanQuery) this.delegate.rewrite(reader, multiTermQuery);
}

final ScoringRewrite<List<SpanQuery>> delegate = new ScoringRewrite<List<SpanQuery>>() {

@Override
protected List<SpanQuery> getTopLevelBuilder() {
return new ArrayList();
}

@Override
protected Query build(List<SpanQuery> builder) {
return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()]));
}

@Override
protected void checkMaxClauseCount(int count) {
if (count > maxExpansions) {
throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " +
"exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
}
}

@Override
protected void addClause(List<SpanQuery> topLevel, Term term, int docCount, float boost, TermContext states) {
SpanTermQuery q = new SpanTermQuery(term, states);
topLevel.add(q);
}
};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not throw an error, I would just ensure that we collect only up to maxExpansions queries. @jimczi what do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks , I will look at checklist . I thought the plan was to throw exception if term expansion count is above max_expansions ( which is defaulted to 1024 ) . The other pattern can be done already by adding rewrite param to inner multi query

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to throw an exception, at least in master. I don't think we need a max_expansion option though, we can just honor BooleanQuery#getMaxClauseCount() and fails the query if there are more terms. The span scoring rewrite does not allow to limit the number of expansions, it is all or nothing and I think it's a good property since it will not confuse the users by returning partial responses on prefix queries that match more than 1024 terms. This is the main reason why I opened the issue in the first place, we should never build a disjunction on more than BooleanQuery.getMaxClauseCount().

}

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
Query subQuery = multiTermQueryBuilder.toQuery(context);
Expand Down Expand Up @@ -190,10 +240,15 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
}
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) {
((SpanMultiTermQueryWrapper<MultiTermQuery>) spanQuery).setRewriteMethod(new
TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount()));
}
}
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
return new SpanBoostQuery(spanQuery, boost);
}

return spanQuery;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.internal.SearchContext;
Expand Down Expand Up @@ -238,4 +237,38 @@ public void testFromJson() throws IOException {
assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value());
assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001);
}

public void testDefaultMaxRewriteBuilder() throws Exception {
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")).
toQuery(createShardContext());

if (query instanceof SpanBoostQuery) {
query = ((SpanBoostQuery)query).getQuery();
}

assertTrue(query instanceof SpanMultiTermQueryWrapper);
if (query instanceof SpanMultiTermQueryWrapper) {
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
}

}

public void testTopNMultiTermsRewriteInsideSpan() throws Exception {

Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b").rewrite
("top_terms_boost_2000")).
toQuery(createShardContext());

if (query instanceof SpanBoostQuery) {
query = ((SpanBoostQuery)query).getQuery();
}

assertTrue(query instanceof SpanMultiTermQueryWrapper);
if (query instanceof SpanMultiTermQueryWrapper) {
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@

package org.elasticsearch.search.query;

import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.util.English;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
Expand All @@ -33,8 +35,12 @@
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.SpanMultiTermQueryBuilder;
import org.elasticsearch.index.query.SpanNearQueryBuilder;
import org.elasticsearch.index.query.SpanTermQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.query.WrapperQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
Expand All @@ -52,6 +58,7 @@
import org.joda.time.format.ISODateTimeFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Random;
Expand Down Expand Up @@ -1819,4 +1826,30 @@ public void testRangeQueryRangeFields_24744() throws Exception {
assertHitCount(searchResponse, 1);
}

public void testTermExpansionExceptionOnSpanFailure() throws ExecutionException, InterruptedException {
Settings.Builder builder = Settings.builder();
builder.put(SETTING_NUMBER_OF_SHARDS, 1).build();

createIndex("test", builder.build());
ArrayList<IndexRequestBuilder> reqs = new ArrayList<>();
int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
try {
BooleanQuery.setMaxClauseCount(2);
for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) {
reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" +
Integer.toString(i) + " bar baz"));
}
indexRandom(true, false, reqs);

QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery
("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar"));

expectThrows(ElasticsearchException.class, () ->
client().prepareSearch().setIndices("test").setQuery(queryBuilder).get());
} finally {
BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount);
}

}

}