Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix index prefixes to work with span_multi #31066

Merged
merged 3 commits into from
Jun 4, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/reference/query-dsl/span-multi-term-query.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,11 @@ GET /_search
}
--------------------------------------------------
// CONSOLE

WARNING: By default `span_multi queries are rewritten to a `span_or` query
containing **all** the expanded terms. This can be expensive if the number of expanded
terms is large. To avoid an unbounded expansion you can set the
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
rewrite. Or, if you use `span_multi` on `prefix` query only, you can
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
"search with index prefixes":
setup:
- skip:
version: " - 6.99.99"
version: " - 6.2.99"
reason: index_prefixes is only available as of 6.3.0

- do:
indices.create:
index: test
Expand All @@ -27,6 +27,11 @@
indices.refresh:
index: [test]

---
"search with index prefixes":
- skip:
version: " - 6.2.99"
reason: index_prefixes is only available as of 6.3.0
- do:
search:
index: test
Expand Down Expand Up @@ -57,3 +62,23 @@

- match: {hits.total: 1}
- match: {hits.hits.0._score: 1}

---
"search index prefixes with span_multi":
- skip:
version: " - 6.99.99"
reason: span_multi throws an exception with prefix fields on < versions

- do:
search:
index: test
body:
query:
span_near:
clauses: [
{ "span_term": { "text": "short" } },
{ "span_multi": { "match": { "prefix": { "text": "word" } } } }
]

- match: {hits.total: 1}

Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -175,7 +176,11 @@ public TextFieldMapper build(BuilderContext context) {
if (fieldType().isSearchable() == false) {
throw new IllegalArgumentException("Cannot set index_prefixes on unindexed field [" + name() + "]");
}
if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
// Copy the index options of the main field to allow phrase queries on
// the prefix field.
if (context.indexCreatedVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can already be set to 6_4 before the backport, can't it?

prefixFieldType.setIndexOptions(fieldType.indexOptions());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we map DOCS_AND_FREQS TO DOCS?

} else if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
prefixFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
}
if (fieldType.storeTermVectorOffsets()) {
Expand Down Expand Up @@ -339,7 +344,6 @@ static final class PrefixFieldType extends StringFieldType {
PrefixFieldType(String name, int minChars, int maxChars) {
setTokenized(true);
setOmitNorms(true);
setIndexOptions(IndexOptions.DOCS);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we still need this? What happens with a 6.3 index that doesn't store offsets?

setName(name);
this.minChars = minChars;
this.maxChars = maxChars;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,28 @@
*/
package org.elasticsearch.index.query;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.query.support.QueryParsers;

import java.io.IOException;
import java.util.Objects;
Expand Down Expand Up @@ -124,22 +134,61 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro
protected Query doToQuery(QueryShardContext context) throws IOException {
Query subQuery = multiTermQueryBuilder.toQuery(context);
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
if (subQuery instanceof BoostQuery) {
if (subQuery instanceof ConstantScoreQuery) {
subQuery = ((ConstantScoreQuery) subQuery).getQuery();
} else if (subQuery instanceof BoostQuery) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we unwrap recursively to be more resilient to changes to the way queries are built?

BoostQuery boostQuery = (BoostQuery) subQuery;
subQuery = boostQuery.getQuery();
boost = boostQuery.getBoost();
}
//no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here
final SpanQuery spanQuery;
// no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here
assert subQuery instanceof SpanBoostQuery == false;
if (subQuery instanceof MultiTermQuery == false) {
throw new UnsupportedOperationException("unsupported inner query, should be " + MultiTermQuery.class.getName() +" but was "
+ subQuery.getClass().getName());
if (subQuery instanceof TermQuery) {
/**
* Text fields that index prefixes can rewrite prefix queries
* into term queries. See {@link TextFieldMapper.TextFieldType#prefixQuery}.
*/
if (multiTermQueryBuilder.getClass() != PrefixQueryBuilder.class) {
throw new UnsupportedOperationException("unsupported inner query, should be "
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's include the class of multiTermQueryBuilder to the error message as well?

}
if (context.getIndexSettings().getIndexVersionCreated().before(Version.V_7_0_0_alpha1)) {
/**
* Indices created in this version do not index positions on the prefix field
* so we cannot use it to match positional queries. Instead, we explicitly create the prefix
* query on the main field to avoid the rewrite.
*/
PrefixQueryBuilder prefixBuilder = (PrefixQueryBuilder) multiTermQueryBuilder;
PrefixQuery prefixQuery = new PrefixQuery(new Term(prefixBuilder.fieldName(), prefixBuilder.value()));
if (prefixBuilder.rewrite() != null) {
MultiTermQuery.RewriteMethod rewriteMethod =
QueryParsers.parseRewriteMethod(prefixBuilder.rewrite(), null, LoggingDeprecationHandler.INSTANCE);
prefixQuery.setRewriteMethod(rewriteMethod);
}
spanQuery = new SpanMultiTermQueryWrapper<>(prefixQuery);
} else {
String origFieldName = ((PrefixQueryBuilder) multiTermQueryBuilder).fieldName();
SpanTermQuery spanTermQuery = new SpanTermQuery(((TermQuery) subQuery).getTerm());
/**
* Prefixes are indexed in a different field so we mask the term query with the original field
* name. This is required because span_near and span_or queries don't work across different field.
* The masking is safe because the prefix field is indexed using the same content than the original field
* and the prefix analyzer preserves positions.
*/
spanQuery = new FieldMaskingSpanQuery(spanTermQuery, origFieldName);
}
} else {
if (subQuery instanceof MultiTermQuery == false) {
throw new UnsupportedOperationException("unsupported inner query, should be "
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
}
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
}
SpanQuery wrapper = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
wrapper = new SpanBoostQuery(wrapper, boost);
return new SpanBoostQuery(spanQuery, boost);
}
return wrapper;
return spanQuery;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,46 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.Version;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.test.AbstractQueryTestCase;

import java.io.IOException;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.CoreMatchers.either;

public class SpanMultiTermQueryBuilderTests extends AbstractQueryTestCase<SpanMultiTermQueryBuilder> {
@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
.startObject("prefix_field")
.field("type", "text")
.startObject("index_prefixes").endObject()
.endObject()
.endObject().endObject().endObject();

mapperService.merge("_doc",
new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE);
}

@Override
protected SpanMultiTermQueryBuilder doCreateTestQueryBuilder() {
MultiTermQueryBuilder multiTermQueryBuilder = RandomQueryBuilder.createMultiTermQuery(random());
Expand All @@ -62,8 +84,9 @@ protected void doAssertLuceneQuery(SpanMultiTermQueryBuilder queryBuilder, Query
BoostQuery boostQuery = (BoostQuery) multiTermQuery;
multiTermQuery = boostQuery.getQuery();
}
assertThat(multiTermQuery, instanceOf(MultiTermQuery.class));
assertThat(spanMultiTermQueryWrapper.getWrappedQuery(), equalTo(new SpanMultiTermQueryWrapper<>((MultiTermQuery)multiTermQuery).getWrappedQuery()));
assertThat(multiTermQuery, either(instanceOf(MultiTermQuery.class)).or(instanceOf(TermQuery.class)));
assertThat(spanMultiTermQueryWrapper.getWrappedQuery(),
equalTo(new SpanMultiTermQueryWrapper<>((MultiTermQuery)multiTermQuery).getWrappedQuery()));
}

public void testIllegalArgument() {
Expand Down Expand Up @@ -135,11 +158,61 @@ public void writeTo(StreamOutput out) throws IOException {
}

public void testToQueryInnerSpanMultiTerm() throws IOException {

Query query = new SpanOrQueryBuilder(createTestQueryBuilder()).toQuery(createShardContext());
//verify that the result is still a span query, despite the boost that might get set (SpanBoostQuery rather than BoostQuery)
assertThat(query, instanceOf(SpanQuery.class));
}

public void testToQueryInnerTermQuery() throws IOException {
final QueryShardContext context = createShardContext();
if (context.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_7_0_0_alpha1)) {
Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder("prefix_field", "foo"))
.toQuery(context);
assertThat(query, instanceOf(FieldMaskingSpanQuery.class));
FieldMaskingSpanQuery fieldSpanQuery = (FieldMaskingSpanQuery) query;
assertThat(fieldSpanQuery.getField(), equalTo("prefix_field"));
assertThat(fieldSpanQuery.getMaskedQuery(), instanceOf(SpanTermQuery.class));
SpanTermQuery spanTermQuery = (SpanTermQuery) fieldSpanQuery.getMaskedQuery();
assertThat(spanTermQuery.getTerm().text(), equalTo("foo"));

query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder("prefix_field", "foo"))
.boost(2.0f)
.toQuery(context);
assertThat(query, instanceOf(SpanBoostQuery.class));
SpanBoostQuery boostQuery = (SpanBoostQuery) query;
assertThat(boostQuery.getBoost(), equalTo(2.0f));
assertThat(boostQuery.getQuery(), instanceOf(FieldMaskingSpanQuery.class));
fieldSpanQuery = (FieldMaskingSpanQuery) boostQuery.getQuery();
assertThat(fieldSpanQuery.getField(), equalTo("prefix_field"));
assertThat(fieldSpanQuery.getMaskedQuery(), instanceOf(SpanTermQuery.class));
spanTermQuery = (SpanTermQuery) fieldSpanQuery.getMaskedQuery();
assertThat(spanTermQuery.getTerm().text(), equalTo("foo"));
} else {
Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder("prefix_field", "foo"))
.toQuery(context);
assertThat(query, instanceOf(SpanMultiTermQueryWrapper.class));
SpanMultiTermQueryWrapper wrapper = (SpanMultiTermQueryWrapper) query;
assertThat(wrapper.getWrappedQuery(), instanceOf(PrefixQuery.class));
PrefixQuery prefixQuery = (PrefixQuery) wrapper.getWrappedQuery();
assertThat(prefixQuery.getField(), equalTo("prefix_field"));
assertThat(prefixQuery.getPrefix().text(), equalTo("foo"));

query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder("prefix_field", "foo"))
.boost(2.0f)
.toQuery(context);
assertThat(query, instanceOf(SpanBoostQuery.class));
SpanBoostQuery boostQuery = (SpanBoostQuery) query;
assertThat(boostQuery.getBoost(), equalTo(2.0f));
assertThat(boostQuery.getQuery(), instanceOf(SpanMultiTermQueryWrapper.class));
wrapper = (SpanMultiTermQueryWrapper) boostQuery.getQuery();
assertThat(wrapper.getWrappedQuery(), instanceOf(PrefixQuery.class));
prefixQuery = (PrefixQuery) wrapper.getWrappedQuery();
assertThat(prefixQuery.getField(), equalTo("prefix_field"));
assertThat(prefixQuery.getPrefix().text(), equalTo("foo"));
}
}

public void testFromJson() throws IOException {
String json =
"{\n" +
Expand Down