Skip to content

Commit

Permalink
Merge branch '6.x' into silent-batch-mode-migration
Browse files Browse the repository at this point in the history
* 6.x:
  Improve similarity integration. (elastic#29187)
  Fix some query extraction bugs. (elastic#29283)
  Fixed quote_field_suffix in query_string (elastic#29332)
  TEST: Update negative byte size setting error msg
  Fix bwc in GeoDistanceQuery serialization (elastic#29325)
  • Loading branch information
jasontedor committed Apr 4, 2018
2 parents cd3608a + 9e61f6d commit 8249317
Show file tree
Hide file tree
Showing 34 changed files with 750 additions and 855 deletions.
8 changes: 5 additions & 3 deletions docs/reference/index-modules/similarity.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ Type name: `BM25`
[[classic-similarity]]
==== Classic similarity

deprecated[6.3.0, The quality of the produced scores used to rely on coordination factors, which have been removed. It is advised to use BM25 instead.]

The classic similarity that is based on the TF/IDF model. This
similarity has the following option:

Expand All @@ -97,7 +99,7 @@ similarity has the following option:
Type name: `classic`

[float]
[[drf]]
[[dfr]]
==== DFR similarity

Similarity that implements the
Expand Down Expand Up @@ -510,7 +512,7 @@ PUT /index
"index": {
"similarity": {
"default": {
"type": "classic"
"type": "boolean"
}
}
}
Expand All @@ -532,7 +534,7 @@ PUT /index/_settings
"index": {
"similarity": {
"default": {
"type": "classic"
"type": "boolean"
}
}
}
Expand Down
9 changes: 2 additions & 7 deletions docs/reference/mapping/params/similarity.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,9 @@ PUT my_index
"default_field": { <1>
"type": "text"
},
"classic_field": {
"type": "text",
"similarity": "classic" <2>
},
"boolean_sim_field": {
"type": "text",
"similarity": "boolean" <3>
"similarity": "boolean" <2>
}
}
}
Expand All @@ -59,5 +55,4 @@ PUT my_index
--------------------------------------------------
// CONSOLE
<1> The `default_field` uses the `BM25` similarity.
<2> The `classic_field` uses the `classic` similarity (ie TF/IDF).
<3> The `boolean_sim_field` uses the `boolean` similarity.
<2> The `boolean_sim_field` uses the `boolean` similarity.
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ protected Settings indexSettings() {

@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
similarity = randomFrom("classic", "BM25");
similarity = randomFrom("boolean", "BM25");
XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
.startObject("join_field")
.field("type", "join")
Expand Down Expand Up @@ -336,9 +336,7 @@ public void testNonDefaultSimilarity() throws Exception {
hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
.create(similarity, Settings.EMPTY,
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
.get();
.apply(Settings.EMPTY, Version.CURRENT, null);
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {

@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
similarity = randomFrom("classic", "BM25");
similarity = randomFrom("boolean", "BM25");
// TODO: use a single type when inner hits have been changed to work with join field,
// this test randomly generates queries with inner hits
mapperService.merge(PARENT_TYPE, new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(PARENT_TYPE,
Expand Down Expand Up @@ -323,9 +323,7 @@ public void testNonDefaultSimilarity() throws Exception {
hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
.create(similarity, Settings.EMPTY,
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
.get();
.apply(Settings.EMPTY, Version.CURRENT, null);
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ static Result analyze(Query query, Version indexVersion) {
}

private static BiFunction<Query, Version, Result> matchNoDocsQuery() {
return (query, version) -> new Result(true, Collections.emptySet(), 1);
return (query, version) -> new Result(true, Collections.emptySet(), 0);
}

private static BiFunction<Query, Version, Result> matchAllDocsQuery() {
Expand Down Expand Up @@ -179,36 +179,36 @@ private static BiFunction<Query, Version, Result> termInSetQuery() {
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
terms.add(new QueryExtraction(new Term(iterator.field(), term)));
}
return new Result(true, terms, 1);
return new Result(true, terms, Math.min(1, terms.size()));
};
}

private static BiFunction<Query, Version, Result> synonymQuery() {
return (query, version) -> {
Set<QueryExtraction> terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
return new Result(true, terms, 1);
return new Result(true, terms, Math.min(1, terms.size()));
};
}

private static BiFunction<Query, Version, Result> commonTermsQuery() {
return (query, version) -> {
Set<QueryExtraction> terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
return new Result(false, terms, 1);
return new Result(false, terms, Math.min(1, terms.size()));
};
}

private static BiFunction<Query, Version, Result> blendedTermQuery() {
return (query, version) -> {
Set<QueryExtraction> terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
return new Result(true, terms, 1);
return new Result(true, terms, Math.min(1, terms.size()));
};
}

private static BiFunction<Query, Version, Result> phraseQuery() {
return (query, version) -> {
Term[] terms = ((PhraseQuery) query).getTerms();
if (terms.length == 0) {
return new Result(true, Collections.emptySet(), 1);
return new Result(true, Collections.emptySet(), 0);
}

if (version.onOrAfter(Version.V_6_1_0)) {
Expand All @@ -232,7 +232,7 @@ private static BiFunction<Query, Version, Result> multiPhraseQuery() {
return (query, version) -> {
Term[][] terms = ((MultiPhraseQuery) query).getTermArrays();
if (terms.length == 0) {
return new Result(true, Collections.emptySet(), 1);
return new Result(true, Collections.emptySet(), 0);
}

if (version.onOrAfter(Version.V_6_1_0)) {
Expand Down Expand Up @@ -297,7 +297,7 @@ private static BiFunction<Query, Version, Result> spanOrQuery() {
for (SpanQuery clause : spanOrQuery.getClauses()) {
terms.addAll(analyze(clause, version).extractions);
}
return new Result(false, terms, 1);
return new Result(false, terms, Math.min(1, terms.size()));
};
}

Expand Down Expand Up @@ -334,6 +334,9 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
numOptionalClauses++;
}
}
if (minimumShouldMatch > numOptionalClauses) {
return new Result(false, Collections.emptySet(), 0);
}
if (numRequiredClauses > 0) {
if (version.onOrAfter(Version.V_6_1_0)) {
UnsupportedQueryException uqe = null;
Expand All @@ -345,7 +348,12 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
// since they are completely optional.

try {
results.add(analyze(clause.getQuery(), version));
Result subResult = analyze(clause.getQuery(), version);
if (subResult.matchAllDocs == false && subResult.extractions.isEmpty()) {
// doesn't match anything
return subResult;
}
results.add(subResult);
} catch (UnsupportedQueryException e) {
uqe = e;
}
Expand Down Expand Up @@ -399,7 +407,12 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
}
}
msm += resultMsm;
verified &= result.verified;

if (result.verified == false
// If some inner extractions are optional, the result can't be verified
|| result.minimumShouldMatch < result.extractions.size()) {
verified = false;
}
matchAllDocs &= result.matchAllDocs;
extractions.addAll(result.extractions);
}
Expand Down Expand Up @@ -491,7 +504,7 @@ private static BiFunction<Query, Version, Result> pointRangeQuery() {
// Need to check whether upper is not smaller than lower, otherwise NumericUtils.subtract(...) fails IAE
// If upper is really smaller than lower then we deal with like MatchNoDocsQuery. (verified and no extractions)
if (new BytesRef(lowerPoint).compareTo(new BytesRef(upperPoint)) > 0) {
return new Result(true, Collections.emptySet(), 1);
return new Result(true, Collections.emptySet(), 0);
}

byte[] interval = new byte[16];
Expand Down Expand Up @@ -536,7 +549,15 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
for (int i = 0; i < disjunctions.size(); i++) {
Query disjunct = disjunctions.get(i);
Result subResult = analyze(disjunct, version);
verified &= subResult.verified;
if (subResult.verified == false
// one of the sub queries requires more than one term to match, we can't
// verify it with a single top-level min_should_match
|| subResult.minimumShouldMatch > 1
// One of the inner clauses has multiple extractions, we won't be able to
// verify it with a single top-level min_should_match
|| (subResult.extractions.size() > 1 && requiredShouldClauses > 1)) {
verified = false;
}
if (subResult.matchAllDocs) {
numMatchAllClauses++;
}
Expand Down Expand Up @@ -682,6 +703,10 @@ static class Result {
final boolean matchAllDocs;

Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) {
if (minimumShouldMatch > extractions.size()) {
throw new IllegalArgumentException("minimumShouldMatch can't be greater than the number of extractions: "
+ minimumShouldMatch + " > " + extractions.size());
}
this.extractions = extractions;
this.verified = verified;
this.minimumShouldMatch = minimumShouldMatch;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,13 @@ public void testDuel() throws Exception {
new BytesRef(randomFrom(stringContent.get(field1)))));
queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(stringContent.get(field1))),
new BytesRef(randomFrom(stringContent.get(field1)))));
int numRandomBoolQueries = randomIntBetween(16, 32);
// many iterations with boolean queries, which are the most complex queries to deal with when nested
int numRandomBoolQueries = 1000;
for (int i = 0; i < numRandomBoolQueries; i++) {
queryFunctions.add(() -> createRandomBooleanQuery(1, stringFields, stringContent, intFieldType, intValues));
}
queryFunctions.add(() -> {
int numClauses = randomIntBetween(1, 16);
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4));
List<Query> clauses = new ArrayList<>();
for (int i = 0; i < numClauses; i++) {
String field = randomFrom(stringFields);
Expand Down Expand Up @@ -266,7 +267,7 @@ public void testDuel() throws Exception {
private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Map<String, List<String>> content,
MappedFieldType intFieldType, List<Integer> intValues) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
int numClauses = randomIntBetween(1, 16);
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); // use low numbers of clauses more often
int numShouldClauses = 0;
boolean onlyShouldClauses = rarely();
for (int i = 0; i < numClauses; i++) {
Expand Down Expand Up @@ -313,7 +314,7 @@ private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Ma
numShouldClauses++;
}
}
builder.setMinimumNumberShouldMatch(numShouldClauses);
builder.setMinimumNumberShouldMatch(randomIntBetween(0, numShouldClauses));
return builder.build();
}

Expand Down
Loading

0 comments on commit 8249317

Please sign in to comment.