diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml index ccd8fea012035..678155c656170 100644 --- a/buildSrc/src/main/resources/checkstyle_suppressions.xml +++ b/buildSrc/src/main/resources/checkstyle_suppressions.xml @@ -16,7 +16,6 @@ - @@ -428,7 +427,6 @@ - diff --git a/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java b/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java deleted file mode 100644 index a33bf16dee4c7..0000000000000 --- a/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.search.postingshighlight; - -import org.apache.lucene.search.highlight.Snippet; -import org.apache.lucene.search.highlight.Encoder; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils; - -/** -Custom passage formatter that allows us to: -1) extract different snippets (instead of a single big string) together with their scores ({@link Snippet}) -2) use the {@link Encoder} implementations that are already used with the other highlighters - */ -public class CustomPassageFormatter extends PassageFormatter { - - private final String preTag; - private final String postTag; - private final Encoder encoder; - - public CustomPassageFormatter(String preTag, String postTag, Encoder encoder) { - this.preTag = preTag; - this.postTag = postTag; - this.encoder = encoder; - } - - @Override - public Snippet[] format(Passage[] passages, String content) { - Snippet[] snippets = new Snippet[passages.length]; - int pos; - for (int j = 0; j < passages.length; j++) { - Passage passage = passages[j]; - StringBuilder sb = new StringBuilder(); - pos = passage.getStartOffset(); - for (int i = 0; i < passage.getNumMatches(); i++) { - int start = passage.getMatchStarts()[i]; - int end = passage.getMatchEnds()[i]; - // its possible to have overlapping terms - if (start > pos) { - append(sb, content, pos, start); - } - if (end > pos) { - sb.append(preTag); - append(sb, content, Math.max(pos, start), end); - sb.append(postTag); - pos = end; - } - } - // its possible a "term" from the analyzer could span a sentence boundary. - append(sb, content, pos, Math.max(pos, passage.getEndOffset())); - //we remove the paragraph separator if present at the end of the snippet (we used it as separator between values) - if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) { - sb.deleteCharAt(sb.length() - 1); - } else if (sb.charAt(sb.length() - 1) == HighlightUtils.NULL_SEPARATOR) { - sb.deleteCharAt(sb.length() - 1); - } - //and we trim the snippets too - snippets[j] = new Snippet(sb.toString().trim(), passage.getScore(), passage.getNumMatches() > 0); - } - return snippets; - } - - protected void append(StringBuilder dest, String content, int start, int end) { - dest.append(encoder.encodeText(content.substring(start, end))); - } -} diff --git a/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java b/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java deleted file mode 100644 index ac90a3e57aee7..0000000000000 --- a/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.search.postingshighlight; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.highlight.Snippet; - -import java.io.IOException; -import java.text.BreakIterator; -import java.util.Map; - -/** - * Subclass of the {@link PostingsHighlighter} that works for a single field in a single document. - * Uses a custom {@link PassageFormatter}. Accepts field content as a constructor argument, given that loading - * is custom and can be done reading from _source field. Supports using different {@link BreakIterator} to break - * the text into fragments. Considers every distinct field value as a discrete passage for highlighting (unless - * the whole content needs to be highlighted). Supports both returning empty snippets and non highlighted snippets - * when no highlighting can be performed. - * - * The use that we make of the postings highlighter is not optimal. It would be much better to highlight - * multiple docs in a single call, as we actually lose its sequential IO. That would require to - * refactor the elasticsearch highlight api which currently works per hit. - */ -public final class CustomPostingsHighlighter extends PostingsHighlighter { - - private static final Snippet[] EMPTY_SNIPPET = new Snippet[0]; - private static final Passage[] EMPTY_PASSAGE = new Passage[0]; - - private final Analyzer analyzer; - private final CustomPassageFormatter passageFormatter; - private final BreakIterator breakIterator; - private final boolean returnNonHighlightedSnippets; - private final String fieldValue; - - /** - * Creates a new instance of {@link CustomPostingsHighlighter} - * - * @param analyzer the analyzer used for the field at index time, used for multi term queries internally - * @param passageFormatter our own {@link PassageFormatter} which generates snippets in forms of {@link Snippet} objects - * @param fieldValue the original field values as constructor argument, loaded from te _source field or the relevant stored field. - * @param returnNonHighlightedSnippets whether non highlighted snippets should be returned rather than empty snippets when - * no highlighting can be performed - */ - public CustomPostingsHighlighter(Analyzer analyzer, CustomPassageFormatter passageFormatter, String fieldValue, boolean returnNonHighlightedSnippets) { - this(analyzer, passageFormatter, null, fieldValue, returnNonHighlightedSnippets); - } - - /** - * Creates a new instance of {@link CustomPostingsHighlighter} - * - * @param analyzer the analyzer used for the field at index time, used for multi term queries internally - * @param passageFormatter our own {@link PassageFormatter} which generates snippets in forms of {@link Snippet} objects - * @param breakIterator an instance {@link BreakIterator} selected depending on the highlighting options - * @param fieldValue the original field values as constructor argument, loaded from te _source field or the relevant stored field. - * @param returnNonHighlightedSnippets whether non highlighted snippets should be returned rather than empty snippets when - * no highlighting can be performed - */ - public CustomPostingsHighlighter(Analyzer analyzer, CustomPassageFormatter passageFormatter, BreakIterator breakIterator, String fieldValue, boolean returnNonHighlightedSnippets) { - this.analyzer = analyzer; - this.passageFormatter = passageFormatter; - this.breakIterator = breakIterator; - this.returnNonHighlightedSnippets = returnNonHighlightedSnippets; - this.fieldValue = fieldValue; - } - - /** - * Highlights terms extracted from the provided query within the content of the provided field name - */ - public Snippet[] highlightField(String field, Query query, IndexSearcher searcher, int docId, int maxPassages) throws IOException { - Map fieldsAsObjects = super.highlightFieldsAsObjects(new String[]{field}, query, searcher, new int[]{docId}, new int[]{maxPassages}); - Object[] snippetObjects = fieldsAsObjects.get(field); - if (snippetObjects != null) { - //one single document at a time - assert snippetObjects.length == 1; - Object snippetObject = snippetObjects[0]; - if (snippetObject != null && snippetObject instanceof Snippet[]) { - return (Snippet[]) snippetObject; - } - } - return EMPTY_SNIPPET; - } - - @Override - protected PassageFormatter getFormatter(String field) { - return passageFormatter; - } - - @Override - protected BreakIterator getBreakIterator(String field) { - if (breakIterator == null) { - return super.getBreakIterator(field); - } - return breakIterator; - } - - /* - By default the postings highlighter returns non highlighted snippet when there are no matches. - We want to return no snippets by default, unless no_match_size is greater than 0 - */ - @Override - protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) { - if (returnNonHighlightedSnippets) { - //we want to return the first sentence of the first snippet only - return super.getEmptyHighlight(fieldName, bi, 1); - } - return EMPTY_PASSAGE; - } - - @Override - protected Analyzer getIndexAnalyzer(String field) { - return analyzer; - } - - @Override - protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException { - //we only highlight one field, one document at a time - return new String[][]{new String[]{fieldValue}}; - } -} diff --git a/core/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java b/core/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java index 7a34a805db623..52eee559c6888 100644 --- a/core/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java +++ b/core/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java @@ -20,7 +20,6 @@ package org.apache.lucene.search.uhighlight; import org.apache.lucene.search.highlight.Encoder; -import org.apache.lucene.search.highlight.Snippet; import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils; /** diff --git a/core/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/core/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java index b6d6f1d1a4dae..ebc13298202a6 100644 --- a/core/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/core/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -27,7 +27,6 @@ import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanOrQuery; diff --git a/core/src/main/java/org/apache/lucene/search/highlight/Snippet.java b/core/src/main/java/org/apache/lucene/search/uhighlight/Snippet.java similarity index 90% rename from core/src/main/java/org/apache/lucene/search/highlight/Snippet.java rename to core/src/main/java/org/apache/lucene/search/uhighlight/Snippet.java index 81a3d406ea346..b7490c55feffa 100644 --- a/core/src/main/java/org/apache/lucene/search/highlight/Snippet.java +++ b/core/src/main/java/org/apache/lucene/search/uhighlight/Snippet.java @@ -17,11 +17,11 @@ * under the License. */ -package org.apache.lucene.search.highlight; +package org.apache.lucene.search.uhighlight; /** * Represents a scored highlighted snippet. - * It's our own arbitrary object that we get back from the postings highlighter when highlighting a document. + * It's our own arbitrary object that we get back from the unified highlighter when highlighting a document. * Every snippet contains its formatted text and its score. * The score is needed in case we want to sort snippets by score, they get sorted by position in the text by default. */ diff --git a/core/src/main/java/org/elasticsearch/search/SearchModule.java b/core/src/main/java/org/elasticsearch/search/SearchModule.java index 16bd9dbe8b931..87d937c09954c 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/core/src/main/java/org/elasticsearch/search/SearchModule.java @@ -229,7 +229,6 @@ import org.elasticsearch.search.fetch.subphase.highlight.HighlightPhase; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter; -import org.elasticsearch.search.fetch.subphase.highlight.PostingsHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter; import org.elasticsearch.search.rescore.QueryRescorerBuilder; import org.elasticsearch.search.rescore.RescoreBuilder; @@ -574,7 +573,6 @@ private Map setupHighlighters(Settings settings, List highlighters = new NamedRegistry<>("highlighter"); highlighters.register("fvh", new FastVectorHighlighter(settings)); highlighters.register("plain", new PlainHighlighter()); - highlighters.register("postings", new PostingsHighlighter()); highlighters.register("unified", new UnifiedHighlighter()); highlighters.extractAndRegister(plugins, SearchPlugin::getHighlighters); diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java index e5db6639ad82a..e6e50cc37e6fe 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AbstractHighlighterBuilder.java @@ -262,8 +262,8 @@ public Integer numOfFragments() { /** * Set type of highlighter to use. Out of the box supported types - * are plain, fvh and postings. - * The default option selected is dependent on the mappings defined for your index. + * are unified, plain and fvj. + * Defaults to unified. * Details of the different highlighter types are covered in the reference guide. */ @SuppressWarnings("unchecked") diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java index b1d557e851a6c..c08eea2e58820 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java @@ -50,7 +50,6 @@ import java.util.Map; public class FastVectorHighlighter implements Highlighter { - private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner(); private static final BoundaryScanner DEFAULT_SENTENCE_BOUNDARY_SCANNER = new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(Locale.ROOT)); diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java index 701b981e0f053..6b9121b8f7b71 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java @@ -39,8 +39,6 @@ import java.util.Map; public class HighlightPhase extends AbstractComponent implements FetchSubPhase { - private static final List STANDARD_HIGHLIGHTERS_BY_PRECEDENCE = Arrays.asList("fvh", "postings", "plain"); - private final Map highlighters; public HighlightPhase(Settings settings, Map highlighters) { @@ -94,13 +92,7 @@ public void hitExecute(SearchContext context, HitContext hitContext) { } String highlighterType = field.fieldOptions().highlighterType(); if (highlighterType == null) { - for(String highlighterCandidate : STANDARD_HIGHLIGHTERS_BY_PRECEDENCE) { - if (highlighters.get(highlighterCandidate).canHighlight(fieldMapper)) { - highlighterType = highlighterCandidate; - break; - } - } - assert highlighterType != null; + highlighterType = "unified"; } Highlighter highlighter = highlighters.get(highlighterType); if (highlighter == null) { diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java index 4a6e991b9a356..b241a686a248f 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java @@ -35,7 +35,7 @@ public final class HighlightUtils { - //U+2029 PARAGRAPH SEPARATOR (PS): each value holds a discrete passage for highlighting (postings highlighter) + //U+2029 PARAGRAPH SEPARATOR (PS): each value holds a discrete passage for highlighting (unified highlighter) public static final char PARAGRAPH_SEPARATOR = 8233; public static final char NULL_SEPARATOR = '\u0000'; diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java index deb1464b703fc..c7943367d31b2 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -49,7 +49,6 @@ import java.util.Map; public class PlainHighlighter implements Highlighter { - private static final String CACHE_KEY = "highlight-plain"; @Override diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java deleted file mode 100644 index 34997912febd0..0000000000000 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.search.fetch.subphase.highlight; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.highlight.Encoder; -import org.apache.lucene.search.postingshighlight.CustomPassageFormatter; -import org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter; -import org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator; -import org.apache.lucene.search.highlight.Snippet; -import org.apache.lucene.util.CollectionUtil; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.text.Text; -import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.search.fetch.FetchPhaseExecutionException; -import org.elasticsearch.search.fetch.FetchSubPhase; -import org.elasticsearch.search.internal.SearchContext; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils.Encoders; - -import java.io.IOException; -import java.text.BreakIterator; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; - -public class PostingsHighlighter implements Highlighter { - - private static final String CACHE_KEY = "highlight-postings"; - - @Override - public HighlightField highlight(HighlighterContext highlighterContext) { - - FieldMapper fieldMapper = highlighterContext.mapper; - SearchContextHighlight.Field field = highlighterContext.field; - if (canHighlight(fieldMapper) == false) { - throw new IllegalArgumentException("the field [" + highlighterContext.fieldName - + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter"); - } - - SearchContext context = highlighterContext.context; - FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; - - if (!hitContext.cache().containsKey(CACHE_KEY)) { - hitContext.cache().put(CACHE_KEY, new HighlighterEntry()); - } - - HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY); - MapperHighlighterEntry mapperHighlighterEntry = highlighterEntry.mappers.get(fieldMapper); - - if (mapperHighlighterEntry == null) { - Encoder encoder = field.fieldOptions().encoder().equals("html") ? Encoders.HTML : Encoders.DEFAULT; - CustomPassageFormatter passageFormatter = new CustomPassageFormatter( - field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0], encoder); - mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter); - } - - List snippets = new ArrayList<>(); - int numberOfFragments; - try { - Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer(); - List fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext); - CustomPostingsHighlighter highlighter; - if (field.fieldOptions().numberOfFragments() == 0) { - //we use a control char to separate values, which is the only char that the custom break iterator breaks the text on, - //so we don't lose the distinction between the different values of a field and we get back a snippet per value - String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.NULL_SEPARATOR); - CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(HighlightUtils.NULL_SEPARATOR); - highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, breakIterator, - fieldValue, field.fieldOptions().noMatchSize() > 0); - numberOfFragments = fieldValues.size(); //we are highlighting the whole content, one snippet per value - } else { - //using paragraph separator we make sure that each field value holds a discrete passage for highlighting - String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.PARAGRAPH_SEPARATOR); - highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, - fieldValue, field.fieldOptions().noMatchSize() > 0); - numberOfFragments = field.fieldOptions().numberOfFragments(); - } - - IndexSearcher searcher = new IndexSearcher(hitContext.reader()); - Snippet[] fieldSnippets = highlighter.highlightField(fieldMapper.fieldType().name(), highlighterContext.query, searcher, - hitContext.docId(), numberOfFragments); - for (Snippet fieldSnippet : fieldSnippets) { - if (Strings.hasText(fieldSnippet.getText())) { - snippets.add(fieldSnippet); - } - } - - } catch(IOException e) { - throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); - } - - snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments()); - - if (field.fieldOptions().scoreOrdered()) { - //let's sort the snippets by score if needed - CollectionUtil.introSort(snippets, new Comparator() { - @Override - public int compare(Snippet o1, Snippet o2) { - return (int) Math.signum(o2.getScore() - o1.getScore()); - } - }); - } - - String[] fragments = new String[snippets.size()]; - for (int i = 0; i < fragments.length; i++) { - fragments[i] = snippets.get(i).getText(); - } - - if (fragments.length > 0) { - return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments)); - } - - return null; - } - - @Override - public boolean canHighlight(FieldMapper fieldMapper) { - return fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; - } - - static String mergeFieldValues(List fieldValues, char valuesSeparator) { - //postings highlighter accepts all values in a single string, as offsets etc. need to match with content - //loaded from stored fields, we merge all values using a proper separator - String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(valuesSeparator)); - return rawValue.substring(0, Math.min(rawValue.length(), Integer.MAX_VALUE - 1)); - } - - static List filterSnippets(List snippets, int numberOfFragments) { - - //We need to filter the snippets as due to no_match_size we could have - //either highlighted snippets or non highlighted ones and we don't want to mix those up - List filteredSnippets = new ArrayList<>(snippets.size()); - for (Snippet snippet : snippets) { - if (snippet.isHighlighted()) { - filteredSnippets.add(snippet); - } - } - - //if there's at least one highlighted snippet, we return all the highlighted ones - //otherwise we return the first non highlighted one if available - if (filteredSnippets.size() == 0) { - if (snippets.size() > 0) { - Snippet snippet = snippets.get(0); - //if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0) - //we need to return the first sentence of the content rather than the whole content - if (numberOfFragments == 0) { - BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT); - String text = snippet.getText(); - bi.setText(text); - int next = bi.next(); - if (next != BreakIterator.DONE) { - String newText = text.substring(0, next).trim(); - snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted()); - } - } - filteredSnippets.add(snippet); - } - } - - return filteredSnippets; - } - - static class HighlighterEntry { - Map mappers = new HashMap<>(); - } - - static class MapperHighlighterEntry { - final CustomPassageFormatter passageFormatter; - - private MapperHighlighterEntry(CustomPassageFormatter passageFormatter) { - this.passageFormatter = passageFormatter; - } - } -} diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java index d3a94d0411b40..684c7ddbddd87 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.highlight.Encoder; -import org.apache.lucene.search.highlight.Snippet; +import org.apache.lucene.search.uhighlight.Snippet; import org.apache.lucene.search.uhighlight.BoundedBreakIteratorScanner; import org.apache.lucene.search.uhighlight.CustomPassageFormatter; import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter; @@ -44,8 +44,6 @@ import java.util.stream.Collectors; import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR; -import static org.elasticsearch.search.fetch.subphase.highlight.PostingsHighlighter.filterSnippets; -import static org.elasticsearch.search.fetch.subphase.highlight.PostingsHighlighter.mergeFieldValues; public class UnifiedHighlighter implements Highlighter { private static final String CACHE_KEY = "highlight-unified"; @@ -174,6 +172,49 @@ private BreakIterator getBreakIterator(SearchContextHighlight.Field field) { } } + private static List filterSnippets(List snippets, int numberOfFragments) { + + //We need to filter the snippets as due to no_match_size we could have + //either highlighted snippets or non highlighted ones and we don't want to mix those up + List filteredSnippets = new ArrayList<>(snippets.size()); + for (Snippet snippet : snippets) { + if (snippet.isHighlighted()) { + filteredSnippets.add(snippet); + } + } + + //if there's at least one highlighted snippet, we return all the highlighted ones + //otherwise we return the first non highlighted one if available + if (filteredSnippets.size() == 0) { + if (snippets.size() > 0) { + Snippet snippet = snippets.get(0); + //if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0) + //we need to return the first sentence of the content rather than the whole content + if (numberOfFragments == 0) { + BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT); + String text = snippet.getText(); + bi.setText(text); + int next = bi.next(); + if (next != BreakIterator.DONE) { + String newText = text.substring(0, next).trim(); + snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted()); + } + } + filteredSnippets.add(snippet); + } + } + + return filteredSnippets; + } + + private static String mergeFieldValues(List fieldValues, char valuesSeparator) { + //postings highlighter accepts all values in a single string, as offsets etc. need to match with content + //loaded from stored fields, we merge all values using a proper separator + String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(valuesSeparator)); + return rawValue.substring(0, Math.min(rawValue.length(), Integer.MAX_VALUE - 1)); + } + + private static class HighlighterEntry { Map mappers = new HashMap<>(); } diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java deleted file mode 100644 index 2d43a1ca64ef4..0000000000000 --- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.search.postingshighlight; - -import org.apache.lucene.search.highlight.Snippet; -import org.apache.lucene.search.highlight.DefaultEncoder; -import org.apache.lucene.search.highlight.SimpleHTMLEncoder; -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.test.ESTestCase; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.notNullValue; - - -public class CustomPassageFormatterTests extends ESTestCase { - public void testSimpleFormat() { - String content = "This is a really cool highlighter. Postings highlighter gives nice snippets back. No matches here."; - - CustomPassageFormatter passageFormatter = new CustomPassageFormatter("", "", new DefaultEncoder()); - - Passage[] passages = new Passage[3]; - String match = "highlighter"; - BytesRef matchBytesRef = new BytesRef(match); - - Passage passage1 = new Passage(); - int start = content.indexOf(match); - int end = start + match.length(); - passage1.startOffset = 0; - passage1.endOffset = end + 2; //lets include the whitespace at the end to make sure we trim it - passage1.addMatch(start, end, matchBytesRef); - passages[0] = passage1; - - Passage passage2 = new Passage(); - start = content.lastIndexOf(match); - end = start + match.length(); - passage2.startOffset = passage1.endOffset; - passage2.endOffset = end + 26; - passage2.addMatch(start, end, matchBytesRef); - passages[1] = passage2; - - Passage passage3 = new Passage(); - passage3.startOffset = passage2.endOffset; - passage3.endOffset = content.length(); - passages[2] = passage3; - - Snippet[] fragments = passageFormatter.format(passages, content); - assertThat(fragments, notNullValue()); - assertThat(fragments.length, equalTo(3)); - assertThat(fragments[0].getText(), equalTo("This is a really cool highlighter.")); - assertThat(fragments[0].isHighlighted(), equalTo(true)); - assertThat(fragments[1].getText(), equalTo("Postings highlighter gives nice snippets back.")); - assertThat(fragments[1].isHighlighted(), equalTo(true)); - assertThat(fragments[2].getText(), equalTo("No matches here.")); - assertThat(fragments[2].isHighlighted(), equalTo(false)); - } - - public void testHtmlEncodeFormat() { - String content = "This is a really cool highlighter. Postings highlighter gives nice snippets back."; - - CustomPassageFormatter passageFormatter = new CustomPassageFormatter("", "", new SimpleHTMLEncoder()); - - Passage[] passages = new Passage[2]; - String match = "highlighter"; - BytesRef matchBytesRef = new BytesRef(match); - - Passage passage1 = new Passage(); - int start = content.indexOf(match); - int end = start + match.length(); - passage1.startOffset = 0; - passage1.endOffset = end + 6; //lets include the whitespace at the end to make sure we trim it - passage1.addMatch(start, end, matchBytesRef); - passages[0] = passage1; - - Passage passage2 = new Passage(); - start = content.lastIndexOf(match); - end = start + match.length(); - passage2.startOffset = passage1.endOffset; - passage2.endOffset = content.length(); - passage2.addMatch(start, end, matchBytesRef); - passages[1] = passage2; - - Snippet[] fragments = passageFormatter.format(passages, content); - assertThat(fragments, notNullValue()); - assertThat(fragments.length, equalTo(2)); - assertThat(fragments[0].getText(), equalTo("<b>This is a really cool highlighter.</b>")); - assertThat(fragments[1].getText(), equalTo("Postings highlighter gives nice snippets back.")); - } -} diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java deleted file mode 100644 index 315e38d12feb5..0000000000000 --- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.search.postingshighlight; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.highlight.Snippet; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.highlight.DefaultEncoder; -import org.apache.lucene.store.Directory; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils; -import org.elasticsearch.test.ESTestCase; - -import static org.hamcrest.CoreMatchers.equalTo; - -public class CustomPostingsHighlighterTests extends ESTestCase { - public void testCustomPostingsHighlighter() throws Exception { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); - iwc.setMergePolicy(newLogMergePolicy()); - RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); - - FieldType offsetsType = new FieldType(TextField.TYPE_STORED); - offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); - - //good position but only one match - final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter."; - Field body = new Field("body", "", offsetsType); - Document doc = new Document(); - doc.add(body); - body.setStringValue(firstValue); - - //two matches, not the best snippet due to its length though - final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower."; - Field body2 = new Field("body", "", offsetsType); - doc.add(body2); - body2.setStringValue(secondValue); - - //two matches and short, will be scored highest - final String thirdValue = "This is highlighting the third short highlighting value."; - Field body3 = new Field("body", "", offsetsType); - doc.add(body3); - body3.setStringValue(thirdValue); - - //one match, same as first but at the end, will be scored lower due to its position - final String fourthValue = "Just a test4 highlighting from postings highlighter."; - Field body4 = new Field("body", "", offsetsType); - doc.add(body4); - body4.setStringValue(fourthValue); - - iw.addDocument(doc); - - IndexReader ir = iw.getReader(); - iw.close(); - - String firstHlValue = "Just a test1 highlighting from postings highlighter."; - String secondHlValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower."; - String thirdHlValue = "This is highlighting the third short highlighting value."; - String fourthHlValue = "Just a test4 highlighting from postings highlighter."; - - IndexSearcher searcher = newSearcher(ir); - Query query = new TermQuery(new Term("body", "highlighting")); - - TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); - assertThat(topDocs.totalHits, equalTo(1)); - - int docId = topDocs.scoreDocs[0].doc; - - String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue; - - CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("", "", new DefaultEncoder()), fieldValue, false); - Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5); - - assertThat(snippets.length, equalTo(4)); - - assertThat(snippets[0].getText(), equalTo(firstHlValue)); - assertThat(snippets[1].getText(), equalTo(secondHlValue)); - assertThat(snippets[2].getText(), equalTo(thirdHlValue)); - assertThat(snippets[3].getText(), equalTo(fourthHlValue)); - - ir.close(); - dir.close(); - } - - public void testNoMatchSize() throws Exception { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); - iwc.setMergePolicy(newLogMergePolicy()); - RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); - - FieldType offsetsType = new FieldType(TextField.TYPE_STORED); - offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); - Field body = new Field("body", "", offsetsType); - Field none = new Field("none", "", offsetsType); - Document doc = new Document(); - doc.add(body); - doc.add(none); - - String firstValue = "This is a test. Just a test highlighting from postings. Feel free to ignore."; - body.setStringValue(firstValue); - none.setStringValue(firstValue); - iw.addDocument(doc); - - IndexReader ir = iw.getReader(); - iw.close(); - - Query query = new TermQuery(new Term("none", "highlighting")); - - IndexSearcher searcher = newSearcher(ir); - TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); - assertThat(topDocs.totalHits, equalTo(1)); - int docId = topDocs.scoreDocs[0].doc; - - CustomPassageFormatter passageFormatter = new CustomPassageFormatter("", "", new DefaultEncoder()); - - CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, false); - Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5); - assertThat(snippets.length, equalTo(0)); - - highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, true); - snippets = highlighter.highlightField("body", query, searcher, docId, 5); - assertThat(snippets.length, equalTo(1)); - assertThat(snippets[0].getText(), equalTo("This is a test.")); - - ir.close(); - dir.close(); - } -} diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomSeparatorBreakIteratorTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomSeparatorBreakIteratorTests.java deleted file mode 100644 index 17aeb869c1a04..0000000000000 --- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomSeparatorBreakIteratorTests.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.search.postingshighlight; - -import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils; -import org.elasticsearch.test.ESTestCase; - -import java.text.BreakIterator; -import java.text.CharacterIterator; -import java.text.StringCharacterIterator; -import java.util.Locale; - -import static org.hamcrest.CoreMatchers.equalTo; - -public class CustomSeparatorBreakIteratorTests extends ESTestCase { - public void testBreakOnCustomSeparator() throws Exception { - Character separator = randomSeparator(); - BreakIterator bi = new CustomSeparatorBreakIterator(separator); - String source = "this" + separator + "is" + separator + "the" + separator + "first" + separator + "sentence"; - bi.setText(source); - assertThat(bi.current(), equalTo(0)); - assertThat(bi.first(), equalTo(0)); - assertThat(source.substring(bi.current(), bi.next()), equalTo("this" + separator)); - assertThat(source.substring(bi.current(), bi.next()), equalTo("is" + separator)); - assertThat(source.substring(bi.current(), bi.next()), equalTo("the" + separator)); - assertThat(source.substring(bi.current(), bi.next()), equalTo("first" + separator)); - assertThat(source.substring(bi.current(), bi.next()), equalTo("sentence")); - assertThat(bi.next(), equalTo(BreakIterator.DONE)); - - assertThat(bi.last(), equalTo(source.length())); - int current = bi.current(); - assertThat(source.substring(bi.previous(), current), equalTo("sentence")); - current = bi.current(); - assertThat(source.substring(bi.previous(), current), equalTo("first" + separator)); - current = bi.current(); - assertThat(source.substring(bi.previous(), current), equalTo("the" + separator)); - current = bi.current(); - assertThat(source.substring(bi.previous(), current), equalTo("is" + separator)); - current = bi.current(); - assertThat(source.substring(bi.previous(), current), equalTo("this" + separator)); - assertThat(bi.previous(), equalTo(BreakIterator.DONE)); - assertThat(bi.current(), equalTo(0)); - - assertThat(source.substring(0, bi.following(9)), equalTo("this" + separator + "is" + separator + "the" + separator)); - - assertThat(source.substring(0, bi.preceding(9)), equalTo("this" + separator + "is" + separator)); - - assertThat(bi.first(), equalTo(0)); - assertThat(source.substring(0, bi.next(3)), equalTo("this" + separator + "is" + separator + "the" + separator)); - } - - public void testSingleSentences() throws Exception { - BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); - BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator()); - assertSameBreaks("a", expected, actual); - assertSameBreaks("ab", expected, actual); - assertSameBreaks("abc", expected, actual); - assertSameBreaks("", expected, actual); - } - - public void testSliceEnd() throws Exception { - BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); - BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator()); - assertSameBreaks("a000", 0, 1, expected, actual); - assertSameBreaks("ab000", 0, 1, expected, actual); - assertSameBreaks("abc000", 0, 1, expected, actual); - assertSameBreaks("000", 0, 0, expected, actual); - } - - public void testSliceStart() throws Exception { - BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); - BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator()); - assertSameBreaks("000a", 3, 1, expected, actual); - assertSameBreaks("000ab", 3, 2, expected, actual); - assertSameBreaks("000abc", 3, 3, expected, actual); - assertSameBreaks("000", 3, 0, expected, actual); - } - - public void testSliceMiddle() throws Exception { - BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); - BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator()); - assertSameBreaks("000a000", 3, 1, expected, actual); - assertSameBreaks("000ab000", 3, 2, expected, actual); - assertSameBreaks("000abc000", 3, 3, expected, actual); - assertSameBreaks("000000", 3, 0, expected, actual); - } - - /** the current position must be ignored, initial position is always first() */ - public void testFirstPosition() throws Exception { - BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); - BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator()); - assertSameBreaks("000ab000", 3, 2, 4, expected, actual); - } - - private static char randomSeparator() { - return randomFrom(' ', HighlightUtils.NULL_SEPARATOR, HighlightUtils.PARAGRAPH_SEPARATOR); - } - - private static void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) { - assertSameBreaks(new StringCharacterIterator(text), - new StringCharacterIterator(text), - expected, - actual); - } - - private static void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) { - assertSameBreaks(text, offset, length, offset, expected, actual); - } - - private static void assertSameBreaks(String text, int offset, int length, int current, BreakIterator expected, BreakIterator actual) { - assertSameBreaks(new StringCharacterIterator(text, offset, offset + length, current), - new StringCharacterIterator(text, offset, offset + length, current), - expected, - actual); - } - - /** Asserts that two breakiterators break the text the same way */ - private static void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) { - expected.setText(one); - actual.setText(two); - - assertEquals(expected.current(), actual.current()); - - // next() - int v = expected.current(); - while (v != BreakIterator.DONE) { - assertEquals(v = expected.next(), actual.next()); - assertEquals(expected.current(), actual.current()); - } - - // first() - assertEquals(expected.first(), actual.first()); - assertEquals(expected.current(), actual.current()); - // last() - assertEquals(expected.last(), actual.last()); - assertEquals(expected.current(), actual.current()); - - // previous() - v = expected.current(); - while (v != BreakIterator.DONE) { - assertEquals(v = expected.previous(), actual.previous()); - assertEquals(expected.current(), actual.current()); - } - - // following() - for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) { - expected.first(); - actual.first(); - assertEquals(expected.following(i), actual.following(i)); - assertEquals(expected.current(), actual.current()); - } - - // preceding() - for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) { - expected.last(); - actual.last(); - assertEquals(expected.preceding(i), actual.preceding(i)); - assertEquals(expected.current(), actual.current()); - } - } -} diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java index 4e664c3e24101..0b8bccb784f24 100644 --- a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java +++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java @@ -19,7 +19,6 @@ package org.apache.lucene.search.uhighlight; -import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.search.highlight.DefaultEncoder; import org.apache.lucene.search.highlight.SimpleHTMLEncoder; import org.apache.lucene.util.BytesRef; diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index 35cde8e02e050..eec611146a62a 100644 --- a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -41,7 +41,6 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.DefaultEncoder; -import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.store.Directory; import org.elasticsearch.common.Strings; import org.elasticsearch.common.lucene.all.AllTermQuery; diff --git a/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java b/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java index 85b13974042e0..4b053b1968f8c 100644 --- a/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java +++ b/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java @@ -56,7 +56,7 @@ import org.elasticsearch.search.fetch.subphase.highlight.FastVectorHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter; -import org.elasticsearch.search.fetch.subphase.highlight.PostingsHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.suggest.CustomSuggesterSearchIT.CustomSuggestionBuilder; import org.elasticsearch.search.suggest.SuggestionBuilder; @@ -204,7 +204,7 @@ public Map getHighlighters() { Map highlighters = module.getHighlighters(); assertEquals(FastVectorHighlighter.class, highlighters.get("fvh").getClass()); assertEquals(PlainHighlighter.class, highlighters.get("plain").getClass()); - assertEquals(PostingsHighlighter.class, highlighters.get("postings").getClass()); + assertEquals(UnifiedHighlighter.class, highlighters.get("unified").getClass()); assertSame(highlighters.get("custom"), customHighlighter); } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/TopHitsIT.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/TopHitsIT.java index 563fac1ba7df7..a90960c2ec944 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/TopHitsIT.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/TopHitsIT.java @@ -852,7 +852,7 @@ public void testTopHitsInSecondLayerNested() throws Exception { } public void testNestedFetchFeatures() { - String hlType = randomFrom("plain", "fvh", "postings"); + String hlType = randomFrom("plain", "fvh", "unified"); HighlightBuilder.Field hlField = new HighlightBuilder.Field("comments.message") .highlightQuery(matchQuery("comments.message", "comment")) .forceSource(randomBoolean()) // randomly from stored field or _source diff --git a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index f7f9bbaeb45ca..9cbd9fc5d75fd 100644 --- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -102,8 +102,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { // TODO as we move analyzers out of the core we need to move some of these into HighlighterWithAnalyzersTests - private static final String[] ALL_TYPES = new String[] {"plain", "postings", "fvh", "unified"}; - private static final String[] UNIFIED_AND_NULL = new String[] {null, "unified"}; + private static final String[] ALL_TYPES = new String[] {"plain", "fvh", "unified"}; @Override protected Collection> nodePlugins() { @@ -127,11 +126,9 @@ public void testHighlightingWithStoredKeyword() throws IOException { .setSource(jsonBuilder().startObject().field("text", "foo").endObject()) .get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "foo")) - .highlighter(new HighlightBuilder().field(new Field("text")).highlighterType(type)).get(); - assertHighlight(search, 0, "text", 0, equalTo("foo")); - } + SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "foo")) + .highlighter(new HighlightBuilder().field(new Field("text"))).get(); + assertHighlight(search, 0, "text", 0, equalTo("foo")); } public void testHighlightingWithWildcardName() throws IOException { @@ -279,19 +276,19 @@ public void testEnsureNoNegativeOffsets() throws Exception { refresh(); SearchResponse search = client().prepareSearch() .setQuery(matchQuery("long_term", "thisisaverylongwordandmakessurethisfails foo highlighed")) - .highlighter(new HighlightBuilder().field("long_term", 18, 1)) + .highlighter(new HighlightBuilder().field("long_term", 18, 1).highlighterType("fvh")) .get(); assertHighlight(search, 0, "long_term", 0, 1, equalTo("thisisaverylongwordandmakessurethisfails")); search = client().prepareSearch() .setQuery(matchPhraseQuery("no_long_term", "test foo highlighed").slop(3)) - .highlighter(new HighlightBuilder().field("no_long_term", 18, 1).postTags("").preTags("")) + .highlighter(new HighlightBuilder().field("no_long_term", 18, 1).highlighterType("fvh").postTags("").preTags("")) .get(); assertNotHighlighted(search, 0, "no_long_term"); search = client().prepareSearch() .setQuery(matchPhraseQuery("no_long_term", "test foo highlighed").slop(3)) - .highlighter(new HighlightBuilder().field("no_long_term", 30, 1).postTags("").preTags("")) + .highlighter(new HighlightBuilder().field("no_long_term", 30, 1).highlighterType("fvh").postTags("").preTags("")) .get(); assertHighlight(search, 0, "no_long_term", 0, 1, equalTo("a test where foo is highlighed and")); @@ -326,26 +323,25 @@ public void testSourceLookupHighlightingUsingPlainHighlighter() throws Exception } indexRandom(true, indexRequestBuilders); - for (String type : UNIFIED_AND_NULL) { - SearchResponse search = client().prepareSearch() - .setQuery(matchQuery("title", "bug")) - .highlighter(new HighlightBuilder().field("title", -1, 0).highlighterType(type)) - .get(); + SearchResponse search = client().prepareSearch() + .setQuery(matchQuery("title", "bug")) + .highlighter(new HighlightBuilder().field("title", -1, 0)) + .get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "title", 0, equalTo("This is a test on the highlighting bug present in elasticsearch")); - } + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "title", 0, equalTo("This is a test on the highlighting bug present in elasticsearch")); + } - search = client().prepareSearch() - .setQuery(matchQuery("attachments.body", "attachment")) - .highlighter(new HighlightBuilder().field("attachments.body", -1, 0).highlighterType(type)) - .get(); + search = client().prepareSearch() + .setQuery(matchQuery("attachments.body", "attachment")) + .highlighter(new HighlightBuilder().field("attachments.body", -1, 0)) + .get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "attachments.body", 0, equalTo("attachment 1")); - assertHighlight(search, i, "attachments.body", 1, equalTo("attachment 2")); - } + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "attachments.body", 0, equalTo("attachment 1")); + assertHighlight(search, i, "attachments.body", 1, equalTo("attachment 2")); } + } public void testSourceLookupHighlightingUsingFastVectorHighlighter() throws Exception { @@ -380,25 +376,23 @@ public void testSourceLookupHighlightingUsingFastVectorHighlighter() throws Exce } indexRandom(true, indexRequestBuilders); - for (String type : UNIFIED_AND_NULL) { - SearchResponse search = client().prepareSearch() - .setQuery(matchQuery("title", "bug")) - .highlighter(new HighlightBuilder().field("title", -1, 0).highlighterType(type)) - .get(); + SearchResponse search = client().prepareSearch() + .setQuery(matchQuery("title", "bug")) + .highlighter(new HighlightBuilder().field("title", -1, 0)) + .get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "title", 0, equalTo("This is a test on the highlighting bug present in elasticsearch")); - } + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "title", 0, equalTo("This is a test on the highlighting bug present in elasticsearch")); + } - search = client().prepareSearch() - .setQuery(matchQuery("attachments.body", "attachment")) - .highlighter(new HighlightBuilder().field("attachments.body", -1, 2).highlighterType(type)) - .execute().get(); + search = client().prepareSearch() + .setQuery(matchQuery("attachments.body", "attachment")) + .highlighter(new HighlightBuilder().field("attachments.body", -1, 2)) + .execute().get(); - for (int i = 0; i < 5; i++) { - assertHighlight(search, i, "attachments.body", 0, equalTo("attachment 1")); - assertHighlight(search, i, "attachments.body", 1, equalTo("attachment 2")); - } + for (int i = 0; i < 5; i++) { + assertHighlight(search, i, "attachments.body", 0, equalTo("attachment 1")); + assertHighlight(search, i, "attachments.body", 1, equalTo("attachment 2")); } } @@ -446,28 +440,26 @@ public void testSourceLookupHighlightingUsingPostingsHighlighter() throws Except assertHighlight(search, i, "title", 1, 2, equalTo("This is the second bug to perform highlighting on.")); } - for (String type : UNIFIED_AND_NULL) { - search = client().prepareSearch() - .setQuery(matchQuery("title", "bug")) - //sentences will be generated out of each value - .highlighter(new HighlightBuilder().field("title").highlighterType(type)).get(); - - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "title", 0, - equalTo("This is a test on the highlighting bug present in elasticsearch.")); - assertHighlight(search, i, "title", 1, 2, - equalTo("This is the second bug to perform highlighting on.")); - } + search = client().prepareSearch() + .setQuery(matchQuery("title", "bug")) + //sentences will be generated out of each value + .highlighter(new HighlightBuilder().field("title")).get(); - search = client().prepareSearch() - .setQuery(matchQuery("attachments.body", "attachment")) - .highlighter(new HighlightBuilder().field("attachments.body", -1, 2).highlighterType(type)) - .get(); + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "title", 0, + equalTo("This is a test on the highlighting bug present in elasticsearch.")); + assertHighlight(search, i, "title", 1, 2, + equalTo("This is the second bug to perform highlighting on.")); + } - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "attachments.body", 0, equalTo("attachment for this test")); - assertHighlight(search, i, "attachments.body", 1, 2, equalTo("attachment 2")); - } + search = client().prepareSearch() + .setQuery(matchQuery("attachments.body", "attachment")) + .highlighter(new HighlightBuilder().field("attachments.body", -1, 2)) + .get(); + + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "attachments.body", 0, equalTo("attachment for this test")); + assertHighlight(search, i, "attachments.body", 1, 2, equalTo("attachment 2")); } } @@ -521,9 +513,9 @@ public void testGlobalHighlightingSettingsOverriddenAtFieldLevel() { SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field1", 0, 2, equalTo(" test")); - assertHighlight(searchResponse, 0, "field1", 1, 2, equalTo(" test")); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("this is another test")); + assertHighlight(searchResponse, 0, "field1", 0, 2, equalTo("test")); + assertHighlight(searchResponse, 0, "field1", 1, 2, equalTo("test")); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("yet another test")); } // Issue #5175 @@ -570,34 +562,31 @@ public void testForceSourceWithSourceDisabled() throws Exception { .setSource("field1", "The quick brown fox jumps over the lazy dog", "field2", "second field content").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - //works using stored field - SearchResponse searchResponse = client().prepareSearch("test") - .setQuery(termQuery("field1", "quick")) - .highlighter(new HighlightBuilder().field(new Field("field1").preTags("").postTags("").highlighterType(type))) - .get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + //works using stored field + SearchResponse searchResponse = client().prepareSearch("test") + .setQuery(termQuery("field1", "quick")) + .highlighter(new HighlightBuilder().field(new Field("field1").preTags("").postTags(""))) + .get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - assertFailures(client().prepareSearch("test") - .setQuery(termQuery("field1", "quick")) - .highlighter( - new HighlightBuilder().field(new Field("field1").preTags("").postTags("") - .highlighterType(type).forceSource(true))), - RestStatus.BAD_REQUEST, - containsString("source is forced for fields [field1] but type [type1] has disabled _source")); + assertFailures(client().prepareSearch("test") + .setQuery(termQuery("field1", "quick")) + .highlighter( + new HighlightBuilder().field(new Field("field1").preTags("").postTags("").forceSource(true))), + RestStatus.BAD_REQUEST, + containsString("source is forced for fields [field1] but type [type1] has disabled _source")); - SearchSourceBuilder searchSource = SearchSourceBuilder.searchSource().query(termQuery("field1", "quick")) - .highlighter(highlight().forceSource(true).field("field1").highlighterType(type)); - assertFailures(client().prepareSearch("test").setSource(searchSource), - RestStatus.BAD_REQUEST, - containsString("source is forced for fields [field1] but type [type1] has disabled _source")); + SearchSourceBuilder searchSource = SearchSourceBuilder.searchSource().query(termQuery("field1", "quick")) + .highlighter(highlight().forceSource(true).field("field1")); + assertFailures(client().prepareSearch("test").setSource(searchSource), + RestStatus.BAD_REQUEST, + containsString("source is forced for fields [field1] but type [type1] has disabled _source")); - searchSource = SearchSourceBuilder.searchSource().query(termQuery("field1", "quick")) - .highlighter(highlight().forceSource(true).field("field*").highlighterType(type)); - assertFailures(client().prepareSearch("test").setSource(searchSource), - RestStatus.BAD_REQUEST, - matches("source is forced for fields \\[field\\d, field\\d\\] but type \\[type1\\] has disabled _source")); - } + searchSource = SearchSourceBuilder.searchSource().query(termQuery("field1", "quick")) + .highlighter(highlight().forceSource(true).field("field*")); + assertFailures(client().prepareSearch("test").setSource(searchSource), + RestStatus.BAD_REQUEST, + matches("source is forced for fields \\[field\\d, field\\d\\] but type \\[type1\\] has disabled _source")); } public void testPlainHighlighter() throws Exception { @@ -1015,16 +1004,14 @@ public void testSameContent() throws Exception { } indexRandom(true, indexRequestBuilders); - for (String type : UNIFIED_AND_NULL) { - SearchResponse search = client().prepareSearch() - .setQuery(matchQuery("title", "bug")) - .highlighter(new HighlightBuilder().field("title", -1, 0).highlighterType(type)) - .get(); + SearchResponse search = client().prepareSearch() + .setQuery(matchQuery("title", "bug")) + .highlighter(new HighlightBuilder().field("title", -1, 0)) + .get(); - for (int i = 0; i < 5; i++) { - assertHighlight(search, i, "title", 0, 1, equalTo("This is a test on the highlighting bug " + - "present in elasticsearch")); - } + for (int i = 0; i < 5; i++) { + assertHighlight(search, i, "title", 0, 1, equalTo("This is a test on the highlighting bug " + + "present in elasticsearch")); } } @@ -1041,7 +1028,7 @@ public void testFastVectorHighlighterOffsetParameter() throws Exception { SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "bug")) - .highlighter(new HighlightBuilder().field("title", 30, 1, 10)) + .highlighter(new HighlightBuilder().field("title", 30, 1, 10).highlighterType("fvh")) .get(); for (int i = 0; i < 5; i++) { @@ -1061,16 +1048,14 @@ public void testEscapeHtml() throws Exception { } indexRandom(true, indexRequestBuilders); - for (String type : UNIFIED_AND_NULL) { - SearchResponse search = client().prepareSearch() - .setQuery(matchQuery("title", "test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1, 10).highlighterType(type)) - .get(); + SearchResponse search = client().prepareSearch() + .setQuery(matchQuery("title", "test")) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1, 10)) + .get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "title", 0, 1, - startsWith("This is a html escaping highlighting test for *&?")); - } + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "title", 0, 1, + startsWith("This is a html escaping highlighting test for *&?")); } } @@ -1087,11 +1072,11 @@ public void testEscapeHtmlVector() throws Exception { SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 30, 1, 10)) + .highlighter(new HighlightBuilder().encoder("html").field("title", 30, 1, 10).highlighterType("plain")) .get(); for (int i = 0; i < 5; i++) { - assertHighlight(search, i, "title", 0, 1, equalTo("highlighting test for *&? elasticsearch")); + assertHighlight(search, i, "title", 0, 1, equalTo(" highlighting test for *&? elasticsearch")); } } @@ -1116,23 +1101,21 @@ public void testMultiMapperVectorWithStore() throws Exception { client().prepareIndex("test", "type1", "1").setSource("title", "this is a test").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - // simple search on body with standard analyzer with a simple field query - SearchResponse search = client().prepareSearch() - .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1).highlighterType(type)) - .get(); + // simple search on body with standard analyzer with a simple field query + SearchResponse search = client().prepareSearch() + .setQuery(matchQuery("title", "this is a test")) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1)) + .get(); - assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title - search = client().prepareSearch() - .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1).highlighterType(type)) - .get(); + // search on title.key and highlight on title + search = client().prepareSearch() + .setQuery(matchQuery("title.key", "this is a test")) + .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1)) + .get(); - assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); - } + assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); } public void testMultiMapperVectorFromSource() throws Exception { @@ -1157,23 +1140,21 @@ public void testMultiMapperVectorFromSource() throws Exception { client().prepareIndex("test", "type1", "1").setSource("title", "this is a test").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - // simple search on body with standard analyzer with a simple field query - SearchResponse search = client().prepareSearch() - .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1).highlighterType(type)) - .get(); + // simple search on body with standard analyzer with a simple field query + SearchResponse search = client().prepareSearch() + .setQuery(matchQuery("title", "this is a test")) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1)) + .get(); - assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title.key - search = client().prepareSearch() - .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1).highlighterType(type)) - .get(); + // search on title.key and highlight on title.key + search = client().prepareSearch() + .setQuery(matchQuery("title.key", "this is a test")) + .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1)) + .get(); - assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); - } + assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); } public void testMultiMapperNoVectorWithStore() throws Exception { @@ -1200,23 +1181,21 @@ public void testMultiMapperNoVectorWithStore() throws Exception { refresh(); - for (String type : UNIFIED_AND_NULL) { - // simple search on body with standard analyzer with a simple field query - SearchResponse search = client().prepareSearch() - .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1).highlighterType(type)) - .get(); + // simple search on body with standard analyzer with a simple field query + SearchResponse search = client().prepareSearch() + .setQuery(matchQuery("title", "this is a test")) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1)) + .get(); - assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title - search = client().prepareSearch() - .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1).highlighterType(type)) - .get(); + // search on title.key and highlight on title + search = client().prepareSearch() + .setQuery(matchQuery("title.key", "this is a test")) + .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1)) + .get(); - assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); - } + assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); } public void testMultiMapperNoVectorFromSource() throws Exception { @@ -1241,23 +1220,21 @@ public void testMultiMapperNoVectorFromSource() throws Exception { client().prepareIndex("test", "type1", "1").setSource("title", "this is a test").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - // simple search on body with standard analyzer with a simple field query - SearchResponse search = client().prepareSearch() - .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1).highlighterType(type)) - .get(); + // simple search on body with standard analyzer with a simple field query + SearchResponse search = client().prepareSearch() + .setQuery(matchQuery("title", "this is a test")) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1)) + .get(); - assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title.key - search = client().prepareSearch() - .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1).highlighterType(type)) - .get(); + // search on title.key and highlight on title.key + search = client().prepareSearch() + .setQuery(matchQuery("title.key", "this is a test")) + .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1)) + .get(); - assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); - } + assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); } public void testFastVectorHighlighterShouldFailIfNoTermVectors() throws Exception { @@ -1305,7 +1282,7 @@ public void testDisableFastVectorHighlighter() throws Exception { SearchResponse search = client().prepareSearch() .setQuery(matchPhraseQuery("title", "test for the workaround")) - .highlighter(new HighlightBuilder().field("title", 50, 1, 10)) + .highlighter(new HighlightBuilder().field("title", 50, 1, 10).highlighterType("fvh")) .get(); for (int i = 0; i < indexRequestBuilders.length; i++) { @@ -1350,7 +1327,7 @@ public void testFSHHighlightAllMvFragments() throws Exception { SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("tags", "tag")) - .highlighter(new HighlightBuilder().field("tags", -1, 0)).get(); + .highlighter(new HighlightBuilder().field("tags", -1, 0).highlighterType("fvh")).get(); assertHighlight(response, 0, "tags", 0, equalTo("this is a really long tag i would like to highlight")); assertHighlight(response, 0, "tags", 1, 2, @@ -1364,16 +1341,14 @@ public void testBoostingQuery() { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() - .query(boostingQuery(termQuery("field2", "brown"), termQuery("field2", "foobar")).negativeBoost(0.5f)) - .highlighter(highlight().field("field2").order("score").preTags("").postTags("").highlighterType(type)); + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() + .query(boostingQuery(termQuery("field2", "brown"), termQuery("field2", "foobar")).negativeBoost(0.5f)) + .highlighter(highlight().field("field2").order("score").preTags("").postTags("")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - } + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); } public void testBoostingQueryTermVector() throws IOException { @@ -1404,14 +1379,12 @@ public void testCommonTermsQuery() { refresh(); logger.info("--> highlighting and searching on field1"); - for (String type : UNIFIED_AND_NULL) { - SearchSourceBuilder source = searchSource() - .query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) - .highlighter(highlight().field("field2").order("score").preTags("").postTags("").highlighterType(type)); + SearchSourceBuilder source = searchSource() + .query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) + .highlighter(highlight().field("field2").order("score").preTags("").postTags("")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - } + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); } public void testCommonTermsTermVector() throws IOException { @@ -1453,98 +1426,86 @@ public void testPhrasePrefix() throws IOException { refresh(); logger.info("--> highlighting and searching on field0"); - for (String type : UNIFIED_AND_NULL) { - SearchSourceBuilder source = searchSource() + SearchSourceBuilder source = searchSource() .query(matchPhrasePrefixQuery("field0", "bro")) - .highlighter(highlight().field("field0").order("score").preTags("").postTags("").highlighterType(type)); - - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - source = searchSource() - .query(matchPhrasePrefixQuery("field0", "quick bro")) - .highlighter(highlight().field("field0").order("score").preTags("").postTags("").highlighterType(type)); - - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + source = searchSource() + .query(matchPhrasePrefixQuery("field0", "quick bro")) + .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); - assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - logger.info("--> highlighting and searching on field1"); - source = searchSource() - .query(boolQuery() - .should(matchPhrasePrefixQuery("field1", "test")) - .should(matchPhrasePrefixQuery("field1", "bro")) - ) - .highlighter(highlight().field("field1").order("score").preTags("").postTags("").highlighterType(type)); + logger.info("--> highlighting and searching on field1"); + source = searchSource() + .query(boolQuery() + .should(matchPhrasePrefixQuery("field1", "test")) + .should(matchPhrasePrefixQuery("field1", "bro")) + ) + .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertThat(searchResponse.getHits().totalHits, equalTo(2L)); - for (int i = 0; i < 2; i++) { - assertHighlight(searchResponse, i, "field1", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - } + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertThat(searchResponse.getHits().totalHits, equalTo(2L)); + for (int i = 0; i < 2; i++) { + assertHighlight(searchResponse, i, "field1", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + } - source = searchSource() - .query(matchPhrasePrefixQuery("field1", "quick bro")) - .highlighter(highlight().field("field1").order("score").preTags("").postTags("").highlighterType(type)); + source = searchSource() + .query(matchPhrasePrefixQuery("field1", "quick bro")) + .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); + + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + + assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + + // with synonyms + client().prepareIndex("test", "type2", "0").setSource( + "field4", "The quick brown fox jumps over the lazy dog", + "field3", "The quick brown fox jumps over the lazy dog").get(); + client().prepareIndex("test", "type2", "1").setSource( + "field4", "The quick browse button is a fancy thing, right bro?").get(); + client().prepareIndex("test", "type2", "2").setSource( + "field4", "a quick fast blue car").get(); + refresh(); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")) + .highlighter(highlight().field("field3").order("score").preTags("").postTags("")); - assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"), - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"), - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - - // with synonyms - client().prepareIndex("test", "type2", "0").setSource( - "field4", "The quick brown fox jumps over the lazy dog", - "field3", "The quick brown fox jumps over the lazy dog").get(); - client().prepareIndex("test", "type2", "1").setSource( - "field4", "The quick browse button is a fancy thing, right bro?").get(); - client().prepareIndex("test", "type2", "2").setSource( - "field4", "a quick fast blue car").get(); - refresh(); - - source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")) - .highlighter(highlight().field("field3").order("score").preTags("").postTags("").highlighterType(type)); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + logger.info("--> highlighting and searching on field4"); + source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")) + .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - logger.info("--> highlighting and searching on field4"); - source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")) - .highlighter(highlight().field("field4").order("score").preTags("").postTags("").highlighterType(type)); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); - assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"), - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"), - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - - logger.info("--> highlighting and searching on field4"); - source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")) - .highlighter(highlight().field("field4").order("score").preTags("").postTags("").highlighterType(type)); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + logger.info("--> highlighting and searching on field4"); + source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")) + .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field4", 0, 1, - anyOf(equalTo("a quick fast blue car"), - equalTo("a quick fast blue car"))); - } + assertHighlight(searchResponse, 0, "field4", 0, 1, + anyOf(equalTo("a quick fast blue car"), + equalTo("a quick fast blue car"))); } public void testPlainHighlightDifferentFragmenter() throws Exception { @@ -1560,8 +1521,9 @@ public void testPlainHighlightDifferentFragmenter() throws Exception { SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQuery.Type.PHRASE)) .highlighter( - new HighlightBuilder().field(new HighlightBuilder.Field("tags").fragmentSize(-1).numOfFragments(2) - .fragmenter("simple"))).get(); + new HighlightBuilder().field(new HighlightBuilder.Field("tags") + .highlighterType("plain").fragmentSize(-1).numOfFragments(2).fragmenter("simple"))) + .get(); assertHighlight(response, 0, "tags", 0, equalTo("this is a really long tag i would like to highlight")); assertHighlight(response, 0, "tags", 1, 2, @@ -1570,7 +1532,7 @@ public void testPlainHighlightDifferentFragmenter() throws Exception { response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQuery.Type.PHRASE)) .highlighter( - new HighlightBuilder().field(new HighlightBuilder.Field("tags").fragmentSize(-1).numOfFragments(2) + new HighlightBuilder().field(new Field("tags").highlighterType("plain").fragmentSize(-1).numOfFragments(2) .fragmenter("span"))).get(); assertHighlight(response, 0, "tags", 0, @@ -1581,7 +1543,7 @@ public void testPlainHighlightDifferentFragmenter() throws Exception { assertFailures(client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQuery.Type.PHRASE)) .highlighter( - new HighlightBuilder().field(new HighlightBuilder.Field("tags").fragmentSize(-1).numOfFragments(2) + new HighlightBuilder().field(new Field("tags").highlighterType("plain").fragmentSize(-1).numOfFragments(2) .fragmenter("invalid"))), RestStatus.BAD_REQUEST, containsString("unknown fragmenter option [invalid] for the field [tags]")); @@ -1635,15 +1597,13 @@ public void testMissingStoredField() throws Exception { .endObject()).get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - // This query used to fail when the field to highlight was absent - SearchResponse response = client().prepareSearch("test") - .setQuery(QueryBuilders.matchQuery("field", "highlight").type(MatchQuery.Type.BOOLEAN)) - .highlighter( - new HighlightBuilder().field(new HighlightBuilder.Field("highlight_field").fragmentSize(-1).numOfFragments(1) - .fragmenter("simple")).highlighterType(type)).get(); - assertThat(response.getHits().getHits()[0].getHighlightFields().isEmpty(), equalTo(true)); - } + // This query used to fail when the field to highlight was absent + SearchResponse response = client().prepareSearch("test") + .setQuery(QueryBuilders.matchQuery("field", "highlight").type(MatchQuery.Type.BOOLEAN)) + .highlighter( + new HighlightBuilder().field(new HighlightBuilder.Field("highlight_field").fragmentSize(-1).numOfFragments(1) + .fragmenter("simple"))).get(); + assertThat(response.getHits().getHits()[0].getHighlightFields().isEmpty(), equalTo(true)); } // Issue #3211 @@ -1688,13 +1648,11 @@ public void testResetTwice() throws Exception { .setSource("text", "elasticsearch test").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - SearchResponse response = client().prepareSearch("test") - .setQuery(QueryBuilders.matchQuery("text", "test").type(MatchQuery.Type.BOOLEAN)) - .highlighter(new HighlightBuilder().field("text").highlighterType(type)).execute().actionGet(); - // PatternAnalyzer will throw an exception if it is resetted twice - assertHitCount(response, 1L); - } + SearchResponse response = client().prepareSearch("test") + .setQuery(QueryBuilders.matchQuery("text", "test").type(MatchQuery.Type.BOOLEAN)) + .highlighter(new HighlightBuilder().field("text")).execute().actionGet(); + // PatternAnalyzer will throw an exception if it is resetted twice + assertHitCount(response, 1L); } public void testHighlightUsesHighlightQuery() throws IOException { @@ -1759,10 +1717,6 @@ public void testHighlightNoMatchSize() throws IOException { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertNotHighlighted(response, 0, "text"); - field.highlighterType("unified"); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); @@ -1776,10 +1730,6 @@ public void testHighlightNoMatchSize() throws IOException { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertNotHighlighted(response, 0, "text"); - field.highlighterType("unified"); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); @@ -1799,11 +1749,6 @@ public void testHighlightNoMatchSize() throws IOException { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some")); - // Postings hl also works but the fragment is the whole first sentence (size ignored) - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); - // We can also ask for a fragment longer than the input string and get the whole string field.highlighterType("plain").noMatchSize(text.length() * 2); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); @@ -1817,11 +1762,6 @@ public void testHighlightNoMatchSize() throws IOException { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo(text)); - //no difference using postings hl as the noMatchSize is ignored (just needs to be greater than 0) - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); - // We can also ask for a fragment exactly the size of the input field and get the whole field field.highlighterType("plain").noMatchSize(text.length()); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); @@ -1836,11 +1776,6 @@ public void testHighlightNoMatchSize() throws IOException { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo(text)); - //no difference using postings hl as the noMatchSize is ignored (just needs to be greater than 0) - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); - // You can set noMatchSize globally in the highlighter as well field.highlighterType("plain").noMatchSize(null); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field).noMatchSize(21)).get(); @@ -1854,10 +1789,6 @@ public void testHighlightNoMatchSize() throws IOException { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field).noMatchSize(21)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some")); - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field).noMatchSize(21)).get(); - assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); - // We don't break if noMatchSize is less than zero though field.highlighterType("plain").noMatchSize(randomIntBetween(Integer.MIN_VALUE, -1)); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); @@ -1867,10 +1798,6 @@ public void testHighlightNoMatchSize() throws IOException { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertNotHighlighted(response, 0, "text"); - field.highlighterType("unified"); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); @@ -1903,11 +1830,6 @@ public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some")); - // Postings hl also works but the fragment is the whole first sentence (size ignored) - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); - // And noMatchSize returns nothing when the first entry is empty string! index("test", "type1", "2", "text", new String[] {"", text2}); refresh(); @@ -1925,12 +1847,6 @@ public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { .highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); - field.highlighterType("postings"); - response = client().prepareSearch("test") - .setQuery(idsQueryBuilder) - .highlighter(new HighlightBuilder().field(field)).get(); - assertNotHighlighted(response, 0, "text"); - // except for the unified highlighter which starts from the first string with actual content field.highlighterType("unified"); response = client().prepareSearch("test") @@ -1954,12 +1870,6 @@ public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { .highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); - field.highlighterType("postings"); - response = client().prepareSearch("test") - .setQuery(idsQueryBuilder) - .highlighter(new HighlightBuilder().field(field)).get(); - assertNotHighlighted(response, 0, "text"); - field.highlighterType("unified"); response = client().prepareSearch("test") .setQuery(idsQueryBuilder) @@ -2000,10 +1910,6 @@ public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertNotHighlighted(response, 0, "text"); - field.highlighterType("unified"); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); @@ -2038,11 +1944,6 @@ public void testHighlightNoMatchSizeNumberOfFragments() throws IOException { assertHighlight(response, 0, "text", 0, 1, equalTo("This is the first sentence")); - // Postings hl also works but the fragment is the whole first sentence (size ignored) - field.highlighterType("postings"); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertHighlight(response, 0, "text", 0, 1, equalTo("This is the first sentence.")); - //if there's a match we only return the values with matches (whole value as number_of_fragments == 0) MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery("text", "third fifth"); field.highlighterType("plain"); @@ -2055,11 +1956,6 @@ public void testHighlightNoMatchSizeNumberOfFragments() throws IOException { assertHighlight(response, 0, "text", 0, 2, equalTo("This is the third sentence. This is the fourth sentence.")); assertHighlight(response, 0, "text", 1, 2, equalTo("This is the fifth sentence")); - field.highlighterType("postings"); - response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get(); - assertHighlight(response, 0, "text", 0, 2, equalTo("This is the third sentence. This is the fourth sentence.")); - assertHighlight(response, 0, "text", 1, 2, equalTo("This is the fifth sentence")); - field.highlighterType("unified"); response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 2, equalTo("This is the third sentence. This is the fourth sentence.")); @@ -2074,49 +1970,42 @@ public void testPostingsHighlighter() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy quick dog").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() - .query(termQuery("field1", "test")) - .highlighter(highlight().field("field1").preTags("").postTags("").highlighterType(type)); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() + .query(termQuery("field1", "test")) + .highlighter(highlight().field("field1").preTags("").postTags("")); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a test")); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a test")); - logger.info("--> searching on field1, highlighting on field1"); - source = searchSource() - .query(termQuery("field1", "test")) - .highlighter(highlight().field("field1").preTags("").postTags("").highlighterType(type)); + logger.info("--> searching on field1, highlighting on field1"); + source = searchSource() + .query(termQuery("field1", "test")) + .highlighter(highlight().field("field1").preTags("").postTags("")); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a test")); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a test")); - logger.info("--> searching on field2, highlighting on field2"); - source = searchSource() - .query(termQuery("field2", "quick")) - .highlighter(highlight().field("field2").order("score").preTags("").postTags("").highlighterType(type)); + logger.info("--> searching on field2, highlighting on field2"); + source = searchSource() + .query(termQuery("field2", "quick")) + .highlighter(highlight().field("field2").order("score").preTags("").postTags("")); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field2", 0, 1, - equalTo("The quick brown fox jumps over the lazy quick dog")); + assertHighlight(searchResponse, 0, "field2", 0, 1, + equalTo("The quick brown fox jumps over the lazy quick dog")); - logger.info("--> searching on field2, highlighting on field2"); - source = searchSource() - .query(matchPhraseQuery("field2", "quick brown")) - .highlighter(highlight().field("field2").preTags("").postTags("").highlighterType(type)); + logger.info("--> searching on field2, highlighting on field2"); + source = searchSource() + .query(matchPhraseQuery("field2", "quick brown")) + .highlighter(highlight().field("field2").preTags("").postTags("")); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - if (type == null) { - //phrase query results in highlighting all different terms regardless of their positions - assertHighlight(searchResponse, 0, "field2", 0, 1, - equalTo("The quick brown fox jumps over the lazy quick dog")); - } else { - assertHighlight(searchResponse, 0, "field2", 0, 1, - equalTo("The quick brown fox jumps over the lazy quick dog")); - } + assertHighlight(searchResponse, 0, "field2", 0, 1, + equalTo("The quick brown fox jumps over the lazy quick dog")); //lets fall back to the standard highlighter then, what people would do to highlight query matches logger.info("--> searching on field2, highlighting on field2, falling back to the plain highlighter"); @@ -2125,11 +2014,10 @@ public void testPostingsHighlighter() throws Exception { .highlighter(highlight() .field("field2").preTags("").postTags("").highlighterType("plain").requireFieldMatch(false)); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field2", 0, 1, - equalTo("The quick brown fox jumps over the lazy quick dog")); - } + assertHighlight(searchResponse, 0, "field2", 0, 1, + equalTo("The quick brown fox jumps over the lazy quick dog")); } public void testPostingsHighlighterMultipleFields() throws Exception { @@ -2141,15 +2029,13 @@ public void testPostingsHighlighterMultipleFields() throws Exception { "field2", "The slow brown fox. Second sentence."); refresh(); - for (String type : UNIFIED_AND_NULL) { - SearchResponse response = client().prepareSearch("test") - .setQuery(QueryBuilders.matchQuery("field1", "fox")) - .highlighter( - new HighlightBuilder().field(new Field("field1").preTags("<1>").postTags("") - .requireFieldMatch(true).highlighterType(type))) - .get(); - assertHighlight(response, 0, "field1", 0, 1, equalTo("The quick brown <1>fox.")); - } + SearchResponse response = client().prepareSearch("test") + .setQuery(QueryBuilders.matchQuery("field1", "fox")) + .highlighter( + new HighlightBuilder().field(new Field("field1").preTags("<1>").postTags("") + .requireFieldMatch(true))) + .get(); + assertHighlight(response, 0, "field1", 0, 1, equalTo("The quick brown <1>fox.")); } public void testPostingsHighlighterNumberOfFragments() throws Exception { @@ -2164,53 +2050,50 @@ public void testPostingsHighlighterNumberOfFragments() throws Exception { refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() - .query(termQuery("field1", "fox")) - .highlighter(highlight() - .field(new Field("field1").numOfFragments(5).preTags("").postTags("").highlighterType(type))); - - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() + .query(termQuery("field1", "fox")) + .highlighter(highlight() + .field(new Field("field1").numOfFragments(5).preTags("").postTags(""))); - assertHighlight(searchResponse, 0, "field1", 0, equalTo("The quick brown fox jumps over the lazy dog.")); - assertHighlight(searchResponse, 0, "field1", 1, equalTo("The lazy red fox jumps over the quick dog.")); - assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy fox.")); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - client().prepareIndex("test", "type1", "2") - .setSource("field1", new String[]{ - "The quick brown fox jumps over the lazy dog. Second sentence not finished", - "The lazy red fox jumps over the quick dog.", - "The quick brown dog jumps over the lazy fox."}).get(); - refresh(); + assertHighlight(searchResponse, 0, "field1", 0, equalTo("The quick brown fox jumps over the lazy dog.")); + assertHighlight(searchResponse, 0, "field1", 1, equalTo("The lazy red fox jumps over the quick dog.")); + assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy fox.")); - source = searchSource() - .query(termQuery("field1", "fox")) - .highlighter(highlight() - .field(new Field("field1").numOfFragments(0).preTags("").postTags("").highlighterType(type))); + client().prepareIndex("test", "type1", "2") + .setSource("field1", new String[]{ + "The quick brown fox jumps over the lazy dog. Second sentence not finished", + "The lazy red fox jumps over the quick dog.", + "The quick brown dog jumps over the lazy fox."}).get(); + refresh(); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHitCount(searchResponse, 2L); - - for (SearchHit searchHit : searchResponse.getHits()) { - if ("1".equals(searchHit.getId())) { - assertHighlight(searchHit, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog. " - + "The lazy red fox jumps over the quick dog. " - + "The quick brown dog jumps over the lazy fox.")); - } else if ("2".equals(searchHit.getId())) { - assertHighlight(searchHit, "field1", 0, 3, - equalTo("The quick brown fox jumps over the lazy dog. Second sentence not finished")); - assertHighlight(searchHit, "field1", 1, 3, equalTo("The lazy red fox jumps over the quick dog.")); - assertHighlight(searchHit, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy fox.")); - } else { - fail("Only hits with id 1 and 2 are returned"); - } + source = searchSource() + .query(termQuery("field1", "fox")) + .highlighter(highlight() + .field(new Field("field1").numOfFragments(0).preTags("").postTags(""))); + + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHitCount(searchResponse, 2L); + + for (SearchHit searchHit : searchResponse.getHits()) { + if ("1".equals(searchHit.getId())) { + assertHighlight(searchHit, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog. " + + "The lazy red fox jumps over the quick dog. " + + "The quick brown dog jumps over the lazy fox.")); + } else if ("2".equals(searchHit.getId())) { + assertHighlight(searchHit, "field1", 0, 3, + equalTo("The quick brown fox jumps over the lazy dog. Second sentence not finished")); + assertHighlight(searchHit, "field1", 1, 3, equalTo("The lazy red fox jumps over the quick dog.")); + assertHighlight(searchHit, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy fox.")); + } else { + fail("Only hits with id 1 and 2 are returned"); } } } public void testMultiMatchQueryHighlight() throws IOException { - String[] highlighterTypes = new String[] {"fvh", "plain", "postings", "unified"}; XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties") .startObject("field1") @@ -2233,23 +2116,10 @@ public void testMultiMatchQueryHighlight() throws IOException { refresh(); final int iters = scaledRandomIntBetween(20, 30); for (int i = 0; i < iters; i++) { - String highlighterType = rarely() ? null : RandomPicks.randomFrom(random(), highlighterTypes); - MultiMatchQueryBuilder.Type[] supportedQueryTypes; - if ("postings".equals(highlighterType)) { - /* - * phrase_prefix is not supported by postings highlighter, as it rewrites against an empty reader, the prefix will never - * match any term - */ - supportedQueryTypes = new MultiMatchQueryBuilder.Type[]{ - MultiMatchQueryBuilder.Type.BEST_FIELDS, - MultiMatchQueryBuilder.Type.CROSS_FIELDS, - MultiMatchQueryBuilder.Type.MOST_FIELDS, - MultiMatchQueryBuilder.Type.PHRASE}; - } else { - supportedQueryTypes = MultiMatchQueryBuilder.Type.values(); - } - MultiMatchQueryBuilder.Type matchQueryType = RandomPicks.randomFrom(random(), supportedQueryTypes); - MultiMatchQueryBuilder multiMatchQueryBuilder = multiMatchQuery("the quick brown fox", "field1", "field2").type(matchQueryType); + String highlighterType = rarely() ? null : RandomPicks.randomFrom(random(), ALL_TYPES); + MultiMatchQueryBuilder.Type matchQueryType = RandomPicks.randomFrom(random(), MultiMatchQueryBuilder.Type.values()); + MultiMatchQueryBuilder multiMatchQueryBuilder = multiMatchQuery("the quick brown fox", "field1", "field2") + .type(matchQueryType); SearchSourceBuilder source = searchSource() .query(multiMatchQueryBuilder) @@ -2278,26 +2148,24 @@ public void testPostingsHighlighterOrderByScore() throws Exception { + "This one contains no matches."}).get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() - .query(termQuery("field1", "sentence")) - .highlighter(highlight().field("field1").order("score").highlighterType(type)); + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() + .query(termQuery("field1", "sentence")) + .highlighter(highlight().field("field1").order("score")); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - Map highlightFieldMap = searchResponse.getHits().getAt(0).getHighlightFields(); - assertThat(highlightFieldMap.size(), equalTo(1)); - HighlightField field1 = highlightFieldMap.get("field1"); - assertThat(field1.fragments().length, equalTo(5)); - assertThat(field1.fragments()[0].string(), - equalTo("This sentence contains three sentence occurrences (sentence).")); - assertThat(field1.fragments()[1].string(), equalTo("This sentence contains two sentence matches.")); - assertThat(field1.fragments()[2].string(), equalTo("This is the second value's first sentence.")); - assertThat(field1.fragments()[3].string(), equalTo("This sentence contains one match, not that short.")); - assertThat(field1.fragments()[4].string(), - equalTo("One sentence match here and scored lower since the text is quite long, not that appealing.")); - } + Map highlightFieldMap = searchResponse.getHits().getAt(0).getHighlightFields(); + assertThat(highlightFieldMap.size(), equalTo(1)); + HighlightField field1 = highlightFieldMap.get("field1"); + assertThat(field1.fragments().length, equalTo(5)); + assertThat(field1.fragments()[0].string(), + equalTo("This sentence contains three sentence occurrences (sentence).")); + assertThat(field1.fragments()[1].string(), equalTo("This sentence contains two sentence matches.")); + assertThat(field1.fragments()[2].string(), equalTo("This is the second value's first sentence.")); + assertThat(field1.fragments()[3].string(), equalTo("This sentence contains one match, not that short.")); + assertThat(field1.fragments()[4].string(), + equalTo("One sentence match here and scored lower since the text is quite long, not that appealing.")); } public void testPostingsHighlighterEscapeHtml() throws Exception { @@ -2311,15 +2179,13 @@ public void testPostingsHighlighterEscapeHtml() throws Exception { } indexRandom(true, indexRequestBuilders); - for (String type : UNIFIED_AND_NULL) { - SearchResponse searchResponse = client().prepareSearch() - .setQuery(matchQuery("title", "test")) - .highlighter(new HighlightBuilder().field("title").encoder("html").highlighterType(type)).get(); + SearchResponse searchResponse = client().prepareSearch() + .setQuery(matchQuery("title", "test")) + .highlighter(new HighlightBuilder().field("title").encoder("html")).get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(searchResponse, i, "title", 0, 1, - equalTo("This is a html escaping highlighting test for *&?")); - } + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(searchResponse, i, "title", 0, 1, + equalTo("This is a html escaping highlighting test for *&?")); } } @@ -2344,28 +2210,26 @@ public void testPostingsHighlighterMultiMapperWithStore() throws Exception { client().prepareIndex("test", "type1", "1").setSource("title", "this is a test . Second sentence.").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - // simple search on body with standard analyzer with a simple field query - SearchResponse searchResponse = client().prepareSearch() - //lets make sure we analyze the query and we highlight the resulting terms - .setQuery(matchQuery("title", "This is a Test")) - .highlighter(new HighlightBuilder().field("title").highlighterType(type)).get(); + // simple search on body with standard analyzer with a simple field query + SearchResponse searchResponse = client().prepareSearch() + //lets make sure we analyze the query and we highlight the resulting terms + .setQuery(matchQuery("title", "This is a Test")) + .highlighter(new HighlightBuilder().field("title")).get(); + + assertHitCount(searchResponse, 1L); + SearchHit hit = searchResponse.getHits().getAt(0); + //stopwords are not highlighted since not indexed + assertHighlight(hit, "title", 0, 1, equalTo("this is a test .")); - assertHitCount(searchResponse, 1L); - SearchHit hit = searchResponse.getHits().getAt(0); - //stopwords are not highlighted since not indexed - assertHighlight(hit, "title", 0, 1, equalTo("this is a test .")); - - // search on title.key and highlight on title - searchResponse = client().prepareSearch() - .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().field("title.key")).get(); - assertHitCount(searchResponse, 1L); + // search on title.key and highlight on title + searchResponse = client().prepareSearch() + .setQuery(matchQuery("title.key", "this is a test")) + .highlighter(new HighlightBuilder().field("title.key")).get(); + assertHitCount(searchResponse, 1L); - //stopwords are now highlighted since we used only whitespace analyzer here - assertHighlight(searchResponse, 0, "title.key", 0, 1, - equalTo("this is a test .")); - } + //stopwords are now highlighted since we used only whitespace analyzer here + assertHighlight(searchResponse, 0, "title.key", 0, 1, + equalTo("this is a test .")); } public void testPostingsHighlighterMultiMapperFromSource() throws Exception { @@ -2390,22 +2254,20 @@ public void testPostingsHighlighterMultiMapperFromSource() throws Exception { client().prepareIndex("test", "type1", "1").setSource("title", "this is a test").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - // simple search on body with standard analyzer with a simple field query - SearchResponse searchResponse = client().prepareSearch() - .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().field("title")) - .get(); + // simple search on body with standard analyzer with a simple field query + SearchResponse searchResponse = client().prepareSearch() + .setQuery(matchQuery("title", "this is a test")) + .highlighter(new HighlightBuilder().field("title")) + .get(); - assertHighlight(searchResponse, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(searchResponse, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title.key - searchResponse = client().prepareSearch() - .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().field("title.key").highlighterType(type)).get(); + // search on title.key and highlight on title.key + searchResponse = client().prepareSearch() + .setQuery(matchQuery("title.key", "this is a test")) + .highlighter(new HighlightBuilder().field("title.key")).get(); - assertHighlight(searchResponse, 0, "title.key", 0, 1, equalTo("this is a test")); - } + assertHighlight(searchResponse, 0, "title.key", 0, 1, equalTo("this is a test")); } public void testPostingsHighlighterShouldFailIfNoOffsets() throws Exception { @@ -2427,26 +2289,6 @@ public void testPostingsHighlighterShouldFailIfNoOffsets() throws Exception { .highlighter(new HighlightBuilder().field("title")) .get(); assertNoFailures(search); - - assertFailures(client().prepareSearch() - .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().field("title").highlighterType("postings")), - RestStatus.BAD_REQUEST, - containsString("the field [title] should be indexed with positions and offsets in the " - + "postings list to be used with postings highlighter")); - - - assertFailures(client().prepareSearch() - .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().field("title").highlighterType("postings")), - RestStatus.BAD_REQUEST, - containsString("the field [title] should be indexed with positions and offsets in the " - + "postings list to be used with postings highlighter")); - - //should not fail if there is a wildcard - assertNoFailures(client().prepareSearch() - .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().field("tit*").highlighterType("postings")).get()); } public void testPostingsHighlighterBoostingQuery() throws IOException { @@ -2456,15 +2298,13 @@ public void testPostingsHighlighterBoostingQuery() throws IOException { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() - .query(boostingQuery(termQuery("field2", "brown"), termQuery("field2", "foobar")).negativeBoost(0.5f)) - .highlighter(highlight().field("field2").preTags("").postTags("").highlighterType(type)); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() + .query(boostingQuery(termQuery("field2", "brown"), termQuery("field2", "foobar")).negativeBoost(0.5f)) + .highlighter(highlight().field("field2").preTags("").postTags("")); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - } + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); } public void testPostingsHighlighterCommonTermsQuery() throws IOException { @@ -2475,15 +2315,13 @@ public void testPostingsHighlighterCommonTermsQuery() throws IOException { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource().query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) - .highlighter(highlight().field("field2").preTags("").postTags("").highlighterType(type)); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHitCount(searchResponse, 1L); + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource().query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) + .highlighter(highlight().field("field2").preTags("").postTags("")); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHitCount(searchResponse, 1L); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - } + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); } private static XContentBuilder type1PostingsffsetsMapping() throws IOException { @@ -2504,12 +2342,10 @@ public void testPostingsHighlighterPrefixQuery() throws Exception { refresh(); logger.info("--> highlighting and searching on field2"); - for (String type : UNIFIED_AND_NULL) { - SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui")) - .highlighter(highlight().field("field2").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - } + SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui")) + .highlighter(highlight().field("field2")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); } public void testPostingsHighlighterFuzzyQuery() throws Exception { @@ -2520,14 +2356,12 @@ public void testPostingsHighlighterFuzzyQuery() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(fuzzyQuery("field2", "quck")) - .highlighter(highlight().field("field2").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(fuzzyQuery("field2", "quck")) + .highlighter(highlight().field("field2")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - } + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); } public void testPostingsHighlighterRegexpQuery() throws Exception { @@ -2538,14 +2372,12 @@ public void testPostingsHighlighterRegexpQuery() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k")) - .highlighter(highlight().field("field2").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k")) + .highlighter(highlight().field("field2")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - } + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); } public void testPostingsHighlighterWildcardQuery() throws Exception { @@ -2556,21 +2388,19 @@ public void testPostingsHighlighterWildcardQuery() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*")) - .highlighter(highlight().field("field2").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*")) + .highlighter(highlight().field("field2")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - source = searchSource().query(wildcardQuery("field2", "qu*k")) - .highlighter(highlight().field("field2").highlighterType(type)); - searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHitCount(searchResponse, 1L); + source = searchSource().query(wildcardQuery("field2", "qu*k")) + .highlighter(highlight().field("field2")); + searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHitCount(searchResponse, 1L); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - } + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); } public void testPostingsHighlighterTermRangeQuery() throws Exception { @@ -2580,14 +2410,12 @@ public void testPostingsHighlighterTermRangeQuery() throws Exception { client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "aaab").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(rangeQuery("field2").gte("aaaa").lt("zzzz")) - .highlighter(highlight().field("field2").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(rangeQuery("field2").gte("aaaa").lt("zzzz")) + .highlighter(highlight().field("field2")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("aaab")); - } + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("aaab")); } public void testPostingsHighlighterQueryString() throws Exception { @@ -2598,13 +2426,11 @@ public void testPostingsHighlighterQueryString() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(queryStringQuery("qui*").defaultField("field2")) - .highlighter(highlight().field("field2").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - } + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(queryStringQuery("qui*").defaultField("field2")) + .highlighter(highlight().field("field2")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); } public void testPostingsHighlighterRegexpQueryWithinConstantScoreQuery() throws Exception { @@ -2614,13 +2440,11 @@ public void testPostingsHighlighterRegexpQueryWithinConstantScoreQuery() throws client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+"))) - .highlighter(highlight().field("field1").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); - } + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+"))) + .highlighter(highlight().field("field1")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); } public void testPostingsHighlighterMultiTermQueryMultipleLevels() throws Exception { @@ -2630,16 +2454,14 @@ public void testPostingsHighlighterMultiTermQueryMultipleLevels() throws Excepti client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource().query(boolQuery() - .should(boolQuery().mustNot(QueryBuilders.existsQuery("field1"))) - .should(matchQuery("field1", "test")) - .should(constantScoreQuery(queryStringQuery("field1:photo*")))) - .highlighter(highlight().field("field1").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); - } + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource().query(boolQuery() + .should(boolQuery().mustNot(QueryBuilders.existsQuery("field1"))) + .should(matchQuery("field1", "test")) + .should(constantScoreQuery(queryStringQuery("field1:photo*")))) + .highlighter(highlight().field("field1")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); } public void testPostingsHighlighterPrefixQueryWithinBooleanQuery() throws Exception { @@ -2649,14 +2471,12 @@ public void testPostingsHighlighterPrefixQueryWithinBooleanQuery() throws Except client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() - .query(boolQuery().must(prefixQuery("field1", "photo")).should(matchQuery("field1", "test").minimumShouldMatch("0"))) - .highlighter(highlight().field("field1").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); - } + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() + .query(boolQuery().must(prefixQuery("field1", "photo")).should(matchQuery("field1", "test").minimumShouldMatch("0"))) + .highlighter(highlight().field("field1")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); } public void testPostingsHighlighterQueryStringWithinFilteredQuery() throws Exception { @@ -2666,15 +2486,13 @@ public void testPostingsHighlighterQueryStringWithinFilteredQuery() throws Excep client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); refresh(); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource().query(boolQuery() - .must(queryStringQuery("field1:photo*")) - .mustNot(existsQuery("field_null"))) - .highlighter(highlight().field("field1").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); - } + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource().query(boolQuery() + .must(queryStringQuery("field1:photo*")) + .mustNot(existsQuery("field_null"))) + .highlighter(highlight().field("field1")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); } public void testPostingsHighlighterManyDocs() throws Exception { @@ -2691,25 +2509,23 @@ public void testPostingsHighlighterManyDocs() throws Exception { String prefix = randomAlphaOfLengthBetween(5, 30); prefixes.put(String.valueOf(i), prefix); indexRequestBuilders[i] = client().prepareIndex("test", "type1", Integer.toString(i)).setSource("field1", "Sentence " + prefix - + " test. Sentence two."); + + " test. Sentence two."); } logger.info("--> indexing docs"); indexRandom(true, indexRequestBuilders); - for (String type : UNIFIED_AND_NULL) { - logger.info("--> searching explicitly on field1 and highlighting on it"); - SearchRequestBuilder searchRequestBuilder = client().prepareSearch() - .setSize(COUNT) - .setQuery(termQuery("field1", "test")) - .highlighter(new HighlightBuilder().field("field1").highlighterType(type)); - SearchResponse searchResponse = - searchRequestBuilder.get(); - assertHitCount(searchResponse, COUNT); - assertThat(searchResponse.getHits().getHits().length, equalTo(COUNT)); - for (SearchHit hit : searchResponse.getHits()) { - String prefix = prefixes.get(hit.getId()); - assertHighlight(hit, "field1", 0, 1, equalTo("Sentence " + prefix + " test.")); - } + logger.info("--> searching explicitly on field1 and highlighting on it"); + SearchRequestBuilder searchRequestBuilder = client().prepareSearch() + .setSize(COUNT) + .setQuery(termQuery("field1", "test")) + .highlighter(new HighlightBuilder().field("field1")); + SearchResponse searchResponse = + searchRequestBuilder.get(); + assertHitCount(searchResponse, COUNT); + assertThat(searchResponse.getHits().getHits().length, equalTo(COUNT)); + for (SearchHit hit : searchResponse.getHits()) { + String prefix = prefixes.get(hit.getId()); + assertHighlight(hit, "field1", 0, 1, equalTo("Sentence " + prefix + " test.")); } } @@ -2755,11 +2571,6 @@ public void testFastVectorHighlighterPhraseBoost() throws Exception { phraseBoostTestCase("fvh"); } - public void testPostingsHighlighterPhraseBoost() throws Exception { - assertAcked(prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping())); - phraseBoostTestCase("postings"); - } - /** * Test phrase boosting over normal term matches. Note that this will never pass with the plain highlighter * because it doesn't support the concept of terms having a different weight based on position. @@ -2851,7 +2662,7 @@ public void testGeoFieldHighlightingWithDifferentHighlighters() throws IOExcepti .setSource(jsonBuilder().startObject().field("text", "Arbitrary text field which will should not cause a failure").endObject()) .get(); refresh(); - String highlighterType = randomFrom("plain", "fvh", "postings", "unified"); + String highlighterType = randomFrom(ALL_TYPES); QueryBuilder query = QueryBuilders.boolQuery().should(QueryBuilders.geoBoundingBoxQuery("geo_point") .setCorners(61.10078883158897, -170.15625, -64.92354174306496, 118.47656249999999)) .should(QueryBuilders.termQuery("text", "failure")); @@ -2972,17 +2783,15 @@ public void testFunctionScoreQueryHighlight() throws Exception { .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) .get(); - for (String type : UNIFIED_AND_NULL) { - SearchResponse searchResponse = client().prepareSearch() - .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"))) - .highlighter(new HighlightBuilder() - .field(new Field("text")).highlighterType(type)) - .get(); - assertHitCount(searchResponse, 1); - HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("text"); - assertThat(field.getFragments().length, equalTo(1)); - assertThat(field.getFragments()[0].string(), equalTo("brown")); - } + SearchResponse searchResponse = client().prepareSearch() + .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"))) + .highlighter(new HighlightBuilder() + .field(new Field("text"))) + .get(); + assertHitCount(searchResponse, 1); + HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("text"); + assertThat(field.getFragments().length, equalTo(1)); + assertThat(field.getFragments()[0].string(), equalTo("brown")); } public void testFiltersFunctionScoreQueryHighlight() throws Exception { @@ -2994,18 +2803,16 @@ public void testFiltersFunctionScoreQueryHighlight() throws Exception { new FunctionScoreQueryBuilder.FilterFunctionBuilder(QueryBuilders.termQuery("enable", "yes"), new RandomScoreFunctionBuilder()); - for (String type : UNIFIED_AND_NULL) { - SearchResponse searchResponse = client().prepareSearch() - .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"), - new FunctionScoreQueryBuilder.FilterFunctionBuilder[]{filterBuilder})) - .highlighter(new HighlightBuilder() - .field(new Field("text")).highlighterType(type)) - .get(); - assertHitCount(searchResponse, 1); - HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("text"); - assertThat(field.getFragments().length, equalTo(1)); - assertThat(field.getFragments()[0].string(), equalTo("brown")); - } + SearchResponse searchResponse = client().prepareSearch() + .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"), + new FunctionScoreQueryBuilder.FilterFunctionBuilder[]{filterBuilder})) + .highlighter(new HighlightBuilder() + .field(new Field("text"))) + .get(); + assertHitCount(searchResponse, 1); + HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("text"); + assertThat(field.getFragments().length, equalTo(1)); + assertThat(field.getFragments()[0].string(), equalTo("brown")); } public void testSynonyms() throws IOException { diff --git a/docs/reference/mapping/fields/all-field.asciidoc b/docs/reference/mapping/fields/all-field.asciidoc index f6cf8237e1c70..2ed2f9e432fdb 100644 --- a/docs/reference/mapping/fields/all-field.asciidoc +++ b/docs/reference/mapping/fields/all-field.asciidoc @@ -312,8 +312,7 @@ disk space and, because it is a combination of other fields, it may result in odd highlighting results. The `_all` field also accepts the `term_vector` and `index_options` -parameters, allowing the use of the fast vector highlighter and the postings -highlighter. +parameters, allowing highlighting to use it. [[all-highlight-fields]] ===== Highlight original fields diff --git a/docs/reference/mapping/params/index-options.asciidoc b/docs/reference/mapping/params/index-options.asciidoc index bda174b523409..8d180e4f98852 100644 --- a/docs/reference/mapping/params/index-options.asciidoc +++ b/docs/reference/mapping/params/index-options.asciidoc @@ -26,7 +26,7 @@ following settings: Doc number, term frequencies, positions, and start and end character offsets (which map the term back to the original string) are indexed. - Offsets are used by the <>. + Offsets are used by the <> to speed up highlighting. <> string fields use `positions` as the default, and all other fields use `docs` as the default. @@ -67,4 +67,4 @@ GET my_index/_search } -------------------------------------------------- // CONSOLE -<1> The `text` field will use the postings highlighter by default because `offsets` are indexed. +<1> The `text` field will use the postings for the highlighting by default because `offsets` are indexed. diff --git a/docs/reference/mapping/types/nested.asciidoc b/docs/reference/mapping/types/nested.asciidoc index 8047193f934f4..b5b0d3394eafe 100644 --- a/docs/reference/mapping/types/nested.asciidoc +++ b/docs/reference/mapping/types/nested.asciidoc @@ -188,7 +188,7 @@ accessed within the scope of the `nested` query, the For instance, if a string field within a nested document has <> set to `offsets` to allow use of the postings -highlighter, these offsets will not be available during the main highlighting +during the highlighting, these offsets will not be available during the main highlighting phase. Instead, highlighting needs to be performed via <>. diff --git a/docs/reference/migration/migrate_6_0/search.asciidoc b/docs/reference/migration/migrate_6_0/search.asciidoc index 82c2ba8f71793..339af404871e3 100644 --- a/docs/reference/migration/migrate_6_0/search.asciidoc +++ b/docs/reference/migration/migrate_6_0/search.asciidoc @@ -98,3 +98,14 @@ but the only reason why it has not been deprecated too is because it is used for the `random_score` function. If you really need access to the id of documents for sorting, aggregations or search scripts, the recommandation is to duplicate the id as a field in the document. + +==== Highlighers + +The `unified` highlighter is the new default choice for highlighter. +The offset strategy for each field is picked internally by this highlighter depending on the +type of the field (`index_options`). +It is still possible to force the highlighter to `fvh` or `plain` types. + +The `postings` highlighter has been removed from Lucene and Elasticsearch. +The `unified` highlighter outputs the same highlighting when `index_options` is set + to `offsets`. \ No newline at end of file diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc index 5901fe0c9b041..a06295aa8cee5 100644 --- a/docs/reference/search/request/highlighting.asciidoc +++ b/docs/reference/search/request/highlighting.asciidoc @@ -1,9 +1,8 @@ [[search-request-highlighting]] === Highlighting -Allows to highlight search results on one or more fields. The -implementation uses either the lucene `plain` highlighter, the -fast vector highlighter (`fvh`) or `postings` highlighter. +Highlighters allow you to produce highlighted snippets from one or more fields +in your search results. The following is an example of the search request body: [source,js] @@ -45,35 +44,48 @@ from versions before 5.0) that match the expression to be highlighted. Note that all other fields will not be highlighted. If you use a custom mapper and want to highlight on a field anyway, you have to provide the field name explicitly. -[[plain-highlighter]] -==== Plain highlighter +[[unified-highlighter]] +==== Unified Highlighter -The default choice of highlighter is of type `plain` and uses the Lucene highlighter. -It tries hard to reflect the query matching logic in terms of understanding word importance and any word positioning criteria in phrase queries. +The unified highlighter (which is used by default if no highlighter type is specified) +uses the Lucene Unified Highlighter. +This highlighter breaks the text into sentences and scores individual sentences as +if they were documents in this corpus, using the BM25 algorithm. +It also supports accurate phrase and multi-term (fuzzy, prefix, regex) highlighting. -[WARNING] -If you want to highlight a lot of fields in a lot of documents with complex queries this highlighter will not be fast. -In its efforts to accurately reflect query logic it creates a tiny in-memory index and re-runs the original query criteria through -Lucene's query execution planner to get access to low-level match information on the current document. -This is repeated for every field and every document that needs highlighting. If this presents a performance issue in your system consider using an alternative highlighter. +[float] +===== Offsets Strategy + +In order to create meaningful search snippets from the terms being queried, +a highlighter needs to know the start and end character offsets of each word +in the original text. +These offsets can be obtained from: -[[postings-highlighter]] -==== Postings highlighter +* The postings list (fields mapped as "index_options": "offsets"). +* Term vectors (fields mapped as "term_vectors": "with_positions_offsets"). +* The original field, by reanalysing the text on-the-fly. + +[float] +====== Plain highlighting + +This mode is picked when there is no other alternative. +It creates a tiny in-memory index and re-runs the original query criteria through +Lucene's query execution planner to get access to low-level match information on the current document. +This is repeated for every field and every document that needs highlighting. -If `index_options` is set to `offsets` in the mapping the postings highlighter -will be used instead of the plain highlighter. The postings highlighter: +[float] +====== Postings -* Is faster since it doesn't require to reanalyze the text to be highlighted: -the larger the documents the better the performance gain should be -* Requires less disk space than term_vectors, needed for the fast vector -highlighter -* Breaks the text into sentences and highlights them. Plays really well with -natural languages, not as well with fields containing for instance html markup -* Treats the document as the whole corpus, and scores individual sentences as -if they were documents in this corpus, using the BM25 algorithm +If `index_options` is set to `offsets` in the mapping the `unified` highlighter +will use this information to highlight documents without re-analyzing the text. +It re-runs the original query directly on the postings and extracts the matching offsets +directly from the index limiting the collection to the highlighted documents. +This mode is faster on large fields since it doesn't require to reanalyze the text to be highlighted +and requires less disk space than term_vectors, needed for the fast vector +highlighting. Here is an example of setting the `comment` field in the index mapping to allow for -highlighting using the postings highlighter on it: +highlighting using the postings: [source,js] -------------------------------------------------- @@ -93,24 +105,56 @@ PUT /example -------------------------------------------------- // CONSOLE -[NOTE] -Note that the postings highlighter is meant to perform simple query terms -highlighting, regardless of their positions. That means that when used for -instance in combination with a phrase query, it will highlight all the terms -that the query is composed of, regardless of whether they are actually part of -a query match, effectively ignoring their positions. +[float] +====== Term Vectors + +If `term_vector` information is provided by setting `term_vector` to +`with_positions_offsets` in the mapping then the `unified` highlighter +will automatically use the `term_vector` to highlight the field. +The `term_vector` highlighting is faster to highlight multi-term queries like +`prefix` or `wildcard` because it can access the dictionary of term for each document +but it is also usually more costly than using the `postings` directly. + +Here is an example of setting the `comment` field to allow for +highlighting using the `term_vectors` (this will cause the index to be bigger): + +[source,js] +-------------------------------------------------- +PUT /example +{ + "mappings": { + "doc" : { + "properties": { + "comment" : { + "type": "text", + "term_vector" : "with_positions_offsets" + } + } + } + } +} +-------------------------------------------------- +// CONSOLE + +[[plain-highlighter]] +==== Plain highlighter + +This highlighter of type `plain` uses the standard Lucene highlighter. +It tries hard to reflect the query matching logic in terms of understanding word importance and any word positioning criteria in phrase queries. [WARNING] -The postings highlighter doesn't support highlighting some complex queries, -like a `match` query with `type` set to `match_phrase_prefix`. No highlighted -snippets will be returned in that case. +If you want to highlight a lot of fields in a lot of documents with complex queries this highlighter will not be fast. +In its efforts to accurately reflect query logic it creates a tiny in-memory index and re-runs the original query criteria through +Lucene's query execution planner to get access to low-level match information on the current document. +This is repeated for every field and every document that needs highlighting. If this presents a performance issue in your system consider using an alternative highlighter. [[fast-vector-highlighter]] ==== Fast vector highlighter -If `term_vector` information is provided by setting `term_vector` to -`with_positions_offsets` in the mapping then the fast vector highlighter -will be used instead of the plain highlighter. The fast vector highlighter: +This highlighter of type `fvh` uses the Lucene Fast Vector highlighter. +This highlighter can be used on fields with `term_vector` set to +`with_positions_offsets` in the mapping. +The fast vector highlighter: * Is faster especially for large fields (> `1MB`) * Can be customized with `boundary_scanner` (see <>) @@ -144,30 +188,10 @@ PUT /example -------------------------------------------------- // CONSOLE -==== Unified Highlighter - -experimental[] - -The `unified` highlighter can extract offsets from either postings, term vectors, or via re-analyzing text. -Under the hood it uses Lucene UnifiedHighlighter which picks its strategy depending on the field and the query to highlight. -Independently of the strategy this highlighter breaks the text into sentences and scores individual sentences as -if they were documents in this corpus, using the BM25 algorithm. -It supports accurate phrase and multi-term (fuzzy, prefix, regex) highlighting and can be used with the following options: - -* `force_source` -* `encoder` -* `highlight_query` -* `pre_tags and `post_tags` -* `require_field_match` -* `boundary_scanner` (`sentence` (**default**) or `word`) -* `max_fragment_length` (only for `sentence` scanner) -* `no_match_size` - ==== Force highlighter type -The `type` field allows to force a specific highlighter type. This is useful -for instance when needing to use the plain highlighter on a field that has -`term_vectors` enabled. The allowed values are: `plain`, `postings` and `fvh`. +The `type` field allows to force a specific highlighter type. +The allowed values are: `unified`, `plain` and `fvh`. The following is an example that forces the use of the plain highlighter: [source,js] @@ -320,9 +344,6 @@ GET /_search // CONSOLE // TEST[setup:twitter] -The `fragment_size` is ignored when using the postings highlighter, as it -outputs sentences regardless of their length. - On top of this it is possible to specify that highlighted fragments need to be sorted by score: @@ -375,10 +396,7 @@ In the case where there is no matching fragment to highlight, the default is to not return anything. Instead, we can return a snippet of text from the beginning of the field by setting `no_match_size` (default `0`) to the length of the text that you want returned. The actual length may be shorter or longer than -specified as it tries to break on a word boundary. When using the postings -highlighter it is not possible to control the actual size of the snippet, -therefore the first sentence gets returned whenever `no_match_size` is -greater than `0`. +specified as it tries to break on a word boundary. [source,js] -------------------------------------------------- @@ -403,6 +421,8 @@ GET /_search ==== Fragmenter +WARNING: This option is not supported by the `unified` highlighter + Fragmenter can control how text should be broken up in highlight snippets. However, this option is applicable only for the Plain Highlighter. There are two options: @@ -421,6 +441,7 @@ GET twitter/tweet/_search "highlight" : { "fields" : { "message" : { + "type": "plain", "fragment_size" : 15, "number_of_fragments" : 3, "fragmenter": "simple" @@ -476,6 +497,7 @@ GET twitter/tweet/_search "highlight" : { "fields" : { "message" : { + "type": "plain", "fragment_size" : 15, "number_of_fragments" : 3, "fragmenter": "span" @@ -596,12 +618,6 @@ GET /_search // CONSOLE // TEST[setup:twitter] -Note that the score of text fragment in this case is calculated by the Lucene -highlighting framework. For implementation details you can check the -`ScoreOrderFragmentsBuilder.java` class. On the other hand when using the -postings highlighter the fragments are scored using, as mentioned above, -the BM25 algorithm. - [[highlighting-settings]] ==== Global Settings @@ -681,6 +697,9 @@ You can set `fragment_size` to 0 to never split any sentence. [[matched-fields]] ==== Matched Fields + +WARNING: This is only supported by the `fvh` highlighter + The Fast Vector Highlighter can combine matches on multiple fields to highlight a single field using `matched_fields`. This is most intuitive for multifields that analyze the same string in different @@ -814,6 +833,9 @@ to [[phrase-limit]] ==== Phrase Limit + +WARNING: this is only supported by the `fvh` highlighter + The fast vector highlighter has a `phrase_limit` parameter that prevents it from analyzing too many phrases and eating tons of memory. It defaults to 256 so only the first 256 matching phrases in the document scored diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java index c022d5c85acbb..bb1f2a55f7cb4 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java @@ -136,7 +136,7 @@ public void testMultiPhraseCutoff() throws IOException { refresh(); SearchResponse search = client().prepareSearch() .setQuery(matchPhraseQuery("body", "Test: http://www.facebook.com ")) - .highlighter(new HighlightBuilder().field("body")).get(); + .highlighter(new HighlightBuilder().field("body").highlighterType("fvh")).get(); assertHighlight(search, 0, "body", 0, startsWith("Test: http://www.facebook.com")); search = client() .prepareSearch() @@ -146,7 +146,7 @@ public void testMultiPhraseCutoff() throws IOException { + "feature Test: http://www.facebook.com http://elasticsearch.org " + "http://xing.com http://cnn.com http://quora.com http://twitter.com this " + "is a test for highlighting feature")) - .highlighter(new HighlightBuilder().field("body")).execute().actionGet(); + .highlighter(new HighlightBuilder().field("body").highlighterType("fvh")).execute().actionGet(); assertHighlight(search, 0, "body", 0, equalTo("Test: " + "http://www.facebook.com http://elasticsearch.org " + "http://xing.com http://cnn.com http://quora.com"));