diff --git a/server/src/main/java/org/apache/lucene/search/uhighlight/BoundedBreakIteratorScanner.java b/server/src/main/java/org/apache/lucene/search/uhighlight/BoundedBreakIteratorScanner.java index 42c7d7cd67ce2..2413ab225c879 100644 --- a/server/src/main/java/org/apache/lucene/search/uhighlight/BoundedBreakIteratorScanner.java +++ b/server/src/main/java/org/apache/lucene/search/uhighlight/BoundedBreakIteratorScanner.java @@ -89,16 +89,29 @@ public int preceding(int offset) { innerStart = innerEnd; innerEnd = windowEnd; } else { - windowStart = innerStart = mainBreak.preceding(offset); - windowEnd = innerEnd = mainBreak.following(offset - 1); - // expand to next break until we reach maxLen - while (innerEnd - innerStart < maxLen) { - int newEnd = mainBreak.following(innerEnd); - if (newEnd == DONE || (newEnd - innerStart) > maxLen) { - break; - } - windowEnd = innerEnd = newEnd; + innerStart = Math.max(mainBreak.preceding(offset), 0); + + final int targetEndOffset = offset + Math.max(0, maxLen - (offset - innerStart)); + final int textEndIndex = getText().getEndIndex(); + + if (targetEndOffset + 1 > textEndIndex) { + innerEnd = textEndIndex; + } else { + innerEnd = mainBreak.preceding(targetEndOffset + 1); + } + + assert innerEnd != DONE && innerEnd >= innerStart; + + // in case no break was found up to maxLen, find one afterwards. + if (innerStart == innerEnd) { + innerEnd = mainBreak.following(targetEndOffset); + assert innerEnd - innerStart > maxLen; + } else { + assert innerEnd - innerStart <= maxLen; } + + windowStart = innerStart; + windowEnd = innerEnd; } if (innerEnd - innerStart > maxLen) { diff --git a/server/src/test/java/org/apache/lucene/search/uhighlight/BoundedBreakIteratorScannerTests.java b/server/src/test/java/org/apache/lucene/search/uhighlight/BoundedBreakIteratorScannerTests.java index 26cc2c6a5a01f..9b2bcd95da6ae 100644 --- a/server/src/test/java/org/apache/lucene/search/uhighlight/BoundedBreakIteratorScannerTests.java +++ b/server/src/test/java/org/apache/lucene/search/uhighlight/BoundedBreakIteratorScannerTests.java @@ -124,4 +124,20 @@ public void testBoundedSentence() { ); } } + + public void testTextThatEndsBeforeMaxLen() { + BreakIterator bi = BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 1000); + + final String text = "This is the first test sentence. Here is the second one."; + + int offset = text.indexOf("first"); + bi.setText(text); + assertEquals(0, bi.preceding(offset)); + assertEquals(text.length(), bi.following(offset - 1)); + + offset = text.indexOf("second"); + bi.setText(text); + assertEquals(33, bi.preceding(offset)); + assertEquals(text.length(), bi.following(offset - 1)); + } }