From 8d4f7a6e99d2da802b7019247b0f8f305d71c024 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 25 Jul 2024 15:38:21 +0200 Subject: [PATCH] Bump the window size of disjunction from 2,048 to 4,096. (#13605) It's been pointed multiple times that a difference between Tantivy and Lucene is the fact that Tantivy uses windows of 4,096 docs when Lucene has a 2x smaller window size of 2,048 docs and that this might explain part of the performance difference. luceneutil suggests that bumping the window size to 4,096 does indeed improve performance for counting queries, but not for top-k queries. I'm still suggesting to bump the window size across the board to keep our disjunction scorer consistent. --- .../apache/lucene/search/BooleanScorer.java | 2 +- .../lucene/search/MaxScoreBulkScorer.java | 2 +- .../lucene/search/TestMaxScoreBulkScorer.java | 26 +++++++++---------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java index 919cc1680cf0..e6e4f456bf3b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java @@ -30,7 +30,7 @@ */ final class BooleanScorer extends BulkScorer { - static final int SHIFT = 11; + static final int SHIFT = 12; static final int SIZE = 1 << SHIFT; static final int MASK = SIZE - 1; static final int SET_SIZE = 1 << (SHIFT - 6); diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java index bce02cbdae61..8786343cceca 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java @@ -25,7 +25,7 @@ final class MaxScoreBulkScorer extends BulkScorer { - static final int INNER_WINDOW_SIZE = 1 << 11; + static final int INNER_WINDOW_SIZE = 1 << 12; private final int maxDoc; // All scorers, sorted by increasing max score. diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java index 4c731201dc65..d7ccea692759 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java @@ -125,19 +125,19 @@ public void collect(int doc) throws IOException { assertEquals(2 + 1, scorer.score(), 0); break; case 1: - assertEquals(2048, doc); + assertEquals(4096, doc); assertEquals(2, scorer.score(), 0); break; case 2: - assertEquals(6144, doc); + assertEquals(12288, doc); assertEquals(2 + 1, scorer.score(), 0); break; case 3: - assertEquals(8192, doc); + assertEquals(16384, doc); assertEquals(1, scorer.score(), 0); break; case 4: - assertEquals(10240, doc); + assertEquals(20480, doc); assertEquals(1, scorer.score(), 0); break; default: @@ -195,13 +195,13 @@ public void collect(int doc) throws IOException { assertEquals(2 + 1, scorer.score(), 0); break; case 1: - assertEquals(2048, doc); + assertEquals(4096, doc); assertEquals(2, scorer.score(), 0); // simulate top-2 retrieval scorer.setMinCompetitiveScore(Math.nextUp(2)); break; case 2: - assertEquals(6144, doc); + assertEquals(12288, doc); assertEquals(2 + 1, scorer.score(), 0); scorer.setMinCompetitiveScore(Math.nextUp(2 + 1)); break; @@ -268,19 +268,19 @@ public void collect(int doc) throws IOException { assertEquals(2 + 1, scorer.score(), 0); break; case 1: - assertEquals(2048, doc); + assertEquals(4096, doc); assertEquals(2, scorer.score(), 0); break; case 2: - assertEquals(6144, doc); + assertEquals(12288, doc); assertEquals(2 + 1 + 3, scorer.score(), 0); break; case 3: - assertEquals(8192, doc); + assertEquals(16384, doc); assertEquals(1, scorer.score(), 0); break; case 4: - assertEquals(10240, doc); + assertEquals(20480, doc); assertEquals(1 + 3, scorer.score(), 0); break; default: @@ -346,18 +346,18 @@ public void collect(int doc) throws IOException { assertEquals(2 + 1, scorer.score(), 0); break; case 1: - assertEquals(2048, doc); + assertEquals(4096, doc); assertEquals(2, scorer.score(), 0); // simulate top-2 retrieval scorer.setMinCompetitiveScore(Math.nextUp(2)); break; case 2: - assertEquals(6144, doc); + assertEquals(12288, doc); assertEquals(2 + 1 + 3, scorer.score(), 0); scorer.setMinCompetitiveScore(Math.nextUp(2 + 1)); break; case 3: - assertEquals(10240, doc); + assertEquals(20480, doc); assertEquals(1 + 3, scorer.score(), 0); scorer.setMinCompetitiveScore(Math.nextUp(1 + 3)); break;