Replace static FacetsCollector#search methods (#13733)

We have a few public static utility search methods in FacetsCollector that accept a Collector as last argument. In practice, these are expected to be called providing a `FacetsCollector` as last argument. Also, we'd like to remove all the search methods that take a `Collector` in favour of those that take a `CollectorManager` (see #12892). This commit adds the corresponding functionality to `FacetsCollectorManager`. The new methods take a `FacetsCollectorManager` as last argument. The return type has to be adapted to include also the facets results that were before made available through the collector argument. In order for tests to all work I had to add support for `keepScores` to `FacetsCollectorManager` which was missing. Closes #13725
apache · Sep 6, 2024 · 47c0a6e · 47c0a6e
1 parent 0ec453d
commit 47c0a6e
Show file tree

Hide file tree

Showing 19 changed files with 266 additions and 184 deletions.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -114,6 +114,9 @@ API Changes
 
 * GITHUB#13708: Move Operations.sameLanguage/subsetOf to test-framework. (Robert Muir)
 
+* GITHUB#13733: Move FacetsCollector#search utility methods to `FacetsCollectorManager`, replace the `Collector`
+  argument with a `FacetsCollectorManager` and update the return type to include both `TopDocs` results as well as
+  facets results. (Luca Cavanna)
 
 New Features
 ---------------------

diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md
@@ -793,6 +793,13 @@ Specifically, the method `FunctionValues#getScorer(Weight weight, LeafReaderCont
 Callers must now keep track of the Weight instance that created the Scorer if they need it, instead of relying on 
 Scorer.
 
+### `FacetsCollector#search` utility methods moved and updated
+
+The static `search` methods exposed by `FacetsCollector` have been moved to `FacetsCollectorManager`. 
+Furthermore, they take a `FacetsCollectorManager` last argument in place of a `Collector` so that they support 
+intra query concurrency. The return type has also be updated to `FacetsCollectorManager.FacetsResult` which includes 
+both `TopDocs` as well as facets results included in a reduced `FacetsCollector` instance.
+
 ### `SearchWithCollectorTask` no longer supports the `collector.class` config parameter 
 
 `collector.class` used to allow users to load a custom collector implementation. `collector.manager.class` 

diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java
@@ -25,6 +25,7 @@
 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsCollectorManager;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.LabelAndValue;
 import org.apache.lucene.facet.taxonomy.AssociationAggregationFunction;
@@ -97,12 +98,13 @@ private List<FacetResult> sumAssociations() throws IOException {
     IndexSearcher searcher = new IndexSearcher(indexReader);
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
 
-    FacetsCollector fc = new FacetsCollector();
-
     // MatchAllDocsQuery is for "browsing" (counts facets
     // for all non-deleted docs in the index); normally
     // you'd use a "normal" query:
-    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+    FacetsCollectorManager.FacetsResult facetsResult =
+        FacetsCollectorManager.search(
+            searcher, new MatchAllDocsQuery(), 10, new FacetsCollectorManager());
+    FacetsCollector fc = facetsResult.facetsCollector();
 
     Facets tags =
         new TaxonomyFacetIntAssociations(
@@ -133,8 +135,8 @@ private FacetResult drillDown() throws IOException {
 
     // Now user drills down on Publish Date/2010:
     q.add("tags", "solr");
-    FacetsCollector fc = new FacetsCollector();
-    FacetsCollector.search(searcher, q, 10, fc);
+    FacetsCollectorManager fcm = new FacetsCollectorManager();
+    FacetsCollector fc = FacetsCollectorManager.search(searcher, q, 10, fcm).facetsCollector();
 
     // Retrieve results
     Facets facets =

diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/ExpressionAggregationFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/ExpressionAggregationFacetsExample.java
@@ -30,6 +30,7 @@
 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsCollectorManager;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.taxonomy.AssociationAggregationFunction;
 import org.apache.lucene.facet.taxonomy.TaxonomyFacetFloatAssociations;
@@ -97,12 +98,13 @@ private FacetResult search() throws IOException, ParseException {
         DoubleValuesSource.fromLongField("popularity")); // the value of the 'popularity' field
 
     // Aggregates the facet values
-    FacetsCollector fc = new FacetsCollector(true);
+    FacetsCollectorManager fcm = new FacetsCollectorManager(true);
 
     // MatchAllDocsQuery is for "browsing" (counts facets
     // for all non-deleted docs in the index); normally
     // you'd use a "normal" query:
-    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+    FacetsCollector fc =
+        FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();
 
     // Retrieve results
     Facets facets =

diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/MultiCategoryListsFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/MultiCategoryListsFacetsExample.java
@@ -25,6 +25,7 @@
 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsCollectorManager;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
@@ -97,12 +98,13 @@ private List<FacetResult> search() throws IOException {
     IndexSearcher searcher = new IndexSearcher(indexReader);
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
 
-    FacetsCollector fc = new FacetsCollector();
+    FacetsCollectorManager fcm = new FacetsCollectorManager();
 
     // MatchAllDocsQuery is for "browsing" (counts facets
     // for all non-deleted docs in the index); normally
     // you'd use a "normal" query:
-    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+    FacetsCollector fc =
+        FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();
 
     // Retrieve results
     List<FacetResult> results = new ArrayList<>();

diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java
@@ -30,6 +30,7 @@
 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsCollectorManager;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.range.LongRange;
 import org.apache.lucene.facet.range.LongRangeFacetCounts;
@@ -115,13 +116,13 @@ private FacetsConfig getConfig() {
   /** User runs a query and counts facets. */
   public FacetResult search() throws IOException {
 
-    // Aggregates the facet counts
-    FacetsCollector fc = new FacetsCollector();
-
     // MatchAllDocsQuery is for "browsing" (counts facets
     // for all non-deleted docs in the index); normally
     // you'd use a "normal" query:
-    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+    FacetsCollector fc =
+        FacetsCollectorManager.search(
+                searcher, new MatchAllDocsQuery(), 10, new FacetsCollectorManager())
+            .facetsCollector();
 
     Facets facets = new LongRangeFacetCounts("timestamp", fc, PAST_HOUR, PAST_SIX_HOURS, PAST_DAY);
     return facets.getAllChildren("timestamp");
@@ -131,12 +132,13 @@ public FacetResult search() throws IOException {
   public FacetResult searchTopChildren() throws IOException {
 
     // Aggregates the facet counts
-    FacetsCollector fc = new FacetsCollector();
+    FacetsCollectorManager fcm = new FacetsCollectorManager();
 
     // MatchAllDocsQuery is for "browsing" (counts facets
     // for all non-deleted docs in the index); normally
     // you'd use a "normal" query:
-    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+    FacetsCollector fc =
+        FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();
 
     Facets facets = new LongRangeFacetCounts("error timestamp", fc, logTimestampRanges);
     return facets.getTopChildren(10, "error timestamp");

diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleFacetsExample.java
@@ -99,12 +99,13 @@ List<FacetResult> facetsWithSearch() throws IOException {
     IndexSearcher searcher = new IndexSearcher(indexReader);
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
 
-    FacetsCollector fc = new FacetsCollector();
+    FacetsCollectorManager fcm = new FacetsCollectorManager();
 
     // MatchAllDocsQuery is for "browsing" (counts facets
     // for all non-deleted docs in the index); normally
     // you'd use a "normal" query:
-    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+    FacetsCollector fc =
+        FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();
 
     // Retrieve results
     List<FacetResult> results = new ArrayList<>();
@@ -156,8 +157,8 @@ FacetResult drillDown() throws IOException {
 
     // Now user drills down on Publish Date/2010:
     q.add("Publish Date", "2010");
-    FacetsCollector fc = new FacetsCollector();
-    FacetsCollector.search(searcher, q, 10, fc);
+    FacetsCollectorManager fcm = new FacetsCollectorManager();
+    FacetsCollector fc = FacetsCollectorManager.search(searcher, q, 10, fcm).facetsCollector();
 
     // Retrieve results
     Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);

diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java
@@ -25,6 +25,7 @@
 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsCollectorManager;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
@@ -92,12 +93,13 @@ private List<FacetResult> search() throws IOException {
         new DefaultSortedSetDocValuesReaderState(indexReader, config);
 
     // Aggregates the facet counts
-    FacetsCollector fc = new FacetsCollector();
+    FacetsCollectorManager fcm = new FacetsCollectorManager();
 
     // MatchAllDocsQuery is for "browsing" (counts facets
     // for all non-deleted docs in the index); normally
     // you'd use a "normal" query:
-    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+    FacetsCollector fc =
+        FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();
 
     // Retrieve results
     Facets facets = new SortedSetDocValuesFacetCounts(state, fc);
@@ -120,8 +122,8 @@ private FacetResult drillDown() throws IOException {
     // Now user drills down on Publish Year/2010:
     DrillDownQuery q = new DrillDownQuery(config);
     q.add("Publish Year", "2010");
-    FacetsCollector fc = new FacetsCollector();
-    FacetsCollector.search(searcher, q, 10, fc);
+    FacetsCollectorManager fcm = new FacetsCollectorManager();
+    FacetsCollector fc = FacetsCollectorManager.search(searcher, q, 10, fcm).facetsCollector();
 
     // Retrieve results
     Facets facets = new SortedSetDocValuesFacetCounts(state, fc);

diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/StringValueFacetCountsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/StringValueFacetCountsExample.java
@@ -27,6 +27,7 @@
 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsCollectorManager;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.StringDocValuesReaderState;
 import org.apache.lucene.facet.StringValueFacetCounts;
@@ -96,12 +97,13 @@ private List<FacetResult> search() throws IOException {
         new StringDocValuesReaderState(indexReader, "Publish Year");
 
     // Aggregates the facet counts
-    FacetsCollector fc = new FacetsCollector();
+    FacetsCollectorManager fcm = new FacetsCollectorManager();
 
     // MatchAllDocsQuery is for "browsing" (counts facets
     // for all non-deleted docs in the index); normally
     // you'd use a "normal" query:
-    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+    FacetsCollector fc =
+        FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();
 
     // Retrieve results
     Facets authorFacets = new StringValueFacetCounts(authorState, fc);

diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/package-info.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/package-info.java
@@ -198,9 +198,9 @@
  * org.apache.lucene.search.Collector}, and as such can be passed to the search() method of Lucene's
  * {@link org.apache.lucene.search.IndexSearcher}. In case the application also needs to collect
  * documents (in addition to accumulating/collecting facets), you can use one of {@link
- * org.apache.lucene.facet.FacetsCollector#search(org.apache.lucene.search.IndexSearcher,
- * org.apache.lucene.search.Query, int, org.apache.lucene.search.Collector)
- * FacetsCollector.search(...)} utility methods.
+ * org.apache.lucene.facet.FacetsCollectorManager#search(org.apache.lucene.search.IndexSearcher,
+ * org.apache.lucene.search.Query, int, org.apache.lucene.facet.FacetsCollectorManager)
+ * FacetsCollectorManager.search(...)} utility methods.
  *
  * <p>There is a facets collecting code example in {@link
  * org.apache.lucene.demo.facet.SimpleFacetsExample#facetsWithSearch()}, see <a

diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java
@@ -22,23 +22,9 @@
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.FieldDoc;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MultiCollector;
-import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorable;
-import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.SimpleCollector;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.TopDocsCollector;
-import org.apache.lucene.search.TopFieldCollector;
-import org.apache.lucene.search.TopFieldCollectorManager;
-import org.apache.lucene.search.TopFieldDocs;
-import org.apache.lucene.search.TopScoreDocCollectorManager;
-import org.apache.lucene.search.TotalHitCountCollector;
-import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.DocIdSetBuilder;
 
@@ -155,118 +141,4 @@ public void finish() throws IOException {
     scores = null;
     context = null;
   }
-
-  /** Utility method, to search and also collect all hits into the provided {@link Collector}. */
-  public static TopDocs search(IndexSearcher searcher, Query q, int n, Collector fc)
-      throws IOException {
-    return doSearch(searcher, null, q, n, null, false, fc);
-  }
-
-  /** Utility method, to search and also collect all hits into the provided {@link Collector}. */
-  public static TopFieldDocs search(IndexSearcher searcher, Query q, int n, Sort sort, Collector fc)
-      throws IOException {
-    if (sort == null) {
-      throw new IllegalArgumentException("sort must not be null");
-    }
-    return (TopFieldDocs) doSearch(searcher, null, q, n, sort, false, fc);
-  }
-
-  /** Utility method, to search and also collect all hits into the provided {@link Collector}. */
-  public static TopFieldDocs search(
-      IndexSearcher searcher, Query q, int n, Sort sort, boolean doDocScores, Collector fc)
-      throws IOException {
-    if (sort == null) {
-      throw new IllegalArgumentException("sort must not be null");
-    }
-    return (TopFieldDocs) doSearch(searcher, null, q, n, sort, doDocScores, fc);
-  }
-
-  /** Utility method, to search and also collect all hits into the provided {@link Collector}. */
-  public static TopDocs searchAfter(
-      IndexSearcher searcher, ScoreDoc after, Query q, int n, Collector fc) throws IOException {
-    return doSearch(searcher, after, q, n, null, false, fc);
-  }
-
-  /** Utility method, to search and also collect all hits into the provided {@link Collector}. */
-  public static TopDocs searchAfter(
-      IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort, Collector fc)
-      throws IOException {
-    if (sort == null) {
-      throw new IllegalArgumentException("sort must not be null");
-    }
-    return doSearch(searcher, after, q, n, sort, false, fc);
-  }
-
-  /** Utility method, to search and also collect all hits into the provided {@link Collector}. */
-  public static TopDocs searchAfter(
-      IndexSearcher searcher,
-      ScoreDoc after,
-      Query q,
-      int n,
-      Sort sort,
-      boolean doDocScores,
-      Collector fc)
-      throws IOException {
-    if (sort == null) {
-      throw new IllegalArgumentException("sort must not be null");
-    }
-    return doSearch(searcher, after, q, n, sort, doDocScores, fc);
-  }
-
-  private static TopDocs doSearch(
-      IndexSearcher searcher,
-      ScoreDoc after,
-      Query q,
-      int n,
-      Sort sort,
-      boolean doDocScores,
-      Collector fc)
-      throws IOException {
-
-    int limit = searcher.getIndexReader().maxDoc();
-    if (limit == 0) {
-      limit = 1;
-    }
-    n = Math.min(n, limit);
-
-    if (after != null && after.doc >= limit) {
-      throw new IllegalArgumentException(
-          "after.doc exceeds the number of documents in the reader: after.doc="
-              + after.doc
-              + " limit="
-              + limit);
-    }
-
-    TopDocs topDocs = null;
-    if (n == 0) {
-      TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
-      searcher.search(q, MultiCollector.wrap(totalHitCountCollector, fc));
-      topDocs =
-          new TopDocs(
-              new TotalHits(totalHitCountCollector.getTotalHits(), TotalHits.Relation.EQUAL_TO),
-              new ScoreDoc[0]);
-    } else {
-      TopDocsCollector<?> hitsCollector;
-      if (sort != null) {
-        if (after != null && !(after instanceof FieldDoc)) {
-          // TODO: if we fix type safety of TopFieldDocs we can
-          // remove this
-          throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
-        }
-        hitsCollector =
-            new TopFieldCollectorManager(sort, n, (FieldDoc) after, Integer.MAX_VALUE, false)
-                .newCollector(); // TODO: can we disable exact hit counts
-      } else {
-        hitsCollector =
-            new TopScoreDocCollectorManager(n, after, Integer.MAX_VALUE, false).newCollector();
-      }
-      searcher.search(q, MultiCollector.wrap(hitsCollector, fc));
-
-      topDocs = hitsCollector.topDocs();
-      if (doDocScores) {
-        TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, q);
-      }
-    }
-    return topDocs;
-  }
 }