Skip to content

Commit

Permalink
Replace static FacetsCollector#search methods (#13733)
Browse files Browse the repository at this point in the history
We have a few public static utility search methods in FacetsCollector that accept
a Collector as last argument. In practice, these are expected to be called
providing a `FacetsCollector` as last argument. Also, we'd like to remove all
the search methods that take a `Collector` in favour of those that take a
`CollectorManager` (see #12892).

This commit adds the corresponding functionality to `FacetsCollectorManager`.
The new methods take a `FacetsCollectorManager` as last argument. The return type
has to be adapted to include also the facets results that were before made
available through the collector argument.

In order for tests to all work I had to add support for `keepScores` to
`FacetsCollectorManager` which was missing.

Closes #13725
  • Loading branch information
javanna authored Sep 6, 2024
1 parent 0ec453d commit 47c0a6e
Show file tree
Hide file tree
Showing 19 changed files with 266 additions and 184 deletions.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ API Changes

* GITHUB#13708: Move Operations.sameLanguage/subsetOf to test-framework. (Robert Muir)

* GITHUB#13733: Move FacetsCollector#search utility methods to `FacetsCollectorManager`, replace the `Collector`
argument with a `FacetsCollectorManager` and update the return type to include both `TopDocs` results as well as
facets results. (Luca Cavanna)

New Features
---------------------
Expand Down
7 changes: 7 additions & 0 deletions lucene/MIGRATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,13 @@ Specifically, the method `FunctionValues#getScorer(Weight weight, LeafReaderCont
Callers must now keep track of the Weight instance that created the Scorer if they need it, instead of relying on
Scorer.

### `FacetsCollector#search` utility methods moved and updated

The static `search` methods exposed by `FacetsCollector` have been moved to `FacetsCollectorManager`.
Furthermore, they take a `FacetsCollectorManager` last argument in place of a `Collector` so that they support
intra query concurrency. The return type has also be updated to `FacetsCollectorManager.FacetsResult` which includes
both `TopDocs` as well as facets results included in a reduced `FacetsCollector` instance.

### `SearchWithCollectorTask` no longer supports the `collector.class` config parameter

`collector.class` used to allow users to load a custom collector implementation. `collector.manager.class`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.taxonomy.AssociationAggregationFunction;
Expand Down Expand Up @@ -97,12 +98,13 @@ private List<FacetResult> sumAssociations() throws IOException {
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);

FacetsCollector fc = new FacetsCollector();

// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
FacetsCollectorManager.FacetsResult facetsResult =
FacetsCollectorManager.search(
searcher, new MatchAllDocsQuery(), 10, new FacetsCollectorManager());
FacetsCollector fc = facetsResult.facetsCollector();

Facets tags =
new TaxonomyFacetIntAssociations(
Expand Down Expand Up @@ -133,8 +135,8 @@ private FacetResult drillDown() throws IOException {

// Now user drills down on Publish Date/2010:
q.add("tags", "solr");
FacetsCollector fc = new FacetsCollector();
FacetsCollector.search(searcher, q, 10, fc);
FacetsCollectorManager fcm = new FacetsCollectorManager();
FacetsCollector fc = FacetsCollectorManager.search(searcher, q, 10, fcm).facetsCollector();

// Retrieve results
Facets facets =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.AssociationAggregationFunction;
import org.apache.lucene.facet.taxonomy.TaxonomyFacetFloatAssociations;
Expand Down Expand Up @@ -97,12 +98,13 @@ private FacetResult search() throws IOException, ParseException {
DoubleValuesSource.fromLongField("popularity")); // the value of the 'popularity' field

// Aggregates the facet values
FacetsCollector fc = new FacetsCollector(true);
FacetsCollectorManager fcm = new FacetsCollectorManager(true);

// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
FacetsCollector fc =
FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();

// Retrieve results
Facets facets =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
Expand Down Expand Up @@ -97,12 +98,13 @@ private List<FacetResult> search() throws IOException {
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);

FacetsCollector fc = new FacetsCollector();
FacetsCollectorManager fcm = new FacetsCollectorManager();

// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
FacetsCollector fc =
FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();

// Retrieve results
List<FacetResult> results = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.facet.range.LongRangeFacetCounts;
Expand Down Expand Up @@ -115,13 +116,13 @@ private FacetsConfig getConfig() {
/** User runs a query and counts facets. */
public FacetResult search() throws IOException {

// Aggregates the facet counts
FacetsCollector fc = new FacetsCollector();

// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
FacetsCollector fc =
FacetsCollectorManager.search(
searcher, new MatchAllDocsQuery(), 10, new FacetsCollectorManager())
.facetsCollector();

Facets facets = new LongRangeFacetCounts("timestamp", fc, PAST_HOUR, PAST_SIX_HOURS, PAST_DAY);
return facets.getAllChildren("timestamp");
Expand All @@ -131,12 +132,13 @@ public FacetResult search() throws IOException {
public FacetResult searchTopChildren() throws IOException {

// Aggregates the facet counts
FacetsCollector fc = new FacetsCollector();
FacetsCollectorManager fcm = new FacetsCollectorManager();

// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
FacetsCollector fc =
FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();

Facets facets = new LongRangeFacetCounts("error timestamp", fc, logTimestampRanges);
return facets.getTopChildren(10, "error timestamp");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,13 @@ List<FacetResult> facetsWithSearch() throws IOException {
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);

FacetsCollector fc = new FacetsCollector();
FacetsCollectorManager fcm = new FacetsCollectorManager();

// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
FacetsCollector fc =
FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();

// Retrieve results
List<FacetResult> results = new ArrayList<>();
Expand Down Expand Up @@ -156,8 +157,8 @@ FacetResult drillDown() throws IOException {

// Now user drills down on Publish Date/2010:
q.add("Publish Date", "2010");
FacetsCollector fc = new FacetsCollector();
FacetsCollector.search(searcher, q, 10, fc);
FacetsCollectorManager fcm = new FacetsCollectorManager();
FacetsCollector fc = FacetsCollectorManager.search(searcher, q, 10, fcm).facetsCollector();

// Retrieve results
Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
Expand Down Expand Up @@ -92,12 +93,13 @@ private List<FacetResult> search() throws IOException {
new DefaultSortedSetDocValuesReaderState(indexReader, config);

// Aggregates the facet counts
FacetsCollector fc = new FacetsCollector();
FacetsCollectorManager fcm = new FacetsCollectorManager();

// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
FacetsCollector fc =
FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();

// Retrieve results
Facets facets = new SortedSetDocValuesFacetCounts(state, fc);
Expand All @@ -120,8 +122,8 @@ private FacetResult drillDown() throws IOException {
// Now user drills down on Publish Year/2010:
DrillDownQuery q = new DrillDownQuery(config);
q.add("Publish Year", "2010");
FacetsCollector fc = new FacetsCollector();
FacetsCollector.search(searcher, q, 10, fc);
FacetsCollectorManager fcm = new FacetsCollectorManager();
FacetsCollector fc = FacetsCollectorManager.search(searcher, q, 10, fcm).facetsCollector();

// Retrieve results
Facets facets = new SortedSetDocValuesFacetCounts(state, fc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.StringDocValuesReaderState;
import org.apache.lucene.facet.StringValueFacetCounts;
Expand Down Expand Up @@ -96,12 +97,13 @@ private List<FacetResult> search() throws IOException {
new StringDocValuesReaderState(indexReader, "Publish Year");

// Aggregates the facet counts
FacetsCollector fc = new FacetsCollector();
FacetsCollectorManager fcm = new FacetsCollectorManager();

// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
FacetsCollector fc =
FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();

// Retrieve results
Facets authorFacets = new StringValueFacetCounts(authorState, fc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@
* org.apache.lucene.search.Collector}, and as such can be passed to the search() method of Lucene's
* {@link org.apache.lucene.search.IndexSearcher}. In case the application also needs to collect
* documents (in addition to accumulating/collecting facets), you can use one of {@link
* org.apache.lucene.facet.FacetsCollector#search(org.apache.lucene.search.IndexSearcher,
* org.apache.lucene.search.Query, int, org.apache.lucene.search.Collector)
* FacetsCollector.search(...)} utility methods.
* org.apache.lucene.facet.FacetsCollectorManager#search(org.apache.lucene.search.IndexSearcher,
* org.apache.lucene.search.Query, int, org.apache.lucene.facet.FacetsCollectorManager)
* FacetsCollectorManager.search(...)} utility methods.
*
* <p>There is a facets collecting code example in {@link
* org.apache.lucene.demo.facet.SimpleFacetsExample#facetsWithSearch()}, see <a
Expand Down
128 changes: 0 additions & 128 deletions lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,9 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldCollectorManager;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollectorManager;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.DocIdSetBuilder;

Expand Down Expand Up @@ -155,118 +141,4 @@ public void finish() throws IOException {
scores = null;
context = null;
}

/** Utility method, to search and also collect all hits into the provided {@link Collector}. */
public static TopDocs search(IndexSearcher searcher, Query q, int n, Collector fc)
throws IOException {
return doSearch(searcher, null, q, n, null, false, fc);
}

/** Utility method, to search and also collect all hits into the provided {@link Collector}. */
public static TopFieldDocs search(IndexSearcher searcher, Query q, int n, Sort sort, Collector fc)
throws IOException {
if (sort == null) {
throw new IllegalArgumentException("sort must not be null");
}
return (TopFieldDocs) doSearch(searcher, null, q, n, sort, false, fc);
}

/** Utility method, to search and also collect all hits into the provided {@link Collector}. */
public static TopFieldDocs search(
IndexSearcher searcher, Query q, int n, Sort sort, boolean doDocScores, Collector fc)
throws IOException {
if (sort == null) {
throw new IllegalArgumentException("sort must not be null");
}
return (TopFieldDocs) doSearch(searcher, null, q, n, sort, doDocScores, fc);
}

/** Utility method, to search and also collect all hits into the provided {@link Collector}. */
public static TopDocs searchAfter(
IndexSearcher searcher, ScoreDoc after, Query q, int n, Collector fc) throws IOException {
return doSearch(searcher, after, q, n, null, false, fc);
}

/** Utility method, to search and also collect all hits into the provided {@link Collector}. */
public static TopDocs searchAfter(
IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort, Collector fc)
throws IOException {
if (sort == null) {
throw new IllegalArgumentException("sort must not be null");
}
return doSearch(searcher, after, q, n, sort, false, fc);
}

/** Utility method, to search and also collect all hits into the provided {@link Collector}. */
public static TopDocs searchAfter(
IndexSearcher searcher,
ScoreDoc after,
Query q,
int n,
Sort sort,
boolean doDocScores,
Collector fc)
throws IOException {
if (sort == null) {
throw new IllegalArgumentException("sort must not be null");
}
return doSearch(searcher, after, q, n, sort, doDocScores, fc);
}

private static TopDocs doSearch(
IndexSearcher searcher,
ScoreDoc after,
Query q,
int n,
Sort sort,
boolean doDocScores,
Collector fc)
throws IOException {

int limit = searcher.getIndexReader().maxDoc();
if (limit == 0) {
limit = 1;
}
n = Math.min(n, limit);

if (after != null && after.doc >= limit) {
throw new IllegalArgumentException(
"after.doc exceeds the number of documents in the reader: after.doc="
+ after.doc
+ " limit="
+ limit);
}

TopDocs topDocs = null;
if (n == 0) {
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
searcher.search(q, MultiCollector.wrap(totalHitCountCollector, fc));
topDocs =
new TopDocs(
new TotalHits(totalHitCountCollector.getTotalHits(), TotalHits.Relation.EQUAL_TO),
new ScoreDoc[0]);
} else {
TopDocsCollector<?> hitsCollector;
if (sort != null) {
if (after != null && !(after instanceof FieldDoc)) {
// TODO: if we fix type safety of TopFieldDocs we can
// remove this
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
}
hitsCollector =
new TopFieldCollectorManager(sort, n, (FieldDoc) after, Integer.MAX_VALUE, false)
.newCollector(); // TODO: can we disable exact hit counts
} else {
hitsCollector =
new TopScoreDocCollectorManager(n, after, Integer.MAX_VALUE, false).newCollector();
}
searcher.search(q, MultiCollector.wrap(hitsCollector, fc));

topDocs = hitsCollector.topDocs();
if (doDocScores) {
TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, q);
}
}
return topDocs;
}
}
Loading

0 comments on commit 47c0a6e

Please sign in to comment.