From d668ca2bf0158fec40b35195f2239a7bb0d43926 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 24 Feb 2022 09:56:50 -0500 Subject: [PATCH 1/5] LUCENE-10311: remove complex cost estimation and abstraction leakage around it Cost estimation drives the API complexity out of control, we don't need it. Hopefully i've cleared up all the API damage from this explosive leak. Instead, FixedBitSet.approximateCardinality() is used for cost estimation. TODO: let's optimize that! --- .../document/LatLonPointDistanceQuery.java | 2 +- .../lucene/document/RangeFieldQuery.java | 2 +- .../apache/lucene/document/SpatialQuery.java | 9 +- .../document/XYPointInGeometryQuery.java | 2 +- .../MultiTermQueryConstantScoreWrapper.java | 2 +- .../apache/lucene/search/PointInSetQuery.java | 2 +- .../apache/lucene/search/PointRangeQuery.java | 2 +- .../apache/lucene/search/TermInSetQuery.java | 2 +- .../apache/lucene/util/DocIdSetBuilder.java | 61 +----- .../lucene/util/TestDocIdSetBuilder.java | 207 ------------------ .../sandbox/search/MultiRangeQuery.java | 2 +- .../composite/IntersectsRPTVerifyQuery.java | 4 +- .../prefix/IntersectsPrefixTreeQuery.java | 2 +- .../spatial3d/PointInGeo3DShapeQuery.java | 2 +- 14 files changed, 17 insertions(+), 284 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index 4adf74529131..a41592b63dde 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -148,7 +148,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti LatLonPoint.checkCompatible(fieldInfo); // matching docids - DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); final IntersectVisitor visitor = getIntersectVisitor(result); final Weight weight = this; diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index ab83969de6f7..588c434d08e8 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -538,7 +538,7 @@ public long cost() { } else { return new ScorerSupplier() { - final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); final IntersectVisitor visitor = getIntersectVisitor(result); long cost = -1; diff --git a/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java b/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java index c6170ecad626..6f23560b2064 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java @@ -187,7 +187,7 @@ && hasAnyHits(spatialVisitor, queryRelation, values) == false) { return null; } // walk the tree to get matching documents - return new RelationScorerSupplier(values, spatialVisitor, queryRelation, field) { + return new RelationScorerSupplier(values, spatialVisitor, queryRelation) { @Override public Scorer get(long leadCost) throws IOException { return getScorer(reader, weight, score(), scoreMode); @@ -252,18 +252,15 @@ private abstract static class RelationScorerSupplier extends ScorerSupplier { private final PointValues values; private final SpatialVisitor spatialVisitor; private final QueryRelation queryRelation; - private final String field; private long cost = -1; RelationScorerSupplier( final PointValues values, SpatialVisitor spatialVisitor, - final QueryRelation queryRelation, - final String field) { + final QueryRelation queryRelation) { this.values = values; this.spatialVisitor = spatialVisitor; this.queryRelation = queryRelation; - this.field = field; } protected Scorer getScorer( @@ -311,7 +308,7 @@ && cost() > reader.maxDoc() / 2) { cost[0] == 0 ? DocIdSetIterator.empty() : new BitSetIterator(result, cost[0]); return new ConstantScoreScorer(weight, boost, scoreMode, iterator); } else { - final DocIdSetBuilder docIdSetBuilder = new DocIdSetBuilder(reader.maxDoc(), values, field); + final DocIdSetBuilder docIdSetBuilder = new DocIdSetBuilder(reader.maxDoc()); values.intersect(getSparseVisitor(spatialVisitor, queryRelation, docIdSetBuilder)); final DocIdSetIterator iterator = docIdSetBuilder.build().iterator(); return new ConstantScoreScorer(weight, boost, scoreMode, iterator); diff --git a/lucene/core/src/java/org/apache/lucene/document/XYPointInGeometryQuery.java b/lucene/core/src/java/org/apache/lucene/document/XYPointInGeometryQuery.java index 1533463a2734..595c3d163694 100644 --- a/lucene/core/src/java/org/apache/lucene/document/XYPointInGeometryQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/XYPointInGeometryQuery.java @@ -149,7 +149,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti return new ScorerSupplier() { long cost = -1; - DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); final IntersectVisitor visitor = getIntersectVisitor(result, tree); @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java index f729ed6421c6..105fb14d92d1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java @@ -180,7 +180,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { } // Too many terms: go back to the terms we already collected and start building the bit set - DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms); + DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc()); if (collectedTerms.isEmpty() == false) { TermsEnum termsEnum2 = terms.iterator(); for (TermAndState t : collectedTerms) { diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index a49072794dda..8af279971b4f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -173,7 +173,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { + bytesPerDim); } - DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); if (numDims == 1) { diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index aeb474422a9c..0d133ce7d261 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -331,7 +331,7 @@ public long cost() { } else { return new ScorerSupplier() { - final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); final IntersectVisitor visitor = getIntersectVisitor(result); long cost = -1; diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index a77b735378e5..a2b4c9a9af0a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -288,7 +288,7 @@ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException { matchingTerms.add(new TermAndState(field, termsEnum)); } else { assert matchingTerms.size() == threshold; - builder = new DocIdSetBuilder(reader.maxDoc(), terms); + builder = new DocIdSetBuilder(reader.maxDoc()); docs = termsEnum.postings(docs, PostingsEnum.NONE); builder.add(docs); for (TermAndState t : matchingTerms) { diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java index 67b3dde9f200..47637130e9f6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java @@ -19,8 +19,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.Terms; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.packed.PackedInts; @@ -100,52 +98,17 @@ public void add(int doc) { private final int maxDoc; private final int threshold; - // pkg-private for testing - final boolean multivalued; - final double numValuesPerDoc; private List buffers = new ArrayList<>(); private int totalAllocated; // accumulated size of the allocated buffers private FixedBitSet bitSet; - private long counter = -1; private BulkAdder adder; /** Create a builder that can contain doc IDs between {@code 0} and {@code maxDoc}. */ public DocIdSetBuilder(int maxDoc) { - this(maxDoc, -1, -1); - } - - /** - * Create a {@link DocIdSetBuilder} instance that is optimized for accumulating docs that match - * the given {@link Terms}. - */ - public DocIdSetBuilder(int maxDoc, Terms terms) throws IOException { - this(maxDoc, terms.getDocCount(), terms.getSumDocFreq()); - } - - /** - * Create a {@link DocIdSetBuilder} instance that is optimized for accumulating docs that match - * the given {@link PointValues}. - */ - public DocIdSetBuilder(int maxDoc, PointValues values, String field) throws IOException { - this(maxDoc, values.getDocCount(), values.size()); - } - - DocIdSetBuilder(int maxDoc, int docCount, long valueCount) { this.maxDoc = maxDoc; - this.multivalued = docCount < 0 || docCount != valueCount; - if (docCount <= 0 || valueCount < 0) { - // assume one value per doc, this means the cost will be overestimated - // if the docs are actually multi-valued - this.numValuesPerDoc = 1; - } else { - // otherwise compute from index stats - this.numValuesPerDoc = (double) valueCount / docCount; - } - - assert numValuesPerDoc >= 1 : "valueCount=" + valueCount + " docCount=" + docCount; // For ridiculously small sets, we'll just use a sorted int[] // maxDoc >>> 7 is a good value if you want to save memory, lower values @@ -190,10 +153,8 @@ public BulkAdder grow(int numDocs) { ensureBufferCapacity(numDocs); } else { upgradeToBitSet(); - counter += numDocs; } } else { - counter += numDocs; } return adder; } @@ -247,17 +208,14 @@ private void growBuffer(Buffer buffer, int additionalCapacity) { private void upgradeToBitSet() { assert bitSet == null; FixedBitSet bitSet = new FixedBitSet(maxDoc); - long counter = 0; for (Buffer buffer : buffers) { int[] array = buffer.array; int length = buffer.length; - counter += length; for (int i = 0; i < length; ++i) { bitSet.set(array[i]); } } this.bitSet = bitSet; - this.counter = counter; this.buffers = null; this.adder = new FixedBitSetAdder(bitSet); } @@ -266,20 +224,12 @@ private void upgradeToBitSet() { public DocIdSet build() { try { if (bitSet != null) { - assert counter >= 0; - final long cost = Math.round(counter / numValuesPerDoc); - return new BitDocIdSet(bitSet, cost); + return new BitDocIdSet(bitSet); } else { Buffer concatenated = concat(buffers); LSBRadixSorter sorter = new LSBRadixSorter(); sorter.sort(PackedInts.bitsRequired(maxDoc - 1), concatenated.array, concatenated.length); - final int l; - if (multivalued) { - l = dedup(concatenated.array, concatenated.length); - } else { - assert noDups(concatenated.array, concatenated.length); - l = concatenated.length; - } + final int l = dedup(concatenated.array, concatenated.length); assert l <= concatenated.length; concatenated.array[l] = DocIdSetIterator.NO_MORE_DOCS; return new IntArrayDocIdSet(concatenated.array, l); @@ -336,11 +286,4 @@ private static int dedup(int[] arr, int length) { } return l; } - - private static boolean noDups(int[] a, int len) { - for (int i = 1; i < len; ++i) { - assert a[i - 1] < a[i]; - } - return true; - } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java index 2fa146581c68..86bd2b53a5fa 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java @@ -17,9 +17,6 @@ package org.apache.lucene.util; import java.io.IOException; -import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.tests.util.LuceneTestCase; @@ -145,208 +142,4 @@ public void testRandom() throws IOException { assertEquals(expected, actual); } } - - public void testMisleadingDISICost() throws IOException { - final int maxDoc = TestUtil.nextInt(random(), 1000, 10000); - DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc); - FixedBitSet expected = new FixedBitSet(maxDoc); - - for (int i = 0; i < 10; ++i) { - final FixedBitSet docs = new FixedBitSet(maxDoc); - final int numDocs = random().nextInt(maxDoc / 1000); - for (int j = 0; j < numDocs; ++j) { - docs.set(random().nextInt(maxDoc)); - } - expected.or(docs); - // We provide a cost of 0 here to make sure the builder can deal with wrong costs - builder.add(new BitSetIterator(docs, 0L)); - } - - assertEquals(new BitDocIdSet(expected), builder.build()); - } - - public void testEmptyPoints() throws IOException { - PointValues values = new DummyPointValues(0, 0); - DocIdSetBuilder builder = new DocIdSetBuilder(1, values, "foo"); - assertEquals(1d, builder.numValuesPerDoc, 0d); - } - - public void testLeverageStats() throws IOException { - // single-valued points - PointValues values = new DummyPointValues(42, 42); - DocIdSetBuilder builder = new DocIdSetBuilder(100, values, "foo"); - assertEquals(1d, builder.numValuesPerDoc, 0d); - assertFalse(builder.multivalued); - DocIdSetBuilder.BulkAdder adder = builder.grow(2); - adder.add(5); - adder.add(7); - DocIdSet set = builder.build(); - assertTrue(set instanceof BitDocIdSet); - assertEquals(2, set.iterator().cost()); - - // multi-valued points - values = new DummyPointValues(42, 63); - builder = new DocIdSetBuilder(100, values, "foo"); - assertEquals(1.5, builder.numValuesPerDoc, 0d); - assertTrue(builder.multivalued); - adder = builder.grow(2); - adder.add(5); - adder.add(7); - set = builder.build(); - assertTrue(set instanceof BitDocIdSet); - assertEquals(1, set.iterator().cost()); // it thinks the same doc was added twice - - // incomplete stats - values = new DummyPointValues(42, -1); - builder = new DocIdSetBuilder(100, values, "foo"); - assertEquals(1d, builder.numValuesPerDoc, 0d); - assertTrue(builder.multivalued); - - values = new DummyPointValues(-1, 84); - builder = new DocIdSetBuilder(100, values, "foo"); - assertEquals(1d, builder.numValuesPerDoc, 0d); - assertTrue(builder.multivalued); - - // single-valued terms - Terms terms = new DummyTerms(42, 42); - builder = new DocIdSetBuilder(100, terms); - assertEquals(1d, builder.numValuesPerDoc, 0d); - assertFalse(builder.multivalued); - adder = builder.grow(2); - adder.add(5); - adder.add(7); - set = builder.build(); - assertTrue(set instanceof BitDocIdSet); - assertEquals(2, set.iterator().cost()); - - // multi-valued terms - terms = new DummyTerms(42, 63); - builder = new DocIdSetBuilder(100, terms); - assertEquals(1.5, builder.numValuesPerDoc, 0d); - assertTrue(builder.multivalued); - adder = builder.grow(2); - adder.add(5); - adder.add(7); - set = builder.build(); - assertTrue(set instanceof BitDocIdSet); - assertEquals(1, set.iterator().cost()); // it thinks the same doc was added twice - - // incomplete stats - terms = new DummyTerms(42, -1); - builder = new DocIdSetBuilder(100, terms); - assertEquals(1d, builder.numValuesPerDoc, 0d); - assertTrue(builder.multivalued); - - terms = new DummyTerms(-1, 84); - builder = new DocIdSetBuilder(100, terms); - assertEquals(1d, builder.numValuesPerDoc, 0d); - assertTrue(builder.multivalued); - } - - private static class DummyTerms extends Terms { - - private final int docCount; - private final long numValues; - - DummyTerms(int docCount, long numValues) { - this.docCount = docCount; - this.numValues = numValues; - } - - @Override - public TermsEnum iterator() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public long size() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public long getSumTotalTermFreq() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public long getSumDocFreq() throws IOException { - return numValues; - } - - @Override - public int getDocCount() throws IOException { - return docCount; - } - - @Override - public boolean hasFreqs() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasOffsets() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasPositions() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasPayloads() { - throw new UnsupportedOperationException(); - } - } - - private static class DummyPointValues extends PointValues { - - private final int docCount; - private final long numPoints; - - DummyPointValues(int docCount, long numPoints) { - this.docCount = docCount; - this.numPoints = numPoints; - } - - @Override - public PointTree getPointTree() { - throw new UnsupportedOperationException(); - } - - @Override - public byte[] getMinPackedValue() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public byte[] getMaxPackedValue() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public int getNumDimensions() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public int getNumIndexDimensions() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public int getBytesPerDimension() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public long size() { - return numPoints; - } - - @Override - public int getDocCount() { - return docCount; - } - } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java index ae07d34d706d..cd2a8d5e73ef 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java @@ -277,7 +277,7 @@ public long cost() { } else { return new ScorerSupplier() { - final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); final PointValues.IntersectVisitor visitor = getIntersectVisitor(result, range); long cost = -1; diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java index 8fa4a36bff65..904b3cfd9aa7 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java @@ -197,8 +197,8 @@ public IntersectsDifferentiatingVisitor(LeafReaderContext context) throws IOExce @Override protected void start() throws IOException { - approxBuilder = new DocIdSetBuilder(maxDoc, terms); - exactBuilder = new DocIdSetBuilder(maxDoc, terms); + approxBuilder = new DocIdSetBuilder(maxDoc); + exactBuilder = new DocIdSetBuilder(maxDoc); } @Override diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeQuery.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeQuery.java index 966080037a9d..57f59196f7c4 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeQuery.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeQuery.java @@ -58,7 +58,7 @@ protected DocIdSet getDocIdSet(LeafReaderContext context) throws IOException { @Override protected void start() throws IOException { - results = new DocIdSetBuilder(maxDoc, terms); + results = new DocIdSetBuilder(maxDoc); } @Override diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInGeo3DShapeQuery.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInGeo3DShapeQuery.java index 283cb5144105..69717edbaf8b 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInGeo3DShapeQuery.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInGeo3DShapeQuery.java @@ -103,7 +103,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid; */ - DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); values.intersect(new PointInShapeIntersectVisitor(result, shape, shapeBounds)); From 4c6b436c7059742c80a7975a39b72494082c543c Mon Sep 17 00:00:00 2001 From: iverase Date: Fri, 25 Feb 2022 09:35:01 +0100 Subject: [PATCH 2/5] Simplify call to IntersectVisitor#grow when bulk visiting points --- .../codecs/simpletext/SimpleTextBKDReader.java | 16 +++++----------- .../org/apache/lucene/util/bkd/BKDReader.java | 16 +++++----------- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java index 6b989497cb39..ef8bb928155f 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java @@ -333,17 +333,11 @@ private int getNumLeavesSlow(int node) { @Override public void visitDocIDs(PointValues.IntersectVisitor visitor) throws IOException { - addAll(visitor, false); + visitor.grow((int) Math.min(Integer.MAX_VALUE, size())); + addAll(visitor); } - public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws IOException { - if (grown == false) { - final long size = size(); - if (size <= Integer.MAX_VALUE) { - visitor.grow((int) size); - grown = true; - } - } + public void addAll(PointValues.IntersectVisitor visitor) throws IOException { if (isLeafNode()) { // Leaf node BytesRefBuilder scratch = new BytesRefBuilder(); @@ -356,10 +350,10 @@ public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws I } } else { pushLeft(); - addAll(visitor, grown); + addAll(visitor); pop(true); pushRight(); - addAll(visitor, grown); + addAll(visitor); pop(false); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index ac55f33fb1e6..343f3668e5be 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -553,17 +553,11 @@ private int balanceTreeNodePosition( @Override public void visitDocIDs(PointValues.IntersectVisitor visitor) throws IOException { resetNodeDataPosition(); - addAll(visitor, false); + visitor.grow((int) Math.min(Integer.MAX_VALUE, size())); + addAll(visitor); } - public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws IOException { - if (grown == false) { - final long size = size(); - if (size <= Integer.MAX_VALUE) { - visitor.grow((int) size); - grown = true; - } - } + public void addAll(PointValues.IntersectVisitor visitor) throws IOException { if (isLeafNode()) { // Leaf node leafNodes.seek(getLeafBlockFP()); @@ -573,10 +567,10 @@ public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws I DocIdsWriter.readInts(leafNodes, count, visitor); } else { pushLeft(); - addAll(visitor, grown); + addAll(visitor); pop(); pushRight(); - addAll(visitor, grown); + addAll(visitor); pop(); } } From c64ee66479768ee6bc66edfe2c08251b4a4e3ab2 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 3 Mar 2022 17:17:11 -0500 Subject: [PATCH 3/5] LUCENE-10311: add long method, that forwards to int method and truncates Confusion happens because grow(numAdds) reserves space for you to call add() up to numAdds times. When numAdds exceeds a "threshold" (maxdoc >> 8), we really don't care about big numbers at all: we'll switch to a FixedBitSet(maxDoc) with fixed sized storage, bounded only by maxDoc. But we can just add a one-liner grow(long) that doesn't require the caller to understand any of this, and hide it via implementation detail. --- .../apache/lucene/util/DocIdSetBuilder.java | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java index 47637130e9f6..8b2d03b822e1 100644 --- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java @@ -144,13 +144,23 @@ public void add(DocIdSetIterator iter) throws IOException { } /** - * Reserve space and return a {@link BulkAdder} object that can be used to add up to {@code - * numDocs} documents. + * Reserve space and return a {@link BulkAdder} object that supports up to {@code numAdds} + * invocations of {@link BulkAdder#add(int)}. */ - public BulkAdder grow(int numDocs) { + public BulkAdder grow(long numAdds) { + // as an impl detail, we don't need to care about numbers bigger than int32, + // we switch over to FixedBitSet storage well before that threshold. + return grow((int) Math.min(Integer.MAX_VALUE, numAdds)); + } + + /** + * Reserve space and return a {@link BulkAdder} object that supports up to {@code numAdds} + * invocations of {@link BulkAdder#add(int)}. + */ + public BulkAdder grow(int numAdds) { if (bitSet == null) { - if ((long) totalAllocated + numDocs <= threshold) { - ensureBufferCapacity(numDocs); + if ((long) totalAllocated + numAdds <= threshold) { + ensureBufferCapacity(numAdds); } else { upgradeToBitSet(); } From ceb5e30b91fa7e106ee57216ede30bef32230728 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 4 Mar 2022 06:43:58 -0500 Subject: [PATCH 4/5] Revert "LUCENE-10311: add long method, that forwards to int method and truncates" This reverts commit c64ee66479768ee6bc66edfe2c08251b4a4e3ab2. --- .../apache/lucene/util/DocIdSetBuilder.java | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java index 8b2d03b822e1..47637130e9f6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java @@ -144,23 +144,13 @@ public void add(DocIdSetIterator iter) throws IOException { } /** - * Reserve space and return a {@link BulkAdder} object that supports up to {@code numAdds} - * invocations of {@link BulkAdder#add(int)}. + * Reserve space and return a {@link BulkAdder} object that can be used to add up to {@code + * numDocs} documents. */ - public BulkAdder grow(long numAdds) { - // as an impl detail, we don't need to care about numbers bigger than int32, - // we switch over to FixedBitSet storage well before that threshold. - return grow((int) Math.min(Integer.MAX_VALUE, numAdds)); - } - - /** - * Reserve space and return a {@link BulkAdder} object that supports up to {@code numAdds} - * invocations of {@link BulkAdder#add(int)}. - */ - public BulkAdder grow(int numAdds) { + public BulkAdder grow(int numDocs) { if (bitSet == null) { - if ((long) totalAllocated + numAdds <= threshold) { - ensureBufferCapacity(numAdds); + if ((long) totalAllocated + numDocs <= threshold) { + ensureBufferCapacity(numDocs); } else { upgradeToBitSet(); } From 5d1fbf413539e5ec4885537165f7475e30aa9887 Mon Sep 17 00:00:00 2001 From: iverase Date: Sat, 5 Mar 2022 10:00:31 +0100 Subject: [PATCH 5/5] revert "Simplify call to IntersectVisitor#grow when bulk visiting points" --- .../codecs/simpletext/SimpleTextBKDReader.java | 16 +++++++++++----- .../org/apache/lucene/util/bkd/BKDReader.java | 16 +++++++++++----- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java index ef8bb928155f..6b989497cb39 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java @@ -333,11 +333,17 @@ private int getNumLeavesSlow(int node) { @Override public void visitDocIDs(PointValues.IntersectVisitor visitor) throws IOException { - visitor.grow((int) Math.min(Integer.MAX_VALUE, size())); - addAll(visitor); + addAll(visitor, false); } - public void addAll(PointValues.IntersectVisitor visitor) throws IOException { + public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws IOException { + if (grown == false) { + final long size = size(); + if (size <= Integer.MAX_VALUE) { + visitor.grow((int) size); + grown = true; + } + } if (isLeafNode()) { // Leaf node BytesRefBuilder scratch = new BytesRefBuilder(); @@ -350,10 +356,10 @@ public void addAll(PointValues.IntersectVisitor visitor) throws IOException { } } else { pushLeft(); - addAll(visitor); + addAll(visitor, grown); pop(true); pushRight(); - addAll(visitor); + addAll(visitor, grown); pop(false); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index 343f3668e5be..ac55f33fb1e6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -553,11 +553,17 @@ private int balanceTreeNodePosition( @Override public void visitDocIDs(PointValues.IntersectVisitor visitor) throws IOException { resetNodeDataPosition(); - visitor.grow((int) Math.min(Integer.MAX_VALUE, size())); - addAll(visitor); + addAll(visitor, false); } - public void addAll(PointValues.IntersectVisitor visitor) throws IOException { + public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws IOException { + if (grown == false) { + final long size = size(); + if (size <= Integer.MAX_VALUE) { + visitor.grow((int) size); + grown = true; + } + } if (isLeafNode()) { // Leaf node leafNodes.seek(getLeafBlockFP()); @@ -567,10 +573,10 @@ public void addAll(PointValues.IntersectVisitor visitor) throws IOException { DocIdsWriter.readInts(leafNodes, count, visitor); } else { pushLeft(); - addAll(visitor); + addAll(visitor, grown); pop(); pushRight(); - addAll(visitor); + addAll(visitor, grown); pop(); } }