Skip to content

Commit

Permalink
Dry up TestScorerPerf (#13712)
Browse files Browse the repository at this point in the history
Some of the test methods were commented out when this test class was added. They got later removed
but the removal left unused method behind. I also adjusted visibility of all the internal methods
that were public and should have been private, which led me to further clean up: `MatchingHitCollector`
was not needed and can be removed.
  • Loading branch information
javanna committed Sep 5, 2024
1 parent 7d89ea7 commit 40c4e58
Showing 1 changed file with 24 additions and 181 deletions.
205 changes: 24 additions & 181 deletions lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,11 @@
package org.apache.lucene.search;

import java.io.IOException;
import java.util.BitSet;
import java.util.Collection;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.util.LuceneTestCase;
Expand All @@ -35,42 +31,15 @@
public class TestScorerPerf extends LuceneTestCase {
private final boolean validate = true; // set to false when doing performance testing

public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir)
throws Exception {
int[] freq = new int[nTerms];
Term[] terms = new Term[nTerms];
for (int i = 0; i < nTerms; i++) {
int f = (nTerms + 1) - i; // make first terms less frequent
freq[i] = (int) Math.ceil(Math.pow(f, power));
terms[i] = new Term("f", Character.toString((char) ('A' + i)));
}

IndexWriter iw =
new IndexWriter(
dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
for (int i = 0; i < nDocs; i++) {
Document d = new Document();
for (int j = 0; j < nTerms; j++) {
if (random().nextInt(freq[j]) == 0) {
d.add(newStringField("f", terms[j].text(), Field.Store.NO));
// System.out.println(d);
}
}
iw.addDocument(d);
}
iw.forceMerge(1);
iw.close();
}

public FixedBitSet randBitSet(int sz, int numBitsToSet) {
private static FixedBitSet randBitSet(int sz, int numBitsToSet) {
FixedBitSet set = new FixedBitSet(sz);
for (int i = 0; i < numBitsToSet; i++) {
set.set(random().nextInt(sz));
}
return set;
}

public FixedBitSet[] randBitSets(int numSets, int setSize) {
private static FixedBitSet[] randBitSets(int numSets, int setSize) {
FixedBitSet[] sets = new FixedBitSet[numSets];
for (int i = 0; i < sets.length; i++) {
sets[i] = randBitSet(setSize, random().nextInt(setSize));
Expand All @@ -81,22 +50,13 @@ public FixedBitSet[] randBitSets(int numSets, int setSize) {
private static final class CountingHitCollectorManager
implements CollectorManager<CountingHitCollector, CountingHitCollector> {

private final boolean validate;
private final FixedBitSet result;

CountingHitCollectorManager(boolean validate, FixedBitSet result) {
this.validate = validate;
this.result = result;
}

@Override
public CountingHitCollector newCollector() {
return validate ? new MatchingHitCollector(result) : new CountingHitCollector();
return new CountingHitCollector();
}

@Override
public CountingHitCollector reduce(Collection<CountingHitCollector> collectors)
throws IOException {
public CountingHitCollector reduce(Collection<CountingHitCollector> collectors) {
CountingHitCollector result = new CountingHitCollector();
for (CountingHitCollector collector : collectors) {
result.count += collector.count;
Expand All @@ -106,7 +66,7 @@ public CountingHitCollector reduce(Collection<CountingHitCollector> collectors)
}
}

public static class CountingHitCollector extends SimpleCollector {
private static class CountingHitCollector extends SimpleCollector {
int count = 0;
int sum = 0;
protected int docBase = 0;
Expand All @@ -121,12 +81,8 @@ public int getCount() {
return count;
}

public int getSum() {
return sum;
}

@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
protected void doSetNextReader(LeafReaderContext context) {
docBase = context.docBase;
}

Expand All @@ -136,24 +92,6 @@ public ScoreMode scoreMode() {
}
}

public static class MatchingHitCollector extends CountingHitCollector {
FixedBitSet answer;
int pos = -1;

public MatchingHitCollector(FixedBitSet answer) {
this.answer = answer;
}

public void collect(int doc, float score) {

pos = answer.nextSetBit(pos + 1);
if (pos != doc + docBase) {
throw new RuntimeException("Expected doc " + pos + " but got " + (doc + docBase));
}
super.collect(doc);
}
}

private static class BitSetQuery extends Query {

private final FixedBitSet docs;
Expand All @@ -163,8 +101,7 @@ private static class BitSetQuery extends Query {
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Expand Down Expand Up @@ -198,20 +135,22 @@ public int hashCode() {
}
}

FixedBitSet addClause(FixedBitSet[] sets, BooleanQuery.Builder bq, FixedBitSet result) {
private FixedBitSet addClause(FixedBitSet[] sets, BooleanQuery.Builder bq, FixedBitSet result) {
final FixedBitSet rnd = sets[random().nextInt(sets.length)];
Query q = new BitSetQuery(rnd);
bq.add(q, BooleanClause.Occur.MUST);
if (validate) {
if (result == null) result = rnd.clone();
else result.and(rnd);
if (result == null) {
result = rnd.clone();
} else {
result.and(rnd);
}
}
return result;
}

public int doConjunctions(IndexSearcher s, FixedBitSet[] sets, int iter, int maxClauses)
private void doConjunctions(IndexSearcher s, FixedBitSet[] sets, int iter, int maxClauses)
throws IOException {
int ret = 0;

for (int i = 0; i < iter; i++) {
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
Expand All @@ -220,21 +159,17 @@ public int doConjunctions(IndexSearcher s, FixedBitSet[] sets, int iter, int max
for (int j = 0; j < nClauses; j++) {
result = addClause(sets, bq, result);
}
CountingHitCollector hc =
s.search(bq.build(), new CountingHitCollectorManager(validate, result));
ret += hc.getSum();
CountingHitCollector hc = s.search(bq.build(), new CountingHitCollectorManager());

if (validate) assertEquals(result.cardinality(), hc.getCount());
// System.out.println(hc.getCount());
if (validate) {
assertEquals(result.cardinality(), hc.getCount());
}
}

return ret;
}

public int doNestedConjunctions(
private void doNestedConjunctions(
IndexSearcher s, FixedBitSet[] sets, int iter, int maxOuterClauses, int maxClauses)
throws IOException {
int ret = 0;
long nMatches = 0;

for (int i = 0; i < iter; i++) {
Expand All @@ -253,107 +188,15 @@ public int doNestedConjunctions(
oq.add(bq.build(), BooleanClause.Occur.MUST);
} // outer

CountingHitCollector hc =
s.search(oq.build(), new CountingHitCollectorManager(validate, result));
CountingHitCollector hc = s.search(oq.build(), new CountingHitCollectorManager());
nMatches += hc.getCount();
ret += hc.getSum();
if (validate) assertEquals(result.cardinality(), hc.getCount());
// System.out.println(hc.getCount());
}
if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter));
return ret;
}

public int doTermConjunctions(
Term[] terms, IndexSearcher s, int termsInIndex, int maxClauses, int iter)
throws IOException {
int ret = 0;

long nMatches = 0;
for (int i = 0; i < iter; i++) {
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
BooleanQuery.Builder bq = new BooleanQuery.Builder();
BitSet termflag = new BitSet(termsInIndex);
for (int j = 0; j < nClauses; j++) {
int tnum;
// don't pick same clause twice
tnum = random().nextInt(termsInIndex);
if (termflag.get(tnum)) tnum = termflag.nextClearBit(tnum);
if (tnum < 0 || tnum >= termsInIndex) tnum = termflag.nextClearBit(0);
termflag.set(tnum);
Query tq = new TermQuery(terms[tnum]);
bq.add(tq, BooleanClause.Occur.MUST);
if (validate) {
assertEquals(result.cardinality(), hc.getCount());
}

CountingHitCollector hc = s.search(bq.build(), new CountingHitCollectorManager(false, null));
nMatches += hc.getCount();
ret += hc.getSum();
}
if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter));

return ret;
}

public int doNestedTermConjunctions(
IndexSearcher s,
Term[] terms,
int termsInIndex,
int maxOuterClauses,
int maxClauses,
int iter)
throws IOException {
int ret = 0;
long nMatches = 0;
for (int i = 0; i < iter; i++) {
int oClauses = random().nextInt(maxOuterClauses - 1) + 2;
BooleanQuery.Builder oq = new BooleanQuery.Builder();
for (int o = 0; o < oClauses; o++) {

int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
BooleanQuery.Builder bq = new BooleanQuery.Builder();
BitSet termflag = new BitSet(termsInIndex);
for (int j = 0; j < nClauses; j++) {
int tnum;
// don't pick same clause twice
tnum = random().nextInt(termsInIndex);
if (termflag.get(tnum)) tnum = termflag.nextClearBit(tnum);
if (tnum < 0 || tnum >= 25) tnum = termflag.nextClearBit(0);
termflag.set(tnum);
Query tq = new TermQuery(terms[tnum]);
bq.add(tq, BooleanClause.Occur.MUST);
} // inner

oq.add(bq.build(), BooleanClause.Occur.MUST);
} // outer

CountingHitCollector hc = s.search(oq.build(), new CountingHitCollectorManager(false, null));
nMatches += hc.getCount();
ret += hc.getSum();
}
if (VERBOSE) System.out.println("Average number of matches=" + (nMatches / iter));
return ret;
}

public int doSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter)
throws IOException {
int ret = 0;

for (int i = 0; i < iter; i++) {
int nClauses = random().nextInt(maxClauses - 1) + 2; // min 2 clauses
PhraseQuery.Builder builder = new PhraseQuery.Builder();
for (int j = 0; j < nClauses; j++) {
int tnum = random().nextInt(termsInIndex);
builder.add(new Term("f", Character.toString((char) (tnum + 'A'))));
}
// slop could be random too
builder.setSlop(termsInIndex);
PhraseQuery q = builder.build();

CountingHitCollector hc = s.search(q, new CountingHitCollectorManager(false, null));
ret += hc.getSum();
if (VERBOSE) {
System.out.println("Average number of matches=" + (nMatches / iter));
}

return ret;
}

public void testConjunctions() throws Exception {
Expand Down

0 comments on commit 40c4e58

Please sign in to comment.