Skip to content

Commit

Permalink
SimpleText[Float|Byte]VectorValues::scorer should return null when th…
Browse files Browse the repository at this point in the history
…e vector values is empty (#13444)

This commit ensures that SimpleText[Float|Byte]VectorValues::scorer returns null when the vector values is empty, as per the scorer javadoc. Other KnnVectorsReader implementations have specialised empty implementations that do similar, e.g. OffHeapFloatVectorValues.EmptyOffHeapVectorValues. The VectorScorer interface in new in Lucene 9.11, see #13181

An existing test randomly hits this, but a new test has been added that exercises this code path consistently. It's also useful to verify other KnnVectorsReader implementations.
  • Loading branch information
ChrisHegarty committed May 31, 2024
1 parent c5ea94f commit 1e660ee
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,9 @@ public float[] vectorValue(int targetOrd) throws IOException {

@Override
public VectorScorer scorer(float[] target) {
if (size() == 0) {
return null;
}
OffHeapFloatVectorValues values = this.copy();
return new VectorScorer() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,9 @@ public float[] vectorValue(int targetOrd) throws IOException {

@Override
public VectorScorer scorer(float[] target) {
if (size == 0) {
return null;
}
OffHeapFloatVectorValues values = this.copy();
return new VectorScorer() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,9 @@ public void testSortedIndexBytes() throws Exception {
public void testByteVectorScorerIteration() {
// unimplemented
}

@Override
public void testEmptyByteVectorData() {
// unimplemented
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,9 @@ public void testSortedIndexBytes() throws Exception {
public void testByteVectorScorerIteration() {
// unimplemented
}

@Override
public void testEmptyByteVectorData() {
// unimplemented
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,9 @@ public void testSortedIndexBytes() throws Exception {
public void testByteVectorScorerIteration() {
// unimplemented
}

@Override
public void testEmptyByteVectorData() {
// unimplemented
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,9 @@ public int advance(int target) throws IOException {

@Override
public VectorScorer scorer(float[] target) {
if (size() == 0) {
return null;
}
SimpleTextFloatVectorValues simpleTextFloatVectorValues =
new SimpleTextFloatVectorValues(this);
return new VectorScorer() {
Expand Down Expand Up @@ -504,6 +507,9 @@ public int advance(int target) throws IOException {

@Override
public VectorScorer scorer(byte[] target) {
if (size() == 0) {
return null;
}
SimpleTextByteVectorValues simpleTextByteVectorValues = new SimpleTextByteVectorValues(this);
return new VectorScorer() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.lucene.tests.index;

import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

import java.io.ByteArrayOutputStream;
Expand Down Expand Up @@ -847,6 +848,58 @@ public void testByteVectorScorerIteration() throws Exception {
}
}

public void testEmptyFloatVectorData() throws Exception {
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
var doc1 = new Document();
doc1.add(new StringField("id", "0", Field.Store.NO));
doc1.add(new KnnFloatVectorField("v", new float[] {2, 3, 5, 6}, DOT_PRODUCT));
w.addDocument(doc1);

var doc2 = new Document();
doc2.add(new StringField("id", "1", Field.Store.NO));
w.addDocument(doc2);

w.deleteDocuments(new Term("id", Integer.toString(0)));
w.commit();
w.forceMerge(1);

try (DirectoryReader reader = DirectoryReader.open(w)) {
LeafReader r = getOnlyLeafReader(reader);
FloatVectorValues values = r.getFloatVectorValues("v");
assertNotNull(values);
assertEquals(0, values.size());
assertNull(values.scorer(new float[] {2, 3, 5, 6}));
}
}
}

public void testEmptyByteVectorData() throws Exception {
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
var doc1 = new Document();
doc1.add(new StringField("id", "0", Field.Store.NO));
doc1.add(new KnnByteVectorField("v", new byte[] {2, 3, 5, 6}, DOT_PRODUCT));
w.addDocument(doc1);

var doc2 = new Document();
doc2.add(new StringField("id", "1", Field.Store.NO));
w.addDocument(doc2);

w.deleteDocuments(new Term("id", Integer.toString(0)));
w.commit();
w.forceMerge(1);

try (DirectoryReader reader = DirectoryReader.open(w)) {
LeafReader r = getOnlyLeafReader(reader);
ByteVectorValues values = r.getByteVectorValues("v");
assertNotNull(values);
assertEquals(0, values.size());
assertNull(values.scorer(new byte[] {2, 3, 5, 6}));
}
}
}

protected VectorSimilarityFunction randomSimilarity() {
return VectorSimilarityFunction.values()[
random().nextInt(VectorSimilarityFunction.values().length)];
Expand Down

0 comments on commit 1e660ee

Please sign in to comment.