From 1e660eee72df27ede09ba38fbaa47f8cdf8a26bc Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Fri, 31 May 2024 14:27:25 +0100 Subject: [PATCH] SimpleText[Float|Byte]VectorValues::scorer should return null when the vector values is empty (#13444) This commit ensures that SimpleText[Float|Byte]VectorValues::scorer returns null when the vector values is empty, as per the scorer javadoc. Other KnnVectorsReader implementations have specialised empty implementations that do similar, e.g. OffHeapFloatVectorValues.EmptyOffHeapVectorValues. The VectorScorer interface in new in Lucene 9.11, see #13181 An existing test randomly hits this, but a new test has been added that exercises this code path consistently. It's also useful to verify other KnnVectorsReader implementations. --- .../lucene90/Lucene90HnswVectorsReader.java | 3 ++ .../lucene91/Lucene91HnswVectorsReader.java | 3 ++ .../TestLucene90HnswVectorsFormat.java | 5 ++ .../TestLucene91HnswVectorsFormat.java | 5 ++ .../TestLucene92HnswVectorsFormat.java | 5 ++ .../SimpleTextKnnVectorsReader.java | 6 +++ .../index/BaseKnnVectorsFormatTestCase.java | 53 +++++++++++++++++++ 7 files changed, 80 insertions(+) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java index b277275fe053..5376de7b9711 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java @@ -451,6 +451,9 @@ public float[] vectorValue(int targetOrd) throws IOException { @Override public VectorScorer scorer(float[] target) { + if (size() == 0) { + return null; + } OffHeapFloatVectorValues values = this.copy(); return new VectorScorer() { @Override diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java index 2ebb6382da53..2415cc53f58c 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java @@ -497,6 +497,9 @@ public float[] vectorValue(int targetOrd) throws IOException { @Override public VectorScorer scorer(float[] target) { + if (size == 0) { + return null; + } OffHeapFloatVectorValues values = this.copy(); return new VectorScorer() { @Override diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java index b914acf3fbb4..720e1f564685 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java @@ -73,4 +73,9 @@ public void testSortedIndexBytes() throws Exception { public void testByteVectorScorerIteration() { // unimplemented } + + @Override + public void testEmptyByteVectorData() { + // unimplemented + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java index b27a42700cb2..09b5a50b4bcc 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java @@ -72,4 +72,9 @@ public void testSortedIndexBytes() throws Exception { public void testByteVectorScorerIteration() { // unimplemented } + + @Override + public void testEmptyByteVectorData() { + // unimplemented + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java index aaee5abe4ad1..5189791ef17c 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java @@ -62,4 +62,9 @@ public void testSortedIndexBytes() throws Exception { public void testByteVectorScorerIteration() { // unimplemented } + + @Override + public void testEmptyByteVectorData() { + // unimplemented + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java index bcd52a39c040..66fc9af07ff3 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java @@ -395,6 +395,9 @@ public int advance(int target) throws IOException { @Override public VectorScorer scorer(float[] target) { + if (size() == 0) { + return null; + } SimpleTextFloatVectorValues simpleTextFloatVectorValues = new SimpleTextFloatVectorValues(this); return new VectorScorer() { @@ -504,6 +507,9 @@ public int advance(int target) throws IOException { @Override public VectorScorer scorer(byte[] target) { + if (size() == 0) { + return null; + } SimpleTextByteVectorValues simpleTextByteVectorValues = new SimpleTextByteVectorValues(this); return new VectorScorer() { @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java index f9daae8ebbb7..ccceba42c989 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.tests.index; +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import java.io.ByteArrayOutputStream; @@ -847,6 +848,58 @@ public void testByteVectorScorerIteration() throws Exception { } } + public void testEmptyFloatVectorData() throws Exception { + try (Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + var doc1 = new Document(); + doc1.add(new StringField("id", "0", Field.Store.NO)); + doc1.add(new KnnFloatVectorField("v", new float[] {2, 3, 5, 6}, DOT_PRODUCT)); + w.addDocument(doc1); + + var doc2 = new Document(); + doc2.add(new StringField("id", "1", Field.Store.NO)); + w.addDocument(doc2); + + w.deleteDocuments(new Term("id", Integer.toString(0))); + w.commit(); + w.forceMerge(1); + + try (DirectoryReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + FloatVectorValues values = r.getFloatVectorValues("v"); + assertNotNull(values); + assertEquals(0, values.size()); + assertNull(values.scorer(new float[] {2, 3, 5, 6})); + } + } + } + + public void testEmptyByteVectorData() throws Exception { + try (Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + var doc1 = new Document(); + doc1.add(new StringField("id", "0", Field.Store.NO)); + doc1.add(new KnnByteVectorField("v", new byte[] {2, 3, 5, 6}, DOT_PRODUCT)); + w.addDocument(doc1); + + var doc2 = new Document(); + doc2.add(new StringField("id", "1", Field.Store.NO)); + w.addDocument(doc2); + + w.deleteDocuments(new Term("id", Integer.toString(0))); + w.commit(); + w.forceMerge(1); + + try (DirectoryReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + ByteVectorValues values = r.getByteVectorValues("v"); + assertNotNull(values); + assertEquals(0, values.size()); + assertNull(values.scorer(new byte[] {2, 3, 5, 6})); + } + } + } + protected VectorSimilarityFunction randomSimilarity() { return VectorSimilarityFunction.values()[ random().nextInt(VectorSimilarityFunction.values().length)];