Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions python/python/tests/test_scalar_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2039,6 +2039,44 @@ def test_label_list_index_array_contains(tmp_path: Path):
assert "ScalarIndexQuery" not in explain


def test_label_list_index_null_element_match(tmp_path: Path):
"""Ensure LABEL_LIST index keeps scan semantics when lists contain NULLs."""
tbl = pa.table({"labels": [["foo", None], ["foo"], None]})
dataset = lance.write_dataset(tbl, tmp_path / "dataset")

filters = [
"array_has_any(labels, ['foo'])",
"array_has_all(labels, ['foo'])",
"array_contains(labels, 'foo')",
]
expected = {
f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
}

dataset.create_scalar_index("labels", index_type="LABEL_LIST")

actual = {
f: dataset.to_table(filter=f).column("labels").to_pylist() for f in filters
}
assert actual == expected


def test_label_list_index_explain_null_literals(tmp_path: Path):
tbl = pa.table({"labels": [["foo", None], ["foo"]]})
dataset = lance.write_dataset(tbl, tmp_path / "dataset")
dataset.create_scalar_index("labels", index_type="LABEL_LIST")

# explain_plan should not panic when list literals include NULLs.
for expr in [
"array_has_any(labels, [NULL])",
"array_has_all(labels, [NULL])",
"array_has_any(labels, ['foo', NULL])",
"array_has_all(labels, ['foo', NULL])",
]:
explain = dataset.scanner(filter=expr).explain_plan()
assert isinstance(explain, str)


def test_create_index_empty_dataset(tmp_path: Path):
# Creating an index on an empty dataset is (currently) not terribly useful but
# we shouldn't return strange errors.
Expand Down
4 changes: 2 additions & 2 deletions rust/lance-index/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ impl AnyQuery for LabelListQuery {
let offsets_buffer =
OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, labels_arr.len() as i32]));
let labels_list = ListArray::try_new(
Arc::new(Field::new("item", labels_arr.data_type().clone(), false)),
Arc::new(Field::new("item", labels_arr.data_type().clone(), true)),
offsets_buffer,
labels_arr,
None,
Expand All @@ -569,7 +569,7 @@ impl AnyQuery for LabelListQuery {
let offsets_buffer =
OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, labels_arr.len() as i32]));
let labels_list = ListArray::try_new(
Arc::new(Field::new("item", labels_arr.data_type().clone(), false)),
Arc::new(Field::new("item", labels_arr.data_type().clone(), true)),
offsets_buffer,
labels_arr,
None,
Expand Down
7 changes: 6 additions & 1 deletion rust/lance-index/src/scalar/bitmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,12 @@ impl ScalarIndex for BitmapIndex {
}
};

let selection = NullableRowAddrSet::new(row_ids, null_row_ids.unwrap_or_default());
let mut null_rows = null_row_ids.unwrap_or_default();
if !null_rows.is_empty() {
// A row can be both TRUE and NULL after list flattening; treat it as TRUE.
null_rows -= &row_ids;
}
let selection = NullableRowAddrSet::new(row_ids, null_rows);
Ok(SearchResult::Exact(selection))
}

Expand Down