Skip to content

Commit

Permalink
add fragment bitmap
Browse files Browse the repository at this point in the history
  • Loading branch information
jiachengdb committed Sep 14, 2024
1 parent 8d48a6d commit 18fb870
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 12 deletions.
7 changes: 6 additions & 1 deletion python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2237,10 +2237,15 @@ class CreateIndex(BaseOperation):
name: str
fields: List[int]
dataset_version: int
fragment_ids: List[int]

def _to_inner(self):
return _Operation.create_index(
self.uuid, self.name, self.fields, self.dataset_version
self.uuid,
self.name,
self.fields,
self.dataset_version,
self.fragment_ids,
)


Expand Down
26 changes: 16 additions & 10 deletions python/python/tests/test_commit_index.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The Lance Authors

import os
import random
import shutil
import string
from datetime import date, datetime, timedelta
from pathlib import Path

import lance
Expand All @@ -14,7 +12,9 @@
import pytest


def create_table(num_rows=1000):
@pytest.fixture()
def test_table():
num_rows = 1000
price = np.random.rand(num_rows) * 100

def gen_str(n, split="", char_set=string.ascii_letters + string.digits):
Expand All @@ -35,20 +35,18 @@ def gen_str(n, split="", char_set=string.ascii_letters + string.digits):


@pytest.fixture()
def dataset_with_index(tmp_path):
table = create_table()
dataset = lance.write_dataset(table, tmp_path)
def dataset_with_index(test_table, tmp_path):
dataset = lance.write_dataset(test_table, tmp_path)
dataset.create_scalar_index("meta", index_type="BTREE")
return dataset


def test_commit_index(tmp_path, dataset_with_index):
def test_commit_index(dataset_with_index, test_table, tmp_path):
index_id = dataset_with_index.list_indices()[0]["uuid"]

# Create a new dataset without index
table = create_table()
dataset_without_index = lance.write_dataset(
table, tmp_path / "dataset_without_index"
test_table, tmp_path / "dataset_without_index"
)

# Copy the index from dataset_with_index to dataset_without_index
Expand All @@ -59,7 +57,11 @@ def test_commit_index(tmp_path, dataset_with_index):
# Commit the index to dataset_without_index
field_idx = dataset_without_index.schema.get_field_index("meta")
create_index_op = lance.LanceOperation.CreateIndex(
index_id, "meta_idx", [field_idx], dataset_without_index.version
index_id,
"meta_idx",
[field_idx],
dataset_without_index.version,
[f.fragment_id for f in dataset_without_index.get_fragments()],
)
dataset_without_index = lance.LanceDataset.commit(
dataset_without_index.uri,
Expand All @@ -71,6 +73,10 @@ def test_commit_index(tmp_path, dataset_with_index):
assert len(dataset_with_index.list_indices()) == 1
assert len(dataset_without_index.list_indices()) == 1

assert (
dataset_without_index.list_indices()[0] == dataset_with_index.list_indices()[0]
)

# Check if the index is used in scans
for dataset in [dataset_with_index, dataset_without_index]:
scanner = dataset.scanner(
Expand Down
3 changes: 2 additions & 1 deletion python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,13 +328,14 @@ impl Operation {
name: String,
fields: Vec<i32>,
dataset_version: u64,
fragment_ids: Vec<u32>,
) -> PyResult<Self> {
let new_indices = vec![Index {
uuid: Uuid::parse_str(&uuid).map_err(|e| PyValueError::new_err(e.to_string()))?,
name,
fields,
dataset_version,
fragment_bitmap: None,
fragment_bitmap: Some(fragment_ids.into_iter().collect()),
}];
let op = LanceOperation::CreateIndex {
new_indices,
Expand Down

0 comments on commit 18fb870

Please sign in to comment.