Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2338,6 +2338,7 @@ def create_scalar_index(
Literal["BITMAP"],
Literal["LABEL_LIST"],
Literal["INVERTED"],
Literal["FTS"],
Literal["NGRAM"],
Literal["ZONEMAP"],
Literal["BLOOMFILTER"],
Expand Down Expand Up @@ -2407,8 +2408,9 @@ def create_scalar_index(
called zones and stores summary statistics for each zone (min, max,
null_count, nan_count, fragment_id, local_row_offset). It's very small but
only effective if the column is at least approximately in sorted order.
* ``INVERTED``. It is used to index document columns. This index
can conduct full-text searches. For example, a column that contains any word
* ``INVERTED`` (alias: ``FTS``). It is used to index document columns. This
index can conduct full-text searches. For example, a column that contains any
word
of query string "hello world". The results will be ranked by BM25.
* ``BLOOMFILTER``. This inexact index uses a bloom filter. It is small
but can only handle filters with equals and not equals and may require
Expand All @@ -2428,7 +2430,7 @@ def create_scalar_index(
index_type : str
The type of the index. One of ``"BTREE"``, ``"BITMAP"``,
``"LABEL_LIST"``, ``"NGRAM"``, ``"ZONEMAP"``, ``"INVERTED"``,
``"BLOOMFILTER"``, ``"RTREE"``.
``"FTS"``, ``"BLOOMFILTER"``, ``"RTREE"``.
name : str, optional
The index name. If not provided, it will be generated from the
column name.
Expand Down Expand Up @@ -2548,6 +2550,7 @@ def create_scalar_index(
"ZONEMAP",
"LABEL_LIST",
"INVERTED",
"FTS",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would be easier to just do something like...

if index_type.upper() == "FTS":
  index_type = "INVERTED"

At the top of this method. That way, if we add other checks for INVERTED in the future, we won't have to worry about maintaining the FTS alias?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tried this but that may be confusing if people switch back to rust lib and it doesn't work. intend to make sure both FTS and INVERTED` can work in all places

"BLOOMFILTER",
"RTREE",
]:
Expand Down Expand Up @@ -2587,7 +2590,7 @@ def create_scalar_index(
field_type
):
raise TypeError(f"NGRAM index column {column} must be a string")
elif index_type in ["INVERTED"]:
elif index_type in ["INVERTED", "FTS"]:
value_type = field_type
if pa.types.is_list(field_type) or pa.types.is_large_list(field_type):
value_type = field_type.value_type
Expand Down
5 changes: 5 additions & 0 deletions python/python/tests/test_scalar_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,11 @@ def test_filter_with_fts_index(dataset):
assert query == row.as_py()


def test_create_scalar_index_fts_alias(dataset):
dataset.create_scalar_index("doc", index_type="FTS", with_position=False)
assert any(idx["type"] == "Inverted" for idx in dataset.list_indices())


def test_multi_index_create(tmp_path):
dataset = lance.write_dataset(
pa.table({"ints": range(1024)}), tmp_path, max_rows_per_file=100
Expand Down
6 changes: 3 additions & 3 deletions python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1822,7 +1822,7 @@ impl Dataset {
"BLOOMFILTER" => IndexType::BloomFilter,
"LABEL_LIST" => IndexType::LabelList,
"RTREE" => IndexType::RTree,
"INVERTED" => IndexType::Inverted,
"INVERTED" | "FTS" => IndexType::Inverted,
"IVF_FLAT" | "IVF_PQ" | "IVF_SQ" | "IVF_RQ" | "IVF_HNSW_FLAT" | "IVF_HNSW_PQ"
| "IVF_HNSW_SQ" => IndexType::Vector,
_ => {
Expand Down Expand Up @@ -1879,7 +1879,7 @@ impl Dataset {
params: Some(config.config.clone()),
})
}
"INVERTED" => {
"INVERTED" | "FTS" => {
let mut params = InvertedIndexParams::default();
if let Some(kwargs) = kwargs {
if let Some(with_position) = kwargs.get_item("with_position")? {
Expand Down Expand Up @@ -2031,7 +2031,7 @@ impl Dataset {
index_type_up
);
match index_type_up.as_str() {
"INVERTED" => {
"INVERTED" | "FTS" => {
// Call merge_index_files function for inverted index
lance_index::scalar::inverted::builder::merge_index_files(
self.ds.object_store(),
Expand Down