Skip to content

Commit

Permalink
chore: Renaming fts rebuild for consistency (#60)
Browse files Browse the repository at this point in the history
Closes #59
  • Loading branch information
tazarov authored Jan 8, 2025
1 parent 1d296b2 commit cf2ae41
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 44 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,13 +206,13 @@ This command rebuilds the full-text search index.
#### Python

```bash
chops rebuild-fts /path/to/persist_dir
chops fts rebuild /path/to/persist_dir
```

Change the tokenizer to `unicode61` by passing `--tokenizer unicode61` (or `-t unicode61`) option.

```bash
chops rebuild-fts --tokenizer unicode61 /path/to/persist_dir
chops fts rebuild --tokenizer unicode61 /path/to/persist_dir
```

> See [SQLite FTS5 Tokenizers](https://www.sqlite.org/fts5.html#tokenizers) for more information and available tokenizers and their options.
Expand Down
19 changes: 6 additions & 13 deletions chroma_ops/rebuild_fts.py → chroma_ops/fts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import argparse
import os
import sqlite3
import sys
Expand All @@ -9,6 +8,8 @@
from chroma_ops.utils import validate_chroma_persist_dir, read_script


fts_commands = typer.Typer()

def validate_tokenizer(tokenizer: str) -> None:
valid_tokenizers = ["trigram", "unicode61", "ascii", "porter"]
if (
Expand Down Expand Up @@ -51,7 +52,7 @@ def rebuild_fts(persist_dir: str, tokenizer: str = "trigram") -> None:
)


def command(
def rebuild_command(
persist_dir: str = typer.Argument(..., help="The persist directory"),
tokenizer: str = typer.Option(
"trigram",
Expand All @@ -63,14 +64,6 @@ def command(
rebuild_fts(persist_dir, tokenizer)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("persist_dir", type=str)
parser.add_argument(
"--tokenizer",
type=str,
default="trigram",
help="The tokenizer to use for the FTS index. Supported values: 'trigram', 'unicode61', 'ascii', 'porter'. See https://www.sqlite.org/fts5.html#tokenizers",
)
arg = parser.parse_args()
rebuild_fts(arg.persist_dir, arg.tokenizer)
fts_commands.command(
name="rebuild", help="Rebuilds Full Text Search index.", no_args_is_help=True
)(rebuild_command)
24 changes: 0 additions & 24 deletions chroma_ops/hnsw.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
import argparse
import datetime
import json
import os
import pickle
import shutil
import sqlite3
import tempfile
import traceback
from typing import Any, Dict, Optional, TypedDict
import numpy as np
import hnswlib
import typer
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
from rich.rule import Rule
from rich.table import Table
from chroma_ops.utils import (
validate_chroma_persist_dir,
Expand Down Expand Up @@ -312,23 +308,3 @@ def info_hnsw_command(
no_args_is_help=True,
)(info_hnsw_command)

if __name__ == "__main__":
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest='hnsw', help='Available commands')

# Setup command groups
rebuild = subparsers.add_parser('rebuild', help='Rebuild the HNSW index')
rebuild.add_argument("persist_dir", type=str)
rebuild.add_argument("collection_name", type=str)
rebuild.add_argument("-d", "--database", type=str, default="default_database")
rebuild.set_defaults(func=rebuild_hnsw_command)
info = subparsers.add_parser('info', help='Info about the HNSW index')
info.add_argument("persist_dir", type=str)
info.add_argument("collection_name", type=str)
info.add_argument("-d", "--database", type=str, default="default_database")
info.add_argument("-v", "--verbose", type=bool, default=False)
info.set_defaults(func=info_hnsw_command)
args = parser.parse_args()
if hasattr(args, 'func'):
args.func(args)

6 changes: 2 additions & 4 deletions chroma_ops/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import typer

from chroma_ops.rebuild_fts import rebuild_fts
from chroma_ops.fts import fts_commands
from chroma_ops.wal_commit import command as commit_wal_command
from chroma_ops.wal_clean import command as clean_wal_command
from chroma_ops.wal_export import command as export_wal_command
Expand All @@ -22,9 +22,6 @@
app.command(
name="export-wal", help="Exports the WAL to a jsonl file.", no_args_is_help=True
)(export_wal_command)
app.command(
name="rebuild-fts", help="Rebuilds Full Text Search index.", no_args_is_help=True
)(rebuild_fts)

app.command(
name="info",
Expand All @@ -40,6 +37,7 @@
)(clean_command)

app.add_typer(hnsw_commands, name="hnsw", help="HNSW index maintenance commands")
app.add_typer(fts_commands, name="fts", help="Full Text Search index maintenance commands")

if __name__ == "__main__":
app()
2 changes: 1 addition & 1 deletion tests/test_rebuild_fts.py → tests/test_fts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import chromadb
import pytest

from chroma_ops.rebuild_fts import rebuild_fts
from chroma_ops.fts import rebuild_fts


def test_rebuild_fts() -> None:
Expand Down

0 comments on commit cf2ae41

Please sign in to comment.