From cf2ae4177a599df9b701499db0f71cbb0b002169 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 8 Jan 2025 17:27:46 +0200 Subject: [PATCH] chore: Renaming fts rebuild for consistency (#60) Closes #59 --- README.md | 4 ++-- chroma_ops/{rebuild_fts.py => fts.py} | 19 ++++++----------- chroma_ops/hnsw.py | 24 ---------------------- chroma_ops/main.py | 6 ++---- tests/{test_rebuild_fts.py => test_fts.py} | 2 +- 5 files changed, 11 insertions(+), 44 deletions(-) rename chroma_ops/{rebuild_fts.py => fts.py} (81%) rename tests/{test_rebuild_fts.py => test_fts.py} (98%) diff --git a/README.md b/README.md index b585539..6357255 100644 --- a/README.md +++ b/README.md @@ -206,13 +206,13 @@ This command rebuilds the full-text search index. #### Python ```bash -chops rebuild-fts /path/to/persist_dir +chops fts rebuild /path/to/persist_dir ``` Change the tokenizer to `unicode61` by passing `--tokenizer unicode61` (or `-t unicode61`) option. ```bash -chops rebuild-fts --tokenizer unicode61 /path/to/persist_dir +chops fts rebuild --tokenizer unicode61 /path/to/persist_dir ``` > See [SQLite FTS5 Tokenizers](https://www.sqlite.org/fts5.html#tokenizers) for more information and available tokenizers and their options. diff --git a/chroma_ops/rebuild_fts.py b/chroma_ops/fts.py similarity index 81% rename from chroma_ops/rebuild_fts.py rename to chroma_ops/fts.py index 21d18e4..cdbaec5 100644 --- a/chroma_ops/rebuild_fts.py +++ b/chroma_ops/fts.py @@ -1,4 +1,3 @@ -import argparse import os import sqlite3 import sys @@ -9,6 +8,8 @@ from chroma_ops.utils import validate_chroma_persist_dir, read_script +fts_commands = typer.Typer() + def validate_tokenizer(tokenizer: str) -> None: valid_tokenizers = ["trigram", "unicode61", "ascii", "porter"] if ( @@ -51,7 +52,7 @@ def rebuild_fts(persist_dir: str, tokenizer: str = "trigram") -> None: ) -def command( +def rebuild_command( persist_dir: str = typer.Argument(..., help="The persist directory"), tokenizer: str = typer.Option( "trigram", @@ -63,14 +64,6 @@ def command( rebuild_fts(persist_dir, tokenizer) -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("persist_dir", type=str) - parser.add_argument( - "--tokenizer", - type=str, - default="trigram", - help="The tokenizer to use for the FTS index. Supported values: 'trigram', 'unicode61', 'ascii', 'porter'. See https://www.sqlite.org/fts5.html#tokenizers", - ) - arg = parser.parse_args() - rebuild_fts(arg.persist_dir, arg.tokenizer) +fts_commands.command( + name="rebuild", help="Rebuilds Full Text Search index.", no_args_is_help=True +)(rebuild_command) diff --git a/chroma_ops/hnsw.py b/chroma_ops/hnsw.py index a4c67f5..d06db92 100644 --- a/chroma_ops/hnsw.py +++ b/chroma_ops/hnsw.py @@ -1,19 +1,15 @@ -import argparse import datetime import json import os -import pickle import shutil import sqlite3 import tempfile import traceback from typing import Any, Dict, Optional, TypedDict -import numpy as np import hnswlib import typer from rich.console import Console from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn -from rich.rule import Rule from rich.table import Table from chroma_ops.utils import ( validate_chroma_persist_dir, @@ -312,23 +308,3 @@ def info_hnsw_command( no_args_is_help=True, )(info_hnsw_command) -if __name__ == "__main__": - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers(dest='hnsw', help='Available commands') - - # Setup command groups - rebuild = subparsers.add_parser('rebuild', help='Rebuild the HNSW index') - rebuild.add_argument("persist_dir", type=str) - rebuild.add_argument("collection_name", type=str) - rebuild.add_argument("-d", "--database", type=str, default="default_database") - rebuild.set_defaults(func=rebuild_hnsw_command) - info = subparsers.add_parser('info', help='Info about the HNSW index') - info.add_argument("persist_dir", type=str) - info.add_argument("collection_name", type=str) - info.add_argument("-d", "--database", type=str, default="default_database") - info.add_argument("-v", "--verbose", type=bool, default=False) - info.set_defaults(func=info_hnsw_command) - args = parser.parse_args() - if hasattr(args, 'func'): - args.func(args) - diff --git a/chroma_ops/main.py b/chroma_ops/main.py index df7a4e9..4f93da9 100644 --- a/chroma_ops/main.py +++ b/chroma_ops/main.py @@ -1,6 +1,6 @@ import typer -from chroma_ops.rebuild_fts import rebuild_fts +from chroma_ops.fts import fts_commands from chroma_ops.wal_commit import command as commit_wal_command from chroma_ops.wal_clean import command as clean_wal_command from chroma_ops.wal_export import command as export_wal_command @@ -22,9 +22,6 @@ app.command( name="export-wal", help="Exports the WAL to a jsonl file.", no_args_is_help=True )(export_wal_command) -app.command( - name="rebuild-fts", help="Rebuilds Full Text Search index.", no_args_is_help=True -)(rebuild_fts) app.command( name="info", @@ -40,6 +37,7 @@ )(clean_command) app.add_typer(hnsw_commands, name="hnsw", help="HNSW index maintenance commands") +app.add_typer(fts_commands, name="fts", help="Full Text Search index maintenance commands") if __name__ == "__main__": app() diff --git a/tests/test_rebuild_fts.py b/tests/test_fts.py similarity index 98% rename from tests/test_rebuild_fts.py rename to tests/test_fts.py index dd023df..784323f 100644 --- a/tests/test_rebuild_fts.py +++ b/tests/test_fts.py @@ -7,7 +7,7 @@ import chromadb import pytest -from chroma_ops.rebuild_fts import rebuild_fts +from chroma_ops.fts import rebuild_fts def test_rebuild_fts() -> None: