From 3d9b136a6724bc2eee486eeec40d9f96a7ecf675 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 22 Oct 2023 18:16:02 -0700 Subject: [PATCH] Upgrade to latest GPT4All. Use Mistral as default offline chat model GPT4all now supports gguf llama.cpp chat models. Latest GPT4All (+mistral) performs much at least 3x faster. On Macbook Pro at ~10s response start time vs 30s-120s earlier. Mistral is also a better chat model, although it hallucinates more than llama-2 --- pyproject.toml | 4 +- .../migrate_offline_chat_default_model.py | 69 +++++++++++++++++++ .../conversation/gpt4all/chat_model.py | 4 +- .../processor/conversation/gpt4all/utils.py | 4 +- src/khoj/processor/conversation/utils.py | 2 + src/khoj/utils/cli.py | 2 + src/khoj/utils/rawconfig.py | 2 +- tests/conftest.py | 2 +- tests/test_gpt4all_chat_actors.py | 2 +- 9 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 src/khoj/migrations/migrate_offline_chat_default_model.py diff --git a/pyproject.toml b/pyproject.toml index f0cc22344..bac662a4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,8 +60,8 @@ dependencies = [ "bs4 >= 0.0.1", "anyio == 3.7.1", "pymupdf >= 1.23.3", - "gpt4all == 1.0.12; platform_system == 'Linux' and platform_machine == 'x86_64'", - "gpt4all == 1.0.12; platform_system == 'Windows' or platform_system == 'Darwin'", + "gpt4all >= 2.0.0; platform_system == 'Linux' and platform_machine == 'x86_64'", + "gpt4all >= 2.0.0; platform_system == 'Windows' or platform_system == 'Darwin'", ] dynamic = ["version"] diff --git a/src/khoj/migrations/migrate_offline_chat_default_model.py b/src/khoj/migrations/migrate_offline_chat_default_model.py new file mode 100644 index 000000000..b7f4cf94c --- /dev/null +++ b/src/khoj/migrations/migrate_offline_chat_default_model.py @@ -0,0 +1,69 @@ +""" +Current format of khoj.yml +--- +app: + ... +content-type: + ... +processor: + conversation: + offline-chat: + enable-offline-chat: false + chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin + ... +search-type: + ... + +New format of khoj.yml +--- +app: + ... +content-type: + ... +processor: + conversation: + offline-chat: + enable-offline-chat: false + chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf + ... +search-type: + ... +""" +import logging +from packaging import version + +from khoj.utils.yaml import load_config_from_file, save_config_to_file + + +logger = logging.getLogger(__name__) + + +def migrate_offline_chat_default_model(args): + schema_version = "0.12.4" + raw_config = load_config_from_file(args.config_file) + previous_version = raw_config.get("version") + + if "processor" not in raw_config: + return args + if raw_config["processor"] is None: + return args + if "conversation" not in raw_config["processor"]: + return args + if "offline-chat" not in raw_config["processor"]["conversation"]: + return args + if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]: + return args + + if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"): + logger.info( + f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF" + ) + raw_config["version"] = schema_version + + # Update offline chat model to mistral in GGUF format to use latest GPT4All + offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] + if offline_chat_model.endswith(".bin"): + raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf" + + save_config_to_file(raw_config, args.config_file) + return args diff --git a/src/khoj/processor/conversation/gpt4all/chat_model.py b/src/khoj/processor/conversation/gpt4all/chat_model.py index 7e92d002f..04a004f05 100644 --- a/src/khoj/processor/conversation/gpt4all/chat_model.py +++ b/src/khoj/processor/conversation/gpt4all/chat_model.py @@ -16,7 +16,7 @@ def extract_questions_offline( text: str, - model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin", + model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf", loaded_model: Union[Any, None] = None, conversation_log={}, use_history: bool = True, @@ -123,7 +123,7 @@ def converse_offline( references, user_query, conversation_log={}, - model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin", + model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf", loaded_model: Union[Any, None] = None, completion_func=None, conversation_command=ConversationCommand.Default, diff --git a/src/khoj/processor/conversation/gpt4all/utils.py b/src/khoj/processor/conversation/gpt4all/utils.py index d52017805..2bb1fbbc1 100644 --- a/src/khoj/processor/conversation/gpt4all/utils.py +++ b/src/khoj/processor/conversation/gpt4all/utils.py @@ -14,9 +14,9 @@ def download_model(model_name: str): # Use GPU for Chat Model, if available try: model = GPT4All(model_name=model_name, device="gpu") - logger.debug("Loaded chat model to GPU.") + logger.debug(f"Loaded {model_name} chat model to GPU.") except ValueError: model = GPT4All(model_name=model_name) - logger.debug("Loaded chat model to CPU.") + logger.debug(f"Loaded {model_name} chat model to CPU.") return model diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 83d51f2d4..b0d401fa8 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -20,9 +20,11 @@ "gpt-4": 8192, "llama-2-7b-chat.ggmlv3.q4_0.bin": 1548, "gpt-3.5-turbo-16k": 15000, + "mistral-7b-instruct-v0.1.Q4_0.gguf": 1548, } model_to_tokenizer = { "llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer", + "mistral-7b-instruct-v0.1.Q4_0.gguf": "mistralai/Mistral-7B-Instruct-v0.1", } diff --git a/src/khoj/utils/cli.py b/src/khoj/utils/cli.py index 1d6106cb0..7c72b101e 100644 --- a/src/khoj/utils/cli.py +++ b/src/khoj/utils/cli.py @@ -10,6 +10,7 @@ from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema from khoj.migrations.migrate_offline_model import migrate_offline_model from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema +from khoj.migrations.migrate_offline_chat_default_model import migrate_offline_chat_default_model def cli(args=None): @@ -61,6 +62,7 @@ def run_migrations(args): migrate_processor_conversation_schema, migrate_offline_model, migrate_offline_chat_schema, + migrate_offline_chat_default_model, ] for migration in migrations: args = migration(args) diff --git a/src/khoj/utils/rawconfig.py b/src/khoj/utils/rawconfig.py index f7c42266d..cc4fe2088 100644 --- a/src/khoj/utils/rawconfig.py +++ b/src/khoj/utils/rawconfig.py @@ -93,7 +93,7 @@ class OpenAIProcessorConfig(ConfigBase): class OfflineChatProcessorConfig(ConfigBase): enable_offline_chat: Optional[bool] = False - chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin" + chat_model: Optional[str] = "mistral-7b-instruct-v0.1.Q4_0.gguf" class ConversationProcessorConfig(ConfigBase): diff --git a/tests/conftest.py b/tests/conftest.py index f75dfcebb..8b661f504 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -206,7 +206,7 @@ def processor_config_offline_chat(tmp_path_factory): # Setup conversation processor processor_config = ProcessorConfig() - offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True) + offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True, chat_model="mistral-7b-instruct-v0.1.Q4_0.gguf") processor_config.conversation = ConversationProcessorConfig( offline_chat=offline_chat, conversation_logfile=processor_dir.joinpath("conversation_logs.json"), diff --git a/tests/test_gpt4all_chat_actors.py b/tests/test_gpt4all_chat_actors.py index 76ed26e7a..782b54f20 100644 --- a/tests/test_gpt4all_chat_actors.py +++ b/tests/test_gpt4all_chat_actors.py @@ -24,7 +24,7 @@ from khoj.processor.conversation.utils import message_to_log -MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_0.bin" +MODEL_NAME = "mistral-7b-instruct-v0.1.Q4_0.gguf" @pytest.fixture(scope="session")