From fa22c5be638f54491ce834d8d1a984de4fa5c54c Mon Sep 17 00:00:00 2001 From: Wesley Stewart Date: Thu, 4 Apr 2024 16:59:42 +0000 Subject: [PATCH 1/3] Updated prompt_style to be moved to the main LLM setting since all LLMs from llama_index can utilize this. I also included temperature, context window size, max_tokens, max_new_tokens into the openailike to help ensure the settings are consistent from the other implementations. --- private_gpt/components/llm/llm_component.py | 11 ++++++++--- private_gpt/settings/settings.py | 11 +++++++++++ settings.yaml | 1 + 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index dae997cc2..a2bf8169f 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -40,7 +40,7 @@ def __init__(self, settings: Settings) -> None: "Local dependencies not found, install with `poetry install --extras llms-llama-cpp`" ) from e - prompt_style = get_prompt_style(settings.llamacpp.prompt_style) + prompt_style = get_prompt_style(settings.llm.prompt_style) settings_kwargs = { "tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp "top_k": settings.llamacpp.top_k, # ollama and llama-cpp @@ -98,15 +98,20 @@ def __init__(self, settings: Settings) -> None: raise ImportError( "OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`" ) from e - + prompt_style = get_prompt_style(settings.llm.prompt_style) openai_settings = settings.openai self.llm = OpenAILike( api_base=openai_settings.api_base, api_key=openai_settings.api_key, model=openai_settings.model, is_chat_model=True, - max_tokens=None, + max_tokens=settings.llm.max_new_tokens, api_version="", + temperature=settings.llm.temperature, + context_window=settings.llm.context_window, + max_new_tokens=settings.llm.max_new_tokens, + messages_to_prompt=prompt_style.messages_to_prompt, + completion_to_prompt=prompt_style.completion_to_prompt, ) case "ollama": try: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 5df681141..cecef1b12 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -104,6 +104,17 @@ class LLMSettings(BaseModel): 0.1, description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.", ) + prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field( + "llama2", + description=( + "The prompt style to use for the chat engine. " + "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n" + "If `llama2` - use the llama2 prompt style from the llama_index. Based on ``, `[INST]` and `<>`.\n" + "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n" + "If `mistral` - use the `mistral prompt style. It shoudl look like [INST] {System Prompt} [/INST][INST] { UserInstructions } [/INST]" + "`llama2` is the historic behaviour. `default` might work better with your custom models." + ), + ) class VectorstoreSettings(BaseModel): diff --git a/settings.yaml b/settings.yaml index dfd719bc4..807d79df0 100644 --- a/settings.yaml +++ b/settings.yaml @@ -36,6 +36,7 @@ ui: llm: mode: llamacpp + prompt_style: "mistral" # Should be matching the selected model max_new_tokens: 512 context_window: 3900 From d4386c3f79c4c905ebc334cdf748343f821b32ea Mon Sep 17 00:00:00 2001 From: Wesley Stewart Date: Sun, 28 Apr 2024 16:53:23 +0000 Subject: [PATCH 2/3] Removed prompt_style from llamacpp entirely --- private_gpt/settings/settings.py | 12 ------------ settings.yaml | 1 - 2 files changed, 13 deletions(-) diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index b47c5344e..c4c5e20da 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -128,18 +128,6 @@ class NodeStoreSettings(BaseModel): class LlamaCPPSettings(BaseModel): llm_hf_repo_id: str llm_hf_model_file: str - prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field( - "llama2", - description=( - "The prompt style to use for the chat engine. " - "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n" - "If `llama2` - use the llama2 prompt style from the llama_index. Based on ``, `[INST]` and `<>`.\n" - "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n" - "If `mistral` - use the `mistral prompt style. It shoudl look like [INST] {System Prompt} [/INST][INST] { UserInstructions } [/INST]" - "`llama2` is the historic behaviour. `default` might work better with your custom models." - ), - ) - tfs_z: float = Field( 1.0, description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.", diff --git a/settings.yaml b/settings.yaml index 7b4532f76..d8d2500cd 100644 --- a/settings.yaml +++ b/settings.yaml @@ -54,7 +54,6 @@ rag: top_n: 1 llamacpp: - prompt_style: "mistral" llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting From e06c39e4aae382c1df0bb3fb908e41c624628b92 Mon Sep 17 00:00:00 2001 From: Wesley Stewart Date: Sun, 28 Apr 2024 17:02:50 +0000 Subject: [PATCH 3/3] Fixed settings-local.yaml to include prompt_style in the LLM settings instead of llamacpp. --- settings-local.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/settings-local.yaml b/settings-local.yaml index c9d027420..48eeb0ea0 100644 --- a/settings-local.yaml +++ b/settings-local.yaml @@ -8,9 +8,9 @@ llm: max_new_tokens: 512 context_window: 3900 tokenizer: mistralai/Mistral-7B-Instruct-v0.2 + prompt_style: "mistral" llamacpp: - prompt_style: "mistral" llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf @@ -24,4 +24,4 @@ vectorstore: database: qdrant qdrant: - path: local_data/private_gpt/qdrant \ No newline at end of file + path: local_data/private_gpt/qdrant