diff --git a/README.md b/README.md index 47dbd5ac3f..24764f2685 100644 --- a/README.md +++ b/README.md @@ -223,6 +223,7 @@ $ xinference registrations | LLM | vicuna-v1.5-16k | ['en'] | ['embed', 'chat'] | | LLM | wizardlm-v1.0 | ['en'] | ['embed', 'chat'] | | LLM | wizardmath-v1.0 | ['en'] | ['embed', 'chat'] | +| LLM | OpenBuddy-v11.1 | ['en', 'zh'] | ['embed', 'chat'] | For in-depth details on the built-in models, please refer to [built-in models](https://inference.readthedocs.io/en/latest/models/builtin/index.html). diff --git a/README_ja_JP.md b/README_ja_JP.md index f8a8ae46c8..ae9b81c36b 100644 --- a/README_ja_JP.md +++ b/README_ja_JP.md @@ -202,6 +202,7 @@ $ xinference registrations | LLM | vicuna-v1.5-16k | ['en'] | ['embed', 'chat'] | | LLM | wizardlm-v1.0 | ['en'] | ['embed', 'chat'] | | LLM | wizardmath-v1.0 | ['en'] | ['embed', 'chat'] | +| LLM | OpenBuddy-v11.1 | ['en', 'zh'] | ['embed', 'chat'] | **注**: - Xinference は自動的にモデルをダウンロードし、デフォルトでは `${USER}/.xinference/cache` の下に保存されます。 diff --git a/README_zh_CN.md b/README_zh_CN.md index 20f09c8007..698cf56c69 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -205,6 +205,7 @@ $ xinference registrations | LLM | vicuna-v1.5-16k | ['en'] | ['embed', 'chat'] | | LLM | wizardlm-v1.0 | ['en'] | ['embed', 'chat'] | | LLM | wizardmath-v1.0 | ['en'] | ['embed', 'chat'] | +| LLM | OpenBuddy-v11.1 | ['en', 'zh'] | ['embed', 'chat'] | 更多信息请参考 [内置模型](https://inference.readthedocs.io/en/latest/models/builtin/index.html)。 diff --git a/doc/source/models/builtin/index.rst b/doc/source/models/builtin/index.rst index b60ab6d411..f84e112e1a 100644 --- a/doc/source/models/builtin/index.rst +++ b/doc/source/models/builtin/index.rst @@ -38,6 +38,7 @@ Chat & Instruction-following Models - :ref:`Vicuna v1.5 16k ` - :ref:`WizardLM v1.0 ` - :ref:`WizardMath v1.0 ` +- :ref:`OpenBuddy v11.1 ` Code Generation Models diff --git a/doc/source/models/builtin/openbuddy.rst b/doc/source/models/builtin/openbuddy.rst new file mode 100644 index 0000000000..7e9d2c3c8d --- /dev/null +++ b/doc/source/models/builtin/openbuddy.rst @@ -0,0 +1,29 @@ +.. _models_builtin_openbuddy_v11.1: + +========= +OpenBuddy +========= + +- **Model Name:** OpenBuddy +- **Languages:** en, zh +- **Abilities:** embed, chat + +Specifications +^^^^^^^^^^^^^^ + +Model Spec 1 (ggmlv3, 13 Billion) +++++++++++++++++++++++++++++++++ + +- **Model Format:** ggmlv3 +- **Model Size (in billions):** 13 +- **Quantizations:** Q2_K, Q3_K_S, Q3_K_M, Q3_K_L, Q4_0, Q4_1, Q4_K_S, Q4_K_M, Q5_0, Q5_1, Q5_K_S, Q5_K_M, Q6_K, Q8_0 +- **Model ID:** TheBloke/OpenBuddy-Llama2-13B-v11.1-GGML + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name OpenBuddy --size-in-billions 13 --model-format ggmlv3 --quantization ${quantization} + +.. note:: + + Multiple rounds chat is disabled for better translation. diff --git a/examples/AI_translate.py b/examples/AI_translate.py new file mode 100644 index 0000000000..546325dd0c --- /dev/null +++ b/examples/AI_translate.py @@ -0,0 +1,75 @@ +# Copyright 2022-2023 XProbe Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import logging +import os.path + +from xinference.client import Client + +logger = logging.getLogger(__name__) + + +def _prompt(text): + return f"Translate the english text to chinese: {text}" + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser() + parser.add_argument( + "-e", + "--endpoint", + type=str, + help="Xinference endpoint, required", + required=True, + ) + parser.add_argument("-i", "--input", type=str, help="Input text", required=True) + + args = parser.parse_args() + endpoint = args.endpoint + logger.info("Connect to xinference server: %s", endpoint) + client = Client(endpoint) + + logger.info("Launch model.") + model_uid = client.launch_model( + model_name="OpenBuddy", + model_format="ggmlv3", + model_size_in_billions=13, + quantization="Q4_1", + n_ctx=2048, + ) + translator_model = client.get_model(model_uid) + + logger.info("Read %s", args.input) + with open(args.input, "r") as f: + eng = f.read() + + paragraphs = eng.split("\n\n") + logger.info("%s contains %s lines.", args.input, len(paragraphs)) + input, ext = os.path.splitext(args.input) + output = f"{input}_translated{ext}" + logger.info("Translated output: %s", output) + with open(output, "w") as f: + for idx, text_string in enumerate(paragraphs, 1): + logger.info( + "[%s/%s] Translate: %.10s...", idx, len(paragraphs), text_string + ) + completion = translator_model.chat( + _prompt(text_string), generate_config={"temperature": 0.23} + ) + content = completion["choices"][0]["message"]["content"] + stripped_content = content.split("\n")[0] + logger.info("%s", stripped_content) + f.write(stripped_content + "\n") diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index f387762056..9f560a08ec 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -1728,5 +1728,48 @@ "model_file_name_template": "ggml-model-{quantization}.gguf" } ] + }, + { + "version": 1, + "context_length": 2048, + "model_name": "OpenBuddy", + "model_lang": [ + "en" + ], + "model_ability": [ + "chat" + ], + "model_description": "OpenBuddy is a powerful open multilingual chatbot model aimed at global users, emphasizing conversational AI and seamless multilingual support for English, Chinese, and other languages. The chat history is disabled for better translation.", + "model_specs": [ + { + "model_format": "ggmlv3", + "model_size_in_billions": 13, + "quantizations": [ + "Q2_K", + "Q3_K_S", + "Q3_K_M", + "Q3_K_L", + "Q4_0", + "Q4_1", + "Q4_K_S", + "Q4_K_M", + "Q5_0", + "Q5_1", + "Q5_K_S", + "Q5_K_M", + "Q6_K", + "Q8_0" + ], + "model_id": "TheBloke/OpenBuddy-Llama2-13B-v11.1-GGML", + "model_file_name_template": "openbuddy-llama2-13b-v11.1.ggmlv3.{quantization}.bin" + } + ], + "prompt_style": { + "style_name": "INSTRUCTION", + "system_prompt": "You are a professional translator. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. Do not translate person's name. Do not add any additional text to the translation. Do not give me any comments or suggestions.\nUser:\n\n{0}\nAssistant:", + "roles": ["User", "Assistant"], + "intra_message_sep": "", + "inter_message_sep": "" + } } ] diff --git a/xinference/model/llm/utils.py b/xinference/model/llm/utils.py index a8a31490ae..e3c467aaac 100644 --- a/xinference/model/llm/utils.py +++ b/xinference/model/llm/utils.py @@ -174,6 +174,9 @@ def get_prompt( else: ret += role + ": Let's think step by step." return ret + elif prompt_style.style_name == "INSTRUCTION": + message = chat_history[-2] + return prompt_style.system_prompt.format(message["content"]) else: raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")