xorbitsai · ChengjieLi28 · Apr 8, 2024 · Apr 2, 2024 · Apr 2, 2024 · Apr 2, 2024
diff --git a/doc/source/gen_docs.py b/doc/source/gen_docs.py
@@ -14,12 +14,23 @@
 
 import json
 import os
+from collections import defaultdict
+
 from jinja2 import Environment, FileSystemLoader
+from xinference.model.llm.vllm.core import VLLM_INSTALLED, VLLM_SUPPORTED_MODELS, VLLM_SUPPORTED_CHAT_MODELS
 
 MODEL_HUB_HUGGING_FACE = "Hugging Face"
 MODEL_HUB_MODELSCOPE = "ModelScope"
 
 
+def gen_vllm_models():
+    prefix_to_models = defaultdict(list)
+    for model in VLLM_SUPPORTED_MODELS + VLLM_SUPPORTED_CHAT_MODELS:
+        prefix = model.split('-', 1)[0]
+        prefix_to_models[prefix].append(model)
+    return [list(v) for _, v in prefix_to_models.items()]
+
+
 def get_metrics_from_url(metrics_url):
     from prometheus_client.parser import text_string_to_metric_families
     import requests
@@ -193,6 +204,21 @@ def get_unique_id(spec):
             rendered_index = env.get_template('audio_index.rst.jinja').render(models=sorted_models)
             file.write(rendered_index)
 
+    if VLLM_INSTALLED:
+        vllm_models = gen_vllm_models()
+        groups = [', '.join("``%s``" % m for m in group) for group in vllm_models]
+        vllm_model_str = '\n'.join('- %s' % group for group in groups)
+        for fn in ['getting_started/installation.rst', 'user_guide/backends.rst']:
+            with open(fn) as f:
+                content = f.read()
+            start_label = '.. vllm_start'
+            end_label = '.. vllm_end'
+            start = content.find(start_label) + len(start_label)
+            end = content.find(end_label)
+            new_content = content[:start] + '\n\n' + vllm_model_str + '\n' + content[end:]
+            with open(fn, 'w') as f:
+                f.write(new_content)
+
     try:
         output_dir = './user_guide'
         os.makedirs(output_dir, exist_ok=True)

diff --git a/doc/source/getting_started/installation.rst b/doc/source/getting_started/installation.rst
@@ -26,22 +26,31 @@ vLLM Backend
 ~~~~~~~~~~~~
 vLLM is a fast and easy-to-use library for LLM inference and serving. Xinference will choose vLLM as the backend to achieve better throughput when the following conditions are met:
 
-- The model format is PyTorch or GPTQ
-- The quantization method is GPTQ 4 bit or none
+- The model format is ``pytorch``, ``gptq`` or ``awq``.
+- When the model format is ``pytorch``, the quantization is ``none``.
+- When the model format is ``gptq`` or ``awq``, the quantization is ``Int4``.
 - The system is Linux and has at least one CUDA device
-- The model is within the list of models supported by vLLM.
+- The model family (for custom models) / model name (for builtin models) is within the list of models supported by vLLM
 
 Currently, supported models include:
 
+.. vllm_start
+
 - ``llama-2``, ``llama-2-chat``
-- ``baichuan``, ``baichuan-chat``
-- ``internlm``, ``internlm-20b``, ``internlm-chat``, ``internlm-chat-20b``
-- ``vicuna-v1.3``, ``vicuna-v1.5``
+- ``baichuan``, ``baichuan-chat``, ``baichuan-2-chat``
+- ``internlm-16k``, ``internlm-chat-7b``, ``internlm-chat-8k``, ``internlm-chat-20b``
+- ``mistral-v0.1``, ``mistral-instruct-v0.1``, ``mistral-instruct-v0.2``
 - ``Yi``, ``Yi-chat``
-- ``qwen-chat``
 - ``code-llama``, ``code-llama-python``, ``code-llama-instruct``
-- ``mistral-instruct-v0.1``
-- ``chatglm3``
+- ``vicuna-v1.3``, ``vicuna-v1.5``
+- ``qwen-chat``
+- ``mixtral-instruct-v0.1``
+- ``chatglm3``, ``chatglm3-32k``, ``chatglm3-128k``
+- ``deepseek-chat``, ``deepseek-coder-instruct``
+- ``qwen1.5-chat``
+- ``gemma-it``
+- ``orion-chat``, ``orion-chat-rag``
+.. vllm_end
 
 To install Xinference and vLLM::
 

diff --git a/doc/source/locale/zh_CN/LC_MESSAGES/getting_started/installation.po b/doc/source/locale/zh_CN/LC_MESSAGES/getting_started/installation.po
@@ -7,7 +7,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: Xinference \n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-12-29 11:05+0800\n"
+"POT-Creation-Date: 2024-04-02 15:27+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -16,7 +16,7 @@ msgstr ""
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=utf-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"Generated-By: Babel 2.13.0\n"
+"Generated-By: Babel 2.14.0\n"
 
 #: ../../source/getting_started/installation.rst:5
 msgid "Installation"
@@ -80,94 +80,132 @@ msgstr ""
 " 会自动选择 vllm 作为引擎来达到更高的吞吐量："
 
 #: ../../source/getting_started/installation.rst:29
-msgid "The model format is PyTorch or GPTQ"
-msgstr "模型的格式必须是 PyTorch 或者 GPTQ"
+msgid "The model format is ``pytorch``, ``gptq`` or ``awq``."
+msgstr "模型格式为 ``pytorch`` ， ``gptq`` 或者 ``awq`` 。"
 
 #: ../../source/getting_started/installation.rst:30
-msgid "The quantization method is GPTQ 4 bit or none"
-msgstr "量化方式必须是 GPTQ 4 bit 或者 none"
+msgid "When the model format is ``pytorch``, the quantization is ``none``."
+msgstr "当模型格式为 ``pytorch`` 时，量化选项需为 ``none`` 。"
 
 #: ../../source/getting_started/installation.rst:31
-msgid "The system is Linux and has at least one CUDA device"
-msgstr "运行的操作系统必须是 Linux 且至少有一张支持 CUDA 的显卡"
+msgid ""
+"When the model format is ``gptq`` or ``awq``, the quantization is "
+"``Int4``."
+msgstr "当模型格式为 ``gptq`` 或 ``awq`` 时，量化选项需为 ``Int4`` 。"
 
 #: ../../source/getting_started/installation.rst:32
-msgid "The model is within the list of models supported by vLLM."
-msgstr "运行的模型必须在 vLLM 引擎的支持列表里"
+msgid "The system is Linux and has at least one CUDA device"
+msgstr "操作系统为 Linux 并且至少有一个支持 CUDA 的设备"
 
-#: ../../source/getting_started/installation.rst:34
+#: ../../source/getting_started/installation.rst:33
+msgid ""
+"The model family (for custom models) / model name (for builtin models) is"
+" within the list of models supported by vLLM"
+msgstr "自定义模型的 ``model_family`` 字段和内置模型的 ``model_name`` 字段在 vLLM"
+" 的支持列表中。"
+
+#: ../../source/getting_started/installation.rst:35
 msgid "Currently, supported models include:"
 msgstr "目前，支持的模型包括："
 
-#: ../../source/getting_started/installation.rst:36
+#: ../../source/getting_started/installation.rst:39
 msgid "``llama-2``, ``llama-2-chat``"
 msgstr "``llama-2``, ``llama-2-chat``"
 
-#: ../../source/getting_started/installation.rst:37
-msgid "``baichuan``, ``baichuan-chat``"
-msgstr "``baichuan``, ``baichuan-chat``"
+#: ../../source/getting_started/installation.rst:40
+msgid "``baichuan``, ``baichuan-chat``, ``baichuan-2-chat``"
+msgstr "``baichuan``, ``baichuan-chat``, ``baichuan-2-chat``"
 
-#: ../../source/getting_started/installation.rst:38
-msgid "``internlm``, ``internlm-20b``, ``internlm-chat``, ``internlm-chat-20b``"
-msgstr "``internlm``, ``internlm-20b``, ``internlm-chat``, ``internlm-chat-20b``"
+#: ../../source/getting_started/installation.rst:41
+msgid ""
+"``internlm-16k``, ``internlm-chat-7b``, ``internlm-chat-8k``, ``internlm-"
+"chat-20b``"
+msgstr "``internlm-16k``, ``internlm-chat-7b``, ``internlm-chat-8k``, ``internlm-"
+"chat-20b``"
 
-#: ../../source/getting_started/installation.rst:39
-msgid "``vicuna-v1.3``, ``vicuna-v1.5``"
-msgstr "``vicuna-v1.3``, ``vicuna-v1.5``"
+#: ../../source/getting_started/installation.rst:42
+msgid "``mistral-v0.1``, ``mistral-instruct-v0.1``, ``mistral-instruct-v0.2``"
+msgstr ""
 
-#: ../../source/getting_started/installation.rst:40
+#: ../../source/getting_started/installation.rst:43
 msgid "``Yi``, ``Yi-chat``"
 msgstr "``Yi``, ``Yi-chat``"
 
-#: ../../source/getting_started/installation.rst:41
-msgid "``qwen-chat``"
-msgstr "``qwen-chat``"
-
-#: ../../source/getting_started/installation.rst:42
+#: ../../source/getting_started/installation.rst:44
 msgid "``code-llama``, ``code-llama-python``, ``code-llama-instruct``"
 msgstr "``code-llama``, ``code-llama-python``, ``code-llama-instruct``"
 
-#: ../../source/getting_started/installation.rst:43
-msgid "``mistral-instruct-v0.1``"
+#: ../../source/getting_started/installation.rst:45
+msgid "``vicuna-v1.3``, ``vicuna-v1.5``"
+msgstr "``vicuna-v1.3``, ``vicuna-v1.5``"
+
+#: ../../source/getting_started/installation.rst:46
+msgid "``qwen-chat``"
+msgstr "``qwen-chat``"
+
+#: ../../source/getting_started/installation.rst:47
+msgid "``mixtral-instruct-v0.1``"
 msgstr "``mistral-instruct-v0.1``"
 
-#: ../../source/getting_started/installation.rst:44
-msgid "``chatglm3``"
-msgstr "``chatglm3``"
+#: ../../source/getting_started/installation.rst:48
+msgid "``chatglm3``, ``chatglm3-32k``, ``chatglm3-128k``"
+msgstr ""
 
-#: ../../source/getting_started/installation.rst:46
+#: ../../source/getting_started/installation.rst:49
+msgid "``deepseek-chat``, ``deepseek-coder-instruct``"
+msgstr ""
+
+#: ../../source/getting_started/installation.rst:50
+msgid "``qwen1.5-chat``"
+msgstr "``qwen1.5-chat``"
+
+#: ../../source/getting_started/installation.rst:51
+msgid "``gemma-it``"
+msgstr ""
+
+#: ../../source/getting_started/installation.rst:52
+msgid "``orion-chat``, ``orion-chat-rag``"
+msgstr ""
+
+#: ../../source/getting_started/installation.rst:55
 msgid "To install Xinference and vLLM::"
 msgstr "安装 xinference 和 vLLM："
 
-#: ../../source/getting_started/installation.rst:53
+#: ../../source/getting_started/installation.rst:62
 msgid "GGML Backend"
 msgstr "GGML 引擎"
 
-#: ../../source/getting_started/installation.rst:54
+#: ../../source/getting_started/installation.rst:63
 msgid ""
 "It's advised to install the GGML dependencies manually based on your "
 "hardware specifications to enable acceleration."
 msgstr ""
 "当使用 GGML 引擎时，建议根据当前使用的硬件手动安装依赖，从而获得最佳的"
 "加速效果。"
 
-#: ../../source/getting_started/installation.rst:56
+#: ../../source/getting_started/installation.rst:65
 msgid "Initial setup::"
 msgstr "初始步骤："
 
-#: ../../source/getting_started/installation.rst:61
+#: ../../source/getting_started/installation.rst:70
 msgid "Hardware-Specific installations:"
 msgstr "不同硬件的安装方式："
 
-#: ../../source/getting_started/installation.rst:63
+#: ../../source/getting_started/installation.rst:72
 msgid "Apple Silicon::"
 msgstr "Apple M系列"
 
-#: ../../source/getting_started/installation.rst:67
+#: ../../source/getting_started/installation.rst:76
 msgid "Nvidia cards::"
 msgstr "英伟达显卡："
 
-#: ../../source/getting_started/installation.rst:71
+#: ../../source/getting_started/installation.rst:80
 msgid "AMD cards::"
 msgstr "AMD 显卡："
 
+#~ msgid "The quantization method is GPTQ 4 bit or none"
+#~ msgstr "量化方式必须是 GPTQ 4 bit 或者 none"
+
+#~ msgid "``chatglm3``"
+#~ msgstr "``chatglm3``"
+