readme

xorbitsai · Sep 22, 2023 · e7e14a4 · e7e14a4
1 parent c1b46d7
commit e7e14a4
Show file tree

Hide file tree

Showing 4 changed files with 64 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -209,11 +209,11 @@ $ xinference registrations
 | LLM  | falcon              | ['en']       | ['embed', 'generate']  |
 | LLM  | falcon-instruct     | ['en']       | ['embed', 'chat']      |
 | LLM  | gpt-2               | ['en']       | ['generate']           |
-| LLM  | internlm            | ['en', 'zh'] | ['embed', 'generate']  |
+| LLM  | internlm-7b         | ['en', 'zh'] | ['embed', 'generate']  |
 | LLM  | internlm-16k        | ['en', 'zh'] | ['embed', 'generate']  |
-| LLM  | internlm-chat       | ['en', 'zh'] | ['embed', 'chat']      |
+| LLM  | internlm-chat-7b    | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | internlm-chat-8k    | ['en', 'zh'] | ['embed', 'chat']      |
-| LLM  | internlm-chat-16k   | ['en', 'zh'] | ['embed', 'chat']      |
+| LLM  | internlm-chat-20b   | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | llama-2             | ['en']       | ['embed', 'generate']  |
 | LLM  | llama-2-chat        | ['en']       | ['embed', 'chat']      |
 | LLM  | opt                 | ['en']       | ['embed', 'generate']  |
@@ -233,7 +233,14 @@ For in-depth details on the built-in models, please refer to [built-in models](h
 
 **NOTE**:
 - Xinference will download models automatically for you, and by default the models will be saved under `${USER}/.xinference/cache`. 
-- If you have trouble downloading models from the Hugging Face, run `export XINFERENCE_MODEL_SRC=xorbits` to download models from our mirror site.
+- If you have trouble downloading models from the Hugging Face, run `export XINFERENCE_MODEL_SRC=modelscope` to download models from [modelscope](https://modelscope.cn/). Models supported by modelscope:
+  - llama-2
+  - llama-2-chat
+  - baichuan-2
+  - baichuan-2-chat
+  - chatglm2
+  - chatglm2-32k
+  - internlm-chat-20b
 
 ## Custom models
 Please refer to [custom models](https://inference.readthedocs.io/en/latest/models/custom.html).
diff --git a/README_zh_CN.md b/README_zh_CN.md
@@ -191,11 +191,11 @@ $ xinference registrations
 | LLM  | falcon              | ['en']       | ['embed', 'generate']  |
 | LLM  | falcon-instruct     | ['en']       | ['embed', 'chat']      |
 | LLM  | gpt-2               | ['en']       | ['generate']           |
-| LLM  | internlm            | ['en', 'zh'] | ['embed', 'generate']  |
+| LLM  | internlm-7b         | ['en', 'zh'] | ['embed', 'generate']  |
 | LLM  | internlm-16k        | ['en', 'zh'] | ['embed', 'generate']  |
-| LLM  | internlm-chat       | ['en', 'zh'] | ['embed', 'chat']      |
+| LLM  | internlm-chat-7b    | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | internlm-chat-8k    | ['en', 'zh'] | ['embed', 'chat']      |
-| LLM  | internlm-chat-16k   | ['en', 'zh'] | ['embed', 'chat']      |
+| LLM  | internlm-chat-20b   | ['en', 'zh'] | ['embed', 'chat']      |
 | LLM  | llama-2             | ['en']       | ['embed', 'generate']  |
 | LLM  | llama-2-chat        | ['en']       | ['embed', 'chat']      |
 | LLM  | opt                 | ['en']       | ['embed', 'generate']  |
@@ -215,7 +215,14 @@ $ xinference registrations
 
 **注意**:
 - Xinference 会自动为你下载模型，默认的模型存放路径为 `${USER}/.xinference/cache`。
-- 如果您在Hugging Face下载模型时遇到问题，请运行 `export XINFERENCE_MODEL_SRC=xorbits`，从我们的镜像站点下载模型。
-- 
+- 如果您在Hugging Face下载模型时遇到问题，请运行 `export XINFERENCE_MODEL_SRC=modelscope`，默认优先从 modelscope 下载。目前 modelscope 支持的模型有：
+  - llama-2
+  - llama-2-chat
+  - baichuan-2
+  - baichuan-2-chat
+  - chatglm2
+  - chatglm2-32k
+  - internlm-chat-20b
+
 ## 自定义模型
 请参考 [自定义模型](https://inference.readthedocs.io/en/latest/models/custom.html)。
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -1042,7 +1042,7 @@
   {
     "version": 1,
     "context_length": 4096,
-    "model_name": "internlm-7b-chat",
+    "model_name": "internlm-chat-7b",
     "model_lang": [
       "en",
       "zh"

diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
@@ -33,6 +33,42 @@
         "model_file_name_template": "llama-2-7b-chat.{quantization}.gguf",
         "model_hub": "modelscope",
         "model_revision": "v0.0.1"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "modelscope/Llama-2-7b-chat-ms",
+        "model_hub": "modelscope",
+        "model_revision": "v1.0.5"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "modelscope/Llama-2-13b-chat-ms",
+        "model_hub": "modelscope",
+        "model_revision": "v1.0.2"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "modelscope/Llama-2-70b-chat-ms",
+        "model_hub": "modelscope",
+        "model_revision": "v1.0.1"
       }
     ],
     "prompt_style": {
@@ -244,7 +280,7 @@
   {
     "version": 1,
     "context_length": 8192,
-    "model_name": "internlm",
+    "model_name": "internlm-7b",
     "model_lang": [
       "en",
       "zh"
@@ -272,7 +308,7 @@
   {
     "version": 1,
     "context_length": 4096,
-    "model_name": "internlm-chat",
+    "model_name": "internlm-chat-7b",
     "model_lang": [
       "en",
       "zh"
@@ -362,7 +398,7 @@
   {
     "version": 1,
     "context_length": 16384,
-    "model_name": "internlm-20B",
+    "model_name": "internlm-20b",
     "model_lang": [
       "en",
       "zh"
@@ -390,7 +426,7 @@
   {
     "version": 1,
     "context_length": 16384,
-    "model_name": "internlm-chat-20B",
+    "model_name": "internlm-chat-20b",
     "model_lang": [
       "en",
       "zh"