xorbitsai · aresnow1 · Jan 11, 2024 · Jan 8, 2024 · Jan 8, 2024 · Jan 9, 2024
diff --git a/doc/source/models/builtin/llm/yi-chat.rst b/doc/source/models/builtin/llm/yi-chat.rst
@@ -14,7 +14,22 @@ Specifications
 ^^^^^^^^^^^^^^
 
 
-Model Spec 1 (pytorch, 34 Billion)
+Model Spec 1 (gptq, 34 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** gptq
+- **Model Size (in billions):** 34
+- **Quantizations:** 8bits
+- **Model ID:** 01-ai/Yi-34B-Chat-{quantization}
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/01-ai/Yi-34B-Chat-{quantization}>`_, `ModelScope <https://modelscope.cn/models/01ai/Yi-34B-Chat-{quantization}>`_
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name Yi-chat --size-in-billions 34 --model-format gptq --quantization ${quantization}
+
+
+Model Spec 2 (pytorch, 34 Billion)
 ++++++++++++++++++++++++++++++++++++++++
 
 - **Model Format:** pytorch
@@ -29,7 +44,7 @@ chosen quantization method from the options listed above::
    xinference launch --model-name Yi-chat --size-in-billions 34 --model-format pytorch --quantization ${quantization}
 
 
-Model Spec 2 (ggufv2, 34 Billion)
+Model Spec 3 (ggufv2, 34 Billion)
 ++++++++++++++++++++++++++++++++++++++++
 
 - **Model Format:** ggufv2

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -2477,6 +2477,14 @@
     ],
     "model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
     "model_specs": [
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "8bits"
+        ],
+        "model_id": "01-ai/Yi-34B-Chat-{quantization}"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 34,

diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
@@ -1108,6 +1108,15 @@
     ],
     "model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
     "model_specs": [
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "8bits"
+        ],
+        "model_id": "01ai/Yi-34B-Chat-{quantization}",
+        "model_revision": "master"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 34,