xorbitsai · UranusSeven · Aug 17, 2023 · Aug 11, 2023 · Aug 14, 2023 · Aug 17, 2023
diff --git a/doc/source/models/builtin/index.rst b/doc/source/models/builtin/index.rst
@@ -31,6 +31,8 @@ Chat & Instruction-following Models
 - :ref:`Orca Mini <models_builtin_orca_mini>`
 - :ref:`Qwen Chat <models_builtin_qwen_chat>`
 - :ref:`Vicuna v1.3 <models_builtin_vicuna_v1_3>`
+- :ref:`Vicuna v1.5 <models_builtin_vicuna_v1_5>`
+- :ref:`Vicuna v1.5 16k <models_builtin_vicuna_v1_5_16k>`
 - :ref:`WizardLM v1.0 <models_builtin_wizardlm_v1_0>`
 
 
@@ -68,4 +70,6 @@ Code Assistant Models
    starcoderplus
    qwen-chat
    vicuna-v1.3
+   vicuna-v1.5
+   vicuna-v1.5-16k
    wizardlm-v1.0
diff --git a/doc/source/models/builtin/vicuna-v1.5-16k.rst b/doc/source/models/builtin/vicuna-v1.5-16k.rst
@@ -0,0 +1,46 @@
+.. _models_builtin_vicuna_v1_5_16k:
+
+===============
+Vicuna v1.5-16k
+===============
+
+- **Model Name:** vicuna-v1.5-16k
+- **Languages:** en
+- **Abilities:** embed, chat
+
+Specifications
+^^^^^^^^^^^^^^
+
+Model Spec 1 (pytorch, 7 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 7
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** lmsys/vicuna-7b-v1.5-16k
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name vicuna-v1.5-16k --size-in-billions 7 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 2 (pytorch, 13 Billion)
+++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 13
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** lmsys/vicuna-13b-v1.5-16k
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name vicuna-v1.5-16k --size-in-billions 13 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
diff --git a/doc/source/models/builtin/vicuna-v1.5.rst b/doc/source/models/builtin/vicuna-v1.5.rst
@@ -0,0 +1,46 @@
+.. _models_builtin_vicuna_v1_5:
+
+===========
+Vicuna v1.5
+===========
+
+- **Model Name:** vicuna-v1.5
+- **Languages:** en
+- **Abilities:** embed, chat
+
+Specifications
+^^^^^^^^^^^^^^
+
+Model Spec 1 (pytorch, 7 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 7
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** lmsys/vicuna-7b-v1.5
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name vicuna-v1.5 --size-in-billions 7 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 2 (pytorch, 13 Billion)
+++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 13
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** lmsys/vicuna-13b-v1.5
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name vicuna-v1.5 --size-in-billions 13 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -979,5 +979,95 @@
       ],
       "stop": ["<eoa>"]
     }
+  },
+  {
+    "version": 1,
+    "model_name": "vicuna-v1.5",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "embed",
+      "chat"
+    ],
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "lmsys/vicuna-7b-v1.5",
+        "model_revision": "de56c35b1763eaae20f4d60efd64af0a9091ebe5"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "lmsys/vicuna-13b-v1.5",
+        "model_revision": "3deb0106f72a3a433f0c6ea0cb978bdf14bcd3a6"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_TWO",
+      "system_prompt": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
+      "roles": [
+        "USER",
+        "ASSISTANT"
+      ],
+      "intra_message_sep": " ",
+      "inter_message_sep": "</s>"
+    }
+  },
+  {
+    "version": 1,
+    "model_name": "vicuna-v1.5-16k",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "embed",
+      "chat"
+    ],
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "lmsys/vicuna-7b-v1.5-16k",
+        "model_revision": "9a93d7d11fac7f3f9074510b80092b53bc1a5bec"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "lmsys/vicuna-13b-v1.5-16k",
+        "model_revision": "277697af19d4b267626ebc9f4e078d19a9a0fddf"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_TWO",
+      "system_prompt": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
+      "roles": [
+        "USER",
+        "ASSISTANT"
+      ],
+      "intra_message_sep": " ",
+      "inter_message_sep": "</s>"
+    }
   }
 ]