xorbitsai · pangyoki · Aug 17, 2023 · Aug 14, 2023 · Aug 16, 2023 · Aug 17, 2023
diff --git a/doc/source/models/builtin/index.rst b/doc/source/models/builtin/index.rst
@@ -34,6 +34,7 @@ Chat & Instruction-following Models
 - :ref:`Vicuna v1.5 <models_builtin_vicuna_v1_5>`
 - :ref:`Vicuna v1.5 16k <models_builtin_vicuna_v1_5_16k>`
 - :ref:`WizardLM v1.0 <models_builtin_wizardlm_v1_0>`
+- :ref:`WizardMath v1.0 <models_builtin_wizardmath_v1_0>`
 
 
 Code Generation Models
@@ -73,3 +74,4 @@ Code Assistant Models
    vicuna-v1.5
    vicuna-v1.5-16k
    wizardlm-v1.0
+   wizardmath-v1.0
diff --git a/doc/source/models/builtin/wizardmath-v1.0.rst b/doc/source/models/builtin/wizardmath-v1.0.rst
@@ -0,0 +1,63 @@
+.. _models_builtin_wizardmath_v1_0:
+
+===============
+WizardMath v1.0
+===============
+
+- **Model Name:** wizardmath-v1.0
+- **Languages:** en
+- **Abilities:** embed, chat
+
+Specifications
+^^^^^^^^^^^^^^
+
+Model Spec 1 (pytorch, 7 Billion)
++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 7
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** WizardLM/WizardMath-7B-V1.0
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name wizardmath-v1.0 --size-in-billions 7 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 2 (pytorch, 13 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 13
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** WizardLM/WizardMath-13B-V1.0
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name wizardmath-v1.0 --size-in-billions 13 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
+
+Model Spec 3 (pytorch, 70 Billion)
++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 70
+- **Quantizations:** 4-bit, 8-bit, none
+- **Model ID:** WizardLM/WizardMath-70B-V1.0
+
+Execute the following command to launch the model, remember to replace `${quantization}` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-name wizardmath-v1.0 --size-in-billions 70 --model-format pytorch --quantization ${quantization}
+
+.. note::
+
+   4-bit quantization is not supported on macOS.
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -937,7 +937,9 @@
         1,
         103028
       ],
-      "stop": ["<eoa>"]
+      "stop": [
+        "<eoa>"
+      ]
     }
   },
   {
@@ -977,7 +979,9 @@
         1,
         103028
       ],
-      "stop": ["<eoa>"]
+      "stop": [
+        "<eoa>"
+      ]
     }
   },
   {
@@ -1069,5 +1073,60 @@
       "intra_message_sep": " ",
       "inter_message_sep": "</s>"
     }
+  },
+  {
+    "version": 1,
+    "model_name": "wizardmath-v1.0",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "embed",
+      "chat"
+    ],
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "WizardLM/WizardMath-7B-V1.0",
+        "model_revision": "3c3a3b33334f4b35344b22c5c7465957ee7b2c75"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "WizardLM/WizardMath-13B-V1.0",
+        "model_revision": "ef95532e96e634c634992dab891a17032dc71c8d"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "WizardLM/WizardMath-70B-V1.0",
+        "model_revision": " 8823afe1d77b1ebdd6ac0c14e6e8977037d1830e"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_SINGLE_COT",
+      "system_prompt": "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
+      "roles": [
+        "Instruction",
+        "Response"
+      ],
+      "intra_message_sep": "\n\n### "
+    }
   }
 ]
diff --git a/xinference/model/llm/tests/test_utils.py b/xinference/model/llm/tests/test_utils.py
@@ -298,6 +298,36 @@ def test_prompt_style_internlm():
     assert expected == actual
 
 
+def test_prompt_style_add_colon_single_cot():
+    prompt_style = PromptStyleV1(
+        style_name="ADD_COLON_SINGLE_COT",
+        system_prompt=(
+            "Below is an instruction that describes a task. Write a response that appropriately "
+            "completes the request."
+        ),
+        roles=["Instruction", "Response"],
+        intra_message_sep="\n\n### ",
+    )
+
+    chat_history = [
+        ChatCompletionMessage(role=prompt_style.roles[0], content="Hi there."),
+        ChatCompletionMessage(
+            role=prompt_style.roles[1], content="Hello, how may I help you?"
+        ),
+    ]
+    expected = (
+        "Below is an instruction that describes a task. Write a response that appropriately "
+        "completes the request."
+        "\n\n### Instruction: Hi there."
+        "\n\n### Response: Hello, how may I help you?"
+        "\n\n### Instruction: Write a poem."
+        "\n\n### Response: Let's think step by step."
+    )
+    assert expected == ChatModelMixin.get_prompt(
+        "Write a poem.", chat_history, prompt_style
+    )
+
+
 def test_is_valid_model_name():
     from ..utils import is_valid_model_name
 

diff --git a/xinference/model/llm/utils.py b/xinference/model/llm/utils.py
@@ -164,6 +164,16 @@ def get_prompt(
             )
             ret += chat_history[-1]["role"] + ":"
             return ret
+        elif prompt_style.style_name == "ADD_COLON_SINGLE_COT":
+            ret = prompt_style.system_prompt + prompt_style.intra_message_sep
+            for message in chat_history:
+                role = message["role"]
+                content = message["content"]
+                if content:
+                    ret += role + ": " + content + prompt_style.intra_message_sep
+                else:
+                    ret += role + ": Let's think step by step."
+            return ret
         else:
             raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")