xorbitsai · qinxuye · May 31, 2024 · May 29, 2024 · May 29, 2024 · May 30, 2024
diff --git a/doc/source/models/builtin/llm/cogvlm2.rst b/doc/source/models/builtin/llm/cogvlm2.rst
@@ -0,0 +1,47 @@
+.. _models_llm_cogvlm2:
+
+========================================
+cogvlm2
+========================================
+
+- **Context Length:** 8192
+- **Model Name:** cogvlm2
+- **Languages:** en, zh
+- **Abilities:** chat, vision
+- **Description:** CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.
+
+Specifications
+^^^^^^^^^^^^^^
+
+
+Model Spec 1 (pytorch, 20 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 20
+- **Quantizations:** none
+- **Engines**: Transformers
+- **Model ID:** THUDM/cogvlm2-llama3-chinese-chat-19B
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/THUDM/cogvlm2-llama3-chinese-chat-19B>`__, `ModelScope <https://modelscope.cn/models/ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name cogvlm2 --size-in-billions 20 --model-format pytorch --quantization ${quantization}
+
+
+Model Spec 2 (pytorch, 20 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 20
+- **Quantizations:** int4
+- **Engines**: Transformers
+- **Model ID:** THUDM/cogvlm2-llama3-chinese-chat-19B-{quantizations}
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/THUDM/cogvlm2-llama3-chinese-chat-19B-{quantizations}>`__, `ModelScope <https://modelscope.cn/models/ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name cogvlm2 --size-in-billions 20 --model-format pytorch --quantization ${quantization}
+
diff --git a/doc/source/models/builtin/llm/index.rst b/doc/source/models/builtin/llm/index.rst
@@ -126,6 +126,11 @@ The following is a list of built-in LLM in Xinference:
      - 8194
      - CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.
 
+   * - :ref:`cogvlm2 <models_llm_cogvlm2>`
+     - chat, vision
+     - 8192
+     - CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.
+
    * - :ref:`deepseek <models_llm_deepseek>`
      - generate
      - 4096
@@ -236,11 +241,6 @@ The following is a list of built-in LLM in Xinference:
      - 8192
      - The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..
 
-   * - :ref:`mini-internvl-chat <models_llm_mini-internvl-chat>`
-     - chat, vision
-     - 32768
-     - InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. 
-
    * - :ref:`minicpm-2b-dpo-bf16 <models_llm_minicpm-2b-dpo-bf16>`
      - chat
      - 4096
@@ -550,6 +550,8 @@ The following is a list of built-in LLM in Xinference:
 
    codeshell-chat
 
+   cogvlm2
+
    deepseek
 
    deepseek-chat
@@ -594,8 +596,6 @@ The following is a list of built-in LLM in Xinference:
 
    llama-3-instruct
 
-   mini-internvl-chat
-
    minicpm-2b-dpo-bf16
 
    minicpm-2b-dpo-fp16

diff --git a/xinference/model/llm/__init__.py b/xinference/model/llm/__init__.py
@@ -113,6 +113,7 @@ def _install():
     from .ggml.llamacpp import LlamaCppChatModel, LlamaCppModel
     from .pytorch.baichuan import BaichuanPytorchChatModel
     from .pytorch.chatglm import ChatglmPytorchChatModel
+    from .pytorch.cogvlm2 import CogVLM2Model
     from .pytorch.core import PytorchChatModel, PytorchModel
     from .pytorch.deepseek_vl import DeepSeekVLChatModel
     from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
@@ -159,6 +160,7 @@ def _install():
             DeepSeekVLChatModel,
             InternVLChatModel,
             PytorchModel,
+            CogVLM2Model,
         ]
     )
     if OmniLMMModel:  # type: ignore

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -6247,5 +6247,57 @@
             "<|im_end|>"
         ]
     }
+},
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "cogvlm2",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
+    "model_specs": [
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 20,
+            "quantizations": [
+                "none"
+            ],
+            "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B",
+            "model_revision": "d88b352bce5ee58a289b1ac8328553eb31efa2ef"
+        },
+      {
+            "model_format": "pytorch",
+            "model_size_in_billions": 20,
+            "quantizations": [
+                "int4"
+            ],
+            "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{quantizations}",
+            "model_revision": "7863e362174f4718c2fe9cba4befd0b580a3194f"
+        }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
 }
 ]
diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
@@ -3860,5 +3860,60 @@
             "<|im_end|>"
         ]
     }
+},
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "cogvlm2",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
+    "model_specs": [
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 20,
+            "quantizations": [
+                "none"
+            ],
+          "model_hub": "modelscope",
+
+            "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B",
+            "model_revision": "master"
+        },
+      {
+            "model_format": "pytorch",
+            "model_size_in_billions": 20,
+            "quantizations": [
+                "int4"
+            ],
+          "model_hub": "modelscope",
+            "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}",
+            "model_revision": "master"
+        }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
 }
 ]