From 8ea3f6578e62345d2f638f869388f53bcaad12f5 Mon Sep 17 00:00:00 2001
From: qinxuye <qinxuye@gmail.com>
Date: Wed, 3 Jul 2024 17:35:56 +0800
Subject: [PATCH 1/2] FEAT: add gemma-2-it

---
 xinference/model/llm/llm_family.json          | 137 ++++++++++++++++++
 .../model/llm/llm_family_modelscope.json      |  70 +++++++++
 2 files changed, 207 insertions(+)

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
index b34436d974..ef4c768689 100644
--- a/xinference/model/llm/llm_family.json
+++ b/xinference/model/llm/llm_family.json
@@ -6143,6 +6143,143 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "gemma-2-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-2-9b-it"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-2-27b-it"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/gemma-2-9b-it-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/gemma-2-9b-it-8bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "None"
+        ],
+        "model_id": "mlx-community/gemma-2-9b-it-fp16"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/gemma-2-27b-it-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/gemma-2-27b-it-8bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "None"
+        ],
+        "model_id": "mlx-community/gemma-2-27b-it-fp16"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "Q8_0_L"
+        ],
+        "model_id": "bartowski/gemma-2-9b-it-GGUF",
+        "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "Q8_0_L"
+        ],
+        "model_id": "bartowski/gemma-2-27b-it-GGUF",
+        "model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "gemma",
+      "roles": [
+        "user",
+        "model"
+      ],
+      "stop": [
+        "<end_of_turn>",
+        "<start_of_turn>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 4096,
diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
index c9f7051d35..2c1325e44a 100644
--- a/xinference/model/llm/llm_family_modelscope.json
+++ b/xinference/model/llm/llm_family_modelscope.json
@@ -3738,6 +3738,76 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "gemma-2-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "AI-ModelScope/gemma-2-9b-it",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "AI-ModelScope/gemma-2-27b-it",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "Q8_0_L"
+        ],
+        "model_id": "LLM-Research/gemma-2-9b-it-GGUF",
+        "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "gemma",
+      "roles": [
+        "user",
+        "model"
+      ],
+      "stop": [
+        "<end_of_turn>",
+        "<start_of_turn>"
+      ]
+    }
+  },
   {
     "version":1,
     "context_length":2048,

From e7fa30d6f4b31139f66cb7d24a19aff7fc90f1d6 Mon Sep 17 00:00:00 2001
From: qinxuye <qinxuye@gmail.com>
Date: Wed, 3 Jul 2024 19:02:35 +0800
Subject: [PATCH 2/2] fix

---
 xinference/model/llm/llm_family.json          | 44 -------------------
 .../model/llm/llm_family_modelscope.json      | 23 ----------
 2 files changed, 67 deletions(-)

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
index ef4c768689..cb735ab0a7 100644
--- a/xinference/model/llm/llm_family.json
+++ b/xinference/model/llm/llm_family.json
@@ -6222,50 +6222,6 @@
           "None"
         ],
         "model_id": "mlx-community/gemma-2-27b-it-fp16"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": 9,
-        "quantizations": [
-          "Q2_K",
-          "Q3_K_L",
-          "Q3_K_M",
-          "Q3_K_S",
-          "Q4_K_L",
-          "Q4_K_M",
-          "Q4_K_S",
-          "Q5_K_L",
-          "Q5_K_M",
-          "Q5_K_S",
-          "Q6_K",
-          "Q6_K_L",
-          "Q8_0",
-          "Q8_0_L"
-        ],
-        "model_id": "bartowski/gemma-2-9b-it-GGUF",
-        "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": 27,
-        "quantizations": [
-          "Q2_K",
-          "Q3_K_L",
-          "Q3_K_M",
-          "Q3_K_S",
-          "Q4_K_L",
-          "Q4_K_M",
-          "Q4_K_S",
-          "Q5_K_L",
-          "Q5_K_M",
-          "Q5_K_S",
-          "Q6_K",
-          "Q6_K_L",
-          "Q8_0",
-          "Q8_0_L"
-        ],
-        "model_id": "bartowski/gemma-2-27b-it-GGUF",
-        "model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
       }
     ],
     "prompt_style": {
diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
index 2c1325e44a..e7e8fb3394 100644
--- a/xinference/model/llm/llm_family_modelscope.json
+++ b/xinference/model/llm/llm_family_modelscope.json
@@ -3771,29 +3771,6 @@
         ],
         "model_id": "AI-ModelScope/gemma-2-27b-it",
         "model_hub": "modelscope"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": 9,
-        "quantizations": [
-          "Q2_K",
-          "Q3_K_L",
-          "Q3_K_M",
-          "Q3_K_S",
-          "Q4_K_L",
-          "Q4_K_M",
-          "Q4_K_S",
-          "Q5_K_L",
-          "Q5_K_M",
-          "Q5_K_S",
-          "Q6_K",
-          "Q6_K_L",
-          "Q8_0",
-          "Q8_0_L"
-        ],
-        "model_id": "LLM-Research/gemma-2-9b-it-GGUF",
-        "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf",
-        "model_hub": "modelscope"
       }
     ],
     "prompt_style": {