From 8ea3f6578e62345d2f638f869388f53bcaad12f5 Mon Sep 17 00:00:00 2001 From: qinxuye Date: Wed, 3 Jul 2024 17:35:56 +0800 Subject: [PATCH 1/2] FEAT: add gemma-2-it --- xinference/model/llm/llm_family.json | 137 ++++++++++++++++++ .../model/llm/llm_family_modelscope.json | 70 +++++++++ 2 files changed, 207 insertions(+) diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index b34436d974..ef4c768689 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -6143,6 +6143,143 @@ ] } }, + { + "version": 1, + "context_length": 8192, + "model_name": "gemma-2-it", + "model_lang": [ + "en" + ], + "model_ability": [ + "chat" + ], + "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 9, + "quantizations": [ + "none", + "4-bit", + "8-bit" + ], + "model_id": "google/gemma-2-9b-it" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 27, + "quantizations": [ + "none", + "4-bit", + "8-bit" + ], + "model_id": "google/gemma-2-27b-it" + }, + { + "model_format": "mlx", + "model_size_in_billions": 9, + "quantizations": [ + "4-bit" + ], + "model_id": "mlx-community/gemma-2-9b-it-4bit" + }, + { + "model_format": "mlx", + "model_size_in_billions": 9, + "quantizations": [ + "8-bit" + ], + "model_id": "mlx-community/gemma-2-9b-it-8bit" + }, + { + "model_format": "mlx", + "model_size_in_billions": 9, + "quantizations": [ + "None" + ], + "model_id": "mlx-community/gemma-2-9b-it-fp16" + }, + { + "model_format": "mlx", + "model_size_in_billions": 27, + "quantizations": [ + "4-bit" + ], + "model_id": "mlx-community/gemma-2-27b-it-4bit" + }, + { + "model_format": "mlx", + "model_size_in_billions": 27, + "quantizations": [ + "8-bit" + ], + "model_id": "mlx-community/gemma-2-27b-it-8bit" + }, + { + "model_format": "mlx", + "model_size_in_billions": 27, + "quantizations": [ + "None" + ], + "model_id": "mlx-community/gemma-2-27b-it-fp16" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 9, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_K_L", + "Q4_K_M", + "Q4_K_S", + "Q5_K_L", + "Q5_K_M", + "Q5_K_S", + "Q6_K", + "Q6_K_L", + "Q8_0", + "Q8_0_L" + ], + "model_id": "bartowski/gemma-2-9b-it-GGUF", + "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 27, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_K_L", + "Q4_K_M", + "Q4_K_S", + "Q5_K_L", + "Q5_K_M", + "Q5_K_S", + "Q6_K", + "Q6_K_L", + "Q8_0", + "Q8_0_L" + ], + "model_id": "bartowski/gemma-2-27b-it-GGUF", + "model_file_name_template": "gemma-2-27b-it-{quantization}.gguf" + } + ], + "prompt_style": { + "style_name": "gemma", + "roles": [ + "user", + "model" + ], + "stop": [ + "", + "" + ] + } + }, { "version": 1, "context_length": 4096, diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index c9f7051d35..2c1325e44a 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -3738,6 +3738,76 @@ ] } }, + { + "version": 1, + "context_length": 8192, + "model_name": "gemma-2-it", + "model_lang": [ + "en" + ], + "model_ability": [ + "chat" + ], + "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 9, + "quantizations": [ + "none", + "4-bit", + "8-bit" + ], + "model_id": "AI-ModelScope/gemma-2-9b-it", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 27, + "quantizations": [ + "none", + "4-bit", + "8-bit" + ], + "model_id": "AI-ModelScope/gemma-2-27b-it", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 9, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_K_L", + "Q4_K_M", + "Q4_K_S", + "Q5_K_L", + "Q5_K_M", + "Q5_K_S", + "Q6_K", + "Q6_K_L", + "Q8_0", + "Q8_0_L" + ], + "model_id": "LLM-Research/gemma-2-9b-it-GGUF", + "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf", + "model_hub": "modelscope" + } + ], + "prompt_style": { + "style_name": "gemma", + "roles": [ + "user", + "model" + ], + "stop": [ + "", + "" + ] + } + }, { "version":1, "context_length":2048, From e7fa30d6f4b31139f66cb7d24a19aff7fc90f1d6 Mon Sep 17 00:00:00 2001 From: qinxuye Date: Wed, 3 Jul 2024 19:02:35 +0800 Subject: [PATCH 2/2] fix --- xinference/model/llm/llm_family.json | 44 ------------------- .../model/llm/llm_family_modelscope.json | 23 ---------- 2 files changed, 67 deletions(-) diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index ef4c768689..cb735ab0a7 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -6222,50 +6222,6 @@ "None" ], "model_id": "mlx-community/gemma-2-27b-it-fp16" - }, - { - "model_format": "ggufv2", - "model_size_in_billions": 9, - "quantizations": [ - "Q2_K", - "Q3_K_L", - "Q3_K_M", - "Q3_K_S", - "Q4_K_L", - "Q4_K_M", - "Q4_K_S", - "Q5_K_L", - "Q5_K_M", - "Q5_K_S", - "Q6_K", - "Q6_K_L", - "Q8_0", - "Q8_0_L" - ], - "model_id": "bartowski/gemma-2-9b-it-GGUF", - "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf" - }, - { - "model_format": "ggufv2", - "model_size_in_billions": 27, - "quantizations": [ - "Q2_K", - "Q3_K_L", - "Q3_K_M", - "Q3_K_S", - "Q4_K_L", - "Q4_K_M", - "Q4_K_S", - "Q5_K_L", - "Q5_K_M", - "Q5_K_S", - "Q6_K", - "Q6_K_L", - "Q8_0", - "Q8_0_L" - ], - "model_id": "bartowski/gemma-2-27b-it-GGUF", - "model_file_name_template": "gemma-2-27b-it-{quantization}.gguf" } ], "prompt_style": { diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index 2c1325e44a..e7e8fb3394 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -3771,29 +3771,6 @@ ], "model_id": "AI-ModelScope/gemma-2-27b-it", "model_hub": "modelscope" - }, - { - "model_format": "ggufv2", - "model_size_in_billions": 9, - "quantizations": [ - "Q2_K", - "Q3_K_L", - "Q3_K_M", - "Q3_K_S", - "Q4_K_L", - "Q4_K_M", - "Q4_K_S", - "Q5_K_L", - "Q5_K_M", - "Q5_K_S", - "Q6_K", - "Q6_K_L", - "Q8_0", - "Q8_0_L" - ], - "model_id": "LLM-Research/gemma-2-9b-it-GGUF", - "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf", - "model_hub": "modelscope" } ], "prompt_style": {