Skip to content

Commit 4e68cc9

Browse files
authored
[Model] Introduce Kimi Linear to vLLM (#27809)
Signed-off-by: lizhiyuan <lizhiyuan@moonshot.cn> Signed-off-by: Zhiyuan Li <uniartisan2017@gmail.com>
1 parent 1994de9 commit 4e68cc9

File tree

15 files changed

+1326
-49
lines changed

15 files changed

+1326
-49
lines changed

docs/models/supported_models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ th {
382382
| `InternLM3ForCausalLM` | InternLM3 | `internlm/internlm3-8b-instruct`, etc. | ✅︎ | ✅︎ |
383383
| `JAISLMHeadModel` | Jais | `inceptionai/jais-13b`, `inceptionai/jais-13b-chat`, `inceptionai/jais-30b-v3`, `inceptionai/jais-30b-chat-v3`, etc. | | ✅︎ |
384384
| `JambaForCausalLM` | Jamba | `ai21labs/AI21-Jamba-1.5-Large`, `ai21labs/AI21-Jamba-1.5-Mini`, `ai21labs/Jamba-v0.1`, etc. | ✅︎ | ✅︎ |
385+
| `KimiLinearForCausalLM` | Kimi-Linear-48B-A3B-Base, Kimi-Linear-48B-A3B-Instruct | `moonshotai/Kimi-Linear-48B-A3B-Base`, `moonshotai/Kimi-Linear-48B-A3B-Instruct` | | ✅︎ |
385386
| `Lfm2ForCausalLM` | LFM2 | `LiquidAI/LFM2-1.2B`, `LiquidAI/LFM2-700M`, `LiquidAI/LFM2-350M`, etc. | ✅︎ | ✅︎ |
386387
| `Lfm2MoeForCausalLM` | LFM2MoE | `LiquidAI/LFM2-8B-A1B-preview`, etc. | ✅︎ | ✅︎ |
387388
| `LlamaForCausalLM` | Llama 3.1, Llama 3, Llama 2, LLaMA, Yi | `meta-llama/Meta-Llama-3.1-405B-Instruct`, `meta-llama/Meta-Llama-3.1-70B`, `meta-llama/Meta-Llama-3-70B-Instruct`, `meta-llama/Llama-2-70b-hf`, `01-ai/Yi-34B`, etc. | ✅︎ | ✅︎ |

tests/models/registry.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,9 @@ def check_available_online(
296296
"random": "ai21labs/Jamba-tiny-random",
297297
},
298298
),
299+
"KimiLinearForCausalLM": _HfExamplesInfo(
300+
"moonshotai/Kimi-Linear-48B-A3B-Instruct", trust_remote_code=True
301+
),
299302
"Lfm2ForCausalLM": _HfExamplesInfo("LiquidAI/LFM2-1.2B"),
300303
"Lfm2MoeForCausalLM": _HfExamplesInfo(
301304
"LiquidAI/LFM2-8B-A1B", min_transformers_version="4.58"

vllm/config/compilation.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,7 @@ class CompilationConfig:
453453
"vllm::linear_attention",
454454
"vllm::plamo2_mamba_mixer",
455455
"vllm::gdn_attention",
456+
"vllm::kda_attention",
456457
"vllm::sparse_attn_indexer",
457458
]
458459

vllm/config/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,7 @@ def is_deepseek_mla(self) -> bool:
12361236
"deepseek_v32",
12371237
"deepseek_mtp",
12381238
"kimi_k2",
1239+
"kimi_linear",
12391240
"longcat_flash",
12401241
):
12411242
return self.hf_text_config.kv_lora_rank is not None

vllm/model_executor/layers/fla/ops/kda.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1304,7 +1304,7 @@ def kda_gate_fwd_kernel(
13041304
tl.store(y_ptr, b_y.to(y.dtype.element_ty), boundary_check=(0, 1))
13051305

13061306

1307-
def kda_gate_fwd(
1307+
def fused_kda_gate(
13081308
g: torch.Tensor,
13091309
A: torch.Tensor,
13101310
head_k_dim: int,

0 commit comments

Comments
 (0)