vllm-project
diff --git a/‎docs/models/supported_models.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/models/supported_models.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/models/registry.py‎
Lines changed: 3 additions & 0 deletions b/‎tests/models/registry.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎vllm/config/compilation.py‎
Lines changed: 1 addition & 0 deletions b/‎vllm/config/compilation.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎vllm/config/model.py‎
Lines changed: 1 addition & 0 deletions b/‎vllm/config/model.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎vllm/model_executor/layers/fla/ops/kda.py‎
Lines changed: 1 addition & 1 deletion b/‎vllm/model_executor/layers/fla/ops/kda.py‎
Lines changed: 1 addition & 1 deletion
@@ -382,6 +382,7 @@ th {
 | `InternLM3ForCausalLM` | InternLM3 | `internlm/internlm3-8b-instruct`, etc. | ✅︎ | ✅︎ |
 | `JAISLMHeadModel` | Jais | `inceptionai/jais-13b`, `inceptionai/jais-13b-chat`, `inceptionai/jais-30b-v3`, `inceptionai/jais-30b-chat-v3`, etc. | | ✅︎ |
 | `JambaForCausalLM` | Jamba | `ai21labs/AI21-Jamba-1.5-Large`, `ai21labs/AI21-Jamba-1.5-Mini`, `ai21labs/Jamba-v0.1`, etc. | ✅︎ | ✅︎ |
+| `KimiLinearForCausalLM` | Kimi-Linear-48B-A3B-Base, Kimi-Linear-48B-A3B-Instruct | `moonshotai/Kimi-Linear-48B-A3B-Base`, `moonshotai/Kimi-Linear-48B-A3B-Instruct` | | ✅︎ |
 | `Lfm2ForCausalLM`  | LFM2  | `LiquidAI/LFM2-1.2B`, `LiquidAI/LFM2-700M`, `LiquidAI/LFM2-350M`, etc. | ✅︎ | ✅︎ |
 | `Lfm2MoeForCausalLM`  | LFM2MoE  | `LiquidAI/LFM2-8B-A1B-preview`, etc. | ✅︎ | ✅︎ |
 | `LlamaForCausalLM` | Llama 3.1, Llama 3, Llama 2, LLaMA, Yi | `meta-llama/Meta-Llama-3.1-405B-Instruct`, `meta-llama/Meta-Llama-3.1-70B`, `meta-llama/Meta-Llama-3-70B-Instruct`, `meta-llama/Llama-2-70b-hf`, `01-ai/Yi-34B`, etc. | ✅︎ | ✅︎ |
 
@@ -296,6 +296,9 @@ def check_available_online(
             "random": "ai21labs/Jamba-tiny-random",
         },
     ),
+    "KimiLinearForCausalLM": _HfExamplesInfo(
+        "moonshotai/Kimi-Linear-48B-A3B-Instruct", trust_remote_code=True
+    ),
     "Lfm2ForCausalLM": _HfExamplesInfo("LiquidAI/LFM2-1.2B"),
     "Lfm2MoeForCausalLM": _HfExamplesInfo(
         "LiquidAI/LFM2-8B-A1B", min_transformers_version="4.58"
 
@@ -453,6 +453,7 @@ class CompilationConfig:
         "vllm::linear_attention",
         "vllm::plamo2_mamba_mixer",
         "vllm::gdn_attention",
+        "vllm::kda_attention",
         "vllm::sparse_attn_indexer",
     ]
 
 
@@ -1236,6 +1236,7 @@ def is_deepseek_mla(self) -> bool:
             "deepseek_v32",
             "deepseek_mtp",
             "kimi_k2",
+            "kimi_linear",
             "longcat_flash",
         ):
             return self.hf_text_config.kv_lora_rank is not None
 
@@ -1304,7 +1304,7 @@ def kda_gate_fwd_kernel(
     tl.store(y_ptr, b_y.to(y.dtype.element_ty), boundary_check=(0, 1))
 
 
-def kda_gate_fwd(
+def fused_kda_gate(
     g: torch.Tensor,
     A: torch.Tensor,
     head_k_dim: int,
Original file line number	Diff line number	Diff line change
`@@ -453,6 +453,7 @@ class CompilationConfig:`
`453`	`453`	`"vllm::linear_attention",`
`454`	`454`	`"vllm::plamo2_mamba_mixer",`
`455`	`455`	`"vllm::gdn_attention",`
	`456`	`+ "vllm::kda_attention",`
`456`	`457`	`"vllm::sparse_attn_indexer",`
`457`	`458`	`]`
`458`	`459`