little fix

shen-shanshan · shen-shanshan · commit 356dc3b5aba2 · 2025-09-26T02:11:47.000Z
Signed-off-by: shen-shanshan &lt;467638484@qq.com&gt;
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -1965,7 +1965,7 @@ def execute_model(
                 logits_dtype = logits.dtype
                 logits = logits.to("cpu").float()
                 apply_grammar_bitmask(scheduler_output, self.input_batch,
-                                      logits, self.device)
+                                      logits, torch.device("cpu"))
                 logits = logits.to(self.device).to(logits_dtype)
 
             # Sample the next token and get logprobs if needed.