We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a26c097 commit 356dc3bCopy full SHA for 356dc3b
vllm_ascend/worker/model_runner_v1.py
@@ -1965,7 +1965,7 @@ def execute_model(
1965
logits_dtype = logits.dtype
1966
logits = logits.to("cpu").float()
1967
apply_grammar_bitmask(scheduler_output, self.input_batch,
1968
- logits, self.device)
+ logits, torch.device("cpu"))
1969
logits = logits.to(self.device).to(logits_dtype)
1970
1971
# Sample the next token and get logprobs if needed.
0 commit comments