We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1f731ae commit d3ea7feCopy full SHA for d3ea7fe
vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py
@@ -116,7 +116,7 @@ def apply_weights(self,
116
x: torch.Tensor,
117
bias: Optional[torch.Tensor] = None) -> torch.Tensor:
118
# marlin requires contiguous memory layout
119
- # kv/prefill caching may cause x to be non-contiguous
+ # prefix caching may cause x to be non-contiguous
120
x = x.contiguous() # no-op if already contiguous
121
122
c = self.config
0 commit comments