File tree Expand file tree Collapse file tree 2 files changed +8
-6
lines changed
docs/source/getting_started/installation/gpu Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -23,6 +23,8 @@ Currently, there are no pre-built XPU wheels.
2323- Second, install Python packages for vLLM XPU backend building:
2424
2525``` console
26+ git clone https://github.com/vllm-project/vllm.git
27+ cd vllm
2628pip install --upgrade pip
2729pip install -v -r requirements/xpu.txt
2830```
Original file line number Diff line number Diff line change @@ -220,8 +220,8 @@ def forward(
220220 value_cache ,
221221 attn_metadata .slot_mapping .flatten (),
222222 self .kv_cache_dtype ,
223- layer ._k_scale ,
224- layer ._v_scale ,
223+ layer ._k_scale_float ,
224+ layer ._v_scale_float ,
225225 )
226226
227227 if attn_metadata .is_prompt :
@@ -306,8 +306,8 @@ def forward(
306306 max_seq_len ,
307307 self .alibi_slopes ,
308308 self .kv_cache_dtype ,
309- layer ._k_scale ,
310- layer ._v_scale ,
309+ layer ._k_scale_float ,
310+ layer ._v_scale_float ,
311311 )
312312 else :
313313 # Run PagedAttention V2.
@@ -339,8 +339,8 @@ def forward(
339339 max_seq_len ,
340340 self .alibi_slopes ,
341341 self .kv_cache_dtype ,
342- layer ._k_scale ,
343- layer ._v_scale ,
342+ layer ._k_scale_float ,
343+ layer ._v_scale_float ,
344344 )
345345
346346 # Reshape the output tensor.
You can’t perform that action at this time.
0 commit comments