22
33# ruff: noqa: SIM117
44from pathlib import Path
5- from typing import List , Optional , Tuple
5+ from typing import Optional
66
77import openvino as ov
88import torch
1212from torch import nn
1313
1414import vllm .envs as envs
15- from vllm .attention .backends .openvino import OpenVINOAttentionMetadata
1615from vllm .config import ModelConfig , VllmConfig , set_current_vllm_config
16+ from vllm .forward_context import get_forward_context
1717from vllm .logger import init_logger
1818from vllm .model_executor .layers .logits_processor import (LogitsProcessor ,
1919 _prune_hidden_states )
2424logger = init_logger (__name__ )
2525
2626
27- def _flattenize_inputs (inputs ):
27+ def _flatten_inputs (inputs ):
2828 """
2929 Helper function for making nested inputs flattens
3030 """
@@ -33,10 +33,9 @@ def _flattenize_inputs(inputs):
3333 if input_data is None :
3434 continue
3535 if isinstance (input_data , (list , tuple )):
36- flatten_inputs .extend (_flattenize_inputs (input_data ))
36+ flatten_inputs .extend (_flatten_inputs (input_data ))
3737 elif isinstance (input_data , dict ):
38- flatten_inputs .extend (_flattenize_inputs (list (
39- input_data .values ())))
38+ flatten_inputs .extend (_flatten_inputs (list (input_data .values ())))
4039 else :
4140 flatten_inputs .append (input_data )
4241 return flatten_inputs
@@ -147,15 +146,15 @@ def forward(
147146 self ,
148147 input_ids : torch .Tensor ,
149148 positions : torch .Tensor ,
150- kv_caches : List [Tuple [ov .Tensor , ov .Tensor ]],
151- attn_metadata : OpenVINOAttentionMetadata ,
149+ kv_caches : list [tuple [ov .Tensor , ov .Tensor ]],
152150 ) -> torch .Tensor :
153- flatten_kv_cache = _flattenize_inputs (kv_caches )
151+ flat_kv_caches = _flatten_inputs (kv_caches )
152+ attn_metadata = get_forward_context ().attn_metadata
154153
155154 inputs = [
156155 input_ids ,
157156 positions ,
158- * flatten_kv_cache ,
157+ * flat_kv_caches ,
159158 attn_metadata .past_lens ,
160159 attn_metadata .subsequence_begins ,
161160 attn_metadata .block_indices ,
0 commit comments