File tree Expand file tree Collapse file tree 3 files changed +22
-0
lines changed
distributed/kv_transfer/kv_connector/v1 Expand file tree Collapse file tree 3 files changed +22
-0
lines changed Original file line number Diff line number Diff line change 1212 times for a given request and should be side-effect free.
1313 update_state_after_alloc() - update KVConnector state after
1414 temporary buffer alloc by the CacheManager.
15+ update_connector_output() - update KVConnector state after
16+ output is received from worker-side connectors.
1517 request_finished() - called when a request is finished, with
1618 the computed kv cache blocks for the request.
1719 Returns whether KV cache should be freed now or will be
3840
3941from vllm .logger import init_logger
4042from vllm .v1 .core .sched .output import SchedulerOutput
43+ from vllm .v1 .outputs import KVConnectorOutput
4144
4245if TYPE_CHECKING :
4346 from vllm .attention .backends .abstract import AttentionMetadata
@@ -283,6 +286,16 @@ def build_connector_meta(
283286 """
284287 pass
285288
289+ def update_connector_output (self , connector_output : KVConnectorOutput ):
290+ """
291+ Update KVConnector state from worker-side connectors output.
292+
293+ Args:
294+ connector_output (KVConnectorOutput): the worker-side
295+ connectors output.
296+ """
297+ return
298+
286299 def request_finished (
287300 self ,
288301 request : "Request" ,
Original file line number Diff line number Diff line change 1414from vllm .logger import init_logger
1515from vllm .v1 .core .kv_cache_manager import KVCacheBlocks
1616from vllm .v1 .core .sched .output import SchedulerOutput
17+ from vllm .v1 .outputs import KVConnectorOutput
1718
1819if TYPE_CHECKING :
1920 from vllm .attention .backends .abstract import AttentionMetadata
@@ -177,6 +178,10 @@ def build_connector_meta(
177178 self ._extra_async_saves = {}
178179 return metadata
179180
181+ def update_connector_output (self , connector_output : KVConnectorOutput ):
182+ for c in self ._connectors :
183+ c .update_connector_output (connector_output )
184+
180185 def request_finished (
181186 self ,
182187 request : "Request" ,
Original file line number Diff line number Diff line change @@ -1150,6 +1150,10 @@ def _update_from_kv_xfer_finished(self,
11501150 # if finished_recving: add to state so we can
11511151 scheduler the request during the next step.
11521152 """
1153+
1154+ assert self .connector is not None
1155+ self .connector .update_connector_output (kv_connector_output )
1156+
11531157 # KV Connector:: update recv and send status from last step.
11541158 for req_id in (kv_connector_output .finished_recving or ()):
11551159 logger .debug ("Finished recving KV transfer for request %s" , req_id )
You can’t perform that action at this time.
0 commit comments