@@ -355,6 +355,7 @@ def _transfer_kv_cache(self, req_meta: dict[str, Any]):
355355 num_blocks = len (local_block_ids )
356356
357357 session_id = f"{ remote_host } :{ remote_transfer_port } "
358+ src_list , dst_list , length_list = [], [], []
358359 for k , (src_layer_base_addr , dst_layer_base_addr ) in enumerate (
359360 zip (local_kv_caches_base_addrs , remote_kv_caches_base_addrs )):
360361 block_len = (self .block_len [k % 2 ]
@@ -364,14 +365,15 @@ def _transfer_kv_cache(self, req_meta: dict[str, Any]):
364365 src = src_layer_base_addr + local_block_ids [0 ] * block_len
365366 dst = dst_layer_base_addr + remote_block_id [0 ] * block_len
366367 length = len (local_block_ids ) * block_len
367- ret = self .engine .transfer_sync_read (session_id , src , dst ,
368- length )
369- if ret < 0 :
370- logger .error (
371- "Mooncake transfer failed for request %s: "
372- "src=%x, dst=%x, length=%s" , req_meta ["request_id" ],
373- src , dst , length )
374- raise RuntimeError (f"Mooncake transfer failed, ret: { ret } " )
368+ src_list .append (src )
369+ dst_list .append (dst )
370+ length_list .append (length )
371+ ret = self .engine .batch_transfer_sync_read (session_id , src_list , dst_list ,
372+ length_list )
373+ if ret < 0 :
374+ logger .error (
375+ "Mooncake transfer failed for request %s" , req_meta ["request_id" ])
376+ raise RuntimeError (f"Mooncake transfer failed, ret: { ret } " )
375377
376378 req_end_time = time .perf_counter ()
377379 req_transfer_elapsed = (req_end_time - req_start_time ) * 1000
0 commit comments