@@ -376,19 +376,45 @@ impl KvRouter {
376376 }
377377}
378378
379- // NOTE: this would not be usable for now, should deprecate
379+ // NOTE: KVRouter works like a PushRouter,
380+ // but without the reverse proxy functionality, but based on contract of 3 request types
380381#[ async_trait]
381382impl AsyncEngine < SingleIn < RouterRequest > , ManyOut < Annotated < RouterResponse > > , Error > for KvRouter {
382383 async fn generate (
383384 & self ,
384385 request : SingleIn < RouterRequest > ,
385386 ) -> Result < ManyOut < Annotated < RouterResponse > > > {
386387 let ( request, ctx) = request. into_parts ( ) ;
387- let ( worker_id, _) = self
388- . find_best_match ( ctx. id ( ) , & request. tokens , None , true )
389- . await ?;
388+ let context_id = ctx. context ( ) . id ( ) . to_string ( ) ;
389+ // Handle different request types
390+ let response = match request {
391+ RouterRequest :: New { tokens } => {
392+ let ( worker_id, overlap_blocks) = self
393+ . find_best_match ( & context_id, & tokens, None , true )
394+ . await ?;
395+
396+ RouterResponse {
397+ worker_id,
398+ overlap_blocks,
399+ }
400+ }
401+ RouterRequest :: MarkPrefill { } => {
402+ self . mark_prefill_completed ( & context_id) . await ;
403+
404+ RouterResponse {
405+ worker_id : -1 ,
406+ overlap_blocks : 0 ,
407+ }
408+ }
409+ RouterRequest :: MarkFree { } => {
410+ self . free ( & context_id) . await ;
411+ RouterResponse {
412+ worker_id : -1 ,
413+ overlap_blocks : 0 ,
414+ }
415+ }
416+ } ;
390417
391- let response = RouterResponse { worker_id } ;
392418 let response = Annotated :: from_data ( response) ;
393419 let stream = stream:: iter ( vec ! [ response] ) ;
394420 Ok ( ResponseStream :: new ( Box :: pin ( stream) , ctx. context ( ) ) )
0 commit comments