Skip to content

Commit c7dbde3

Browse files
committed
initial commit - signoff
Signed-off-by: michaelfeil <me@michaelfeil.eu>
1 parent 09c7b73 commit c7dbde3

File tree

2 files changed

+48
-8
lines changed

2 files changed

+48
-8
lines changed

lib/llm/src/kv_router.rs

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -376,19 +376,45 @@ impl KvRouter {
376376
}
377377
}
378378

379-
// NOTE: this would not be usable for now, should deprecate
379+
// NOTE: KVRouter works like a PushRouter,
380+
// but without the reverse proxy functionality, but based on contract of 3 request types
380381
#[async_trait]
381382
impl AsyncEngine<SingleIn<RouterRequest>, ManyOut<Annotated<RouterResponse>>, Error> for KvRouter {
382383
async fn generate(
383384
&self,
384385
request: SingleIn<RouterRequest>,
385386
) -> Result<ManyOut<Annotated<RouterResponse>>> {
386387
let (request, ctx) = request.into_parts();
387-
let (worker_id, _) = self
388-
.find_best_match(ctx.id(), &request.tokens, None, true)
389-
.await?;
388+
let context_id = ctx.context().id().to_string();
389+
// Handle different request types
390+
let response = match request {
391+
RouterRequest::New { tokens } => {
392+
let (worker_id, overlap_blocks) = self
393+
.find_best_match(&context_id, &tokens, None, true)
394+
.await?;
395+
396+
RouterResponse {
397+
worker_id,
398+
overlap_blocks,
399+
}
400+
}
401+
RouterRequest::MarkPrefill {} => {
402+
self.mark_prefill_completed(&context_id).await;
403+
404+
RouterResponse {
405+
worker_id: -1,
406+
overlap_blocks: 0,
407+
}
408+
}
409+
RouterRequest::MarkFree {} => {
410+
self.free(&context_id).await;
411+
RouterResponse {
412+
worker_id: -1,
413+
overlap_blocks: 0,
414+
}
415+
}
416+
};
390417

391-
let response = RouterResponse { worker_id };
392418
let response = Annotated::from_data(response);
393419
let stream = stream::iter(vec![response]);
394420
Ok(ResponseStream::new(Box::pin(stream), ctx.context()))

lib/llm/src/kv_router/protocols.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,28 @@ use crate::tokens::{SequenceHash, Token};
55
use serde::{Deserialize, Serialize};
66
use uuid::Uuid;
77

8-
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
9-
pub struct RouterRequest {
10-
pub tokens: Vec<Token>,
8+
#[derive(Debug, Clone, Serialize, Deserialize)]
9+
#[serde(tag = "method", rename_all = "snake_case")]
10+
pub enum RouterRequest {
11+
// ini
12+
#[serde(rename = "new")]
13+
New {
14+
tokens: Vec<Token>,
15+
},
16+
MarkPrefill {},
17+
MarkFree {},
18+
}
19+
20+
impl Default for RouterRequest {
21+
fn default() -> Self {
22+
RouterRequest::New { tokens: vec![] }
23+
}
1124
}
1225

1326
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1427
pub struct RouterResponse {
1528
pub worker_id: i64,
29+
pub overlap_blocks: u32,
1630
}
1731

1832
#[derive(Debug)]

0 commit comments

Comments
 (0)