Skip to content

Commit 3c1360d

Browse files
committed
clean up
1 parent 714bf21 commit 3c1360d

File tree

4 files changed

+157
-90
lines changed

4 files changed

+157
-90
lines changed

lib/bindings/python/rust/llm/block_manager/vllm/connector/leader.rs

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
// SPDX-License-Identifier: Apache-2.0
33

4-
pub mod slot;
54
pub mod recorder;
5+
pub mod slot;
66

77
use super::*;
88
use dynamo_runtime::DistributedRuntime;
@@ -28,8 +28,8 @@ use std::{
2828
collections::HashSet,
2929
sync::{Arc, Mutex},
3030
};
31-
use tokio::sync::mpsc;
3231
use tokio;
32+
use tokio::sync::mpsc;
3333

3434
type VllmLocality = Logical<DistributedLeaderWorkerResources>;
3535

@@ -38,10 +38,9 @@ impl From<SlotError> for PyErr {
3838
to_pyerr(err)
3939
}
4040
}
41+
use anyhow;
4142
use dynamo_llm::recorder::Recorder;
4243
use tokio_util::sync::CancellationToken;
43-
use anyhow;
44-
4544

4645
pub trait Leader: Send + Sync + std::fmt::Debug {
4746
fn get_num_new_matched_tokens(
@@ -83,7 +82,6 @@ pub struct KvConnectorLeader {
8382
iteration_counter: u64,
8483
}
8584

86-
8785
impl KvConnectorLeader {
8886
fn new(
8987
worker_id: String,
@@ -137,7 +135,9 @@ impl Leader for KvConnectorLeader {
137135
debug_assert!(num_computed_tokens % self.block_size == 0);
138136

139137
let shared_slot = self.slot_manager.get_slot(&request_id)?;
140-
let mut slot = shared_slot.lock().map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
138+
let mut slot = shared_slot
139+
.lock()
140+
.map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
141141

142142
// early exit if we cannot match full block
143143
if (slot.sequence().total_tokens() - num_computed_tokens) < self.block_size {
@@ -180,7 +180,9 @@ impl Leader for KvConnectorLeader {
180180
);
181181

182182
let shared_slot = self.slot_manager.get_slot(&request_id)?;
183-
let mut slot = shared_slot.lock().map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
183+
let mut slot = shared_slot
184+
.lock()
185+
.map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
184186

185187
// we have not yet advanced the computed position, but now we can, since we have an indication that we have
186188
// necessary gpu blocks into which we will load the external tokens.
@@ -207,7 +209,7 @@ impl Leader for KvConnectorLeader {
207209
}
208210

209211
#[tracing::instrument(level = "debug", skip_all, fields(iteration = self.iteration_counter + 1))]
210-
pub fn build_connector_metadata(
212+
fn build_connector_metadata(
211213
&mut self,
212214
scheduler_output: SchedulerOutput,
213215
) -> anyhow::Result<Vec<u8>> {
@@ -234,7 +236,9 @@ impl Leader for KvConnectorLeader {
234236
// once for onboarding (this loop), then again for prefill/decode (new_requests loop).
235237
for request_id in onboarding_slots.iter() {
236238
let shared_slot = self.slot_manager.get_slot(request_id)?;
237-
let mut slot = shared_slot.lock().map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
239+
let mut slot = shared_slot
240+
.lock()
241+
.map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
238242

239243
md.create_slot(request_id.clone());
240244

@@ -256,7 +260,9 @@ impl Leader for KvConnectorLeader {
256260
);
257261

258262
let shared_slot = self.slot_manager.get_slot(request_id)?;
259-
let mut slot = shared_slot.lock().map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
263+
let mut slot = shared_slot
264+
.lock()
265+
.map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
260266

261267
// inform the worker that a new request-slot should be created
262268
md.create_slot(new_req.request_id.clone());
@@ -297,7 +303,9 @@ impl Leader for KvConnectorLeader {
297303
);
298304

299305
let shared_slot = self.slot_manager.get_slot(request_id)?;
300-
let mut slot = shared_slot.lock().map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
306+
let mut slot = shared_slot
307+
.lock()
308+
.map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
301309

302310
let scheduled_tokens = *scheduler_output
303311
.num_scheduled_tokens
@@ -322,16 +330,23 @@ impl Leader for KvConnectorLeader {
322330
}
323331

324332
tracing::debug!("metadata: {md:#?}");
325-
serde_json::to_vec(&md).map_err(|e| anyhow::anyhow!("Failed to serialize connector metadata: {}", e))
333+
serde_json::to_vec(&md)
334+
.map_err(|e| anyhow::anyhow!("Failed to serialize connector metadata: {}", e))
326335
}
327336

328-
fn request_finished(&mut self, request_id: String, block_ids: Vec<BlockId>) -> anyhow::Result<bool> {
337+
fn request_finished(
338+
&mut self,
339+
request_id: String,
340+
block_ids: Vec<BlockId>,
341+
) -> anyhow::Result<bool> {
329342
tracing::debug!("Request finished: {request_id}; block_ids: {block_ids:?}");
330343
// grab the slot
331344
let shared_slot = self.slot_manager.get_slot(&request_id)?;
332345

333346
// mark the slot as finished
334-
let mut slot = shared_slot.lock().map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
347+
let mut slot = shared_slot
348+
.lock()
349+
.map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
335350
slot.mark_as_finished(self.iteration_counter)?;
336351

337352
// todo: allow the request to resolve when it should exit
@@ -413,7 +428,9 @@ impl PyKvConnectorLeader {
413428
request_num_tokens: usize,
414429
num_computed_tokens: usize,
415430
) -> PyResult<(usize, bool)> {
416-
self.connector_leader.get_num_new_matched_tokens(request_id, request_num_tokens, num_computed_tokens).map_err(to_pyerr)
431+
self.connector_leader
432+
.get_num_new_matched_tokens(request_id, request_num_tokens, num_computed_tokens)
433+
.map_err(to_pyerr)
417434
}
418435

419436
fn update_state_after_alloc(
@@ -422,25 +439,30 @@ impl PyKvConnectorLeader {
422439
block_ids: Vec<BlockId>,
423440
num_external_tokens: usize,
424441
) -> PyResult<()> {
425-
self.connector_leader.update_state_after_alloc(request_id, block_ids, num_external_tokens).map_err(to_pyerr)
442+
self.connector_leader
443+
.update_state_after_alloc(request_id, block_ids, num_external_tokens)
444+
.map_err(to_pyerr)
426445
}
427446

428-
fn build_connector_metadata(
429-
&mut self,
430-
scheduler_output: SchedulerOutput,
431-
) -> PyResult<Vec<u8>> {
432-
self.connector_leader.build_connector_metadata(scheduler_output).map_err(to_pyerr)
447+
fn build_connector_metadata(&mut self, scheduler_output: SchedulerOutput) -> PyResult<Vec<u8>> {
448+
self.connector_leader
449+
.build_connector_metadata(scheduler_output)
450+
.map_err(to_pyerr)
433451
}
434452

435453
fn request_finished(&mut self, request_id: &str, block_ids: Vec<BlockId>) -> PyResult<bool> {
436-
self.connector_leader.request_finished(request_id.to_string(), block_ids).map_err(to_pyerr)
454+
self.connector_leader
455+
.request_finished(request_id.to_string(), block_ids)
456+
.map_err(to_pyerr)
437457
}
438458

439459
fn has_slot(&self, request_id: &str) -> bool {
440460
self.connector_leader.has_slot(request_id.to_string())
441461
}
442462

443463
fn create_slot(&mut self, request: KvbmRequest, tokens: Vec<u32>) -> PyResult<()> {
444-
self.connector_leader.create_slot(request, tokens).map_err(to_pyerr)
464+
self.connector_leader
465+
.create_slot(request, tokens)
466+
.map_err(to_pyerr)
445467
}
446468
}

lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/recorder.rs

Lines changed: 70 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use super::*;
22
use anyhow;
33

4-
54
#[derive(Debug, Clone, Serialize, Deserialize)]
65
pub enum Action {
76
GetNumNewMatchedTokens(GetNumNewMatchedTokensInput, GetNumNewMatchedTokensOutput),
@@ -33,8 +32,7 @@ pub struct UpdateStateAfterAllocInput {
3332
}
3433

3534
#[derive(Debug, Clone, Serialize, Deserialize)]
36-
pub struct UpdateStateAfterAllocOutput {
37-
}
35+
pub struct UpdateStateAfterAllocOutput {}
3836

3937
#[derive(Debug, Clone, Serialize, Deserialize)]
4038
pub struct BuildConnectorMetaInput {
@@ -43,7 +41,7 @@ pub struct BuildConnectorMetaInput {
4341

4442
#[derive(Debug, Clone, Serialize, Deserialize)]
4543
pub struct BuildConnectorMetaOutput {
46-
metadata: Vec<u8>,
44+
metadata: ConnectorMetadata,
4745
}
4846

4947
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -74,8 +72,7 @@ pub struct CreateSlotInput {
7472
}
7573

7674
#[derive(Debug, Clone, Serialize, Deserialize)]
77-
pub struct CreateSlotOutput {
78-
}
75+
pub struct CreateSlotOutput {}
7976

8077
#[derive(Debug)]
8178
pub struct KvConnectorLeaderRecorder {
@@ -109,9 +106,11 @@ impl KvConnectorLeaderRecorder {
109106
let output_path = "/tmp/records.jsonl";
110107
tracing::info!("recording events to {}", output_path);
111108

112-
let recorder = drt.runtime().primary().block_on(async {
113-
Recorder::new(token, &output_path, None, None, None).await
114-
}).unwrap();
109+
let recorder = drt
110+
.runtime()
111+
.primary()
112+
.block_on(async { Recorder::new(token, &output_path, None, None, None).await })
113+
.unwrap();
115114

116115
let connector_leader = KvConnectorLeader {
117116
slot_manager: ConnectorSlotManager::new(block_manager.clone(), leader, drt.clone()),
@@ -124,7 +123,10 @@ impl KvConnectorLeaderRecorder {
124123
let (unbounded_tx, unbounded_rx) = mpsc::unbounded_channel();
125124
let recorder_tx = recorder.event_sender();
126125

127-
let _ = drt.runtime().primary().spawn(Self::forward_unbounded_to_sender(unbounded_rx, recorder_tx));
126+
// todo(kvbm): make this a critical task
127+
drt.runtime()
128+
.primary()
129+
.spawn(Self::forward_unbounded_to_sender(unbounded_rx, recorder_tx));
128130

129131
Self {
130132
_recorder: recorder,
@@ -160,16 +162,22 @@ impl Leader for KvConnectorLeaderRecorder {
160162
) -> anyhow::Result<(usize, bool)> {
161163
let input_copy = GetNumNewMatchedTokensInput {
162164
request_id: request_id.clone(),
163-
request_num_tokens: request_num_tokens.clone(),
164-
num_computed_tokens: num_computed_tokens.clone(),
165+
request_num_tokens,
166+
num_computed_tokens,
165167
};
166-
let output = self.connector_leader.get_num_new_matched_tokens(request_id, request_num_tokens, num_computed_tokens);
167-
let output_copy = output.as_ref().unwrap().clone();
168-
let _ = self.unbounded_tx.send(Action::GetNumNewMatchedTokens(input_copy, GetNumNewMatchedTokensOutput {
169-
num_new_matched_tokens: output_copy.0,
170-
has_matched: output_copy.1,
171-
}));
172-
output
168+
let output = self.connector_leader.get_num_new_matched_tokens(
169+
request_id,
170+
request_num_tokens,
171+
num_computed_tokens,
172+
)?;
173+
let _ = self.unbounded_tx.send(Action::GetNumNewMatchedTokens(
174+
input_copy,
175+
GetNumNewMatchedTokensOutput {
176+
num_new_matched_tokens: output.0,
177+
has_matched: output.1,
178+
},
179+
));
180+
Ok(output)
173181
}
174182

175183
/// We drop the need to pass in the KvCacheBlocks and the num_external_tokens as they are captured
@@ -186,10 +194,17 @@ impl Leader for KvConnectorLeaderRecorder {
186194
let input_copy = UpdateStateAfterAllocInput {
187195
request_id: request_id.clone(),
188196
block_ids: block_ids.clone(),
189-
num_external_tokens: num_external_tokens.clone(),
197+
num_external_tokens,
190198
};
191-
let _ = self.connector_leader.update_state_after_alloc(request_id, block_ids, num_external_tokens).unwrap();
192-
let _ = self.unbounded_tx.send(Action::UpdateStateAfterAlloc(input_copy, UpdateStateAfterAllocOutput {}));
199+
self.connector_leader.update_state_after_alloc(
200+
request_id,
201+
block_ids,
202+
num_external_tokens,
203+
)?;
204+
let _ = self.unbounded_tx.send(Action::UpdateStateAfterAlloc(
205+
input_copy,
206+
UpdateStateAfterAllocOutput {},
207+
));
193208
Ok(())
194209
}
195210

@@ -200,39 +215,48 @@ impl Leader for KvConnectorLeaderRecorder {
200215
let input_copy = BuildConnectorMetaInput {
201216
scheduler_output: scheduler_output.clone(),
202217
};
203-
let output = self.connector_leader.build_connector_metadata(scheduler_output);
204-
let output_copy = output.as_ref().unwrap().clone();
205-
let _ = self.unbounded_tx
206-
.send(Action::BuildConnectorMeta(input_copy, BuildConnectorMetaOutput {
207-
metadata: output_copy,
208-
}));
209-
output
218+
let output = self
219+
.connector_leader
220+
.build_connector_metadata(scheduler_output)?;
221+
let _ = self.unbounded_tx.send(Action::BuildConnectorMeta(
222+
input_copy,
223+
BuildConnectorMetaOutput {
224+
metadata: serde_json::from_slice(&output)?,
225+
},
226+
));
227+
Ok(output)
210228
}
211229

212-
fn request_finished(&mut self, request_id: String, block_ids: Vec<BlockId>) -> anyhow::Result<bool> {
230+
fn request_finished(
231+
&mut self,
232+
request_id: String,
233+
block_ids: Vec<BlockId>,
234+
) -> anyhow::Result<bool> {
213235
let input_copy = RequestFinishedInput {
214236
request_id: request_id.clone(),
215237
block_ids: block_ids.clone(),
216238
};
217-
let output = self.connector_leader.request_finished(request_id, block_ids);
218-
let output_copy = output.as_ref().unwrap().clone();
219-
let _ = self.unbounded_tx
220-
.send(Action::RequestFinished(input_copy, RequestFinishedOutput {
221-
is_finished: output_copy,
222-
}));
223-
output
239+
let output = self
240+
.connector_leader
241+
.request_finished(request_id, block_ids)?;
242+
let _ = self.unbounded_tx.send(Action::RequestFinished(
243+
input_copy,
244+
RequestFinishedOutput {
245+
is_finished: output,
246+
},
247+
));
248+
Ok(output)
224249
}
225250

226251
fn has_slot(&self, request_id: String) -> bool {
227252
let input_copy = HasSlotInput {
228253
request_id: request_id.clone(),
229254
};
230255
let output = self.connector_leader.has_slot(request_id);
231-
let output_copy = output.clone();
232-
let _ = self.unbounded_tx
233-
.send(Action::HasSlot(input_copy, HasSlotOutput {
234-
result: output_copy,
235-
}));
256+
let _ = self.unbounded_tx.send(Action::HasSlot(
257+
input_copy,
258+
HasSlotOutput { result: output },
259+
));
236260
output
237261
}
238262

@@ -244,7 +268,9 @@ impl Leader for KvConnectorLeaderRecorder {
244268
tokens: tokens.clone(),
245269
};
246270
let _ = self.connector_leader.create_slot(request, tokens);
247-
let _ = self.unbounded_tx.send(Action::CreateSlot(input_copy, CreateSlotOutput {}));
271+
let _ = self
272+
.unbounded_tx
273+
.send(Action::CreateSlot(input_copy, CreateSlotOutput {}));
248274
Ok(())
249275
}
250-
}
276+
}

0 commit comments

Comments
 (0)