Skip to content

Commit e249851

Browse files
committed
Integrate priorities into offloading
1 parent be88e7b commit e249851

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

lib/llm/src/block_manager/block.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ pub trait BlockMetadata: Default + std::fmt::Debug + Clone + Ord + Send + Sync +
8282
/// Resets the metadata to the default value
8383
/// If called, the [BlockMetadata::is_reset()] should return true
8484
fn reset_metadata(&mut self);
85+
86+
/// The offload priority of the block. Higher priority blocks are offloaded first.
87+
/// If the block should not be offloaded, return None.
88+
fn offload_priority(&self) -> Option<u64>;
8589
}
8690

8791
/// Marker trait for types that are mutable blocks
@@ -536,6 +540,10 @@ impl BlockMetadata for BasicMetadata {
536540
fn reset_metadata(&mut self) {
537541
self.priority = 0;
538542
}
543+
544+
fn offload_priority(&self) -> Option<u64> {
545+
Some(self.priority as u64)
546+
}
539547
}
540548
/// Collection that holds shared storage and layout
541549
#[derive(Debug)]

lib/llm/src/block_manager/pool/inactive.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,10 @@ pub(crate) mod tests {
518518
fn reset_metadata(&mut self) {
519519
self.priority = 0;
520520
}
521+
522+
fn offload_priority(&self) -> Option<u64> {
523+
Some(self.priority as u64)
524+
}
521525
}
522526

523527
type TestPriorityKey = PriorityKey<TestMetadata>;

lib/llm/src/block_manager/pool/state.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,10 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
179179

180180
let immutable = self.active.register(mutable)?;
181181

182-
// TODO: Make a way to set meaningful priority values, and maybe don't enqueue offloads for every registered block.
183182
if offload {
184-
immutable.enqueue_offload(0).await.unwrap();
183+
if let Some(priority) = immutable.metadata().offload_priority() {
184+
immutable.enqueue_offload(priority).await.unwrap();
185+
}
185186
}
186187

187188
immutable_blocks.push(immutable);

0 commit comments

Comments
 (0)