@@ -44,24 +44,6 @@ pub fn add_to_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
4444 Ok ( ( ) )
4545}
4646
47- pub fn map_dtype ( dtype : & str ) -> anyhow:: Result < dynamo_llm:: common:: dtype:: DType > {
48- Ok ( match dtype {
49- "fp8" | "FP8" => dynamo_llm:: common:: dtype:: DType :: FP8 ,
50- "fp16" | "FP16" => dynamo_llm:: common:: dtype:: DType :: FP16 ,
51- "bf16" | "BF16" => dynamo_llm:: common:: dtype:: DType :: BF16 ,
52- "fp32" | "FP32" => dynamo_llm:: common:: dtype:: DType :: FP32 ,
53- "u8" | "U8" => dynamo_llm:: common:: dtype:: DType :: U8 ,
54- "u16" | "U16" => dynamo_llm:: common:: dtype:: DType :: U16 ,
55- "u32" | "U32" => dynamo_llm:: common:: dtype:: DType :: U32 ,
56- "u64" | "U64" => dynamo_llm:: common:: dtype:: DType :: U64 ,
57- "i8" | "I8" => dynamo_llm:: common:: dtype:: DType :: I8 ,
58- "i16" | "I16" => dynamo_llm:: common:: dtype:: DType :: I16 ,
59- "i32" | "I32" => dynamo_llm:: common:: dtype:: DType :: I32 ,
60- "i64" | "I64" => dynamo_llm:: common:: dtype:: DType :: I64 ,
61- _ => return Err ( anyhow:: anyhow!( "Unsupported dtype: {}" , dtype) ) ,
62- } )
63- }
64-
6547type VllmBlockManager = dynamo_llm:: block_manager:: KvBlockManager <
6648 Logical < DistributedLeaderWorkerResources > ,
6749 BasicMetadata ,
@@ -71,6 +53,7 @@ type VllmBlockManager = dynamo_llm::block_manager::KvBlockManager<
7153#[ derive( Clone ) ]
7254pub struct BlockManager {
7355 inner : Arc < VllmBlockManager > ,
56+ _rt : Arc < tokio:: runtime:: Runtime > ,
7457}
7558
7659#[ pymethods]
@@ -94,7 +77,7 @@ impl BlockManager {
9477
9578 tracing:: info!( "Using {} device blocks" , device_num_blocks) ;
9679
97- let mut model_config = dynamo_llm:: block_manager:: KvManagerModelConfig :: builder ( )
80+ let model_config = dynamo_llm:: block_manager:: KvManagerModelConfig :: builder ( )
9881 . num_layers ( 1 )
9982 . outer_dim ( 1 )
10083 . page_size ( page_size)
@@ -110,47 +93,46 @@ impl BlockManager {
11093 . map_err ( to_pyerr) ?,
11194 ) ;
11295
113- if leader. inner ( ) . num_host_blocks ( ) > 0 {
114- tracing:: info!( "Using {} host blocks" , leader. inner( ) . num_host_blocks( ) ) ;
96+ let ( leader, rt) = leader. dissolve ( ) ;
97+
98+ if leader. num_host_blocks ( ) > 0 {
99+ tracing:: info!( "Using {} host blocks" , leader. num_host_blocks( ) ) ;
115100 config = config. host_layout (
116101 dynamo_llm:: block_manager:: KvManagerLayoutConfig :: builder ( )
117- . num_blocks ( leader. inner ( ) . num_host_blocks ( ) )
102+ . num_blocks ( leader. num_host_blocks ( ) )
118103 . logical ( Some ( BlockParallelismStrategy :: LeaderWorkerSharded ) )
119104 . build ( )
120105 . map_err ( to_pyerr) ?,
121106 ) ;
122107 }
123108
124- if leader. inner ( ) . num_disk_blocks ( ) > 0 {
125- tracing:: info!( "Using {} disk blocks" , leader. inner ( ) . num_disk_blocks( ) ) ;
109+ if leader. num_disk_blocks ( ) > 0 {
110+ tracing:: info!( "Using {} disk blocks" , leader. num_disk_blocks( ) ) ;
126111 config = config. disk_layout (
127112 dynamo_llm:: block_manager:: KvManagerLayoutConfig :: builder ( )
128- . num_blocks ( leader. inner ( ) . num_disk_blocks ( ) )
113+ . num_blocks ( leader. num_disk_blocks ( ) )
129114 . logical ( Some ( BlockParallelismStrategy :: LeaderWorkerSharded ) )
130115 . build ( )
131116 . map_err ( to_pyerr) ?,
132117 ) ;
133118 }
134119
135120 let config = config. build ( ) . map_err ( to_pyerr) ?;
136- let tokio_runtime = pyo3_async_runtimes:: tokio:: get_runtime ( ) ;
137121 Ok ( BlockManager {
138122 inner : Arc :: from (
139- tokio_runtime
140- . block_on ( async {
141- let resources = DistributedLeaderWorkerResources :: new (
142- leader. inner ( ) ,
143- cancel_token. child_token ( ) ,
144- ) ?;
145-
146- dynamo_llm:: block_manager:: KvBlockManager :: <
147- Logical < DistributedLeaderWorkerResources > ,
148- BasicMetadata ,
149- > :: new ( config, resources)
150- . await
151- } )
152- . map_err ( to_pyerr) ?,
123+ rt. block_on ( async {
124+ let resources =
125+ DistributedLeaderWorkerResources :: new ( leader, cancel_token. child_token ( ) ) ?;
126+
127+ dynamo_llm:: block_manager:: KvBlockManager :: <
128+ Logical < DistributedLeaderWorkerResources > ,
129+ BasicMetadata ,
130+ > :: new ( config, resources)
131+ . await
132+ } )
133+ . map_err ( to_pyerr) ?,
153134 ) ,
135+ _rt : rt,
154136 } )
155137 }
156138
0 commit comments