Fix types in Python bindings for kv_cache_block_size and context_length

grahamking · grahamking · commit 03739801a665 · 2025-06-30T16:29:07.000-04:00
diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs
@@ -105,8 +105,8 @@ fn register_llm<'p>(
     endpoint: Endpoint,
     model_path: &str,
     model_name: Option<&str>,
-    context_length: Option<usize>,
-    kv_cache_block_size: Option<usize>,
+    context_length: Option<u32>,
+    kv_cache_block_size: Option<u32>,
 ) -> PyResult<Bound<'p, PyAny>> {
     let model_type_obj = match model_type {
         ModelType::Chat => llm_rs::model_type::ModelType::Chat,
diff --git a/lib/bindings/python/rust/llm/kv.rs b/lib/bindings/python/rust/llm/kv.rs
@@ -40,10 +40,13 @@ impl KvRouter {
 
         let runtime = pyo3_async_runtimes::tokio::get_runtime();
         runtime.block_on(async {
-            let inner =
-                llm_rs::kv_router::KvRouter::new(component.inner.clone(), kv_block_size, None)
-                    .await
-                    .map_err(to_pyerr)?;
+            let inner = llm_rs::kv_router::KvRouter::new(
+                component.inner.clone(),
+                kv_block_size as u32,
+                None,
+            )
+            .await
+            .map_err(to_pyerr)?;
             Ok(Self {
                 inner: Arc::new(inner),
             })
@@ -73,7 +76,7 @@ pub fn compute_block_hash_for_seq_py(tokens: Vec<u32>, kv_block_size: usize) ->
         return Err(to_pyerr(anyhow::anyhow!("kv_block_size cannot be 0")));
     }
 
-    let hashes = compute_block_hash_for_seq(&tokens, kv_block_size);
+    let hashes = compute_block_hash_for_seq(&tokens, kv_block_size as u32);
     Ok(hashes.into_iter().map(|h| h.0).collect())
 }
 
@@ -191,7 +194,7 @@ impl ZmqKvEventPublisher {
         let inner = llm_rs::kv_router::publisher::KvEventPublisher::new(
             component.inner,
             config.worker_id,
-            config.kv_block_size,
+            config.kv_block_size as u32,
             Some(KvEventSourceConfig::Zmq {
                 endpoint: config.zmq_endpoint,
                 topic: config.zmq_topic,
@@ -232,7 +235,7 @@ impl ZmqKvEventListener {
                 zmq_topic,
                 tx,
                 shutdown_token.clone(),
-                kv_block_size,
+                kv_block_size as u32,
             ));
 
             Ok(Self {
@@ -293,7 +296,7 @@ impl KvEventPublisher {
         let inner = llm_rs::kv_router::publisher::KvEventPublisher::new(
             component.inner,
             worker_id,
-            kv_block_size,
+            kv_block_size as u32,
             None,
         )
         .map_err(to_pyerr)?;
@@ -322,7 +325,7 @@ impl KvEventPublisher {
             data: KvCacheEventData::Stored(KvCacheStoreData {
                 parent_hash: parent_hash.map(ExternalSequenceBlockHash::from),
                 blocks: create_stored_blocks(
-                    self.kv_block_size,
+                    self.kv_block_size as u32,
                     &token_ids,
                     &num_block_tokens,
                     &block_hashes,
@@ -446,7 +449,7 @@ impl KvIndexer {
             let inner: Arc<llm_rs::kv_router::indexer::KvIndexer> =
                 llm_rs::kv_router::indexer::KvIndexer::new(
                     component.inner.drt().runtime().child_token(),
-                    kv_block_size,
+                    kv_block_size as u32,
                 )
                 .into();
             // [gluo TODO] try subscribe_with_type::<RouterEvent>,
@@ -478,7 +481,7 @@ impl KvIndexer {
     }
 
     fn block_size(&self) -> usize {
-        self.inner.block_size()
+        self.inner.block_size() as usize
     }
 
     fn find_matches<'p>(&self, py: Python<'p>, sequence: Vec<u64>) -> PyResult<Bound<'p, PyAny>> {
diff --git a/lib/engines/llamacpp/src/lib.rs b/lib/engines/llamacpp/src/lib.rs
@@ -78,7 +78,7 @@ impl LlamacppEngine {
 
         let (ctx_set, ctx_get) = tokio::sync::mpsc::channel(NUM_CONTEXTS);
         let llama_ctx_params = if model_config.card().context_length > 0 {
-            let n_ctx = NonZeroU32::new(model_config.card().context_length as u32);
+            let n_ctx = NonZeroU32::new(model_config.card().context_length);
             LlamaContextParams::default().with_n_ctx(n_ctx)
         } else {
             // Context length defaults to 512 currently
diff --git a/lib/llm/src/block_manager/block.rs b/lib/llm/src/block_manager/block.rs
@@ -1605,7 +1605,7 @@ mod tests {
     use dynamo_runtime::logging::init as init_logging;
     use nixl_sys::Agent as NixlAgent;
 
-    const BLOCK_SIZE: usize = 4;
+    const BLOCK_SIZE: u32 = 4;
     const SALT_HASH: SaltHash = 12345;
 
     // Helper to create a default reset block
@@ -1666,7 +1666,7 @@ mod tests {
 
         // Extend to fill capacity
         assert!(block.add_tokens(Tokens::from(vec![4])).is_ok()); // 1, 2, 3, 4
-        assert_eq!(block.len(), BLOCK_SIZE);
+        assert_eq!(block.len(), BLOCK_SIZE as usize);
 
         // Append when full (should fail)
         assert!(block.add_token(5).is_err(), "Append on full Partial block");
@@ -1690,7 +1690,7 @@ mod tests {
 
         // Fill block again for commit
         assert!(block.add_tokens(Tokens::from(vec![1, 2, 3, 4])).is_ok());
-        assert_eq!(block.len(), BLOCK_SIZE);
+        assert_eq!(block.len(), BLOCK_SIZE as usize);
 
         // --- Partial -> Complete (via commit) --- //
         assert!(block.commit().is_ok());
diff --git a/lib/llm/src/block_manager/block/state.rs b/lib/llm/src/block_manager/block/state.rs
@@ -43,7 +43,7 @@ impl BlockState {
             return Err(BlockStateInvalid("Block is not reset".to_string()));
         }
 
-        let block = PartialTokenBlock::create_sequence_root(page_size, salt_hash);
+        let block = PartialTokenBlock::create_sequence_root(page_size as u32, salt_hash);
         *self = BlockState::Partial(PartialState::new(block));
         Ok(())
     }
diff --git a/lib/llm/src/block_manager/pool/inactive.rs b/lib/llm/src/block_manager/pool/inactive.rs
@@ -648,7 +648,7 @@ pub(crate) mod tests {
     /// Each block is initialized to the Complete state and then Registered.
     pub fn create_blocks(
         tokens: Tokens,
-        block_size: usize,
+        block_size: u32,
         async_runtime: Handle,
     ) -> Vec<Block<NullDeviceStorage, TestMetadata>> {
         let (token_blocks, _partial_token_block) =
@@ -691,7 +691,7 @@ pub(crate) mod tests {
 
     pub fn acquire_blocks(
         tokens: Tokens,
-        block_size: usize,
+        block_size: u32,
         pool: &mut InactiveBlockPool<NullDeviceStorage, TestMetadata>,
         async_runtime: Handle,
     ) -> (Vec<Block<NullDeviceStorage, TestMetadata>>, usize) {
@@ -749,7 +749,7 @@ pub(crate) mod tests {
 
         let async_runtime = tokio::runtime::Runtime::new().unwrap();
 
-        const PAGE_SIZE: usize = 2;
+        const PAGE_SIZE: u32 = 2;
 
         let mut pool = create_block_pool(10);
         assert_eq!(pool.total_blocks(), 10);
diff --git a/lib/llm/src/kv_router/indexer.rs b/lib/llm/src/kv_router/indexer.rs
@@ -1315,21 +1315,17 @@ mod tests {
     fn test_compute_block_hash_for_seq(#[case] kv_block_size: u32) {
         setup();
         // create a sequence of 64 elements
-        let sequence = (0..kv_block_size).map(|i| i as u32).collect::<Vec<u32>>();
+        let sequence = (0..kv_block_size).collect::<Vec<u32>>();
         let hashes = compute_block_hash_for_seq(&sequence, kv_block_size);
         assert_eq!(hashes.len(), 1);
 
         // create a sequence of 65 elements
-        let sequence = (0..(kv_block_size + 1))
-            .map(|i| i as u32)
-            .collect::<Vec<u32>>();
+        let sequence = (0..(kv_block_size + 1)).collect::<Vec<u32>>();
         let hashes = compute_block_hash_for_seq(&sequence, kv_block_size);
         assert_eq!(hashes.len(), 1);
 
         // create a sequence of 129 elements
-        let sequence = (0..(2 * kv_block_size + 1))
-            .map(|i| i as u32)
-            .collect::<Vec<u32>>();
+        let sequence = (0..(2 * kv_block_size + 1)).collect::<Vec<u32>>();
         let hashes = compute_block_hash_for_seq(&sequence, kv_block_size);
         assert_eq!(hashes.len(), 2);
     }
diff --git a/lib/llm/src/tokenizers/sp.rs b/lib/llm/src/tokenizers/sp.rs
@@ -81,7 +81,7 @@ impl Decoder for SentencePieceTokenizer {
     /// # Arguments
     /// * `token_ids` - The sequence of token IDs to decode
     /// * `skip_special_tokens` - Currently unsupported in SentencePieceTokenizer and
-    /// it will return an error if true
+    ///   it will return an error if true
     ///
     /// # Returns
     /// * `Result<String>` - The decoded text

Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ impl BlockState {`
`43`	`43`	`return Err(BlockStateInvalid("Block is not reset".to_string()));`
`44`	`44`	`}`
`45`	`45`
`46`		`- let block = PartialTokenBlock::create_sequence_root(page_size, salt_hash);`
	`46`	`+ let block = PartialTokenBlock::create_sequence_root(page_size as u32, salt_hash);`
`47`	`47`	`*self = BlockState::Partial(PartialState::new(block));`
`48`	`48`	`Ok(())`
`49`	`49`	`}`