From a156cf35019b71acc8ef894791e783c83d69d6e9 Mon Sep 17 00:00:00 2001 From: Neal Vaidya Date: Tue, 2 Dec 2025 01:21:09 +0000 Subject: [PATCH 1/7] feat: add register_model function for non-llms This commit introduces the `register_model` function, allowing users to register non-llm model endpoints without requiring local files or downloads from HuggingFace. The function is designed specifically for TensorBased models, where the frontend doesn't do any pre-processing. Signed-off-by: Neal Vaidya --- lib/bindings/python/rust/lib.rs | 71 +++++++++++++++++++ lib/bindings/python/src/dynamo/_core.pyi | 27 +++++++ .../python/src/dynamo/llm/__init__.py | 1 + lib/llm/src/model_card.rs | 9 +++ 4 files changed, 108 insertions(+) diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs index d8f2e785c3..744541bbba 100644 --- a/lib/bindings/python/rust/lib.rs +++ b/lib/bindings/python/rust/lib.rs @@ -141,6 +141,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(lora_name_to_id, m)?)?; m.add_function(wrap_pyfunction!(log_message, m)?)?; m.add_function(wrap_pyfunction!(register_llm, m)?)?; + m.add_function(wrap_pyfunction!(register_model, m)?)?; m.add_function(wrap_pyfunction!(unregister_llm, m)?)?; m.add_function(wrap_pyfunction!(fetch_llm, m)?)?; m.add_function(wrap_pyfunction!(llm::entrypoint::make_engine, m)?)?; @@ -409,6 +410,76 @@ fn fetch_llm<'p>(py: Python<'p>, remote_name: &str) -> PyResult }) } +/// Register a model endpoint without requiring local files or HuggingFace downloads. +/// This is designed for TensorBased models where the backend handles all preprocessing. +/// +/// Unlike `register_llm`, this function does not download any files from HuggingFace. +/// It creates a minimal ModelDeploymentCard with just the model name and registers it +/// with the discovery system. +/// +/// Example: +/// ```python +/// await register_model(endpoint, "my-custom-model") +/// ``` +#[pyfunction] +#[pyo3(signature = (endpoint, model_name, model_type=None, model_input=None, user_data=None, runtime_config=None))] +#[allow(clippy::too_many_arguments)] +fn register_model<'p>( + py: Python<'p>, + endpoint: Endpoint, + model_name: &str, + model_type: Option, + model_input: Option, + user_data: Option<&Bound<'p, PyDict>>, + runtime_config: Option, +) -> PyResult> { + let model_type_inner = model_type + .map(|m| m.inner) + .unwrap_or(llm_rs::model_type::ModelType::TensorBased); + + let model_input_inner = match model_input.unwrap_or(ModelInput::Tensor) { + ModelInput::Text => llm_rs::model_type::ModelInput::Text, + ModelInput::Tokens => llm_rs::model_type::ModelInput::Tokens, + ModelInput::Tensor => llm_rs::model_type::ModelInput::Tensor, + }; + + let model_name_owned = model_name.to_string(); + + let user_data_json = user_data + .map(|dict| pythonize::depythonize(dict)) + .transpose() + .map_err(|err| { + PyErr::new::(format!("Failed to convert user_data: {}", err)) + })?; + + let runtime_config_inner = runtime_config.map(|c| c.inner); + + pyo3_async_runtimes::tokio::future_into_py(py, async move { + // Create a minimal ModelDeploymentCard - no file downloads needed + let mut card = llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name_owned); + card.model_type = model_type_inner; + card.model_input = model_input_inner; + card.user_data = user_data_json; + + if let Some(cfg) = runtime_config_inner { + card.runtime_config = cfg; + } + + // Register the Model Deployment Card via discovery interface + let discovery = endpoint.inner.drt().discovery(); + let spec = rs::discovery::DiscoverySpec::from_model( + endpoint.inner.component().namespace().name().to_string(), + endpoint.inner.component().name().to_string(), + endpoint.inner.name().to_string(), + &card, + ) + .map_err(to_pyerr)?; + discovery.register(spec).await.map_err(to_pyerr)?; + + Ok(()) + }) +} + #[pyclass] #[derive(Clone)] pub struct DistributedRuntime { diff --git a/lib/bindings/python/src/dynamo/_core.pyi b/lib/bindings/python/src/dynamo/_core.pyi index 22841ef6e4..6eb24af7c9 100644 --- a/lib/bindings/python/src/dynamo/_core.pyi +++ b/lib/bindings/python/src/dynamo/_core.pyi @@ -1095,6 +1095,33 @@ def lora_name_to_id(lora_name: str) -> int: """Generate a deterministic integer ID from a LoRA name using blake3 hash.""" ... +async def register_model( + endpoint: Endpoint, + model_name: str, + model_type: Optional[ModelType] = None, + model_input: Optional[ModelInput] = None, + user_data: Optional[Dict[str, Any]] = None, + runtime_config: Optional[ModelRuntimeConfig] = None, +) -> None: + """ + Register a model endpoint without requiring local files or HuggingFace downloads. + + This is designed for TensorBased models where the backend handles all preprocessing. + Unlike `register_llm`, this function does not download any files from HuggingFace. + + Args: + endpoint: The endpoint to register the model on + model_name: The display name for the model + model_type: The model type (defaults to ModelType.TensorBased) + model_input: The input type (defaults to ModelInput.Tensor) + user_data: Optional user data to attach to the model card + runtime_config: Optional runtime configuration + + Example: + await register_model(endpoint, "my-custom-model") + """ + ... + async def fetch_llm(remote_name: str) -> str: """ Download a model from Hugging Face, returning it's local path. diff --git a/lib/bindings/python/src/dynamo/llm/__init__.py b/lib/bindings/python/src/dynamo/llm/__init__.py index 8a75217829..c1fa25f026 100644 --- a/lib/bindings/python/src/dynamo/llm/__init__.py +++ b/lib/bindings/python/src/dynamo/llm/__init__.py @@ -40,6 +40,7 @@ from dynamo._core import lora_name_to_id as lora_name_to_id from dynamo._core import make_engine from dynamo._core import register_llm as register_llm +from dynamo._core import register_model as register_model from dynamo._core import run_input from dynamo._core import unregister_llm as unregister_llm diff --git a/lib/llm/src/model_card.rs b/lib/llm/src/model_card.rs index 756b6c1d5e..55e5bbd821 100644 --- a/lib/llm/src/model_card.rs +++ b/lib/llm/src/model_card.rs @@ -385,6 +385,15 @@ impl ModelDeploymentCard { return Ok(()); } + // For TensorBased models, config files are not used - they handle everything in the backend + if self.model_type.supports_tensor() { + tracing::debug!( + display_name = %self.display_name, + "Skipping config download for TensorBased model" + ); + return Ok(()); + } + let ignore_weights = true; let local_path = crate::hub::from_hf(&self.display_name, ignore_weights).await?; From d08440bfde74e6d88a23cc155f0ec29b90b8d256 Mon Sep 17 00:00:00 2001 From: Neal Vaidya Date: Tue, 2 Dec 2025 01:56:35 +0000 Subject: [PATCH 2/7] validate model and input types Signed-off-by: Neal Vaidya --- lib/bindings/python/rust/lib.rs | 41 +++++++++++++++--------- lib/bindings/python/src/dynamo/_core.pyi | 17 ++++++---- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs index 744541bbba..290a0f9604 100644 --- a/lib/bindings/python/rust/lib.rs +++ b/lib/bindings/python/rust/lib.rs @@ -257,6 +257,13 @@ fn register_llm<'p>( lora_name: Option<&str>, base_model_path: Option<&str>, ) -> PyResult> { + // Tensor input is not supported by register_llm - use register_model instead + if matches!(model_input, ModelInput::Tensor) { + return Err(PyErr::new::( + "ModelInput::Tensor is not supported by register_llm. Use register_model() for tensor-based custom backends.", + )); + } + // Validate Prefill model type requirements if model_type.inner == llm_rs::model_type::ModelType::Prefill { if !matches!(model_input, ModelInput::Tokens) { @@ -274,7 +281,7 @@ fn register_llm<'p>( let model_input = match model_input { ModelInput::Text => llm_rs::model_type::ModelInput::Text, ModelInput::Tokens => llm_rs::model_type::ModelInput::Tokens, - ModelInput::Tensor => llm_rs::model_type::ModelInput::Tensor, + ModelInput::Tensor => unreachable!(), // Validated above }; let model_type_obj = model_type.inner; @@ -410,38 +417,40 @@ fn fetch_llm<'p>(py: Python<'p>, remote_name: &str) -> PyResult }) } -/// Register a model endpoint without requiring local files or HuggingFace downloads. -/// This is designed for TensorBased models where the backend handles all preprocessing. +/// Register a tensor-based model endpoint without requiring local files or HuggingFace downloads. +/// This is designed for custom backends that handle all preprocessing themselves. /// /// Unlike `register_llm`, this function does not download any files from HuggingFace. /// It creates a minimal ModelDeploymentCard with just the model name and registers it /// with the discovery system. /// +/// This function only supports Tensor input (not Text or Tokens). For LLM models +/// that require tokenizers and config files, use `register_llm` instead. +/// /// Example: /// ```python -/// await register_model(endpoint, "my-custom-model") +/// await register_model(ModelInput.Tensor, ModelType.TensorBased, endpoint, "my-custom-model") /// ``` #[pyfunction] -#[pyo3(signature = (endpoint, model_name, model_type=None, model_input=None, user_data=None, runtime_config=None))] -#[allow(clippy::too_many_arguments)] +#[pyo3(signature = (model_input, model_type, endpoint, model_name, user_data=None, runtime_config=None))] fn register_model<'p>( py: Python<'p>, + model_input: ModelInput, + model_type: ModelType, endpoint: Endpoint, model_name: &str, - model_type: Option, - model_input: Option, user_data: Option<&Bound<'p, PyDict>>, runtime_config: Option, ) -> PyResult> { - let model_type_inner = model_type - .map(|m| m.inner) - .unwrap_or(llm_rs::model_type::ModelType::TensorBased); + // Only Tensor input is supported - Text and Tokens require register_llm + if !matches!(model_input, ModelInput::Tensor) { + return Err(PyErr::new::( + "register_model only supports ModelInput::Tensor. Use register_llm() for Text or Tokens input.", + )); + } - let model_input_inner = match model_input.unwrap_or(ModelInput::Tensor) { - ModelInput::Text => llm_rs::model_type::ModelInput::Text, - ModelInput::Tokens => llm_rs::model_type::ModelInput::Tokens, - ModelInput::Tensor => llm_rs::model_type::ModelInput::Tensor, - }; + let model_input_inner = llm_rs::model_type::ModelInput::Tensor; + let model_type_inner = model_type.inner; let model_name_owned = model_name.to_string(); diff --git a/lib/bindings/python/src/dynamo/_core.pyi b/lib/bindings/python/src/dynamo/_core.pyi index 6eb24af7c9..756bf9b7cf 100644 --- a/lib/bindings/python/src/dynamo/_core.pyi +++ b/lib/bindings/python/src/dynamo/_core.pyi @@ -1096,29 +1096,32 @@ def lora_name_to_id(lora_name: str) -> int: ... async def register_model( + model_input: ModelInput, + model_type: ModelType, endpoint: Endpoint, model_name: str, - model_type: Optional[ModelType] = None, - model_input: Optional[ModelInput] = None, user_data: Optional[Dict[str, Any]] = None, runtime_config: Optional[ModelRuntimeConfig] = None, ) -> None: """ - Register a model endpoint without requiring local files or HuggingFace downloads. + Register a tensor-based model endpoint without requiring local files or HuggingFace downloads. - This is designed for TensorBased models where the backend handles all preprocessing. + This is designed for custom backends that handle all preprocessing themselves. Unlike `register_llm`, this function does not download any files from HuggingFace. + This function only supports Tensor input (not Text or Tokens). For LLM models + that require tokenizers and config files, use `register_llm` instead. + Args: + model_input: The input type (must be ModelInput.Tensor) + model_type: The model type (e.g., ModelType.TensorBased) endpoint: The endpoint to register the model on model_name: The display name for the model - model_type: The model type (defaults to ModelType.TensorBased) - model_input: The input type (defaults to ModelInput.Tensor) user_data: Optional user data to attach to the model card runtime_config: Optional runtime configuration Example: - await register_model(endpoint, "my-custom-model") + await register_model(ModelInput.Tensor, ModelType.TensorBased, endpoint, "my-custom-model") """ ... From bce1f1ab7018b4b03ca98bdf423b86bf636a61da Mon Sep 17 00:00:00 2001 From: Neal Vaidya Date: Tue, 2 Dec 2025 01:57:51 +0000 Subject: [PATCH 3/7] use register_model in tensor tests Signed-off-by: Neal Vaidya --- lib/bindings/python/tests/test_tensor.py | 9 +++------ tests/frontend/grpc/echo_tensor_worker.py | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/lib/bindings/python/tests/test_tensor.py b/lib/bindings/python/tests/test_tensor.py index 30b1fde01c..2952be31fe 100644 --- a/lib/bindings/python/tests/test_tensor.py +++ b/lib/bindings/python/tests/test_tensor.py @@ -8,7 +8,7 @@ import pytest import uvloop -from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_llm +from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model from dynamo.runtime import DistributedRuntime TEST_END_TO_END = os.environ.get("TEST_END_TO_END", 0) @@ -34,14 +34,11 @@ async def test_register(runtime: DistributedRuntime): assert model_config == runtime_config.get_tensor_model_config() - # [gluo FIXME] register_llm will attempt to load a LLM model, - # which is not well-defined for Tensor yet. Currently provide - # a valid model name to pass the registration. - await register_llm( + # Use register_model for tensor-based backends + await register_model( ModelInput.Tensor, ModelType.TensorBased, endpoint, - "Qwen/Qwen3-0.6B", "tensor", runtime_config=runtime_config, ) diff --git a/tests/frontend/grpc/echo_tensor_worker.py b/tests/frontend/grpc/echo_tensor_worker.py index db306c3d05..c23a6b36d9 100644 --- a/tests/frontend/grpc/echo_tensor_worker.py +++ b/tests/frontend/grpc/echo_tensor_worker.py @@ -9,7 +9,7 @@ import tritonclient.grpc.model_config_pb2 as mc import uvloop -from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_llm +from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model from dynamo.runtime import DistributedRuntime, dynamo_worker @@ -53,14 +53,11 @@ async def echo_tensor_worker(runtime: DistributedRuntime): ) assert model_config == retrieved_model_config - # [gluo FIXME] register_llm will attempt to load a LLM model, - # which is not well-defined for Tensor yet. Currently provide - # a valid model name to pass the registration. - await register_llm( + # Use register_model for tensor-based backends + await register_model( ModelInput.Tensor, ModelType.TensorBased, endpoint, - "Qwen/Qwen3-0.6B", "echo", runtime_config=runtime_config, ) From d3e4eeb22deb29cb66e674000cffe2c8514f6791 Mon Sep 17 00:00:00 2001 From: Neal Vaidya Date: Tue, 2 Dec 2025 21:35:35 +0000 Subject: [PATCH 4/7] fix: pull register_model logic back into register_llm Signed-off-by: Neal Vaidya --- lib/bindings/python/rust/lib.rs | 113 +++++------------- lib/bindings/python/src/dynamo/_core.pyi | 21 +--- .../python/src/dynamo/llm/__init__.py | 1 - lib/bindings/python/tests/test_tensor.py | 8 +- tests/frontend/grpc/echo_tensor_worker.py | 8 +- 5 files changed, 43 insertions(+), 108 deletions(-) diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs index 290a0f9604..4c41b27686 100644 --- a/lib/bindings/python/rust/lib.rs +++ b/lib/bindings/python/rust/lib.rs @@ -141,7 +141,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(lora_name_to_id, m)?)?; m.add_function(wrap_pyfunction!(log_message, m)?)?; m.add_function(wrap_pyfunction!(register_llm, m)?)?; - m.add_function(wrap_pyfunction!(register_model, m)?)?; m.add_function(wrap_pyfunction!(unregister_llm, m)?)?; m.add_function(wrap_pyfunction!(fetch_llm, m)?)?; m.add_function(wrap_pyfunction!(llm::entrypoint::make_engine, m)?)?; @@ -257,13 +256,6 @@ fn register_llm<'p>( lora_name: Option<&str>, base_model_path: Option<&str>, ) -> PyResult> { - // Tensor input is not supported by register_llm - use register_model instead - if matches!(model_input, ModelInput::Tensor) { - return Err(PyErr::new::( - "ModelInput::Tensor is not supported by register_llm. Use register_model() for tensor-based custom backends.", - )); - } - // Validate Prefill model type requirements if model_type.inner == llm_rs::model_type::ModelType::Prefill { if !matches!(model_input, ModelInput::Tokens) { @@ -281,9 +273,11 @@ fn register_llm<'p>( let model_input = match model_input { ModelInput::Text => llm_rs::model_type::ModelInput::Text, ModelInput::Tokens => llm_rs::model_type::ModelInput::Tokens, - ModelInput::Tensor => unreachable!(), // Validated above + ModelInput::Tensor => llm_rs::model_type::ModelInput::Tensor, }; + let is_tensor_based = model_type.inner.supports_tensor(); + let model_type_obj = model_type.inner; let inner_path = model_path.to_string(); @@ -331,7 +325,34 @@ fn register_llm<'p>( .or_else(|| Some(source_path.clone())); pyo3_async_runtimes::tokio::future_into_py(py, async move { - // Resolve the model path (local or fetch from HuggingFace) + // For TensorBased models, skip HuggingFace downloads and register directly + if is_tensor_based { + let model_name = model_name.unwrap_or_else(|| inner_path.clone()); + let mut card = + llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name); + card.model_type = model_type_obj; + card.model_input = model_input; + card.user_data = user_data_json; + + if let Some(cfg) = runtime_config { + card.runtime_config = cfg.inner; + } + + // Register the Model Deployment Card via discovery interface + let discovery = endpoint.inner.drt().discovery(); + let spec = rs::discovery::DiscoverySpec::from_model( + endpoint.inner.component().namespace().name().to_string(), + endpoint.inner.component().name().to_string(), + endpoint.inner.name().to_string(), + &card, + ) + .map_err(to_pyerr)?; + discovery.register(spec).await.map_err(to_pyerr)?; + + return Ok(()); + } + + // For non-TensorBased models, resolve the model path (local or fetch from HuggingFace) let model_path = if fs::exists(&source_path)? { PathBuf::from(&source_path) } else { @@ -417,78 +438,6 @@ fn fetch_llm<'p>(py: Python<'p>, remote_name: &str) -> PyResult }) } -/// Register a tensor-based model endpoint without requiring local files or HuggingFace downloads. -/// This is designed for custom backends that handle all preprocessing themselves. -/// -/// Unlike `register_llm`, this function does not download any files from HuggingFace. -/// It creates a minimal ModelDeploymentCard with just the model name and registers it -/// with the discovery system. -/// -/// This function only supports Tensor input (not Text or Tokens). For LLM models -/// that require tokenizers and config files, use `register_llm` instead. -/// -/// Example: -/// ```python -/// await register_model(ModelInput.Tensor, ModelType.TensorBased, endpoint, "my-custom-model") -/// ``` -#[pyfunction] -#[pyo3(signature = (model_input, model_type, endpoint, model_name, user_data=None, runtime_config=None))] -fn register_model<'p>( - py: Python<'p>, - model_input: ModelInput, - model_type: ModelType, - endpoint: Endpoint, - model_name: &str, - user_data: Option<&Bound<'p, PyDict>>, - runtime_config: Option, -) -> PyResult> { - // Only Tensor input is supported - Text and Tokens require register_llm - if !matches!(model_input, ModelInput::Tensor) { - return Err(PyErr::new::( - "register_model only supports ModelInput::Tensor. Use register_llm() for Text or Tokens input.", - )); - } - - let model_input_inner = llm_rs::model_type::ModelInput::Tensor; - let model_type_inner = model_type.inner; - - let model_name_owned = model_name.to_string(); - - let user_data_json = user_data - .map(|dict| pythonize::depythonize(dict)) - .transpose() - .map_err(|err| { - PyErr::new::(format!("Failed to convert user_data: {}", err)) - })?; - - let runtime_config_inner = runtime_config.map(|c| c.inner); - - pyo3_async_runtimes::tokio::future_into_py(py, async move { - // Create a minimal ModelDeploymentCard - no file downloads needed - let mut card = llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name_owned); - card.model_type = model_type_inner; - card.model_input = model_input_inner; - card.user_data = user_data_json; - - if let Some(cfg) = runtime_config_inner { - card.runtime_config = cfg; - } - - // Register the Model Deployment Card via discovery interface - let discovery = endpoint.inner.drt().discovery(); - let spec = rs::discovery::DiscoverySpec::from_model( - endpoint.inner.component().namespace().name().to_string(), - endpoint.inner.component().name().to_string(), - endpoint.inner.name().to_string(), - &card, - ) - .map_err(to_pyerr)?; - discovery.register(spec).await.map_err(to_pyerr)?; - - Ok(()) - }) -} - #[pyclass] #[derive(Clone)] pub struct DistributedRuntime { diff --git a/lib/bindings/python/src/dynamo/_core.pyi b/lib/bindings/python/src/dynamo/_core.pyi index 756bf9b7cf..f5fadad233 100644 --- a/lib/bindings/python/src/dynamo/_core.pyi +++ b/lib/bindings/python/src/dynamo/_core.pyi @@ -1104,24 +1104,11 @@ async def register_model( runtime_config: Optional[ModelRuntimeConfig] = None, ) -> None: """ - Register a tensor-based model endpoint without requiring local files or HuggingFace downloads. - - This is designed for custom backends that handle all preprocessing themselves. - Unlike `register_llm`, this function does not download any files from HuggingFace. - - This function only supports Tensor input (not Text or Tokens). For LLM models - that require tokenizers and config files, use `register_llm` instead. + Attach the model at path to the given endpoint, and advertise it as model_type. - Args: - model_input: The input type (must be ModelInput.Tensor) - model_type: The model type (e.g., ModelType.TensorBased) - endpoint: The endpoint to register the model on - model_name: The display name for the model - user_data: Optional user data to attach to the model card - runtime_config: Optional runtime configuration - - Example: - await register_model(ModelInput.Tensor, ModelType.TensorBased, endpoint, "my-custom-model") + For TensorBased models (using ModelInput.Tensor), HuggingFace downloads are skipped + and a minimal model card is registered directly. Use model_path as the display name + for these models. """ ... diff --git a/lib/bindings/python/src/dynamo/llm/__init__.py b/lib/bindings/python/src/dynamo/llm/__init__.py index c1fa25f026..8a75217829 100644 --- a/lib/bindings/python/src/dynamo/llm/__init__.py +++ b/lib/bindings/python/src/dynamo/llm/__init__.py @@ -40,7 +40,6 @@ from dynamo._core import lora_name_to_id as lora_name_to_id from dynamo._core import make_engine from dynamo._core import register_llm as register_llm -from dynamo._core import register_model as register_model from dynamo._core import run_input from dynamo._core import unregister_llm as unregister_llm diff --git a/lib/bindings/python/tests/test_tensor.py b/lib/bindings/python/tests/test_tensor.py index 2952be31fe..e48de90f31 100644 --- a/lib/bindings/python/tests/test_tensor.py +++ b/lib/bindings/python/tests/test_tensor.py @@ -8,7 +8,7 @@ import pytest import uvloop -from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model +from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_llm from dynamo.runtime import DistributedRuntime TEST_END_TO_END = os.environ.get("TEST_END_TO_END", 0) @@ -34,12 +34,12 @@ async def test_register(runtime: DistributedRuntime): assert model_config == runtime_config.get_tensor_model_config() - # Use register_model for tensor-based backends - await register_model( + # Use register_llm for tensor-based backends (skips HuggingFace downloads) + await register_llm( ModelInput.Tensor, ModelType.TensorBased, endpoint, - "tensor", + "tensor", # model_path (used as display name for tensor-based models) runtime_config=runtime_config, ) diff --git a/tests/frontend/grpc/echo_tensor_worker.py b/tests/frontend/grpc/echo_tensor_worker.py index c23a6b36d9..c498b23024 100644 --- a/tests/frontend/grpc/echo_tensor_worker.py +++ b/tests/frontend/grpc/echo_tensor_worker.py @@ -9,7 +9,7 @@ import tritonclient.grpc.model_config_pb2 as mc import uvloop -from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model +from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_llm from dynamo.runtime import DistributedRuntime, dynamo_worker @@ -53,12 +53,12 @@ async def echo_tensor_worker(runtime: DistributedRuntime): ) assert model_config == retrieved_model_config - # Use register_model for tensor-based backends - await register_model( + # Use register_llm for tensor-based backends (skips HuggingFace downloads) + await register_llm( ModelInput.Tensor, ModelType.TensorBased, endpoint, - "echo", + "echo", # model_path (used as display name for tensor-based models) runtime_config=runtime_config, ) From 1700a36e6c651ebf4d9d3cd09f7091d8607159d8 Mon Sep 17 00:00:00 2001 From: Neal Vaidya Date: Tue, 2 Dec 2025 21:46:23 +0000 Subject: [PATCH 5/7] fix rebase conflicts Signed-off-by: Neal Vaidya --- lib/bindings/python/src/dynamo/_core.pyi | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/lib/bindings/python/src/dynamo/_core.pyi b/lib/bindings/python/src/dynamo/_core.pyi index f5fadad233..3562880541 100644 --- a/lib/bindings/python/src/dynamo/_core.pyi +++ b/lib/bindings/python/src/dynamo/_core.pyi @@ -1077,6 +1077,10 @@ async def register_llm( Providing only one of these parameters will raise a ValueError. - `lora_name`: The served model name for the LoRA model - `base_model_path`: Path to the base model that the LoRA extends + + For TensorBased models (using ModelInput.Tensor), HuggingFace downloads are skipped + and a minimal model card is registered directly. Use model_path as the display name + for these models. """ ... @@ -1095,23 +1099,6 @@ def lora_name_to_id(lora_name: str) -> int: """Generate a deterministic integer ID from a LoRA name using blake3 hash.""" ... -async def register_model( - model_input: ModelInput, - model_type: ModelType, - endpoint: Endpoint, - model_name: str, - user_data: Optional[Dict[str, Any]] = None, - runtime_config: Optional[ModelRuntimeConfig] = None, -) -> None: - """ - Attach the model at path to the given endpoint, and advertise it as model_type. - - For TensorBased models (using ModelInput.Tensor), HuggingFace downloads are skipped - and a minimal model card is registered directly. Use model_path as the display name - for these models. - """ - ... - async def fetch_llm(remote_name: str) -> str: """ Download a model from Hugging Face, returning it's local path. From e4f17c27eef2a01fbc884851faae5e4745e030e7 Mon Sep 17 00:00:00 2001 From: Neal Vaidya Date: Tue, 2 Dec 2025 22:01:05 +0000 Subject: [PATCH 6/7] fix rebase issue Signed-off-by: Neal Vaidya --- lib/bindings/python/rust/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs index 4c41b27686..c21856b8de 100644 --- a/lib/bindings/python/rust/lib.rs +++ b/lib/bindings/python/rust/lib.rs @@ -327,7 +327,7 @@ fn register_llm<'p>( pyo3_async_runtimes::tokio::future_into_py(py, async move { // For TensorBased models, skip HuggingFace downloads and register directly if is_tensor_based { - let model_name = model_name.unwrap_or_else(|| inner_path.clone()); + let model_name = model_name.unwrap_or_else(|| source_path.clone()); let mut card = llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name); card.model_type = model_type_obj; From 988644b30d7a25662d2659731f458b218997a989 Mon Sep 17 00:00:00 2001 From: Neal Vaidya Date: Tue, 2 Dec 2025 22:08:08 +0000 Subject: [PATCH 7/7] fix formatting issue Signed-off-by: Neal Vaidya --- lib/bindings/python/rust/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs index c21856b8de..0c3aba6cc5 100644 --- a/lib/bindings/python/rust/lib.rs +++ b/lib/bindings/python/rust/lib.rs @@ -328,8 +328,7 @@ fn register_llm<'p>( // For TensorBased models, skip HuggingFace downloads and register directly if is_tensor_based { let model_name = model_name.unwrap_or_else(|| source_path.clone()); - let mut card = - llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name); + let mut card = llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name); card.model_type = model_type_obj; card.model_input = model_input; card.user_data = user_data_json;