From a156cf35019b71acc8ef894791e783c83d69d6e9 Mon Sep 17 00:00:00 2001
From: Neal Vaidya <nealv@nvidia.com>
Date: Tue, 2 Dec 2025 01:21:09 +0000
Subject: [PATCH 1/7] feat: add register_model function for non-llms

This commit introduces the `register_model` function, allowing users to register non-llm model endpoints without requiring local files or downloads from HuggingFace. The function is designed specifically for TensorBased models, where the frontend doesn't do any pre-processing.

Signed-off-by: Neal Vaidya <nealv@nvidia.com>
---
 lib/bindings/python/rust/lib.rs               | 71 +++++++++++++++++++
 lib/bindings/python/src/dynamo/_core.pyi      | 27 +++++++
 .../python/src/dynamo/llm/__init__.py         |  1 +
 lib/llm/src/model_card.rs                     |  9 +++
 4 files changed, 108 insertions(+)
diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs
index d8f2e785c3..744541bbba 100644
--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -141,6 +141,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(lora_name_to_id, m)?)?;
     m.add_function(wrap_pyfunction!(log_message, m)?)?;
     m.add_function(wrap_pyfunction!(register_llm, m)?)?;
+    m.add_function(wrap_pyfunction!(register_model, m)?)?;
     m.add_function(wrap_pyfunction!(unregister_llm, m)?)?;
     m.add_function(wrap_pyfunction!(fetch_llm, m)?)?;
     m.add_function(wrap_pyfunction!(llm::entrypoint::make_engine, m)?)?;
@@ -409,6 +410,76 @@ fn fetch_llm<'p>(py: Python<'p>, remote_name: &str) -> PyResult<Bound<'p, PyAny>
     })
 }
 
+/// Register a model endpoint without requiring local files or HuggingFace downloads.
+/// This is designed for TensorBased models where the backend handles all preprocessing.
+///
+/// Unlike `register_llm`, this function does not download any files from HuggingFace.
+/// It creates a minimal ModelDeploymentCard with just the model name and registers it
+/// with the discovery system.
+///
+/// Example:
+/// ```python
+/// await register_model(endpoint, "my-custom-model")
+/// ```
+#[pyfunction]
+#[pyo3(signature = (endpoint, model_name, model_type=None, model_input=None, user_data=None, runtime_config=None))]
+#[allow(clippy::too_many_arguments)]
+fn register_model<'p>(
+    py: Python<'p>,
+    endpoint: Endpoint,
+    model_name: &str,
+    model_type: Option<ModelType>,
+    model_input: Option<ModelInput>,
+    user_data: Option<&Bound<'p, PyDict>>,
+    runtime_config: Option<ModelRuntimeConfig>,
+) -> PyResult<Bound<'p, PyAny>> {
+    let model_type_inner = model_type
+        .map(|m| m.inner)
+        .unwrap_or(llm_rs::model_type::ModelType::TensorBased);
+
+    let model_input_inner = match model_input.unwrap_or(ModelInput::Tensor) {
+        ModelInput::Text => llm_rs::model_type::ModelInput::Text,
+        ModelInput::Tokens => llm_rs::model_type::ModelInput::Tokens,
+        ModelInput::Tensor => llm_rs::model_type::ModelInput::Tensor,
+    };
+
+    let model_name_owned = model_name.to_string();
+
+    let user_data_json = user_data
+        .map(|dict| pythonize::depythonize(dict))
+        .transpose()
+        .map_err(|err| {
+            PyErr::new::<PyException, _>(format!("Failed to convert user_data: {}", err))
+        })?;
+
+    let runtime_config_inner = runtime_config.map(|c| c.inner);
+
+    pyo3_async_runtimes::tokio::future_into_py(py, async move {
+        // Create a minimal ModelDeploymentCard - no file downloads needed
+        let mut card = llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name_owned);
+        card.model_type = model_type_inner;
+        card.model_input = model_input_inner;
+        card.user_data = user_data_json;
+
+        if let Some(cfg) = runtime_config_inner {
+            card.runtime_config = cfg;
+        }
+
+        // Register the Model Deployment Card via discovery interface
+        let discovery = endpoint.inner.drt().discovery();
+        let spec = rs::discovery::DiscoverySpec::from_model(
+            endpoint.inner.component().namespace().name().to_string(),
+            endpoint.inner.component().name().to_string(),
+            endpoint.inner.name().to_string(),
+            &card,
+        )
+        .map_err(to_pyerr)?;
+        discovery.register(spec).await.map_err(to_pyerr)?;
+
+        Ok(())
+    })
+}
+
 #[pyclass]
 #[derive(Clone)]
 pub struct DistributedRuntime {
diff --git a/lib/bindings/python/src/dynamo/_core.pyi b/lib/bindings/python/src/dynamo/_core.pyi
index 22841ef6e4..6eb24af7c9 100644
--- a/lib/bindings/python/src/dynamo/_core.pyi
+++ b/lib/bindings/python/src/dynamo/_core.pyi
@@ -1095,6 +1095,33 @@ def lora_name_to_id(lora_name: str) -> int:
     """Generate a deterministic integer ID from a LoRA name using blake3 hash."""
     ...
 
+async def register_model(
+    endpoint: Endpoint,
+    model_name: str,
+    model_type: Optional[ModelType] = None,
+    model_input: Optional[ModelInput] = None,
+    user_data: Optional[Dict[str, Any]] = None,
+    runtime_config: Optional[ModelRuntimeConfig] = None,
+) -> None:
+    """
+    Register a model endpoint without requiring local files or HuggingFace downloads.
+
+    This is designed for TensorBased models where the backend handles all preprocessing.
+    Unlike `register_llm`, this function does not download any files from HuggingFace.
+
+    Args:
+        endpoint: The endpoint to register the model on
+        model_name: The display name for the model
+        model_type: The model type (defaults to ModelType.TensorBased)
+        model_input: The input type (defaults to ModelInput.Tensor)
+        user_data: Optional user data to attach to the model card
+        runtime_config: Optional runtime configuration
+
+    Example:
+        await register_model(endpoint, "my-custom-model")
+    """
+    ...
+
 async def fetch_llm(remote_name: str) -> str:
     """
     Download a model from Hugging Face, returning it's local path.
diff --git a/lib/bindings/python/src/dynamo/llm/__init__.py b/lib/bindings/python/src/dynamo/llm/__init__.py
index 8a75217829..c1fa25f026 100644
--- a/lib/bindings/python/src/dynamo/llm/__init__.py
+++ b/lib/bindings/python/src/dynamo/llm/__init__.py
@@ -40,6 +40,7 @@
 from dynamo._core import lora_name_to_id as lora_name_to_id
 from dynamo._core import make_engine
 from dynamo._core import register_llm as register_llm
+from dynamo._core import register_model as register_model
 from dynamo._core import run_input
 from dynamo._core import unregister_llm as unregister_llm
 
diff --git a/lib/llm/src/model_card.rs b/lib/llm/src/model_card.rs
index 756b6c1d5e..55e5bbd821 100644
--- a/lib/llm/src/model_card.rs
+++ b/lib/llm/src/model_card.rs
@@ -385,6 +385,15 @@ impl ModelDeploymentCard {
             return Ok(());
         }
 
+        // For TensorBased models, config files are not used - they handle everything in the backend
+        if self.model_type.supports_tensor() {
+            tracing::debug!(
+                display_name = %self.display_name,
+                "Skipping config download for TensorBased model"
+            );
+            return Ok(());
+        }
+
         let ignore_weights = true;
         let local_path = crate::hub::from_hf(&self.display_name, ignore_weights).await?;
 

From d08440bfde74e6d88a23cc155f0ec29b90b8d256 Mon Sep 17 00:00:00 2001
From: Neal Vaidya <nealv@nvidia.com>
Date: Tue, 2 Dec 2025 01:56:35 +0000
Subject: [PATCH 2/7] validate model and input types

Signed-off-by: Neal Vaidya <nealv@nvidia.com>
---
 lib/bindings/python/rust/lib.rs          | 41 +++++++++++++++---------
 lib/bindings/python/src/dynamo/_core.pyi | 17 ++++++----
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs
index 744541bbba..290a0f9604 100644
--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -257,6 +257,13 @@ fn register_llm<'p>(
     lora_name: Option<&str>,
     base_model_path: Option<&str>,
 ) -> PyResult<Bound<'p, PyAny>> {
+    // Tensor input is not supported by register_llm - use register_model instead
+    if matches!(model_input, ModelInput::Tensor) {
+        return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
+            "ModelInput::Tensor is not supported by register_llm. Use register_model() for tensor-based custom backends.",
+        ));
+    }
+
     // Validate Prefill model type requirements
     if model_type.inner == llm_rs::model_type::ModelType::Prefill {
         if !matches!(model_input, ModelInput::Tokens) {
@@ -274,7 +281,7 @@ fn register_llm<'p>(
     let model_input = match model_input {
         ModelInput::Text => llm_rs::model_type::ModelInput::Text,
         ModelInput::Tokens => llm_rs::model_type::ModelInput::Tokens,
-        ModelInput::Tensor => llm_rs::model_type::ModelInput::Tensor,
+        ModelInput::Tensor => unreachable!(), // Validated above
     };
 
     let model_type_obj = model_type.inner;
@@ -410,38 +417,40 @@ fn fetch_llm<'p>(py: Python<'p>, remote_name: &str) -> PyResult<Bound<'p, PyAny>
     })
 }
 
-/// Register a model endpoint without requiring local files or HuggingFace downloads.
-/// This is designed for TensorBased models where the backend handles all preprocessing.
+/// Register a tensor-based model endpoint without requiring local files or HuggingFace downloads.
+/// This is designed for custom backends that handle all preprocessing themselves.
 ///
 /// Unlike `register_llm`, this function does not download any files from HuggingFace.
 /// It creates a minimal ModelDeploymentCard with just the model name and registers it
 /// with the discovery system.
 ///
+/// This function only supports Tensor input (not Text or Tokens). For LLM models
+/// that require tokenizers and config files, use `register_llm` instead.
+///
 /// Example:
 /// ```python
-/// await register_model(endpoint, "my-custom-model")
+/// await register_model(ModelInput.Tensor, ModelType.TensorBased, endpoint, "my-custom-model")
 /// ```
 #[pyfunction]
-#[pyo3(signature = (endpoint, model_name, model_type=None, model_input=None, user_data=None, runtime_config=None))]
-#[allow(clippy::too_many_arguments)]
+#[pyo3(signature = (model_input, model_type, endpoint, model_name, user_data=None, runtime_config=None))]
 fn register_model<'p>(
     py: Python<'p>,
+    model_input: ModelInput,
+    model_type: ModelType,
     endpoint: Endpoint,
     model_name: &str,
-    model_type: Option<ModelType>,
-    model_input: Option<ModelInput>,
     user_data: Option<&Bound<'p, PyDict>>,
     runtime_config: Option<ModelRuntimeConfig>,
 ) -> PyResult<Bound<'p, PyAny>> {
-    let model_type_inner = model_type
-        .map(|m| m.inner)
-        .unwrap_or(llm_rs::model_type::ModelType::TensorBased);
+    // Only Tensor input is supported - Text and Tokens require register_llm
+    if !matches!(model_input, ModelInput::Tensor) {
+        return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
+            "register_model only supports ModelInput::Tensor. Use register_llm() for Text or Tokens input.",
+        ));
+    }
 
-    let model_input_inner = match model_input.unwrap_or(ModelInput::Tensor) {
-        ModelInput::Text => llm_rs::model_type::ModelInput::Text,
-        ModelInput::Tokens => llm_rs::model_type::ModelInput::Tokens,
-        ModelInput::Tensor => llm_rs::model_type::ModelInput::Tensor,
-    };
+    let model_input_inner = llm_rs::model_type::ModelInput::Tensor;
+    let model_type_inner = model_type.inner;
 
     let model_name_owned = model_name.to_string();
 
diff --git a/lib/bindings/python/src/dynamo/_core.pyi b/lib/bindings/python/src/dynamo/_core.pyi
index 6eb24af7c9..756bf9b7cf 100644
--- a/lib/bindings/python/src/dynamo/_core.pyi
+++ b/lib/bindings/python/src/dynamo/_core.pyi
@@ -1096,29 +1096,32 @@ def lora_name_to_id(lora_name: str) -> int:
     ...
 
 async def register_model(
+    model_input: ModelInput,
+    model_type: ModelType,
     endpoint: Endpoint,
     model_name: str,
-    model_type: Optional[ModelType] = None,
-    model_input: Optional[ModelInput] = None,
     user_data: Optional[Dict[str, Any]] = None,
     runtime_config: Optional[ModelRuntimeConfig] = None,
 ) -> None:
     """
-    Register a model endpoint without requiring local files or HuggingFace downloads.
+    Register a tensor-based model endpoint without requiring local files or HuggingFace downloads.
 
-    This is designed for TensorBased models where the backend handles all preprocessing.
+    This is designed for custom backends that handle all preprocessing themselves.
     Unlike `register_llm`, this function does not download any files from HuggingFace.
 
+    This function only supports Tensor input (not Text or Tokens). For LLM models
+    that require tokenizers and config files, use `register_llm` instead.
+
     Args:
+        model_input: The input type (must be ModelInput.Tensor)
+        model_type: The model type (e.g., ModelType.TensorBased)
         endpoint: The endpoint to register the model on
         model_name: The display name for the model
-        model_type: The model type (defaults to ModelType.TensorBased)
-        model_input: The input type (defaults to ModelInput.Tensor)
         user_data: Optional user data to attach to the model card
         runtime_config: Optional runtime configuration
 
     Example:
-        await register_model(endpoint, "my-custom-model")
+        await register_model(ModelInput.Tensor, ModelType.TensorBased, endpoint, "my-custom-model")
     """
     ...
 

From bce1f1ab7018b4b03ca98bdf423b86bf636a61da Mon Sep 17 00:00:00 2001
From: Neal Vaidya <nealv@nvidia.com>
Date: Tue, 2 Dec 2025 01:57:51 +0000
Subject: [PATCH 3/7] use register_model in tensor tests

Signed-off-by: Neal Vaidya <nealv@nvidia.com>
---
 lib/bindings/python/tests/test_tensor.py  | 9 +++------
 tests/frontend/grpc/echo_tensor_worker.py | 9 +++------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/lib/bindings/python/tests/test_tensor.py b/lib/bindings/python/tests/test_tensor.py
index 30b1fde01c..2952be31fe 100644
--- a/lib/bindings/python/tests/test_tensor.py
+++ b/lib/bindings/python/tests/test_tensor.py
@@ -8,7 +8,7 @@
 import pytest
 import uvloop
 
-from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_llm
+from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model
 from dynamo.runtime import DistributedRuntime
 
 TEST_END_TO_END = os.environ.get("TEST_END_TO_END", 0)
@@ -34,14 +34,11 @@ async def test_register(runtime: DistributedRuntime):
 
     assert model_config == runtime_config.get_tensor_model_config()
 
-    # [gluo FIXME] register_llm will attempt to load a LLM model,
-    # which is not well-defined for Tensor yet. Currently provide
-    # a valid model name to pass the registration.
-    await register_llm(
+    # Use register_model for tensor-based backends
+    await register_model(
         ModelInput.Tensor,
         ModelType.TensorBased,
         endpoint,
-        "Qwen/Qwen3-0.6B",
         "tensor",
         runtime_config=runtime_config,
     )
diff --git a/tests/frontend/grpc/echo_tensor_worker.py b/tests/frontend/grpc/echo_tensor_worker.py
index db306c3d05..c23a6b36d9 100644
--- a/tests/frontend/grpc/echo_tensor_worker.py
+++ b/tests/frontend/grpc/echo_tensor_worker.py
@@ -9,7 +9,7 @@
 import tritonclient.grpc.model_config_pb2 as mc
 import uvloop
 
-from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_llm
+from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model
 from dynamo.runtime import DistributedRuntime, dynamo_worker
 
 
@@ -53,14 +53,11 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
     )
     assert model_config == retrieved_model_config
 
-    # [gluo FIXME] register_llm will attempt to load a LLM model,
-    # which is not well-defined for Tensor yet. Currently provide
-    # a valid model name to pass the registration.
-    await register_llm(
+    # Use register_model for tensor-based backends
+    await register_model(
         ModelInput.Tensor,
         ModelType.TensorBased,
         endpoint,
-        "Qwen/Qwen3-0.6B",
         "echo",
         runtime_config=runtime_config,
     )

From d3e4eeb22deb29cb66e674000cffe2c8514f6791 Mon Sep 17 00:00:00 2001
From: Neal Vaidya <nealv@nvidia.com>
Date: Tue, 2 Dec 2025 21:35:35 +0000
Subject: [PATCH 4/7] fix: pull register_model logic back into register_llm

Signed-off-by: Neal Vaidya <nealv@nvidia.com>
---
 lib/bindings/python/rust/lib.rs               | 113 +++++-------------
 lib/bindings/python/src/dynamo/_core.pyi      |  21 +---
 .../python/src/dynamo/llm/__init__.py         |   1 -
 lib/bindings/python/tests/test_tensor.py      |   8 +-
 tests/frontend/grpc/echo_tensor_worker.py     |   8 +-
 5 files changed, 43 insertions(+), 108 deletions(-)

diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs
index 290a0f9604..4c41b27686 100644
--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -141,7 +141,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(lora_name_to_id, m)?)?;
     m.add_function(wrap_pyfunction!(log_message, m)?)?;
     m.add_function(wrap_pyfunction!(register_llm, m)?)?;
-    m.add_function(wrap_pyfunction!(register_model, m)?)?;
     m.add_function(wrap_pyfunction!(unregister_llm, m)?)?;
     m.add_function(wrap_pyfunction!(fetch_llm, m)?)?;
     m.add_function(wrap_pyfunction!(llm::entrypoint::make_engine, m)?)?;
@@ -257,13 +256,6 @@ fn register_llm<'p>(
     lora_name: Option<&str>,
     base_model_path: Option<&str>,
 ) -> PyResult<Bound<'p, PyAny>> {
-    // Tensor input is not supported by register_llm - use register_model instead
-    if matches!(model_input, ModelInput::Tensor) {
-        return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
-            "ModelInput::Tensor is not supported by register_llm. Use register_model() for tensor-based custom backends.",
-        ));
-    }
-
     // Validate Prefill model type requirements
     if model_type.inner == llm_rs::model_type::ModelType::Prefill {
         if !matches!(model_input, ModelInput::Tokens) {
@@ -281,9 +273,11 @@ fn register_llm<'p>(
     let model_input = match model_input {
         ModelInput::Text => llm_rs::model_type::ModelInput::Text,
         ModelInput::Tokens => llm_rs::model_type::ModelInput::Tokens,
-        ModelInput::Tensor => unreachable!(), // Validated above
+        ModelInput::Tensor => llm_rs::model_type::ModelInput::Tensor,
     };
 
+    let is_tensor_based = model_type.inner.supports_tensor();
+
     let model_type_obj = model_type.inner;
 
     let inner_path = model_path.to_string();
@@ -331,7 +325,34 @@ fn register_llm<'p>(
         .or_else(|| Some(source_path.clone()));
 
     pyo3_async_runtimes::tokio::future_into_py(py, async move {
-        // Resolve the model path (local or fetch from HuggingFace)
+        // For TensorBased models, skip HuggingFace downloads and register directly
+        if is_tensor_based {
+            let model_name = model_name.unwrap_or_else(|| inner_path.clone());
+            let mut card =
+                llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name);
+            card.model_type = model_type_obj;
+            card.model_input = model_input;
+            card.user_data = user_data_json;
+
+            if let Some(cfg) = runtime_config {
+                card.runtime_config = cfg.inner;
+            }
+
+            // Register the Model Deployment Card via discovery interface
+            let discovery = endpoint.inner.drt().discovery();
+            let spec = rs::discovery::DiscoverySpec::from_model(
+                endpoint.inner.component().namespace().name().to_string(),
+                endpoint.inner.component().name().to_string(),
+                endpoint.inner.name().to_string(),
+                &card,
+            )
+            .map_err(to_pyerr)?;
+            discovery.register(spec).await.map_err(to_pyerr)?;
+
+            return Ok(());
+        }
+
+        // For non-TensorBased models, resolve the model path (local or fetch from HuggingFace)
         let model_path = if fs::exists(&source_path)? {
             PathBuf::from(&source_path)
         } else {
@@ -417,78 +438,6 @@ fn fetch_llm<'p>(py: Python<'p>, remote_name: &str) -> PyResult<Bound<'p, PyAny>
     })
 }
 
-/// Register a tensor-based model endpoint without requiring local files or HuggingFace downloads.
-/// This is designed for custom backends that handle all preprocessing themselves.
-///
-/// Unlike `register_llm`, this function does not download any files from HuggingFace.
-/// It creates a minimal ModelDeploymentCard with just the model name and registers it
-/// with the discovery system.
-///
-/// This function only supports Tensor input (not Text or Tokens). For LLM models
-/// that require tokenizers and config files, use `register_llm` instead.
-///
-/// Example:
-/// ```python
-/// await register_model(ModelInput.Tensor, ModelType.TensorBased, endpoint, "my-custom-model")
-/// ```
-#[pyfunction]
-#[pyo3(signature = (model_input, model_type, endpoint, model_name, user_data=None, runtime_config=None))]
-fn register_model<'p>(
-    py: Python<'p>,
-    model_input: ModelInput,
-    model_type: ModelType,
-    endpoint: Endpoint,
-    model_name: &str,
-    user_data: Option<&Bound<'p, PyDict>>,
-    runtime_config: Option<ModelRuntimeConfig>,
-) -> PyResult<Bound<'p, PyAny>> {
-    // Only Tensor input is supported - Text and Tokens require register_llm
-    if !matches!(model_input, ModelInput::Tensor) {
-        return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
-            "register_model only supports ModelInput::Tensor. Use register_llm() for Text or Tokens input.",
-        ));
-    }
-
-    let model_input_inner = llm_rs::model_type::ModelInput::Tensor;
-    let model_type_inner = model_type.inner;
-
-    let model_name_owned = model_name.to_string();
-
-    let user_data_json = user_data
-        .map(|dict| pythonize::depythonize(dict))
-        .transpose()
-        .map_err(|err| {
-            PyErr::new::<PyException, _>(format!("Failed to convert user_data: {}", err))
-        })?;
-
-    let runtime_config_inner = runtime_config.map(|c| c.inner);
-
-    pyo3_async_runtimes::tokio::future_into_py(py, async move {
-        // Create a minimal ModelDeploymentCard - no file downloads needed
-        let mut card = llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name_owned);
-        card.model_type = model_type_inner;
-        card.model_input = model_input_inner;
-        card.user_data = user_data_json;
-
-        if let Some(cfg) = runtime_config_inner {
-            card.runtime_config = cfg;
-        }
-
-        // Register the Model Deployment Card via discovery interface
-        let discovery = endpoint.inner.drt().discovery();
-        let spec = rs::discovery::DiscoverySpec::from_model(
-            endpoint.inner.component().namespace().name().to_string(),
-            endpoint.inner.component().name().to_string(),
-            endpoint.inner.name().to_string(),
-            &card,
-        )
-        .map_err(to_pyerr)?;
-        discovery.register(spec).await.map_err(to_pyerr)?;
-
-        Ok(())
-    })
-}
-
 #[pyclass]
 #[derive(Clone)]
 pub struct DistributedRuntime {
diff --git a/lib/bindings/python/src/dynamo/_core.pyi b/lib/bindings/python/src/dynamo/_core.pyi
index 756bf9b7cf..f5fadad233 100644
--- a/lib/bindings/python/src/dynamo/_core.pyi
+++ b/lib/bindings/python/src/dynamo/_core.pyi
@@ -1104,24 +1104,11 @@ async def register_model(
     runtime_config: Optional[ModelRuntimeConfig] = None,
 ) -> None:
     """
-    Register a tensor-based model endpoint without requiring local files or HuggingFace downloads.
-
-    This is designed for custom backends that handle all preprocessing themselves.
-    Unlike `register_llm`, this function does not download any files from HuggingFace.
-
-    This function only supports Tensor input (not Text or Tokens). For LLM models
-    that require tokenizers and config files, use `register_llm` instead.
+    Attach the model at path to the given endpoint, and advertise it as model_type.
 
-    Args:
-        model_input: The input type (must be ModelInput.Tensor)
-        model_type: The model type (e.g., ModelType.TensorBased)
-        endpoint: The endpoint to register the model on
-        model_name: The display name for the model
-        user_data: Optional user data to attach to the model card
-        runtime_config: Optional runtime configuration
-
-    Example:
-        await register_model(ModelInput.Tensor, ModelType.TensorBased, endpoint, "my-custom-model")
+    For TensorBased models (using ModelInput.Tensor), HuggingFace downloads are skipped
+    and a minimal model card is registered directly. Use model_path as the display name
+    for these models.
     """
     ...
 
diff --git a/lib/bindings/python/src/dynamo/llm/__init__.py b/lib/bindings/python/src/dynamo/llm/__init__.py
index c1fa25f026..8a75217829 100644
--- a/lib/bindings/python/src/dynamo/llm/__init__.py
+++ b/lib/bindings/python/src/dynamo/llm/__init__.py
@@ -40,7 +40,6 @@
 from dynamo._core import lora_name_to_id as lora_name_to_id
 from dynamo._core import make_engine
 from dynamo._core import register_llm as register_llm
-from dynamo._core import register_model as register_model
 from dynamo._core import run_input
 from dynamo._core import unregister_llm as unregister_llm
 
diff --git a/lib/bindings/python/tests/test_tensor.py b/lib/bindings/python/tests/test_tensor.py
index 2952be31fe..e48de90f31 100644
--- a/lib/bindings/python/tests/test_tensor.py
+++ b/lib/bindings/python/tests/test_tensor.py
@@ -8,7 +8,7 @@
 import pytest
 import uvloop
 
-from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model
+from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_llm
 from dynamo.runtime import DistributedRuntime
 
 TEST_END_TO_END = os.environ.get("TEST_END_TO_END", 0)
@@ -34,12 +34,12 @@ async def test_register(runtime: DistributedRuntime):
 
     assert model_config == runtime_config.get_tensor_model_config()
 
-    # Use register_model for tensor-based backends
-    await register_model(
+    # Use register_llm for tensor-based backends (skips HuggingFace downloads)
+    await register_llm(
         ModelInput.Tensor,
         ModelType.TensorBased,
         endpoint,
-        "tensor",
+        "tensor",  # model_path (used as display name for tensor-based models)
         runtime_config=runtime_config,
     )
 
diff --git a/tests/frontend/grpc/echo_tensor_worker.py b/tests/frontend/grpc/echo_tensor_worker.py
index c23a6b36d9..c498b23024 100644
--- a/tests/frontend/grpc/echo_tensor_worker.py
+++ b/tests/frontend/grpc/echo_tensor_worker.py
@@ -9,7 +9,7 @@
 import tritonclient.grpc.model_config_pb2 as mc
 import uvloop
 
-from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model
+from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_llm
 from dynamo.runtime import DistributedRuntime, dynamo_worker
 
 
@@ -53,12 +53,12 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
     )
     assert model_config == retrieved_model_config
 
-    # Use register_model for tensor-based backends
-    await register_model(
+    # Use register_llm for tensor-based backends (skips HuggingFace downloads)
+    await register_llm(
         ModelInput.Tensor,
         ModelType.TensorBased,
         endpoint,
-        "echo",
+        "echo",  # model_path (used as display name for tensor-based models)
         runtime_config=runtime_config,
     )
 

From 1700a36e6c651ebf4d9d3cd09f7091d8607159d8 Mon Sep 17 00:00:00 2001
From: Neal Vaidya <nealv@nvidia.com>
Date: Tue, 2 Dec 2025 21:46:23 +0000
Subject: [PATCH 5/7] fix rebase conflicts

Signed-off-by: Neal Vaidya <nealv@nvidia.com>
---
 lib/bindings/python/src/dynamo/_core.pyi | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/lib/bindings/python/src/dynamo/_core.pyi b/lib/bindings/python/src/dynamo/_core.pyi
index f5fadad233..3562880541 100644
--- a/lib/bindings/python/src/dynamo/_core.pyi
+++ b/lib/bindings/python/src/dynamo/_core.pyi
@@ -1077,6 +1077,10 @@ async def register_llm(
         Providing only one of these parameters will raise a ValueError.
         - `lora_name`: The served model name for the LoRA model
         - `base_model_path`: Path to the base model that the LoRA extends
+
+    For TensorBased models (using ModelInput.Tensor), HuggingFace downloads are skipped
+    and a minimal model card is registered directly. Use model_path as the display name
+    for these models.
     """
     ...
 
@@ -1095,23 +1099,6 @@ def lora_name_to_id(lora_name: str) -> int:
     """Generate a deterministic integer ID from a LoRA name using blake3 hash."""
     ...
 
-async def register_model(
-    model_input: ModelInput,
-    model_type: ModelType,
-    endpoint: Endpoint,
-    model_name: str,
-    user_data: Optional[Dict[str, Any]] = None,
-    runtime_config: Optional[ModelRuntimeConfig] = None,
-) -> None:
-    """
-    Attach the model at path to the given endpoint, and advertise it as model_type.
-
-    For TensorBased models (using ModelInput.Tensor), HuggingFace downloads are skipped
-    and a minimal model card is registered directly. Use model_path as the display name
-    for these models.
-    """
-    ...
-
 async def fetch_llm(remote_name: str) -> str:
     """
     Download a model from Hugging Face, returning it's local path.

From e4f17c27eef2a01fbc884851faae5e4745e030e7 Mon Sep 17 00:00:00 2001
From: Neal Vaidya <nealv@nvidia.com>
Date: Tue, 2 Dec 2025 22:01:05 +0000
Subject: [PATCH 6/7] fix rebase issue

Signed-off-by: Neal Vaidya <nealv@nvidia.com>
---
 lib/bindings/python/rust/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs
index 4c41b27686..c21856b8de 100644
--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -327,7 +327,7 @@ fn register_llm<'p>(
     pyo3_async_runtimes::tokio::future_into_py(py, async move {
         // For TensorBased models, skip HuggingFace downloads and register directly
         if is_tensor_based {
-            let model_name = model_name.unwrap_or_else(|| inner_path.clone());
+            let model_name = model_name.unwrap_or_else(|| source_path.clone());
             let mut card =
                 llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name);
             card.model_type = model_type_obj;

From 988644b30d7a25662d2659731f458b218997a989 Mon Sep 17 00:00:00 2001
From: Neal Vaidya <nealv@nvidia.com>
Date: Tue, 2 Dec 2025 22:08:08 +0000
Subject: [PATCH 7/7] fix formatting issue

Signed-off-by: Neal Vaidya <nealv@nvidia.com>
---
 lib/bindings/python/rust/lib.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs
index c21856b8de..0c3aba6cc5 100644
--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -328,8 +328,7 @@ fn register_llm<'p>(
         // For TensorBased models, skip HuggingFace downloads and register directly
         if is_tensor_based {
             let model_name = model_name.unwrap_or_else(|| source_path.clone());
-            let mut card =
-                llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name);
+            let mut card = llm_rs::model_card::ModelDeploymentCard::with_name_only(&model_name);
             card.model_type = model_type_obj;
             card.model_input = model_input;
             card.user_data = user_data_json;