ai-dynamo
diff --git a/‎examples/multimodal/components/encode_worker.py‎
Lines changed: 22 additions & 14 deletions b/‎examples/multimodal/components/encode_worker.py‎
Lines changed: 22 additions & 14 deletions
diff --git a/‎examples/multimodal/components/processor.py‎
Lines changed: 22 additions & 16 deletions b/‎examples/multimodal/components/processor.py‎
Lines changed: 22 additions & 16 deletions
diff --git a/‎lib/bindings/python/src/dynamo/_core.pyi‎
Lines changed: 18 additions & 0 deletions b/‎lib/bindings/python/src/dynamo/_core.pyi‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎tests/serve/common.py‎
Lines changed: 58 additions & 0 deletions b/‎tests/serve/common.py‎
Lines changed: 58 additions & 0 deletions
@@ -28,7 +28,7 @@
 from vllm.utils import FlexibleArgumentParser
 
 import dynamo.nixl_connect as connect
-from dynamo.runtime import DistributedRuntime, dynamo_worker
+from dynamo.runtime import Client, DistributedRuntime, dynamo_worker
 from dynamo.runtime.logging import configure_dynamo_logging
 
 sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
@@ -56,8 +56,13 @@
 
 
 class VllmEncodeWorker:
-    def __init__(self, args: argparse.Namespace, engine_args: AsyncEngineArgs) -> None:
-        self.downstream_endpoint = args.downstream_endpoint
+    def __init__(
+        self,
+        args: argparse.Namespace,
+        engine_args: AsyncEngineArgs,
+        pd_worker_client: Client,
+    ) -> None:
+        self.pd_worker_client = pd_worker_client
         self.engine_args = engine_args
         self.model = self.engine_args.model
 
@@ -178,16 +183,6 @@ async def generate(
 
     async def async_init(self, runtime: DistributedRuntime):
         logger.info("Startup started.")
-        parsed_namespace, parsed_component_name, parsed_endpoint_name = parse_endpoint(
-            self.downstream_endpoint
-        )
-        self.pd_worker_client = (
-            await runtime.namespace(parsed_namespace)
-            .component(parsed_component_name)
-            .endpoint(parsed_endpoint_name)
-            .client()
-        )
-
         # Create and initialize a dynamo connector for this worker.
         # We'll needs this to move data between this worker and remote workers efficiently.
         self._connector = connect.Connector()
@@ -262,9 +257,22 @@ async def init(runtime: DistributedRuntime, args: argparse.Namespace, config: Co
 
     generate_endpoint = component.endpoint(config.endpoint)
 
-    handler = VllmEncodeWorker(args, config.engine_args)
+    parsed_namespace, parsed_component_name, parsed_endpoint_name = parse_endpoint(
+        args.downstream_endpoint
+    )
+    pd_worker_client = (
+        await runtime.namespace(parsed_namespace)
+        .component(parsed_component_name)
+        .endpoint(parsed_endpoint_name)
+        .client()
+    )
+
+    handler = VllmEncodeWorker(args, config.engine_args, pd_worker_client)
     await handler.async_init(runtime)
 
+    logger.info("Waiting for PD Worker Instances ...")
+    await pd_worker_client.wait_for_instances()
+
     logger.info(f"Starting to serve the {args.endpoint} endpoint...")
 
     try:
 
@@ -33,7 +33,7 @@
 from vllm.utils import FlexibleArgumentParser
 
 from dynamo.llm import ModelType, register_llm
-from dynamo.runtime import DistributedRuntime, dynamo_worker
+from dynamo.runtime import Client, DistributedRuntime, dynamo_worker
 from dynamo.runtime.logging import configure_dynamo_logging
 
 # To import example local module
@@ -96,9 +96,14 @@ def parse_args(cls) -> Tuple[argparse.Namespace, Config]:
 
         return args, config
 
-    def __init__(self, args: argparse.Namespace, engine_args: AsyncEngineArgs):
+    def __init__(
+        self,
+        args: argparse.Namespace,
+        engine_args: AsyncEngineArgs,
+        encode_worker_client: Client,
+    ):
+        self.encode_worker_client = encode_worker_client
         self.prompt_template = args.prompt_template
-        self.downstream_endpoint = args.downstream_endpoint
         self.engine_args = engine_args
         self.model_config = self.engine_args.create_model_config()
         self.default_sampling_params = self.model_config.get_diff_sampling_param()
@@ -125,17 +130,6 @@ def _create_tokenizer(self, engine_args: AsyncEngineArgs) -> AnyTokenizer:
         )
         return base_tokenizer
 
-    async def async_init(self, runtime: DistributedRuntime):
-        parsed_namespace, parsed_component_name, parsed_endpoint_name = parse_endpoint(
-            self.downstream_endpoint
-        )
-        self.encode_worker_client = (
-            await runtime.namespace(parsed_namespace)
-            .component(parsed_component_name)
-            .endpoint(parsed_endpoint_name)
-            .client()
-        )
-
     # Main method to parse the request and send the request to the vllm worker.
     async def _generate(
         self,
@@ -300,8 +294,20 @@ async def init(runtime: DistributedRuntime, args: argparse.Namespace, config: Co
 
     generate_endpoint = component.endpoint(config.endpoint)
 
-    handler = Processor(args, config.engine_args)
-    await handler.async_init(runtime)
+    parsed_namespace, parsed_component_name, parsed_endpoint_name = parse_endpoint(
+        args.downstream_endpoint
+    )
+    encode_worker_client = (
+        await runtime.namespace(parsed_namespace)
+        .component(parsed_component_name)
+        .endpoint(parsed_endpoint_name)
+        .client()
+    )
+
+    handler = Processor(args, config.engine_args, encode_worker_client)
+
+    logger.info("Waiting for Encoder Worker Instances ...")
+    await encode_worker_client.wait_for_instances()
 
     # Register the endpoint as entrypoint to a model
     await register_llm(
 
@@ -246,6 +246,24 @@ class Client:
 
     ...
 
+    def instance_ids(self) -> List[int]:
+        """
+        Get list of current instance IDs.
+
+        Returns:
+            A list of currently available instance IDs
+        """
+        ...
+
+    async def wait_for_instances(self) -> List[int]:
+        """
+        Wait for instances to be available for work and return their IDs.
+
+        Returns:
+            A list of instance IDs that are available for work
+        """
+        ...
+
     async def random(self, request: JsonLike) -> AsyncIterator[JsonLike]:
         """
         Pick a random instance of the endpoint and issue the request
 
@@ -0,0 +1,58 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Common base classes and utilities for engine tests (vLLM, TRT-LLM, etc.)"""
+
+from dataclasses import dataclass
+from typing import Any, Callable, List
+
+from tests.utils.deployment_graph import Payload
+
+# Common text prompt used across tests
+TEXT_PROMPT = "Tell me a short joke about AI."
+
+
+@dataclass
+class EngineConfig:
+    """Base configuration for engine test scenarios"""
+
+    name: str
+    directory: str
+    script_name: str
+    marks: List[Any]
+    endpoints: List[str]
+    response_handlers: List[Callable[[Any], str]]
+    model: str
+    timeout: int = 120
+    delayed_start: int = 0
+
+
+def create_payload_for_config(config: EngineConfig) -> Payload:
+    """Create a standard payload using the model from the engine config.
+
+    This provides the default implementation for text-only models.
+    """
+    return Payload(
+        payload_chat={
+            "model": config.model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": TEXT_PROMPT,
+                }
+            ],
+            "max_tokens": 150,
+            "temperature": 0.1,
+            "stream": False,
+        },
+        payload_completions={
+            "model": config.model,
+            "prompt": TEXT_PROMPT,
+            "max_tokens": 150,
+            "temperature": 0.1,
+            "stream": False,
+        },
+        repeat_count=3,
+        expected_log=[],
+        expected_response=["AI"],
+    )