Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
model_source="unsloth/Llama-3.1-8B-Instruct",
concurrency=1, # 1 vLLM engine replica
batch_size=32, # 32 samples per batch
engine_kwargs={
"max_model_len": 4096, # Fit into test GPU memory
}
)

# Build processor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,11 @@ def __init__(self, dp_size: int, dp_size_per_node: Optional[int] = None):
f"with dp_size_per_node {self.dp_size_per_node}"
)

async def register(
self, replica_ctx: "serve.context.ReplicaContext", node_id: Optional[str] = None
):
async def register(self, node_id: Optional[str] = None):
"""
Register a replica and assign a rank to it.

Args:
replica_ctx: The replica context.
node_id: The node id of the replica.

Returns:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import time

from ray import serve
from ray.experimental.collective.util import get_address_and_port
from ray.llm._internal.serve.core.configs.llm_config import LLMConfig
from ray.llm._internal.serve.core.server.llm_server import LLMServer
Expand All @@ -24,9 +23,8 @@ class DPServer(LLMServer):
async def __init__(self, llm_config: LLMConfig, dp_rank_assigner: DeploymentHandle):
self.dp_rank_assigner = dp_rank_assigner

replica_ctx = serve.get_replica_context()
node_id = get_runtime_context().get_node_id()
self.dp_rank = await self.dp_rank_assigner.register.remote(replica_ctx, node_id)
self.dp_rank = await self.dp_rank_assigner.register.remote(node_id)

logger.info(f"DP rank {self.dp_rank} registered with rank assigner")

Expand Down