-
Notifications
You must be signed in to change notification settings - Fork 7k
[serve.llm] Fixed DP DSV3 issues #55802
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -424,6 +424,38 @@ def update_engine_kwargs(self, **kwargs: Any) -> None: | |
| if self._engine_config: | ||
| self._engine_config.engine_kwargs.update(kwargs) | ||
|
|
||
| def _merge_replica_actor_and_child_actor_bundles( | ||
| self, | ||
| child_actor_bundles: List[Dict[str, float]], | ||
| replica_actor_bundle: Dict[str, float], | ||
| ) -> List[Dict[str, float]]: | ||
| """Sum up the bundles from replica actor bundles with the first bundle from child actor bundles. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not fully getting the intention here: the placement strategy is STRICT_PACK (at least for TP only), why do we need this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was hanging when deployment was Also, in case of PACK, because replicas are not limited to be scheduled on the same node as their child RayWorkers I was always confounded. Modifying to this form of placement ensures the replica actor is scheduled on the same node as its own RayWorker |
||
|
|
||
| This is because the replica actor will use the first bundle in the list, and we want to collocate the replica actor with the child actor. | ||
| So we need to group them together. | ||
|
|
||
| So for example: | ||
| child_actor_bundles = [{"GPU": 1, "CPU": 1}, {"GPU": 1, "CPU": 1}] | ||
| replica_actor_bundle = {"GPU": 0, "CPU": 1, "memory": 100} | ||
| return [{"GPU": 1, "CPU": 2, "memory": 100}, {"GPU": 1, "CPU": 1}] | ||
| """ | ||
|
|
||
| if not child_actor_bundles: | ||
| return [replica_actor_bundle] | ||
|
|
||
| if not replica_actor_bundle: | ||
| return child_actor_bundles | ||
|
|
||
| first_bundle = child_actor_bundles[0] | ||
| bundle_key_set = set(first_bundle.keys()) | set(replica_actor_bundle.keys()) | ||
|
|
||
| for key in bundle_key_set: | ||
| first_bundle[key] = replica_actor_bundle.get(key, 0) + first_bundle.get( | ||
| key, 0 | ||
| ) | ||
|
|
||
| return [first_bundle] + child_actor_bundles[1:] | ||
|
|
||
| def _set_deployment_placement_options(self) -> Dict[str, Any]: | ||
| deployment_config = self.deployment_config | ||
| engine_config = self.get_engine_config() | ||
|
|
@@ -449,15 +481,17 @@ def _set_deployment_placement_options(self) -> Dict[str, Any]: | |
| ) | ||
|
|
||
| try: | ||
| bundles = engine_config.placement_bundles | ||
| child_actor_bundles = engine_config.placement_bundles | ||
| except ValueError: | ||
| # May happen if all bundles are empty. | ||
| bundles = [] | ||
| child_actor_bundles = [] | ||
|
|
||
| bundles = [replica_actor_resources] + bundles | ||
| pg_bundles = self._merge_replica_actor_and_child_actor_bundles( | ||
| child_actor_bundles, replica_actor_resources | ||
| ) | ||
| deployment_config.update( | ||
| { | ||
| "placement_group_bundles": bundles, | ||
| "placement_group_bundles": pg_bundles, | ||
| "placement_group_strategy": engine_config.placement_strategy, | ||
| } | ||
| ) | ||
|
|
@@ -569,7 +603,7 @@ def _setup_kv_connector_backend(self): | |
| raise ValueError(f"Unsupported connector type: {kv_connector}") | ||
|
|
||
| # 2. Setup the backend | ||
| kv_connector_backend = kv_connector_backend_class(kv_transfer_config) | ||
| kv_connector_backend = kv_connector_backend_class(self) | ||
| kv_connector_backend.setup() | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -76,6 +76,7 @@ def build_dp_deployment( | |
| llm_config: LLMConfig, | ||
| *, | ||
| name_prefix: Optional[str] = None, | ||
| options_override: Optional[dict] = None, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. QQ: what do you have on mind to use this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. placement groups / deployment name (full name) etc. |
||
| ) -> Application: | ||
| """Build a data parallel LLM deployment.""" | ||
| dp_size = llm_config.engine_kwargs.get("data_parallel_size", 1) | ||
|
|
@@ -89,10 +90,12 @@ def build_dp_deployment( | |
| # the number of ranks per node because that has special semantics in vLLM. | ||
| dp_size_per_node = llm_config.experimental_configs.get("dp_size_per_node", None) | ||
|
|
||
| deployment_options = llm_config.get_serve_options(name_prefix=name_prefix) | ||
| dp_rank_assigner = DPRankAssigner.bind( | ||
| dp_size=dp_size, dp_size_per_node=dp_size_per_node | ||
| ) | ||
| deployment_options = llm_config.get_serve_options(name_prefix=name_prefix) | ||
| if options_override: | ||
| deployment_options.update(options_override) | ||
|
|
||
| return DPServer.as_deployment(deployment_options).bind( | ||
| llm_config=llm_config, dp_rank_assigner=dp_rank_assigner | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,29 @@ | ||
| import abc | ||
| import random | ||
| import string | ||
| from typing import Any, Dict | ||
| from typing import TYPE_CHECKING, Any, Dict | ||
|
|
||
| if TYPE_CHECKING: | ||
| from ray.llm._internal.serve.configs.server_models import LLMConfig | ||
|
|
||
|
|
||
| class BaseConnectorBackend(abc.ABC): | ||
| def __init__(self, kv_transfer_config: Dict[str, Any]): | ||
| def __init__(self, llm_config: "LLMConfig"): | ||
| """Base class for connector backends. | ||
|
|
||
| Args: | ||
| kv_transfer_config: Configuration for the KV transfer. | ||
| llm_config: The llm configuration for this engine | ||
| """ | ||
| self.kv_transfer_config = kv_transfer_config | ||
| self.llm_config = llm_config | ||
|
|
||
| @property | ||
| def kv_transfer_config(self) -> Dict[str, Any]: | ||
| engine_kwargs = self.llm_config.engine_kwargs | ||
| kv_transfer_config = engine_kwargs.get("kv_transfer_config") | ||
| assert ( | ||
| kv_transfer_config is not None | ||
| ), "In Connector backend, kv_transfer_config is not set" | ||
|
Comment on lines
+23
to
+25
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. better to validate it early in the constructor, and validate only once? |
||
| return kv_transfer_config | ||
|
|
||
| def _get_unique_suffix(self, len: int = 6) -> str: | ||
| """Generates unique alphanumeric suffix. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: switch the order
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's fine.