diff --git a/components/planner/README.md b/components/planner/README.md index 1f4c2619ee..0c28aebddb 100644 --- a/components/planner/README.md +++ b/components/planner/README.md @@ -124,6 +124,4 @@ For manual testing, you can use the controller_test.py file to add/remove compon The Kubernetes backend works by updating the replicas count of the DynamoGraphDeployment custom resource. When the planner determines that workers need to be scaled up or down based on workload metrics, it uses the Kubernetes API to patch the DynamoGraphDeployment resource specification, changing the replicas count for the appropriate worker component. The Kubernetes operator then reconciles this change by creating or terminating the necessary pods. This provides a seamless autoscaling experience in Kubernetes environments without requiring manual intervention. -The Kubernetes backend will automatically be used by Planner when your pipeline is deployed with `dynamo deployment create`. By default, the planner will run in no-op mode, which means it will monitor metrics but not take scaling actions. To enable actual scaling, you should also specify `--Planner.no-operation=false`. - - +The Kubernetes backend will automatically be used by Planner when your pipeline is deployed using a DynamoGraphDeployment CR. By default, the planner will run in no-op mode, which means it will monitor metrics but not take scaling actions. To enable actual scaling, you should also specify `--Planner.no-operation=false`. diff --git a/deploy/inference-gateway/example/README.md b/deploy/inference-gateway/example/README.md index 3056cf2afb..8f400c11e2 100644 --- a/deploy/inference-gateway/example/README.md +++ b/deploy/inference-gateway/example/README.md @@ -32,11 +32,11 @@ export DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built" ```bash # Deploy first graph export DEPLOYMENT_NAME=llm-agg1 -dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg.yaml +# TODO: Deploy your service using a DynamoGraphDeployment CR. # Deploy second graph export DEPLOYMENT_NAME=llm-agg2 -dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg.yaml +# TODO: Deploy your service using a DynamoGraphDeployment CR. ``` 3. **Deploy Inference Gateway** diff --git a/deploy/sdk/src/dynamo/sdk/cli/cli.py b/deploy/sdk/src/dynamo/sdk/cli/cli.py index 12e27d2172..7fb85140f1 100644 --- a/deploy/sdk/src/dynamo/sdk/cli/cli.py +++ b/deploy/sdk/src/dynamo/sdk/cli/cli.py @@ -23,8 +23,6 @@ from rich.console import Console from dynamo.sdk.cli.build import build -from dynamo.sdk.cli.deployment import app as deployment_app -from dynamo.sdk.cli.deployment import deploy from dynamo.sdk.cli.env import env from dynamo.sdk.cli.run import run from dynamo.sdk.cli.serve import serve @@ -76,8 +74,6 @@ def main( context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, add_help_option=False, )(run) -cli.add_typer(deployment_app, name="deployment") -cli.command()(deploy) cli.command()(build) if __name__ == "__main__": diff --git a/deploy/sdk/src/dynamo/sdk/cli/deployment.py b/deploy/sdk/src/dynamo/sdk/cli/deployment.py deleted file mode 100644 index ae3f93759d..0000000000 --- a/deploy/sdk/src/dynamo/sdk/cli/deployment.py +++ /dev/null @@ -1,561 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2020 Atalaya Tech. Inc -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# # -# http://www.apache.org/licenses/LICENSE-2.0 -# # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES - -from __future__ import annotations - -import json -import typing as t - -import typer -from rich.console import Console -from rich.panel import Panel - -from dynamo.sdk.cli.utils import resolve_service_config -from dynamo.sdk.core.deploy.consts import DeploymentTargetType -from dynamo.sdk.core.deploy.kubernetes import KubernetesDeploymentManager -from dynamo.sdk.core.protocol.deployment import ( - Deployment, - DeploymentConfig, - DeploymentManager, - DeploymentResponse, -) -from dynamo.sdk.core.runner import TargetEnum - -app = typer.Typer( - help="Deploy Dynamo applications to Dynamo Cloud Platform", - add_completion=True, - no_args_is_help=True, -) - -console = Console(highlight=False) - - -def get_deployment_manager(target: str, endpoint: str) -> DeploymentManager: - """Return the appropriate DeploymentManager for the given target and endpoint.""" - try: - target_enum = DeploymentTargetType(target) - except ValueError: - valid_targets = ", ".join([e.value for e in DeploymentTargetType]) - console.print( - Panel( - f"Invalid deployment target: {target}\nSupported targets: {valid_targets}", - title="Error", - style="red", - ) - ) - raise typer.Exit(1) - if target_enum == DeploymentTargetType.KUBERNETES: - return KubernetesDeploymentManager(endpoint) - else: - raise ValueError(f"Unknown deployment target: {target_enum}") - - -def display_deployment_info( - deployment_manager: DeploymentManager, deployment: DeploymentResponse -) -> None: - """Display deployment summary, status, and endpoint URLs using rich panels.""" - name = deployment.get("name") or deployment.get("uid") or deployment.get("id") - status = deployment_manager.get_status(name) - urls = deployment_manager.get_endpoint_urls(name) - created_at = deployment.get("created_at", "") - summary = ( - f"[white]Name:[/] [cyan]{name}[/]\n" - f"[white]Status:[/] [{status.color}]{status.value}[/]" - ) - if created_at: - summary += f"\n[white]Created:[/] [magenta]{created_at}[/]" - if urls: - summary += f"\n[white]URLs:[/] [blue]{' | '.join(urls)}[/]" - else: - summary += "\n[white]URLs:[/] [blue]None[/]" - console.print(Panel(summary, title="Deployment", style="cyan")) - - -def _build_env_dicts( - config_file: t.Optional[t.TextIO] = None, - args: t.Optional[t.List[str]] = None, - envs: t.Optional[t.List[str]] = None, - envs_from_secret: t.Optional[t.List[str]] = None, - env_secrets_name: t.Optional[str] = "dynamo-env-secrets", -) -> t.List[t.Dict[str, t.Any]]: - """ - Build a list of environment variable dicts. - """ - env_dicts: t.List[t.Dict[str, t.Any]] = [] - if config_file or args: - service_configs = resolve_service_config(config_file=config_file, args=args) - config_json = json.dumps(service_configs) - env_dicts.append({"name": "DYN_DEPLOYMENT_CONFIG", "value": config_json}) - if envs: - for env in envs: - if "=" in env: - key, value = env.split("=", 1) - env_dicts.append({"name": key, "value": value}) - else: - raise RuntimeError(f"Invalid env format: {env}. Use KEY=VALUE.") - if envs_from_secret: - for env in envs_from_secret: - if "=" in env: - key, secret_key = env.split("=", 1) - env_dicts.append( - { - "name": key, - "valueFrom": { - "secretKeyRef": { - "name": env_secrets_name, - "key": secret_key, - } - }, - } - ) - else: - raise RuntimeError( - f"Invalid env-from-secret format: {env}. Use KEY=SECRET_KEY." - ) - return env_dicts - - -def _handle_deploy_create( - ctx: typer.Context, - config: DeploymentConfig, -) -> DeploymentResponse: - """Handle deployment creation. This is a helper function for the create and deploy commands. - - Args: - ctx: typer context - config: DeploymentConfig object - """ - - from dynamo.sdk.cli.utils import configure_target_environment - from dynamo.sdk.lib.loader import load_entry_service - - # TODO: hardcoding this is a hack to get the services for the deployment - # we should find a better way to do this once build is finished/generic - configure_target_environment(TargetEnum.DYNAMO) - entry_service = load_entry_service(config.graph) - - deployment_manager = get_deployment_manager(config.target, config.endpoint) - env_dicts = _build_env_dicts( - config_file=config.config_file, - args=ctx.args, - envs=config.envs, - envs_from_secret=config.envs_from_secret, - env_secrets_name=config.env_secrets_name, - ) - deployment = Deployment( - name=config.name or (config.graph if config.graph else "unnamed-deployment"), - namespace="default", - graph=config.graph, - entry_service=entry_service, - envs=env_dicts, - ) - try: - console.print("[bold green]Creating deployment...") - deployment = deployment_manager.create_deployment( - deployment, - dev=config.dev, - ) - console.print(f"[bold green]Deployment '{config.name}' created.") - if config.wait: - deployment, ready = deployment_manager.wait_until_ready( - config.name, timeout=config.timeout - ) - if ready: - console.print( - Panel( - f"Deployment [bold]{config.name}[/] is [green]ready[/]", - title="Status", - ) - ) - else: - console.print( - Panel( - f"Deployment [bold]{config.name}[/] did not become ready in time.", - title="Status", - style="red", - ) - ) - display_deployment_info(deployment_manager, deployment) - return deployment - except Exception as e: - if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple): - status, msg, url = e.args[0] - if status == 409: - console.print( - Panel( - f"Cannot create deployment because deployment with name '{config.name}' already exists.", - title="Error", - style="red", - ) - ) - elif status in (400, 422): - console.print( - Panel(f"Validation error:\n{msg}", title="Error", style="red") - ) - elif status == 404: - console.print( - Panel(f"Not found: {url} \n{msg}", title="Error", style="red") - ) - elif status == 500: - console.print( - Panel(f"Internal server error:\n{msg}", title="Error", style="red") - ) - else: - console.print( - Panel( - f"Failed to create deployment:\n{msg}", - title="Error", - style="red", - ) - ) - else: - console.print(Panel(str(e), title="Error", style="red")) - raise typer.Exit(1) - - -@app.command() -def create( - ctx: typer.Context, - graph: str = typer.Argument(..., help="Dynamo graph to deploy"), - name: t.Optional[str] = typer.Option(None, "--name", "-n", help="Deployment name"), - config_file: t.Optional[typer.FileText] = typer.Option( - None, "--config-file", "-f", help="Configuration file path" - ), - wait: bool = typer.Option( - True, "--wait/--no-wait", help="Do not wait for deployment to be ready" - ), - timeout: int = typer.Option( - 3600, "--timeout", help="Timeout for deployment to be ready in seconds" - ), - endpoint: str = typer.Option( - ..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD" - ), - envs: t.Optional[t.List[str]] = typer.Option( - None, - "--env", - help="Environment variable(s) to set (format: KEY=VALUE). Note: These environment variables will be set on ALL services in your Dynamo graph.", - ), - envs_from_secret: t.Optional[t.List[str]] = typer.Option( - None, - "--env-from-secret", - help="Environment variable(s) from secret (format: KEY=SECRET_KEY). These will be set from your Dynamo secrets.", - ), - target: str = typer.Option( - DeploymentTargetType.KUBERNETES.value, - "--target", - "-t", - help="Deployment target", - ), - dev: bool = typer.Option(False, "--dev", help="Development mode for deployment"), - env_secrets_name: t.Optional[str] = typer.Option( - "dynamo-env-secrets", - "--env-secrets-name", - help="Environment secrets name", - envvar="DYNAMO_ENV_SECRETS", - ), -) -> DeploymentResponse: - """Create a deployment on Dynamo Cloud.""" - config = DeploymentConfig( - graph=graph, - endpoint=endpoint, - name=name, - config_file=config_file, - wait=wait, - timeout=timeout, - envs=envs, - envs_from_secret=envs_from_secret, - target=target, - dev=dev, - env_secrets_name=env_secrets_name, - ) - return _handle_deploy_create(ctx, config) - - -@app.command() -def get( - name: str = typer.Argument(..., help="Deployment name"), - target: str = typer.Option( - DeploymentTargetType.KUBERNETES.value, - "--target", - "-t", - help="Deployment target", - ), - endpoint: str = typer.Option( - ..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD" - ), -) -> DeploymentResponse: - """Get details for a specific deployment by name.""" - deployment_manager = get_deployment_manager(target, endpoint) - try: - with console.status(f"[bold green]Getting deployment '{name}'..."): - deployment = deployment_manager.get_deployment(name) - display_deployment_info(deployment_manager, deployment) - return deployment - except Exception as e: - if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple): - status, msg, _ = e.args[0] - if status == 404: - console.print( - Panel( - f"Deployment '{name}' not found.\n{msg}", - title="Error", - style="red", - ) - ) - else: - console.print( - Panel( - f"Failed to get deployment:\n{msg}", title="Error", style="red" - ) - ) - else: - console.print(Panel(str(e), title="Error", style="red")) - raise typer.Exit(1) - - -@app.command("list") -def list_deployments( - target: str = typer.Option( - DeploymentTargetType.KUBERNETES.value, - "--target", - "-t", - help="Deployment target", - ), - endpoint: str = typer.Option( - ..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD" - ), -) -> None: - """List all deployments.""" - deployment_manager = get_deployment_manager(target, endpoint) - try: - with console.status("[bold green]Listing deployments..."): - deployments = deployment_manager.list_deployments() - if not deployments: - console.print( - Panel("No deployments found.", title="Deployments", style="yellow") - ) - else: - console.print(Panel("[bold]Deployments List[/]", style="blue")) - for dep in deployments: - display_deployment_info(deployment_manager, dep) - except Exception as e: - if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple): - status, msg, url = e.args[0] - if status == 404: - console.print( - Panel( - f"Endpoint not found: {url}\n{msg}", title="Error", style="red" - ) - ) - else: - console.print( - Panel( - f"Failed to list deployments:\n{msg}", - title="Error", - style="red", - ) - ) - else: - console.print(Panel(str(e), title="Error", style="red")) - raise typer.Exit(1) - - -@app.command() -def update( - ctx: typer.Context, - name: str = typer.Argument(..., help="Deployment name to update"), - target: str = typer.Option( - DeploymentTargetType.KUBERNETES.value, - "--target", - "-t", - help="Deployment target", - ), - config_file: t.Optional[typer.FileText] = typer.Option( - None, "--config-file", "-f", help="Configuration file path" - ), - envs: t.Optional[t.List[str]] = typer.Option( - None, - "--env", - help="Environment variable(s) to set (format: KEY=VALUE). Note: These environment variables will be set on ALL services in your Dynamo graph.", - ), - envs_from_secret: t.Optional[t.List[str]] = typer.Option( - None, - "--env-from-secret", - help="Environment variable(s) from secret (format: KEY=SECRET_KEY). These will be set from your Dynamo secrets.", - ), - endpoint: str = typer.Option( - ..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD" - ), - env_secrets_name: t.Optional[str] = typer.Option( - "dynamo-env-secrets", - "--env-secrets-name", - help="Environment secrets name", - envvar="DYNAMO_ENV_SECRETS", - ), -) -> None: - """Update an existing deployment on Dynamo Cloud. - - Update a deployment using parameters or a config yaml file. - """ - deployment_manager = get_deployment_manager(target, endpoint) - try: - with console.status(f"[bold green]Updating deployment '{name}'..."): - env_dicts = _build_env_dicts( - config_file=config_file, - args=ctx.args, - envs=envs, - envs_from_secret=envs_from_secret, - env_secrets_name=env_secrets_name, - ) - deployment = Deployment( - name=name, - namespace="default", - envs=env_dicts, - ) - deployment_manager.update_deployment( - deployment_id=name, deployment=deployment - ) - console.print( - Panel( - "[yellow]Update submitted. It may take a short time for the new pods to become active. Please wait a bit before accessing the deployment to ensure your changes are live.[/yellow]", - title="Status", - ) - ) - except Exception as e: - if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple): - status, msg, url = e.args[0] - if status == 404: - console.print( - Panel( - f"Deployment '{name}' not found.\n{msg}", - title="Error", - style="red", - ) - ) - else: - console.print( - Panel( - f"Failed to update deployment:\n{msg}", - title="Error", - style="red", - ) - ) - else: - console.print(Panel(str(e), title="Error", style="red")) - raise typer.Exit(1) - - -@app.command() -def delete( - name: str = typer.Argument(..., help="Deployment name"), - target: str = typer.Option( - DeploymentTargetType.KUBERNETES.value, - "--target", - "-t", - help="Deployment target", - ), - endpoint: str = typer.Option( - ..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD" - ), -) -> None: - """Delete a deployment by name.""" - deployment_manager = get_deployment_manager(target, endpoint) - try: - with console.status(f"[bold green]Deleting deployment '{name}'..."): - deployment_manager.delete_deployment(name) - console.print( - Panel(f"Deleted deployment {name}", title="Success", style="green") - ) - except Exception as e: - if isinstance(e, RuntimeError) and isinstance(e.args[0], tuple): - status, msg, _ = e.args[0] - if status == 404: - console.print( - Panel( - f"Deployment '{name}' not found.", - title="Error", - style="red", - ) - ) - else: - console.print( - Panel( - f"Failed to delete deployment:\n{msg}", - title="Error", - style="red", - ) - ) - else: - console.print(Panel(str(e), title="Error", style="red")) - raise typer.Exit(1) - - -def deploy( - ctx: typer.Context, - graph: str = typer.Argument(..., help="Dynamo graph to deploy"), - name: t.Optional[str] = typer.Option(None, "--name", "-n", help="Deployment name"), - config_file: t.Optional[typer.FileText] = typer.Option( - None, "--config-file", "-f", help="Configuration file path" - ), - wait: bool = typer.Option( - True, "--wait/--no-wait", help="Do not wait for deployment to be ready" - ), - timeout: int = typer.Option( - 3600, "--timeout", help="Timeout for deployment to be ready in seconds" - ), - endpoint: str = typer.Option( - ..., "--endpoint", "-e", help="Dynamo Cloud endpoint", envvar="DYNAMO_CLOUD" - ), - envs: t.Optional[t.List[str]] = typer.Option( - None, - "--env", - help="Environment variable(s) to set (format: KEY=VALUE). Note: These environment variables will be set on ALL services in your Dynamo graph.", - ), - envs_from_secret: t.Optional[t.List[str]] = typer.Option( - None, - "--env-from-secret", - help="Environment variable(s) from secret (format: KEY=SECRET_KEY). These will be set from your Dynamo secrets.", - ), - target: str = typer.Option( - DeploymentTargetType.KUBERNETES.value, - "--target", - "-t", - help="Deployment target", - ), - dev: bool = typer.Option(False, "--dev", help="Development mode for deployment"), - env_secrets_name: t.Optional[str] = typer.Option( - "dynamo-env-secrets", - "--env-secrets-name", - help="Environment secrets name", - envvar="DYNAMO_ENV_SECRETS", - ), -) -> DeploymentResponse: - """Deploy a Dynamo graph (same as deployment create).""" - config = DeploymentConfig( - graph=graph, - endpoint=endpoint, - name=name, - config_file=config_file, - wait=wait, - timeout=timeout, - envs=envs, - envs_from_secret=envs_from_secret, - target=target, - dev=dev, - env_secrets_name=env_secrets_name, - ) - return _handle_deploy_create(ctx, config) diff --git a/deploy/sdk/src/dynamo/sdk/core/deploy/__init__.py b/deploy/sdk/src/dynamo/sdk/core/deploy/__init__.py deleted file mode 100644 index b47194af60..0000000000 --- a/deploy/sdk/src/dynamo/sdk/core/deploy/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# # -# http://www.apache.org/licenses/LICENSE-2.0 -# # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES diff --git a/deploy/sdk/src/dynamo/sdk/core/deploy/consts.py b/deploy/sdk/src/dynamo/sdk/core/deploy/consts.py deleted file mode 100644 index a6bf47923c..0000000000 --- a/deploy/sdk/src/dynamo/sdk/core/deploy/consts.py +++ /dev/null @@ -1,22 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from enum import Enum - - -class DeploymentTargetType(Enum): - """Enum for deployment target types.""" - - KUBERNETES = "kubernetes" diff --git a/deploy/sdk/src/dynamo/sdk/core/deploy/kubernetes.py b/deploy/sdk/src/dynamo/sdk/core/deploy/kubernetes.py deleted file mode 100644 index 08ab96e351..0000000000 --- a/deploy/sdk/src/dynamo/sdk/core/deploy/kubernetes.py +++ /dev/null @@ -1,172 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -import typing as t - -import requests - -from dynamo.sdk.core.protocol.deployment import ( - Deployment, - DeploymentManager, - DeploymentResponse, - DeploymentStatus, -) -from dynamo.sdk.lib.utils import upload_graph - - -class KubernetesDeploymentManager(DeploymentManager): - """ - Implementation of DeploymentManager that talks to the dynamo_store deployment API. - Accepts **kwargs for backend-specific options. - Handles error reporting and payload construction according to the API schema. - Raises exceptions for errors; CLI handles user interaction. - """ - - def __init__(self, endpoint: str): - self.endpoint = endpoint.rstrip("/") - self.session = requests.Session() - - def create_deployment(self, deployment: Deployment, **kwargs) -> DeploymentResponse: - """Create a new deployment. Ensures all components and versions are registered/uploaded before creating the deployment.""" - # For each service/component in the deployment, upload it to the API store - if not deployment.graph: - raise ValueError( - "Deployment graph must be provided in the format :" - ) - upload_graph( - endpoint=self.endpoint, - graph=deployment.graph, - entry_service=deployment.entry_service, - session=self.session, - **kwargs, - ) - - # Now create the deployment - dev = kwargs.get("dev", False) - payload = { - "name": deployment.name, - "component": deployment.graph or deployment.namespace, - "dev": dev, - "envs": deployment.envs, - } - payload = {k: v for k, v in payload.items() if v is not None} - url = f"{self.endpoint}/api/v2/deployments" - try: - resp = self.session.post(url, json=payload) - resp.raise_for_status() - return resp.json() - except requests.HTTPError as e: - status = e.response.status_code if e.response is not None else None - msg = e.response.text if e.response is not None else str(e) - if "already exists" in msg: - raise RuntimeError((409, msg, None)) from e - raise RuntimeError((status, msg, url)) from e - - def update_deployment( - self, deployment_id: str, deployment: Deployment, **kwargs - ) -> None: - """Update an existing deployment.""" - access_authorization = kwargs.get("access_authorization", False) - payload = { - "name": deployment.name, - "envs": deployment.envs, - "services": deployment.services, - "access_authorization": access_authorization, - } - payload = {k: v for k, v in payload.items() if v is not None} - url = f"{self.endpoint}/api/v2/deployments/{deployment_id}" - try: - resp = self.session.put(url, json=payload) - resp.raise_for_status() - except requests.HTTPError as e: - status = e.response.status_code if e.response is not None else None - msg = e.response.text if e.response is not None else str(e) - raise RuntimeError((status, msg, url)) - - def get_deployment(self, deployment_id: str) -> DeploymentResponse: - """Get deployment details.""" - url = f"{self.endpoint}/api/v2/deployments/{deployment_id}" - try: - resp = self.session.get(url) - resp.raise_for_status() - return resp.json() - except requests.HTTPError as e: - status = e.response.status_code if e.response is not None else None - msg = e.response.text if e.response is not None else str(e) - raise RuntimeError((status, msg, url)) from e - - def list_deployments(self) -> t.List[DeploymentResponse]: - """List all deployments.""" - url = f"{self.endpoint}/api/v2/deployments" - try: - resp = self.session.get(url) - resp.raise_for_status() - data = resp.json() - return data.get("items", []) - except requests.HTTPError as e: - msg = e.response.text if e.response is not None else str(e) - raise RuntimeError( - (e.response.status_code if e.response else None, msg, url) - ) - - def delete_deployment(self, deployment_id: str) -> None: - """Delete a deployment.""" - url = f"{self.endpoint}/api/v2/deployments/{deployment_id}" - try: - resp = self.session.delete(url) - resp.raise_for_status() - except requests.HTTPError as e: - status = e.response.status_code if e.response is not None else None - msg = e.response.text if e.response is not None else str(e) - raise RuntimeError((status, msg, url)) from e - - def get_status( - self, - deployment_id: str, - ) -> DeploymentStatus: - dep = self.get_deployment(deployment_id) - status = dep.get("status", "unknown") - if status == "running": - return DeploymentStatus.RUNNING - elif status == "failed": - return DeploymentStatus.FAILED - elif status == "deploying": - return DeploymentStatus.IN_PROGRESS - elif status == "terminated": - return DeploymentStatus.TERMINATED - else: - return DeploymentStatus.PENDING - - def wait_until_ready( - self, deployment_id: str, timeout: int = 3600 - ) -> t.Tuple[DeploymentResponse, bool]: - start = time.time() - while time.time() - start < timeout: - dep = self.get_deployment(deployment_id) - status = self.get_status(deployment_id) - if status == DeploymentStatus.RUNNING: - return dep, True - elif status == DeploymentStatus.FAILED: - return dep, False - time.sleep(5) - return dep, False - - def get_endpoint_urls( - self, - deployment_id: str, - ) -> t.List[str]: - dep = self.get_deployment(deployment_id) - return dep.get("urls", []) diff --git a/deploy/sdk/src/dynamo/sdk/core/protocol/deployment.py b/deploy/sdk/src/dynamo/sdk/core/protocol/deployment.py index 0e4f9af311..7ba5214682 100644 --- a/deploy/sdk/src/dynamo/sdk/core/protocol/deployment.py +++ b/deploy/sdk/src/dynamo/sdk/core/protocol/deployment.py @@ -14,11 +14,7 @@ # limitations under the License. import typing as t -from abc import ABC, abstractmethod from dataclasses import dataclass, field -from enum import Enum - -import typer @dataclass @@ -75,36 +71,6 @@ def __post_init__(self): ) -class DeploymentStatus(str, Enum): - """Status of a dynamo deployment.""" - - PENDING = "pending" - IN_PROGRESS = "in progress" - RUNNING = "running" - FAILED = "failed" - TERMINATED = "terminate" - SCALED_TO_ZERO = "scaled to zero" - - @property - def color(self) -> str: - return { - DeploymentStatus.RUNNING: "green", - DeploymentStatus.IN_PROGRESS: "yellow", - DeploymentStatus.PENDING: "yellow", - DeploymentStatus.FAILED: "red", - DeploymentStatus.TERMINATED: "red", - DeploymentStatus.SCALED_TO_ZERO: "yellow", - }.get(self, "white") - - -@dataclass -class ScalingPolicy: - """Scaling policy.""" - - policy: str - parameters: t.Dict[str, t.Union[int, float, str]] = field(default_factory=dict) - - @dataclass class Env: """Environment variable.""" @@ -126,149 +92,5 @@ class Service: resources: Resources | None = None envs: t.List[Env] = field(default_factory=list) secrets: t.List[str] = field(default_factory=list) - scaling: ScalingPolicy = field(default_factory=lambda: ScalingPolicy(policy="none")) apis: dict = field(default_factory=dict) size_bytes: int = 0 - - -@dataclass -class Deployment: - """Graph deployment.""" - - name: str - namespace: str - graph: t.Optional[str] = None - entry_service: t.Optional[Service] = None - envs: t.Optional[t.List[t.Dict[str, t.Any]]] = None - - -# Type alias for deployment responses (e.g., from backend APIs) -DeploymentResponse = t.Dict[str, t.Any] - - -@dataclass -class DeploymentConfig: - """Configuration object for deployment operations. - - Consolidates all deployment parameters including graph configuration, - environment variables, and deployment settings. - """ - - # Core deployment settings - graph: str - endpoint: str - name: t.Optional[str] = None - target: str = "kubernetes" - dev: bool = False - - # Configuration and timing - config_file: t.Optional[typer.FileText] = None - wait: bool = True - timeout: int = 3600 - - # Environment variables - envs: t.Optional[t.List[str]] = None - envs_from_secret: t.Optional[t.List[str]] = None - env_secrets_name: t.Optional[str] = "dynamo-env-secrets" - - -class DeploymentManager(ABC): - """Interface for managing dynamo graph deployments.""" - - @abstractmethod - def create_deployment(self, deployment: Deployment, **kwargs) -> DeploymentResponse: - """Create new deployment. - - Args: - deployment: Deployment configuration - **kwargs: Additional backend-specific arguments - - Returns: - The created deployment - """ - pass - - @abstractmethod - def update_deployment(self, deployment_id: str, deployment: Deployment) -> None: - """Update an existing deployment. - - Args: - deployment_id: The ID of the deployment to update - deployment: New deployment configuration - """ - pass - - @abstractmethod - def get_deployment(self, deployment_id: str) -> DeploymentResponse: - """Get deployment details. - - Args: - deployment_id: The ID of the deployment - - Returns: - Dictionary containing deployment details - """ - pass - - @abstractmethod - def list_deployments(self) -> t.List[DeploymentResponse]: - """List all deployments. - - Returns: - List of dictionaries containing deployment id and details - """ - pass - - @abstractmethod - def delete_deployment(self, deployment_id: str) -> None: - """Delete a deployment. - - Args: - deployment_id: The ID of the deployment to delete - """ - pass - - @abstractmethod - def get_status( - self, - deployment_id: str, - ) -> DeploymentStatus: - """Get the current status of a deployment. - - Args (one of): - deployment_id: The ID of the deployment - - Returns: - The current status of the deployment - """ - pass - - @abstractmethod - def wait_until_ready( - self, deployment_id: str, timeout: int = 3600 - ) -> t.Tuple[DeploymentResponse, bool]: - """Wait until a deployment is ready. - - Args: - deployment_id: The ID of the deployment - timeout: Maximum time to wait in seconds - - Returns: - Tuple of deployment response and a boolean indicating if the deployment became ready - """ - pass - - @abstractmethod - def get_endpoint_urls( - self, - deployment_id: str, - ) -> t.List[str]: - """Get the list of endpoint urls attached to a deployment. - - Args (one of): - deployment_id: The ID of the deployment - - Returns: - List of deployment's endpoint urls - """ - pass diff --git a/deploy/sdk/tests/test_deployment.sh b/deploy/sdk/tests/test_deployment.sh index 0c667ead33..d0feeaf68b 100755 --- a/deploy/sdk/tests/test_deployment.sh +++ b/deploy/sdk/tests/test_deployment.sh @@ -32,4 +32,4 @@ DYNAMO_TAG=$(dynamo build hello_world:Frontend | grep "Successfully built" | awk # Step.3: Deploy! echo $DYNAMO_TAG -dynamo deployment create $DYNAMO_TAG --no-wait -n $DEPLOYMENT_NAME +# TODO: Deploy your service using a DynamoGraphDeployment CR. diff --git a/docs/examples/hello_world.md b/docs/examples/hello_world.md index 774efbd57d..2082035f9e 100644 --- a/docs/examples/hello_world.md +++ b/docs/examples/hello_world.md @@ -124,8 +124,7 @@ cd $PROJECT_ROOT/examples/hello_world DYNAMO_TAG=$(dynamo build hello_world:Frontend | grep "Successfully built" | awk '{ print $3 }' | sed 's/\.$//') # Deploy to Kubernetes -export DEPLOYMENT_NAME=ci-hw -dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME +# TODO: Deploy your service using a DynamoGraphDeployment CR. ``` ### Testing the Deployment diff --git a/docs/examples/llm_deployment.md b/docs/examples/llm_deployment.md index dab6d14e66..1240590974 100644 --- a/docs/examples/llm_deployment.md +++ b/docs/examples/llm_deployment.md @@ -243,7 +243,7 @@ DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built" | awk # Deploy to Kubernetes export DEPLOYMENT_NAME=llm-agg -dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg.yaml +# TODO: Deploy your service using a DynamoGraphDeployment CR. ``` **Note**: Optionally add `--Planner.no-operation=false` at the end of the deployment command to enable the planner component to take scaling actions on your deployment. diff --git a/docs/guides/cli_overview.md b/docs/guides/cli_overview.md index d976e20089..024b1babd8 100644 --- a/docs/guides/cli_overview.md +++ b/docs/guides/cli_overview.md @@ -91,24 +91,4 @@ cd examples/hello_world dynamo build hello_world:Frontend ``` -### `deploy` - -Use `deploy` to create a pipeline on Dynamo Cloud using either interactive prompts or a YAML configuration file. For more details, see [Deploying Inference Graphs to Kubernetes](dynamo_deploy/README.md). - -#### Usage -```bash -dynamo deploy [PIPELINE] -``` - -#### Arguments -* `PIPELINE`: The pipeline to deploy; defaults to *None*; required - -#### Flags -* `--name`/`-n`: Set the deployment name. Defaults to *None*; required -* `--config-file`/`-f`: Specify the configuration file path. Defaults to *None*; required -* `--wait`/`--no-wait`: Choose whether to wait for deployment readiness. Defaults to wait -* `--timeout`: Set maximum deployment time in seconds. Defaults to 3600 -* `--endpoint`/`-e`: Specify the Dynamo Cloud deployment endpoint. Defaults to *None*; required -* `--help`/`-h`: Display command help - For a detailed deployment example, see [Operator Deployment](dynamo_deploy/operator_deployment.md). diff --git a/docs/guides/dynamo_deploy/operator_deployment.md b/docs/guides/dynamo_deploy/operator_deployment.md index e96431f3a4..0db7821fea 100644 --- a/docs/guides/dynamo_deploy/operator_deployment.md +++ b/docs/guides/dynamo_deploy/operator_deployment.md @@ -114,15 +114,7 @@ DYNAMO_TAG=$(dynamo build hello_world:Frontend | grep "Successfully built" | awk ### 3. Deploy to Kubernetes -Deploy your service using the Dynamo deployment command: - -```bash -# Set your Helm release name -export DEPLOYMENT_NAME=hello-world - -# Create the deployment -dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME -``` +TODO: Deploy your service using a DynamoGraphDeployment CR. #### Managing Deployments diff --git a/examples/llm/README.md b/examples/llm/README.md index c5fe4cb06d..1e27e1dad2 100644 --- a/examples/llm/README.md +++ b/examples/llm/README.md @@ -228,7 +228,7 @@ DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built" | awk # Deploy to Kubernetes export DEPLOYMENT_NAME=llm-agg -dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg.yaml +# TODO: Deploy your service using a DynamoGraphDeployment CR. ``` **Note**: To avoid rate limiting from unauthenticated requests to HuggingFace (HF), you can provide your `HF_TOKEN` as a secret in your deployment. See the [operator deployment guide](../../docs/guides/dynamo_deploy/operator_deployment.md#referencing-secrets-in-your-deployment) for instructions on referencing secrets like `HF_TOKEN` in your deployment configuration. diff --git a/examples/multimodal/README.md b/examples/multimodal/README.md index df48b3972e..76a734f246 100644 --- a/examples/multimodal/README.md +++ b/examples/multimodal/README.md @@ -207,23 +207,7 @@ export DYNAMO_CLOUD=http://localhost:8080 # If using port-forward # Build the Dynamo base image (see operator_deployment.md for details) export DYNAMO_IMAGE=/: -# Build the service -cd $PROJECT_ROOT/examples/multimodal -DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built" | awk '{ print $NF }' | sed 's/\.$//') -# For disaggregated serving: -# DYNAMO_TAG=$(dynamo build graphs.disagg:Frontend | grep "Successfully built" | awk '{ print $NF }' | sed 's/\.$//') - -# Deploy to Kubernetes -export DEPLOYMENT_NAME=multimodal-agg -# For aggregated serving with LLaVA: -dynamo deploy $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg-llava.yaml -# For aggregated serving with Qwen2.5-VL: -# dynamo deploy $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg-qwen.yaml -# For aggregated serving with Phi3V: -# dynamo deploy $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg-phi3v.yaml -# For disaggregated serving: -# export DEPLOYMENT_NAME=multimodal-disagg -# dynamo deploy $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/disagg.yaml +# TODO: Apply Dynamo graph deployment for the example ``` **Note**: To avoid rate limiting from unauthenticated requests to HuggingFace (HF), you can provide your `HF_TOKEN` as a secret in your deployment. See the [operator deployment guide](../../docs/guides/dynamo_deploy/operator_deployment.md#referencing-secrets-in-your-deployment) for instructions on referencing secrets like `HF_TOKEN` in your deployment configuration.