Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19,719 changes: 19,719 additions & 0 deletions ATTRIBUTIONS-Go.md

Large diffs are not rendered by default.

42,735 changes: 42,735 additions & 0 deletions ATTRIBUTIONS-Python.md

Large diffs are not rendered by default.

119,647 changes: 118,716 additions & 931 deletions ATTRIBUTIONS-Rust.md

Large diffs are not rendered by default.

3,435 changes: 0 additions & 3,435 deletions ATTRIBUTIONS.md

This file was deleted.

64 changes: 38 additions & 26 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ members = [
resolver = "3"

[workspace.package]
version = "0.2.1"
version = "0.3.0"
edition = "2021"
description = "Dynamo Inference Framework"
authors = ["NVIDIA Inc. <sw-dl-dynamo@nvidia.com>"]
Expand All @@ -39,9 +39,9 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynamo"]

[workspace.dependencies]
# Local crates
dynamo-runtime = { path = "lib/runtime", version = "0.2.1" }
dynamo-llm = { path = "lib/llm", version = "0.2.1" }
dynamo-tokens = { path = "lib/tokens", version = "0.2.1" }
dynamo-runtime = { path = "lib/runtime", version = "0.3.0" }
dynamo-llm = { path = "lib/llm", version = "0.3.0" }
dynamo-tokens = { path = "lib/tokens", version = "0.3.0" }

# External dependencies
anyhow = { version = "1" }
Expand Down
5 changes: 3 additions & 2 deletions container/Dockerfile.sglang
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,9 @@ COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

# Copy rest of the code
COPY . /workspace

# Build C bindings, creates lib/bindings/c/include
RUN cd /workspace/lib/bindings/c && cargo build --release --locked

Expand Down Expand Up @@ -365,8 +368,6 @@ ARG GENAI_PERF_VERSION
# Install genai-perf for benchmarking
RUN uv pip install genai-perf==$GENAI_PERF_VERSION

COPY . /workspace

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []
Expand Down
3 changes: 3 additions & 0 deletions container/Dockerfile.tensorrt_llm
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ COPY --from=wheel_builder /workspace /workspace
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

# Copy rest of the code
COPY . /workspace

# Build C bindings, creates lib/bindings/c/include
RUN cd /workspace/lib/bindings/c && cargo build --release --locked

Expand Down
7 changes: 4 additions & 3 deletions container/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ RUN uv pip install /workspace/wheels/nixl/*.whl
ARG VLLM_REF="0.8.4"
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post1"
ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post2"
ARG VLLM_MAX_JOBS=4
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
Expand Down Expand Up @@ -403,6 +403,9 @@ COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

# Copy rest of the code
COPY . /workspace

# Build C bindings, creates lib/bindings/c/include
#
# TODO: In theory the 'cargo build' in earlier stage covers this, we "just" need to copy the
Expand Down Expand Up @@ -447,8 +450,6 @@ ARG GENAI_PERF_VERSION
# Install genai-perf for benchmarking
RUN uv pip install genai-perf==$GENAI_PERF_VERSION

COPY . /workspace

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

CMD []
Expand Down
2 changes: 1 addition & 1 deletion container/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ NONE_BASE_IMAGE_TAG="24.04"
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"

NIXL_COMMIT=78695c2900cd7fff506764377386592dfc98e87e
NIXL_COMMIT=f531404be4866d85ed618b3baf4008c636798d63
NIXL_REPO=ai-dynamo/nixl.git

NO_CACHE=""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ const (
KubeAnnotationLWSSize = "nvidia.com/lws-size"
DeploymentTypeStandard = "standard"
DeploymentTypeLeaderWorker = "leader-worker"
ComponentTypePlanner = "Planner"
)

// DynamoComponentDeploymentReconciler reconciles a DynamoComponentDeployment object
Expand Down Expand Up @@ -1454,7 +1455,9 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
if opt.dynamoComponentDeployment.Spec.DynamoNamespace != nil && *opt.dynamoComponentDeployment.Spec.DynamoNamespace != "" {
args = append(args, fmt.Sprintf("--%s.ServiceArgs.dynamo.namespace=%s", opt.dynamoComponentDeployment.Spec.ServiceName, *opt.dynamoComponentDeployment.Spec.DynamoNamespace))
}
args = append(args, fmt.Sprintf("--%s.environment=%s", opt.dynamoComponentDeployment.Spec.ServiceName, KubernetesDeploymentStrategy))
if componentType, exists := opt.dynamoComponentDeployment.Labels[commonconsts.KubeLabelDynamoComponent]; exists && componentType == ComponentTypePlanner {
args = append(args, fmt.Sprintf("--%s.environment=%s", opt.dynamoComponentDeployment.Spec.ServiceName, KubernetesDeploymentStrategy))
}
}

if len(opt.dynamoComponentDeployment.Spec.Envs) > 0 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Name: "main",
Image: "test-image:latest",
Command: []string{"sh", "-c"},
Args: []string{"ray start --head --port=6379 && cd src && uv run dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-checks --service-name test-lws-deploy-service test-tag --test-lws-deploy-service.ServiceArgs.dynamo.namespace=default --test-lws-deploy-service.environment=kubernetes"},
Args: []string{"ray start --head --port=6379 && cd src && uv run dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-checks --service-name test-lws-deploy-service test-tag --test-lws-deploy-service.ServiceArgs.dynamo.namespace=default"},
Env: []corev1.EnvVar{{Name: "DYNAMO_PORT", Value: "3000"}},
VolumeMounts: []corev1.VolumeMount{
{
Expand Down
10 changes: 7 additions & 3 deletions deploy/sdk/src/dynamo/sdk/cli/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,18 @@ def from_service(cls, service: ServiceInterface[T]) -> ServiceInfo:
if DynamoTransport.HTTP in endpoint.transports:
api_endpoints.append(f"/{ep_name}")

image = service.config.image or DYNAMO_IMAGE
assert (
image is not None
), "Please set DYNAMO_IMAGE environment variable or image field in service config"

# Create config
config = ServiceConfig(
name=name,
service="",
resource=service.config.resource.model_dump(),
resource=service.config.resources.model_dump(),
workers=service.config.workers,
image=service.config.image,
image=image,
dynamo=service.config.dynamo.model_dump(),
http_exposed=len(api_endpoints) > 0,
api_endpoints=api_endpoints,
Expand Down Expand Up @@ -423,7 +428,6 @@ def to_package_name(name: str) -> str:
s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
s2 = re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1)
ret = s2.replace(":", "_")
print(f"Converting {name} to snake_case: {ret}")
return ret

@staticmethod
Expand Down
1 change: 0 additions & 1 deletion deploy/sdk/src/dynamo/sdk/core/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def service(
) -> Any:
"""Service decorator that's adapter-agnostic"""
config = ServiceConfig(**kwargs)
logger.info(f"inner: {inner} config: {config}")

def decorator(inner: Type[G]) -> ServiceInterface[G]:
provider = get_target()
Expand Down
18 changes: 12 additions & 6 deletions deploy/sdk/src/dynamo/sdk/core/protocol/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
from abc import ABC, abstractmethod
from collections import defaultdict
from enum import Enum, auto
from typing import Any, Dict, Generic, List, Optional, Set, Tuple, Type, TypeVar
from typing import Any, Dict, Generic, List, Optional, Set, Tuple, Type, TypeVar, Union

from fastapi import FastAPI
from pydantic import BaseModel
from pydantic import BaseModel, Field, field_validator

from dynamo.sdk.core.protocol.deployment import Env

Expand Down Expand Up @@ -59,16 +59,22 @@ class DynamoTransport(Enum):
class ResourceConfig(BaseModel):
"""Configuration for Dynamo resources"""

cpu: int = 1
memory: str = "100Mi"
gpu: str = "0"
cpu: str = Field(default="1")
memory: str = Field(default="500Mi")
gpu: str = Field(default="0")

@field_validator("gpu", mode="before")
@classmethod
def convert_gpu_to_string(cls, v: Union[str, int]) -> str:
"""Convert gpu value to string if it's an integer"""
return str(v)


class ServiceConfig(BaseModel):
"""Base service configuration that can be extended by adapters"""

dynamo: DynamoConfig
resource: ResourceConfig = ResourceConfig()
resources: ResourceConfig = ResourceConfig()
workers: int = 1
image: str | None = None
envs: List[Env] | None = None
Expand Down
7 changes: 6 additions & 1 deletion deploy/sdk/src/dynamo/sdk/tests/test_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pytest

from dynamo.sdk.cli.utils import configure_target_environment
from dynamo.sdk.core.protocol.interface import ServiceInterface
from dynamo.sdk.core.runner import TargetEnum

pytestmark = pytest.mark.pre_merge
Expand All @@ -40,4 +41,8 @@ class MyService:
def __init__(self) -> None:
pass

assert MyService.config is not None # type: ignore
dyn_svc: ServiceInterface = MyService
assert dyn_svc.config is not None # type: ignore
assert dyn_svc.config.resources.cpu == "2"
assert dyn_svc.config.resources.gpu == "1"
assert dyn_svc.config.resources.memory == "4Gi"
Loading
Loading