From 55b70f9c3d5bdf63459e14f0a27aae74e0460869 Mon Sep 17 00:00:00 2001 From: Anant Sharma Date: Thu, 10 Jul 2025 17:08:34 -0400 Subject: [PATCH 1/3] chore: update nixl to 0.4.0 release (#1860) --- Cargo.lock | 4 ++-- container/build.sh | 2 +- docs/support_matrix.md | 6 +++--- lib/bindings/python/Cargo.lock | 4 ++-- lib/llm/Cargo.toml | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0782b14612..a1227132b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4388,9 +4388,9 @@ dependencies = [ [[package]] name = "nixl-sys" -version = "0.3.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55f74dbb6d0e18023aa6ce29563114b848f23c3c2f9d663383eb3cd590a1eacc" +checksum = "97f621270fd1ed8af5a8028a1945e6f7e612a38836ce82b720fe54222739df3c" dependencies = [ "bindgen 0.71.1", "cc", diff --git a/container/build.sh b/container/build.sh index 7a1e843ea3..2133c96096 100755 --- a/container/build.sh +++ b/container/build.sh @@ -114,7 +114,7 @@ SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" VLLM_V1_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" VLLM_V1_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" -NIXL_COMMIT=3503658e71143b56f9d5b1b440d84a94b9c41af8 +NIXL_COMMIT=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4 NIXL_REPO=ai-dynamo/nixl.git NIXL_UCX_EFA_REF=7ec95b95e524a87e81cac92f5ca8523e3966b16b diff --git a/docs/support_matrix.md b/docs/support_matrix.md index fadcad1343..41738cf331 100644 --- a/docs/support_matrix.md +++ b/docs/support_matrix.md @@ -73,7 +73,7 @@ If you are using a **GPU**, the following GPU models and architectures are suppo | ai-dynamo | 0.3.2 | >=2.28 | | | ai-dynamo-runtime | 0.3.2 | >=2.28 (Python 3.12 has known issues)| | | ai-dynamo-vllm | 0.8.4.post4¹ | >=2.28 (recommended) | | -| NIXL | 0.3.1 | >=2.27 | >=11.8 | +| NIXL | 0.4.0 | >=2.27 | >=11.8 | ### Build Dependency @@ -81,8 +81,8 @@ If you are using a **GPU**, the following GPU models and architectures are suppo | :------------------- | :------------------------------------------------------------------------------- | | **Base Container** | [25.03](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda-dl-base/tags) | | **ai-dynamo-vllm** | 0.8.4.post4¹ | -| **TensorRT-LLM** | 1.0.0rc² | -| **NIXL** | 0.3.1 | +| **TensorRT-LLM** | 1.0.0rc² | +| **NIXL** | 0.4.0 | > [!Important] > ¹ ai-dynamo-vllm `v0.8.4.post4` is a customized patch of `v0.8.4` from vLLM. diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock index 6bd829a42a..428fc32ca0 100644 --- a/lib/bindings/python/Cargo.lock +++ b/lib/bindings/python/Cargo.lock @@ -3934,9 +3934,9 @@ dependencies = [ [[package]] name = "nixl-sys" -version = "0.3.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55f74dbb6d0e18023aa6ce29563114b848f23c3c2f9d663383eb3cd590a1eacc" +checksum = "97f621270fd1ed8af5a8028a1945e6f7e612a38836ce82b720fe54222739df3c" dependencies = [ "bindgen 0.71.1", "cc", diff --git a/lib/llm/Cargo.toml b/lib/llm/Cargo.toml index 62403dd029..c6b7ba68dc 100644 --- a/lib/llm/Cargo.toml +++ b/lib/llm/Cargo.toml @@ -89,7 +89,7 @@ rayon = "1" dialoguer = { version = "0.11", default-features = false, features = ["editor", "history"] } # block_manager -nixl-sys = {version = "0.3.1", optional = true } +nixl-sys = {version = "0.4.0", optional = true } cudarc = { version = "0.16.2", features = ["cuda-12020"], optional = true } ndarray = { version = "0.16", optional = true } nix = { version = "0.26", optional = true } From e9288efc2759c7bf1663df85e74d020aadb976bc Mon Sep 17 00:00:00 2001 From: Anant Sharma Date: Fri, 11 Jul 2025 08:02:54 -0700 Subject: [PATCH 2/3] temp: test failure --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4ee2525d4d..01c071d0ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,7 +89,7 @@ insta.opt-level = 3 # release level optimizations otherwise everything feels slow opt-level = 3 -[profile.release] +#[profile.release] # These make the build much slower but shrink the binary, and could help performance -codegen-units = 1 -lto = true +#codegen-units = 1 +#lto = true From 63dc64751aceb20a805bbb07b0782135d12e5a51 Mon Sep 17 00:00:00 2001 From: Anant Sharma Date: Fri, 11 Jul 2025 08:36:37 -0700 Subject: [PATCH 3/3] Revert "temp: test failure" This reverts commit e9288efc2759c7bf1663df85e74d020aadb976bc. --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01c071d0ed..4ee2525d4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,7 +89,7 @@ insta.opt-level = 3 # release level optimizations otherwise everything feels slow opt-level = 3 -#[profile.release] +[profile.release] # These make the build much slower but shrink the binary, and could help performance -#codegen-units = 1 -#lto = true +codegen-units = 1 +lto = true