From 85c4514e0d6aa6be4c7161313500825f80058fa8 Mon Sep 17 00:00:00 2001 From: Shiyan Deng Date: Mon, 10 Feb 2025 09:52:44 -0800 Subject: [PATCH] Changes done internally at Facebook 61d4518a05584a98a79c78cd1ec97e1e2e42df74 Lucia (Lu) Fang [vllm][integration] multi-host TP + PP support in fb path 9ede353980e9181ffe8081cc0f88a93db3824e0e Lu Fang Move the scripts to vllm scripts folder 590a9e49b79dcef10b23eba0b0fecef447913969 Lu Fang Clean up github_sync in vllm 935d02cae0dd8b3477a8783dec7902e74ec59972 Lu Fang Update the OSS script to install pip setuptools_scm package 9d5af8791b3a270e0cc893b1c7661f8376b318e6 Lu Fang Add AMD build for vllm 7a4b78620798e57ea847c8a57b679327d80789fa dsy842974287 formatting acd39af710e710e2c1ad2203fa2b216159641006 dsy842974287 clean up for PR 33aac23dee6af99fafbecdb63d0c5118295eab80 dsy842974287 aaa Signed-off-by: Shiyan Deng --- csrc/moe/moe_align_sum_kernels.cu | 2 +- csrc/rocm/attention.cu | 2 +- vllm/model_executor/models/registry.py | 15 +++++++++++---- vllm/transformers_utils/configs/__init__.py | 2 +- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/csrc/moe/moe_align_sum_kernels.cu b/csrc/moe/moe_align_sum_kernels.cu index 01dac4044650..c072744f0668 100644 --- a/csrc/moe/moe_align_sum_kernels.cu +++ b/csrc/moe/moe_align_sum_kernels.cu @@ -3,7 +3,7 @@ #include #include -#include +#include #include "../cuda_compat.h" #include "../dispatch_utils.h" diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu index ffa9d44610a7..366b3cdc23aa 100644 --- a/csrc/rocm/attention.cu +++ b/csrc/rocm/attention.cu @@ -1122,4 +1122,4 @@ void paged_attention( #undef WARP_SIZE #undef MAX #undef MIN -#undef DIVIDE_ROUND_UP \ No newline at end of file +#undef DIVIDE_ROUND_UP diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index c2d0fae7056c..c61291860554 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -201,6 +201,14 @@ **_FALLBACK_MODEL, } +# This variable is used as the args for subprocess.run(). We +# can modify this variable to alter the args if needed. e.g. +# when we use par format to pack things together, sys.executable +# might not be the target we want to run. +_SUBPROCESS_COMMAND = [ + sys.executable, "-m", "vllm.model_executor.models.registry" +] + @dataclass(frozen=True) class _ModelInfo: @@ -498,10 +506,9 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T: # cannot use `sys.executable __file__` here because the script # contains relative imports - returned = subprocess.run( - [sys.executable, "-m", "vllm.model_executor.models.registry"], - input=input_bytes, - capture_output=True) + returned = subprocess.run(_SUBPROCESS_COMMAND, + input=input_bytes, + capture_output=True) # check if the subprocess is successful try: diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py index c484a755ab4e..9060565596b2 100644 --- a/vllm/transformers_utils/configs/__init__.py +++ b/vllm/transformers_utils/configs/__init__.py @@ -45,4 +45,4 @@ "SolarConfig", "Telechat2Config", "UltravoxConfig", -] \ No newline at end of file +]