From 846ad030f60cd5fb3a354274fc8306f08b3aafc8 Mon Sep 17 00:00:00 2001 From: rzou Date: Fri, 18 Apr 2025 07:20:15 -0700 Subject: [PATCH] [easy] Pass compile_fx only the config patches Previously we were passing compile_fx the entire default inductor config with the patches applied to it. compile_fx only needs to be passed the patches (reference: https://github.com/pytorch/pytorch/blob/29317f8585ecb232412df3f39734490f0f6d8230/torch/_inductor/compile_fx.py#L1873-L1880) This PR changes vLLM to only pass the patches. This makes debugging things easier (I can stare at just the delta and see what vLLM changed). Test Plan: I ran the following command and verified that performance didn't change. ``` VLLM_USE_V1=1 python benchmark_latency.py --model meta-llama/Meta-Llama-3-8B --batch-size 1 -O '{"level": 3, "compile_sizes": {1, 2}}' ``` Signed-off-by: rzou --- vllm/compilation/compiler_interface.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index f6c752073c7d..833be289265f 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -167,8 +167,7 @@ def compile( compiler_config: Dict[str, Any], runtime_shape: Optional[int] = None ) -> Tuple[Optional[Callable], Optional[Any]]: - from torch._inductor import config - current_config = config.get_config_copy() + current_config = {} from torch._inductor.compile_fx import compile_fx # disable remote cache