77import logging
88import os
99import re
10+ import shutil
1011import subprocess
1112import sys
1213from pathlib import Path
@@ -281,10 +282,81 @@ def run(self):
281282 self .copy_file (file , dst_file )
282283
283284
284- class repackage_wheel (build_ext ):
285+ class precompiled_build_ext (build_ext ):
286+ """Disables extension building when using precompiled binaries."""
287+
288+ def run (self ) -> None :
289+ assert _is_cuda (
290+ ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
291+
292+ def build_extensions (self ) -> None :
293+ print ("Skipping build_ext: using precompiled extensions." )
294+ return
295+
296+
297+ class precompiled_wheel_utils :
285298 """Extracts libraries and other files from an existing wheel."""
286299
287- def get_base_commit_in_main_branch (self ) -> str :
300+ @staticmethod
301+ def extract_precompiled_and_patch_package (wheel_url_or_path : str ) -> dict :
302+ import tempfile
303+ import zipfile
304+
305+ temp_dir = None
306+ try :
307+ if not os .path .isfile (wheel_url_or_path ):
308+ wheel_filename = wheel_url_or_path .split ("/" )[- 1 ]
309+ temp_dir = tempfile .mkdtemp (prefix = "vllm-wheels" )
310+ wheel_path = os .path .join (temp_dir , wheel_filename )
311+ print (f"Downloading wheel from { wheel_url_or_path } "
312+ f"to { wheel_path } " )
313+ from urllib .request import urlretrieve
314+ urlretrieve (wheel_url_or_path , filename = wheel_path )
315+ else :
316+ wheel_path = wheel_url_or_path
317+ print (f"Using existing wheel at { wheel_path } " )
318+
319+ package_data_patch = {}
320+
321+ with zipfile .ZipFile (wheel_path ) as wheel :
322+ files_to_copy = [
323+ "vllm/_C.abi3.so" ,
324+ "vllm/_moe_C.abi3.so" ,
325+ "vllm/_flashmla_C.abi3.so" ,
326+ "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
327+ "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
328+ "vllm/cumem_allocator.abi3.so" ,
329+ ]
330+
331+ compiled_regex = re .compile (
332+ r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
333+ file_members = list (
334+ filter (lambda x : x .filename in files_to_copy ,
335+ wheel .filelist ))
336+ file_members += list (
337+ filter (lambda x : compiled_regex .match (x .filename ),
338+ wheel .filelist ))
339+
340+ for file in file_members :
341+ print (f"[extract] { file .filename } " )
342+ target_path = os .path .join ("." , file .filename )
343+ os .makedirs (os .path .dirname (target_path ), exist_ok = True )
344+ with wheel .open (file .filename ) as src , open (
345+ target_path , "wb" ) as dst :
346+ shutil .copyfileobj (src , dst )
347+
348+ pkg = os .path .dirname (file .filename ).replace ("/" , "." )
349+ package_data_patch .setdefault (pkg , []).append (
350+ os .path .basename (file .filename ))
351+
352+ return package_data_patch
353+ finally :
354+ if temp_dir is not None :
355+ print (f"Removing temporary directory { temp_dir } " )
356+ shutil .rmtree (temp_dir )
357+
358+ @staticmethod
359+ def get_base_commit_in_main_branch () -> str :
288360 # Force to use the nightly wheel. This is mainly used for CI testing.
289361 if envs .VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL :
290362 return "nightly"
@@ -297,6 +369,10 @@ def get_base_commit_in_main_branch(self) -> str:
297369 ]).decode ("utf-8" )
298370 upstream_main_commit = json .loads (resp_json )["sha" ]
299371
372+ # In Docker build context, .git may be immutable or missing.
373+ if envs .VLLM_DOCKER_BUILD_CONTEXT :
374+ return upstream_main_commit
375+
300376 # Check if the upstream_main_commit exists in the local repo
301377 try :
302378 subprocess .check_output (
@@ -329,86 +405,6 @@ def get_base_commit_in_main_branch(self) -> str:
329405 "wheel may not be compatible with your dev branch: %s" , err )
330406 return "nightly"
331407
332- def run (self ) -> None :
333- assert _is_cuda (
334- ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
335-
336- wheel_location = os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
337- if wheel_location is None :
338- base_commit = self .get_base_commit_in_main_branch ()
339- wheel_location = f"https://wheels.vllm.ai/{ base_commit } /vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
340- # Fallback to nightly wheel if latest commit wheel is unavailable,
341- # in this rare case, the nightly release CI hasn't finished on main.
342- if not is_url_available (wheel_location ):
343- wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
344-
345- import zipfile
346-
347- if os .path .isfile (wheel_location ):
348- wheel_path = wheel_location
349- print (f"Using existing wheel={ wheel_path } " )
350- else :
351- # Download the wheel from a given URL, assume
352- # the filename is the last part of the URL
353- wheel_filename = wheel_location .split ("/" )[- 1 ]
354-
355- import tempfile
356-
357- # create a temporary directory to store the wheel
358- temp_dir = tempfile .mkdtemp (prefix = "vllm-wheels" )
359- wheel_path = os .path .join (temp_dir , wheel_filename )
360-
361- print (f"Downloading wheel from { wheel_location } to { wheel_path } " )
362-
363- from urllib .request import urlretrieve
364-
365- try :
366- urlretrieve (wheel_location , filename = wheel_path )
367- except Exception as e :
368- from setuptools .errors import SetupError
369-
370- raise SetupError (
371- f"Failed to get vLLM wheel from { wheel_location } " ) from e
372-
373- with zipfile .ZipFile (wheel_path ) as wheel :
374- files_to_copy = [
375- "vllm/_C.abi3.so" ,
376- "vllm/_moe_C.abi3.so" ,
377- "vllm/_flashmla_C.abi3.so" ,
378- "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
379- "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
380- "vllm/cumem_allocator.abi3.so" ,
381- # "vllm/_version.py", # not available in nightly wheels yet
382- ]
383-
384- file_members = list (
385- filter (lambda x : x .filename in files_to_copy , wheel .filelist ))
386-
387- # vllm_flash_attn python code:
388- # Regex from
389- # `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
390- compiled_regex = re .compile (
391- r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
392- file_members += list (
393- filter (lambda x : compiled_regex .match (x .filename ),
394- wheel .filelist ))
395-
396- for file in file_members :
397- print (f"Extracting and including { file .filename } "
398- "from existing wheel" )
399- package_name = os .path .dirname (file .filename ).replace ("/" , "." )
400- file_name = os .path .basename (file .filename )
401-
402- if package_name not in package_data :
403- package_data [package_name ] = []
404-
405- wheel .extract (file )
406- if file_name .endswith (".py" ):
407- # python files shouldn't be added to package_data
408- continue
409-
410- package_data [package_name ].append (file_name )
411-
412408
413409def _no_device () -> bool :
414410 return VLLM_TARGET_DEVICE == "empty"
@@ -639,6 +635,29 @@ def _read_requirements(filename: str) -> list[str]:
639635 ]
640636}
641637
638+ # If using precompiled, extract and patch package_data (in advance of setup)
639+ if envs .VLLM_USE_PRECOMPILED :
640+ assert _is_cuda (), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
641+ wheel_location = os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
642+ if wheel_location is not None :
643+ wheel_url = wheel_location
644+ else :
645+ base_commit = precompiled_wheel_utils .get_base_commit_in_main_branch ()
646+ wheel_url = f"https://wheels.vllm.ai/{ base_commit } /vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
647+ from urllib .request import urlopen
648+ try :
649+ with urlopen (wheel_url ) as resp :
650+ if resp .status != 200 :
651+ wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
652+ except Exception as e :
653+ print (f"[warn] Falling back to nightly wheel: { e } " )
654+ wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
655+
656+ patch = precompiled_wheel_utils .extract_precompiled_and_patch_package (
657+ wheel_url )
658+ for pkg , files in patch .items ():
659+ package_data .setdefault (pkg , []).extend (files )
660+
642661if _no_device ():
643662 ext_modules = []
644663
@@ -647,7 +666,7 @@ def _read_requirements(filename: str) -> list[str]:
647666else :
648667 cmdclass = {
649668 "build_ext" :
650- repackage_wheel if envs .VLLM_USE_PRECOMPILED else cmake_build_ext
669+ precompiled_build_ext if envs .VLLM_USE_PRECOMPILED else cmake_build_ext
651670 }
652671
653672setup (
0 commit comments