From d13e504a6e263c00742a18234837b33b67af8527 Mon Sep 17 00:00:00 2001 From: simon-mo Date: Tue, 23 Apr 2024 21:27:14 -0700 Subject: [PATCH 01/10] [CI] check size of the wheels --- Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile b/Dockerfile index d1d29177b0f44..3e1e809c21f18 100644 --- a/Dockerfile +++ b/Dockerfile @@ -71,6 +71,10 @@ RUN --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/root/.cache/pip \ python3 setup.py bdist_wheel --dist-dir=dist +# one liner to check that the size of the dist is <100MB, this is used as a test +# case in CI system because PyPI limits the size of the wheel <100MB. +RUN du -sm dist | awk '{if ($1 > 100) exit 1}' || exit 1 + # the `vllm_nccl` package must be installed from source distribution # pip is too smart to store a wheel in the cache, and other CI jobs # will directly use the wheel from the cache, which is not what we want. From 62fa627e5a585fc2929763a4127e167191177f73 Mon Sep 17 00:00:00 2001 From: simon-mo Date: Thu, 25 Apr 2024 01:42:33 +0000 Subject: [PATCH 02/10] try using bash --- Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3e1e809c21f18..0174b36aa48bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ #################### BASE BUILD IMAGE #################### # prepare basic build environment -FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev RUN apt-get update -y \ && apt-get install -y python3-pip git @@ -73,7 +73,8 @@ RUN --mount=type=cache,target=/root/.cache/ccache \ # one liner to check that the size of the dist is <100MB, this is used as a test # case in CI system because PyPI limits the size of the wheel <100MB. -RUN du -sm dist | awk '{if ($1 > 100) exit 1}' || exit 1 +RUN cd dist && fallocate -l 120M test_wheel +RUN bash -c 'du -sm dist | awk \'{if ($1 > 100) exit 1}\'' # the `vllm_nccl` package must be installed from source distribution # pip is too smart to store a wheel in the cache, and other CI jobs @@ -102,7 +103,7 @@ RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \ #################### vLLM installation IMAGE #################### # image with vLLM installed -FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS vllm-base +FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS vllm-base WORKDIR /vllm-workspace RUN apt-get update -y \ From 75bb9dd5b2a23857706c19afd5b75c9c17b5da7d Mon Sep 17 00:00:00 2001 From: simon-mo Date: Fri, 26 Apr 2024 20:42:48 +0000 Subject: [PATCH 03/10] wip --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 0174b36aa48bc..16d263b36f1d7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ ENV MAX_JOBS=${max_jobs} ARG nvcc_threads=8 ENV NVCC_THREADS=$nvcc_threads # make sure punica kernels are built (for LoRA) -ENV VLLM_INSTALL_PUNICA_KERNELS=1 +# ENV VLLM_INSTALL_PUNICA_KERNELS=1 ENV CCACHE_DIR=/root/.cache/ccache RUN --mount=type=cache,target=/root/.cache/ccache \ From 5bcf86cc537375675ac7f227f40c0709247a288b Mon Sep 17 00:00:00 2001 From: simon-mo Date: Fri, 3 May 2024 05:13:05 +0000 Subject: [PATCH 04/10] replace one liner with python script --- .buildkite/check-wheel-size.py | 22 ++++++++++++++++++++++ Dockerfile | 7 +++---- 2 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 .buildkite/check-wheel-size.py diff --git a/.buildkite/check-wheel-size.py b/.buildkite/check-wheel-size.py new file mode 100644 index 0000000000000..ca1045f553ecb --- /dev/null +++ b/.buildkite/check-wheel-size.py @@ -0,0 +1,22 @@ +import os + +MAX_SIZE = 100 * 1024 * 1024 # 100 MB + + +def check_wheel_size(directory): + for root, _, files in os.walk(directory): + for f in files: + if f.endswith(".whl"): + wheel_path = os.path.join(root, f) + wheel_size = os.path.getsize(wheel_path) + if wheel_size > MAX_SIZE: + print( + f"Wheel {wheel_path} is too large ({wheel_size} bytes) " + f"compare to the allowed size ({MAX_SIZE} bytes).") + return 1 + return 0 + + +if __name__ == "__main__": + import sys + sys.exit(check_wheel_size(sys.argv[1])) diff --git a/Dockerfile b/Dockerfile index 16d263b36f1d7..ed0aa9f076fdf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -71,10 +71,9 @@ RUN --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/root/.cache/pip \ python3 setup.py bdist_wheel --dist-dir=dist -# one liner to check that the size of the dist is <100MB, this is used as a test -# case in CI system because PyPI limits the size of the wheel <100MB. -RUN cd dist && fallocate -l 120M test_wheel -RUN bash -c 'du -sm dist | awk \'{if ($1 > 100) exit 1}\'' +# check the size of the wheel, we cannot upload wheels larger than 100MB +COPY .buildkite/check-wheel-size.py check-wheel-size.py +RUN python3 check-wheel-size.py dist # the `vllm_nccl` package must be installed from source distribution # pip is too smart to store a wheel in the cache, and other CI jobs From 0ac9953b0bec228afb644e425f8aa8d9c42ae7c7 Mon Sep 17 00:00:00 2001 From: simon-mo Date: Fri, 3 May 2024 05:13:26 +0000 Subject: [PATCH 05/10] revert env --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ed0aa9f076fdf..eda3d6fbbf207 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ ENV MAX_JOBS=${max_jobs} ARG nvcc_threads=8 ENV NVCC_THREADS=$nvcc_threads # make sure punica kernels are built (for LoRA) -# ENV VLLM_INSTALL_PUNICA_KERNELS=1 +ENV VLLM_INSTALL_PUNICA_KERNELS=1 ENV CCACHE_DIR=/root/.cache/ccache RUN --mount=type=cache,target=/root/.cache/ccache \ From 9210e417449e527b9ae3a861180634230deaa031 Mon Sep 17 00:00:00 2001 From: simon-mo Date: Fri, 3 May 2024 10:24:51 -0700 Subject: [PATCH 06/10] address comments --- .buildkite/check-wheel-size.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.buildkite/check-wheel-size.py b/.buildkite/check-wheel-size.py index ca1045f553ecb..0f120423df298 100644 --- a/.buildkite/check-wheel-size.py +++ b/.buildkite/check-wheel-size.py @@ -1,6 +1,6 @@ import os -MAX_SIZE = 100 * 1024 * 1024 # 100 MB +MAX_SIZE_MB = 100 def check_wheel_size(directory): @@ -9,10 +9,11 @@ def check_wheel_size(directory): if f.endswith(".whl"): wheel_path = os.path.join(root, f) wheel_size = os.path.getsize(wheel_path) - if wheel_size > MAX_SIZE: + wheel_size_mb = wheel_size / 1024 * 1024 + if wheel_size > MAX_SIZE_MB: print( - f"Wheel {wheel_path} is too large ({wheel_size} bytes) " - f"compare to the allowed size ({MAX_SIZE} bytes).") + f"Wheel {wheel_path} is too large ({wheel_size_mb} MB) " + f"compare to the allowed size ({MAX_SIZE_MB} MB).") return 1 return 0 From 77738ecd6147f54ba624dbd596c047cfe68284a8 Mon Sep 17 00:00:00 2001 From: simon-mo Date: Fri, 3 May 2024 10:25:56 -0700 Subject: [PATCH 07/10] print file sizes --- .buildkite/check-wheel-size.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.buildkite/check-wheel-size.py b/.buildkite/check-wheel-size.py index 0f120423df298..89672a2ff3ad6 100644 --- a/.buildkite/check-wheel-size.py +++ b/.buildkite/check-wheel-size.py @@ -1,8 +1,17 @@ import os +import zipfile MAX_SIZE_MB = 100 +def print_top_10_largest_files(zip_file): + with zipfile.ZipFile(zip_file, 'r') as z: + file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()] + file_sizes.sort(key=lambda x: x[1], reverse=True) + for f, size in file_sizes[:10]: + print(f"{f}: {size/1024*1024} MBs") + + def check_wheel_size(directory): for root, _, files in os.walk(directory): for f in files: @@ -14,6 +23,7 @@ def check_wheel_size(directory): print( f"Wheel {wheel_path} is too large ({wheel_size_mb} MB) " f"compare to the allowed size ({MAX_SIZE_MB} MB).") + print_top_10_largest_files(wheel_path) return 1 return 0 From af8241dc1404d69e9d5b5c69135119252d58c2d5 Mon Sep 17 00:00:00 2001 From: simon-mo Date: Fri, 3 May 2024 10:41:43 -0700 Subject: [PATCH 08/10] print file sizes --- .buildkite/check-wheel-size.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/check-wheel-size.py b/.buildkite/check-wheel-size.py index 89672a2ff3ad6..f569ac640fdc2 100644 --- a/.buildkite/check-wheel-size.py +++ b/.buildkite/check-wheel-size.py @@ -9,7 +9,7 @@ def print_top_10_largest_files(zip_file): file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()] file_sizes.sort(key=lambda x: x[1], reverse=True) for f, size in file_sizes[:10]: - print(f"{f}: {size/1024*1024} MBs") + print(f"{f}: {size/(1024*1024)} MBs uncompressed.") def check_wheel_size(directory): @@ -18,7 +18,7 @@ def check_wheel_size(directory): if f.endswith(".whl"): wheel_path = os.path.join(root, f) wheel_size = os.path.getsize(wheel_path) - wheel_size_mb = wheel_size / 1024 * 1024 + wheel_size_mb = wheel_size / (1024 * 1024) if wheel_size > MAX_SIZE_MB: print( f"Wheel {wheel_path} is too large ({wheel_size_mb} MB) " From 03e408b3cb62421611d9bf588b8459e2dd268572 Mon Sep 17 00:00:00 2001 From: simon-mo Date: Sat, 4 May 2024 11:48:52 -0700 Subject: [PATCH 09/10] fix comparison --- .buildkite/check-wheel-size.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/check-wheel-size.py b/.buildkite/check-wheel-size.py index f569ac640fdc2..8178fba552c46 100644 --- a/.buildkite/check-wheel-size.py +++ b/.buildkite/check-wheel-size.py @@ -19,7 +19,7 @@ def check_wheel_size(directory): wheel_path = os.path.join(root, f) wheel_size = os.path.getsize(wheel_path) wheel_size_mb = wheel_size / (1024 * 1024) - if wheel_size > MAX_SIZE_MB: + if wheel_size_mb > MAX_SIZE_MB: print( f"Wheel {wheel_path} is too large ({wheel_size_mb} MB) " f"compare to the allowed size ({MAX_SIZE_MB} MB).") From f33b32a95944c236462a5b667c98fe46def0b291 Mon Sep 17 00:00:00 2001 From: simon-mo Date: Sat, 4 May 2024 12:04:58 -0700 Subject: [PATCH 10/10] ldconfig --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8272d29b987f1..90be3a30f89b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ RUN apt-get update -y \ # https://github.com/pytorch/pytorch/issues/107960 -- hopefully # this won't be needed for future versions of this docker image # or future versions of triton. -RUN ldconfig /usr/local/cuda-12.1/compat/ +RUN ldconfig /usr/local/cuda-12.4/compat/ WORKDIR /workspace @@ -116,7 +116,7 @@ RUN apt-get update -y \ # https://github.com/pytorch/pytorch/issues/107960 -- hopefully # this won't be needed for future versions of this docker image # or future versions of triton. -RUN ldconfig /usr/local/cuda-12.1/compat/ +RUN ldconfig /usr/local/cuda-12.4/compat/ # install vllm wheel first, so that torch etc will be installed RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \