diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml new file mode 100644 index 0000000000000..1959f9752069f --- /dev/null +++ b/.buildkite/release-pipeline.yaml @@ -0,0 +1,21 @@ +steps: + - block: "Build wheels" + + - label: "Build wheel - Python {{matrix.python_version}}, CUDA {{matrix.cuda_version}}" + agents: + queue: cpu_queue + commands: + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg CUDA_VERSION={{matrix.cuda_version}} --build-arg PYTHON_VERSION={{matrix.python_version}} --tag vllm-ci:build-image --target build --progress plain ." + - "mkdir artifacts" + - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image cp -r dist /artifacts_host" + - "aws s3 cp --recursive artifacts/dist s3://vllm-wheels/$BUILDKITE_COMMIT/" + matrix: + setup: + cuda_version: + - "11.8.0" + - "12.1.0" + python_version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" diff --git a/Dockerfile b/Dockerfile index 72894e7cab9ba..5b3e682a80169 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,9 +5,26 @@ # docs/source/dev/dockerfile/dockerfile.rst and # docs/source/assets/dev/dockerfile-stages-dependency.png +ARG CUDA_VERSION=12.4.1 #################### BASE BUILD IMAGE #################### # prepare basic build environment -FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base + +ARG CUDA_VERSION=12.4.1 +ARG PYTHON_VERSION=3 + +ENV DEBIAN_FRONTEND=noninteractive + +RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ + && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ + && apt-get update -y \ + && apt-get install -y ccache software-properties-common \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update -y \ + && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \ + && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \ + && python3 --version \ + && python3 -m pip --version RUN apt-get update -y \ && apt-get install -y python3-pip git curl sudo @@ -16,7 +33,7 @@ RUN apt-get update -y \ # https://github.com/pytorch/pytorch/issues/107960 -- hopefully # this won't be needed for future versions of this docker image # or future versions of triton. -RUN ldconfig /usr/local/cuda-12.4/compat/ +RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ WORKDIR /workspace @@ -24,14 +41,7 @@ WORKDIR /workspace COPY requirements-common.txt requirements-common.txt COPY requirements-cuda.txt requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements-cuda.txt - -# install development dependencies -COPY requirements-lint.txt requirements-lint.txt -COPY requirements-test.txt requirements-test.txt -COPY requirements-dev.txt requirements-dev.txt -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements-dev.txt + python3 -m pip install -r requirements-cuda.txt # cuda arch list used by torch # can be useful for both `dev` and `test` @@ -41,14 +51,16 @@ ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX' ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} #################### BASE BUILD IMAGE #################### - #################### WHEEL BUILD IMAGE #################### -FROM dev AS build +FROM base AS build + +ARG PYTHON_VERSION=3 # install build dependencies COPY requirements-build.txt requirements-build.txt + RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements-build.txt + python3 -m pip install -r requirements-build.txt # install compiler cache to speed up compilation leveraging local or remote caching RUN apt-get update -y && apt-get install -y ccache @@ -101,9 +113,21 @@ RUN python3 check-wheel-size.py dist #################### EXTENSION Build IMAGE #################### +#################### DEV IMAGE #################### +FROM base as dev + +COPY requirements-lint.txt requirements-lint.txt +COPY requirements-test.txt requirements-test.txt +COPY requirements-dev.txt requirements-dev.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + python3 -m pip install -r requirements-dev.txt + +#################### DEV IMAGE #################### + #################### vLLM installation IMAGE #################### # image with vLLM installed -FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS vllm-base +FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base +ARG CUDA_VERSION=12.4.1 WORKDIR /vllm-workspace RUN apt-get update -y \ @@ -113,12 +137,12 @@ RUN apt-get update -y \ # https://github.com/pytorch/pytorch/issues/107960 -- hopefully # this won't be needed for future versions of this docker image # or future versions of triton. -RUN ldconfig /usr/local/cuda-12.4/compat/ +RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # install vllm wheel first, so that torch etc will be installed RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/pip \ - pip install dist/*.whl --verbose + python3 -m pip install dist/*.whl --verbose #################### vLLM installation IMAGE #################### @@ -131,7 +155,7 @@ ADD . /vllm-workspace/ # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements-dev.txt + python3 -m pip install -r requirements-dev.txt # doc requires source code # we hide them inside `test_docs/` , so that this source code