Skip to content

Commit 280d074

Browse files
authored
[CPU][CI] Improve CPU Dockerfile (vllm-project#15690)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
1 parent 32b14ba commit 280d074

File tree

5 files changed

+146
-54
lines changed

5 files changed

+146
-54
lines changed

.buildkite/release-pipeline.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ steps:
8282
queue: cpu_queue_postmerge
8383
commands:
8484
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
85-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain -f Dockerfile.cpu ."
85+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f Dockerfile.cpu ."
8686
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
8787
env:
8888
DOCKER_BUILDKIT: "1"

.buildkite/run-cpu-test.sh

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,19 @@ set -ex
88
CORE_RANGE=${CORE_RANGE:-48-95}
99
NUMA_NODE=${NUMA_NODE:-1}
1010

11-
# Try building the docker image
12-
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f Dockerfile.cpu .
13-
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f Dockerfile.cpu .
14-
1511
# Setup cleanup
16-
remove_docker_container() { set -e; docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true; }
12+
remove_docker_container() {
13+
set -e;
14+
docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true;
15+
docker image rm cpu-test-"$BUILDKITE_BUILD_NUMBER" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 || true;
16+
}
1717
trap remove_docker_container EXIT
1818
remove_docker_container
1919

20+
# Try building the docker image
21+
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f Dockerfile.cpu .
22+
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f Dockerfile.cpu .
23+
2024
# Run the image, setting --shm-size=4g for tensor parallel.
2125
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE" \
2226
--cpuset-mems="$NUMA_NODE" --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"
@@ -36,8 +40,6 @@ function cpu_tests() {
3640
# Run basic model test
3741
docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" bash -c "
3842
set -e
39-
pip install -r vllm/requirements/test.txt
40-
pip install -r vllm/requirements/cpu.txt
4143
pytest -v -s tests/kernels/test_cache.py -m cpu_model
4244
pytest -v -s tests/kernels/test_mla_decode_cpu.py -m cpu_model
4345
pytest -v -s tests/models/decoder_only/language -m cpu_model

Dockerfile.cpu

Lines changed: 107 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,138 @@
11
# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
2+
#
3+
# Build targets:
4+
# vllm-openai (default): used for serving deployment
5+
# vllm-test: used for CI tests
6+
# vllm-dev: used for development
7+
#
8+
# Build arguments:
9+
# PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9
10+
# VLLM_CPU_DISABLE_AVX512=false (default)|true
11+
#
12+
13+
######################### BASE IMAGE #########################
14+
FROM ubuntu:22.04 AS base
215

3-
FROM ubuntu:22.04 AS cpu-test-1
16+
WORKDIR /workspace/
417

5-
ENV CCACHE_DIR=/root/.cache/ccache
18+
ARG PYTHON_VERSION=3.12
19+
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
620

21+
# Install minimal dependencies and uv
22+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
23+
--mount=type=cache,target=/var/lib/apt,sharing=locked \
24+
apt-get update -y \
25+
&& apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \
26+
gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 \
27+
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
28+
&& curl -LsSf https://astral.sh/uv/install.sh | sh
29+
30+
ENV CCACHE_DIR=/root/.cache/ccache
731
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
832

9-
RUN --mount=type=cache,target=/var/cache/apt \
10-
apt-get update -y \
11-
&& apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
12-
&& apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
13-
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
33+
ENV PATH="/root/.local/bin:$PATH"
34+
ENV VIRTUAL_ENV="/opt/venv"
35+
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
36+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
1437

15-
# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html
16-
# intel-openmp provides additional performance improvement vs. openmp
17-
# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects.
18-
RUN --mount=type=cache,target=/root/.cache/pip \
19-
pip install intel-openmp==2025.0.1
38+
ENV UV_HTTP_TIMEOUT=500
2039

21-
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so"
40+
# Install Python dependencies
41+
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
42+
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
43+
ENV UV_INDEX_STRATEGY="unsafe-best-match"
44+
ENV UV_LINK_MODE="copy"
45+
RUN --mount=type=cache,target=/root/.cache/uv \
46+
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
47+
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
48+
uv pip install --upgrade pip && \
49+
uv pip install -r requirements/cpu.txt
2250

23-
RUN echo 'ulimit -c 0' >> ~/.bashrc
51+
RUN --mount=type=cache,target=/root/.cache/uv \
52+
uv pip install intel-openmp==2024.2.1 intel_extension_for_pytorch==2.6.0
2453

25-
RUN pip install intel_extension_for_pytorch==2.6.0
54+
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so:$LD_PRELOAD"
2655

27-
WORKDIR /workspace
56+
RUN echo 'ulimit -c 0' >> ~/.bashrc
2857

29-
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
30-
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
31-
RUN --mount=type=cache,target=/root/.cache/pip \
32-
--mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
33-
pip install --upgrade pip && \
34-
pip install -r requirements/build.txt
58+
######################### BUILD IMAGE #########################
59+
FROM base AS vllm-build
3560

36-
FROM cpu-test-1 AS build
61+
ARG GIT_REPO_CHECK=0
62+
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
63+
ARG VLLM_CPU_DISABLE_AVX512
64+
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
3765

3866
WORKDIR /workspace/vllm
3967

40-
RUN --mount=type=cache,target=/root/.cache/pip \
41-
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
42-
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
43-
pip install -v -r requirements/cpu.txt
68+
RUN --mount=type=cache,target=/root/.cache/uv \
69+
--mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
70+
uv pip install -r requirements/build.txt
4471

4572
COPY . .
46-
ARG GIT_REPO_CHECK=0
4773
RUN --mount=type=bind,source=.git,target=.git \
4874
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
4975

50-
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
51-
ARG VLLM_CPU_DISABLE_AVX512
52-
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
76+
RUN --mount=type=cache,target=/root/.cache/uv \
77+
--mount=type=cache,target=/root/.cache/ccache \
78+
--mount=type=bind,source=.git,target=.git \
79+
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
80+
81+
######################### DEV IMAGE #########################
82+
FROM vllm-build AS vllm-dev
83+
84+
WORKDIR /workspace/vllm
85+
86+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
87+
--mount=type=cache,target=/var/lib/apt,sharing=locked \
88+
apt-get install -y --no-install-recommends vim numactl
89+
90+
# install development dependencies (for testing)
91+
RUN --mount=type=cache,target=/root/.cache/uv \
92+
uv pip install -e tests/vllm_test_utils
5393

54-
RUN --mount=type=cache,target=/root/.cache/pip \
94+
RUN --mount=type=cache,target=/root/.cache/uv \
5595
--mount=type=cache,target=/root/.cache/ccache \
5696
--mount=type=bind,source=.git,target=.git \
57-
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
58-
pip install dist/*.whl && \
59-
rm -rf dist
97+
VLLM_TARGET_DEVICE=cpu python3 setup.py develop
98+
99+
RUN --mount=type=cache,target=/root/.cache/uv \
100+
uv pip install -r requirements/dev.txt && \
101+
pre-commit install --hook-type pre-commit --hook-type commit-msg
102+
103+
ENTRYPOINT ["bash"]
104+
105+
######################### TEST IMAGE #########################
106+
FROM base AS vllm-test
60107

61108
WORKDIR /workspace/
62109

63-
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
110+
RUN --mount=type=cache,target=/root/.cache/uv \
111+
--mount=type=bind,src=requirements/test.txt,target=requirements/test.txt \
112+
uv pip install -r requirements/test.txt
113+
114+
RUN --mount=type=cache,target=/root/.cache/uv \
115+
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
116+
uv pip install dist/*.whl
117+
118+
ADD ./tests/ ./tests/
119+
ADD ./examples/ ./examples/
120+
ADD ./benchmarks/ ./benchmarks/
64121

65122
# install development dependencies (for testing)
66-
RUN --mount=type=cache,target=/root/.cache/pip \
67-
pip install -e tests/vllm_test_utils
123+
RUN --mount=type=cache,target=/root/.cache/uv \
124+
uv pip install -e tests/vllm_test_utils
125+
126+
ENTRYPOINT ["bash"]
127+
128+
######################### RELEASE IMAGE #########################
129+
FROM base AS vllm-openai
130+
131+
WORKDIR /workspace/
132+
133+
RUN --mount=type=cache,target=/root/.cache/uv \
134+
--mount=type=cache,target=/root/.cache/ccache \
135+
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
136+
uv pip install dist/*.whl
68137

69138
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

docs/source/getting_started/installation/cpu.md

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -159,18 +159,37 @@ Currently, there are no pre-built CPU wheels.
159159

160160
### Pre-built images
161161

162-
Currently, there are no pre-build CPU images.
162+
:::::{tab-set}
163+
:sync-group: device
164+
165+
::::{tab-item} Intel/AMD x86
166+
:sync: x86
167+
168+
:::{include} cpu/x86.inc.md
169+
:start-after: "### Pre-built images"
170+
:end-before: "### Build image from source"
171+
:::
172+
173+
::::
174+
175+
:::::
163176

164177
### Build image from source
165178

166179
```console
167-
$ docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
168-
$ docker run -it \
169-
--rm \
170-
--network=host \
171-
--cpuset-cpus=<cpu-id-list, optional> \
172-
--cpuset-mems=<memory-node, optional> \
173-
vllm-cpu-env
180+
$ docker build -f Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai .
181+
182+
# Launching OpenAI server
183+
$ docker run --rm \
184+
--privileged=true \
185+
--shm-size=4g \
186+
-p 8000:8000 \
187+
-e VLLM_CPU_KVCACHE_SPACE=<KV cache space> \
188+
-e VLLM_CPU_OMP_THREADS_BIND=<CPU cores for inference> \
189+
vllm-cpu-env \
190+
--model=meta-llama/Llama-3.2-1B-Instruct \
191+
--dtype=bfloat16 \
192+
other vLLM OpenAI server arguments
174193
```
175194

176195
::::{tip}

docs/source/getting_started/installation/cpu/x86.inc.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ There are no pre-built wheels or images for this device, so you must build vLLM
3434

3535
### Pre-built images
3636

37+
See [https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo](https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo)
38+
3739
### Build image from source
3840

3941
## Extra information

0 commit comments

Comments
 (0)