55# docs/source/contributing/dockerfile/dockerfile.md and
66# docs/source/assets/contributing/dockerfile-stages-dependency.png
77
8- ARG CUDA_VERSION=12.4 .1
8+ ARG CUDA_VERSION=12.8 .1
99# ################### BASE BUILD IMAGE ####################
1010# prepare basic build environment
1111FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
12- ARG CUDA_VERSION=12.4 .1
12+ ARG CUDA_VERSION=12.8 .1
1313ARG PYTHON_VERSION=3.12
1414ARG TARGETPLATFORM
1515ENV DEBIAN_FRONTEND=noninteractive
@@ -37,6 +37,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
3737# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
3838# Reference: https://github.com/astral-sh/uv/pull/1694
3939ENV UV_HTTP_TIMEOUT=500
40+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
4041
4142# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
4243# as it was causing spam when compiling the CUTLASS kernels
@@ -69,7 +70,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
6970COPY requirements/common.txt requirements/common.txt
7071COPY requirements/cuda.txt requirements/cuda.txt
7172RUN --mount=type=cache,target=/root/.cache/uv \
72- uv pip install --system -r requirements/cuda.txt
73+ uv pip install --system -r requirements/cuda.txt \
74+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
7375
7476# cuda arch list used by torch
7577# can be useful for both `dev` and `test`
@@ -92,9 +94,11 @@ COPY requirements/build.txt requirements/build.txt
9294# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
9395# Reference: https://github.com/astral-sh/uv/pull/1694
9496ENV UV_HTTP_TIMEOUT=500
97+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
9598
9699RUN --mount=type=cache,target=/root/.cache/uv \
97- uv pip install --system -r requirements/build.txt
100+ uv pip install --system -r requirements/build.txt \
101+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
98102
99103COPY . .
100104ARG GIT_REPO_CHECK=0
@@ -161,22 +165,25 @@ FROM base as dev
161165# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
162166# Reference: https://github.com/astral-sh/uv/pull/1694
163167ENV UV_HTTP_TIMEOUT=500
168+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
169+
170+ # Workaround for #17068
171+ RUN --mount=type=cache,target=/root/.cache/uv \
172+ uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.4"
164173
165174COPY requirements/lint.txt requirements/lint.txt
166175COPY requirements/test.txt requirements/test.txt
167176COPY requirements/dev.txt requirements/dev.txt
168- # Workaround for #17068
169- RUN --mount=type=cache,target=/root/.cache/uv \
170- uv pip install --system mamba-ssm==2.2.4 --no-build-isolation
171177RUN --mount=type=cache,target=/root/.cache/uv \
172- uv pip install --system -r requirements/dev.txt
178+ uv pip install --system -r requirements/dev.txt \
179+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
173180# ################### DEV IMAGE ####################
174181
175182# ################### vLLM installation IMAGE ####################
176183# image with vLLM installed
177184# TODO: Restore to base image after FlashInfer AOT wheel fixed
178185FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS vllm-base
179- ARG CUDA_VERSION=12.4 .1
186+ ARG CUDA_VERSION=12.8 .1
180187ARG PYTHON_VERSION=3.12
181188WORKDIR /vllm-workspace
182189ENV DEBIAN_FRONTEND=noninteractive
@@ -209,6 +216,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
209216# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
210217# Reference: https://github.com/astral-sh/uv/pull/1694
211218ENV UV_HTTP_TIMEOUT=500
219+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
212220
213221# Workaround for https://github.com/openai/triton/issues/2507 and
214222# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
@@ -229,7 +237,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
229237# Install vllm wheel first, so that torch etc will be installed.
230238RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
231239 --mount=type=cache,target=/root/.cache/uv \
232- uv pip install --system dist/*.whl --verbose
240+ uv pip install --system dist/*.whl --verbose \
241+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
233242
234243# If we need to build FlashInfer wheel before its release:
235244# $ export FLASHINFER_ENABLE_AOT=1
@@ -246,19 +255,26 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
246255RUN --mount=type=cache,target=/root/.cache/uv \
247256. /etc/environment && \
248257if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
249- uv pip install --system https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
258+ # TESTING: install FlashInfer from source to test 2.7.0 final RC
259+ FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX' \
260+ uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@v0.2.2.post1" ; \
250261fi
251262COPY examples examples
252263COPY benchmarks benchmarks
253264COPY ./vllm/collect_env.py .
254265
266+ RUN --mount=type=cache,target=/root/.cache/uv \
267+ . /etc/environment && \
268+ uv pip list
269+
255270# Although we build Flashinfer with AOT mode, there's still
256271# some issues w.r.t. JIT compilation. Therefore we need to
257272# install build dependencies for JIT compilation.
258273# TODO: Remove this once FlashInfer AOT wheel is fixed
259274COPY requirements/build.txt requirements/build.txt
260275RUN --mount=type=cache,target=/root/.cache/uv \
261- uv pip install --system -r requirements/build.txt
276+ uv pip install --system -r requirements/build.txt \
277+ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
262278
263279# ################### vLLM installation IMAGE ####################
264280
@@ -272,11 +288,13 @@ ADD . /vllm-workspace/
272288# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
273289# Reference: https://github.com/astral-sh/uv/pull/1694
274290ENV UV_HTTP_TIMEOUT=500
291+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
275292
276- # install development dependencies (for testing)
277293# Workaround for #17068
278294RUN --mount=type=cache,target=/root/.cache/uv \
279- uv pip install --system mamba-ssm==2.2.4 --no-build-isolation
295+ uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.4"
296+
297+ # install development dependencies (for testing)
280298RUN --mount=type=cache,target=/root/.cache/uv \
281299 uv pip install --system -r requirements/dev.txt
282300
0 commit comments