diff --git a/docker/Dockerfile b/docker/Dockerfile index cf9c245a9517..8d4375470adf 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -77,7 +77,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # can be useful for both `dev` and `test` # explicitly set the list to avoid issues with torch 2.2 # see https://github.com/pytorch/pytorch/pull/123243 -ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX' +ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0' ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} # Override the arch list for flash-attn to reduce the binary size ARG vllm_fa_cmake_gpu_arches='80-real;90-real' @@ -244,7 +244,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist # If we need to build FlashInfer wheel before its release: # $ # Note we remove 7.0 from the arch list compared to the list below, since FlashInfer only supports sm75+ -# $ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a' +# $ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a 12.0' # $ git clone https://github.com/flashinfer-ai/flashinfer.git --recursive # $ cd flashinfer # $ git checkout v0.2.6.post1 @@ -261,7 +261,7 @@ if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \ if [[ "$CUDA_VERSION" == 12.8* ]]; then \ uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl; \ else \ - export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a' && \ + export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a 12.0' && \ git clone https://github.com/flashinfer-ai/flashinfer.git --single-branch --branch v0.2.6.post1 --recursive && \ # Needed to build AOT kernels (cd flashinfer && \