From 4ea258ad0bbe624954d06a7315849d877b2fdbd4 Mon Sep 17 00:00:00 2001
From: mgoin <mgoin64@gmail.com>
Date: Tue, 13 May 2025 19:12:17 +0000
Subject: [PATCH 1/6] Update FlashInfer

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 17adb7a92dc1..671eda8515f8 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -257,7 +257,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
     # TESTING: install FlashInfer from source to test 2.7.0 final RC
     FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX' \
-    uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@v0.2.2.post1" ; \
+    uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@948a14622bd624773918d738b0f66137a9ac4784" ; \
 fi
 COPY examples examples
 COPY benchmarks benchmarks

From 114a0f311c6b3ade2e0a0e3e20d451f6ed6ef5c2 Mon Sep 17 00:00:00 2001
From: mgoin <mgoin64@gmail.com>
Date: Tue, 13 May 2025 19:59:46 +0000
Subject: [PATCH 2/6] Add blackwell cuda arch

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 671eda8515f8..75ff5b11dbec 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -77,7 +77,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # can be useful for both `dev` and `test`
 # explicitly set the list to avoid issues with torch 2.2
 # see https://github.com/pytorch/pytorch/pull/123243
-ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
+ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
 # Override the arch list for flash-attn to reduce the binary size
 ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
@@ -256,7 +256,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 . /etc/environment && \
 if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
     # TESTING: install FlashInfer from source to test 2.7.0 final RC
-    FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX' \
+    FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX' \
     uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@948a14622bd624773918d738b0f66137a9ac4784" ; \
 fi
 COPY examples examples

From c044a06372e277649c4dc8ae70941874009c66f8 Mon Sep 17 00:00:00 2001
From: mgoin <mgoin64@gmail.com>
Date: Wed, 14 May 2025 01:40:27 +0000
Subject: [PATCH 3/6] Try dropping 8.6 and 12.0

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 75ff5b11dbec..666d9d7d1903 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -77,7 +77,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # can be useful for both `dev` and `test`
 # explicitly set the list to avoid issues with torch 2.2
 # see https://github.com/pytorch/pytorch/pull/123243
-ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX'
+ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
 # Override the arch list for flash-attn to reduce the binary size
 ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
@@ -256,7 +256,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 . /etc/environment && \
 if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
     # TESTING: install FlashInfer from source to test 2.7.0 final RC
-    FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX' \
+    FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0+PTX' \
     uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@948a14622bd624773918d738b0f66137a9ac4784" ; \
 fi
 COPY examples examples

From eabdfbb25fa17b9f044400f1922adc417ba5025c Mon Sep 17 00:00:00 2001
From: mgoin <mgoin64@gmail.com>
Date: Thu, 15 May 2025 23:48:40 +0000
Subject: [PATCH 4/6] Update flashinfer to latest

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 666d9d7d1903..619eb613e5d1 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -257,7 +257,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
     # TESTING: install FlashInfer from source to test 2.7.0 final RC
     FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0+PTX' \
-    uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@948a14622bd624773918d738b0f66137a9ac4784" ; \
+    uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@e00e8cedbfcb220f328fd36aa8f529f869b01e6b" ; \
 fi
 COPY examples examples
 COPY benchmarks benchmarks

From 6e7eca2c66b10c233c4e78b36cb317f5928ebe87 Mon Sep 17 00:00:00 2001
From: mgoin <mgoin64@gmail.com>
Date: Fri, 16 May 2025 01:44:28 +0000
Subject: [PATCH 5/6] Add SM 12.0

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 619eb613e5d1..b9ca57e05553 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -77,7 +77,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # can be useful for both `dev` and `test`
 # explicitly set the list to avoid issues with torch 2.2
 # see https://github.com/pytorch/pytorch/pull/123243
-ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX'
+ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0+PTX'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
 # Override the arch list for flash-attn to reduce the binary size
 ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
@@ -256,7 +256,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 . /etc/environment && \
 if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
     # TESTING: install FlashInfer from source to test 2.7.0 final RC
-    FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0+PTX' \
+    FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0 12.0+PTX' \
     uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@e00e8cedbfcb220f328fd36aa8f529f869b01e6b" ; \
 fi
 COPY examples examples

From 23d6c0276951672b23bc5d46b71936cc367651cc Mon Sep 17 00:00:00 2001
From: mgoin <mgoin64@gmail.com>
Date: Fri, 16 May 2025 11:02:43 +0000
Subject: [PATCH 6/6] Revert SM 12.0

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 docker/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index b9ca57e05553..619eb613e5d1 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -77,7 +77,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # can be useful for both `dev` and `test`
 # explicitly set the list to avoid issues with torch 2.2
 # see https://github.com/pytorch/pytorch/pull/123243
-ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0+PTX'
+ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
 # Override the arch list for flash-attn to reduce the binary size
 ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
@@ -256,7 +256,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 . /etc/environment && \
 if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
     # TESTING: install FlashInfer from source to test 2.7.0 final RC
-    FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0 12.0+PTX' \
+    FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0+PTX' \
     uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@e00e8cedbfcb220f328fd36aa8f529f869b01e6b" ; \
 fi
 COPY examples examples