Upgrade FlashInfer to v0.3.0

nvpohanh · nvpohanh · commit e7e16d80f0d5 · 2025-09-03T18:44:59.000-07:00
Mainly to get the GPT-OSS MXFP4 trtllm-gen MoE autotuning and the bug fix in: flashinfer-ai/flashinfer#1573 Signed-off-by: Po-Han Huang <pohanh@nvidia.com>
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -375,7 +375,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
 # Install FlashInfer from source
 ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
 # Keep this in sync with "flashinfer" extra in setup.py
-ARG FLASHINFER_GIT_REF="v0.2.14.post1"
+ARG FLASHINFER_GIT_REF="v0.3.0"
 # Flag to control whether to compile FlashInfer AOT kernels
 # Set to "true" to enable AOT compilation:
 # docker build --build-arg FLASHINFER_AOT_COMPILE=true ...
diff --git a/setup.py b/setup.py
@@ -694,7 +694,7 @@ def _read_requirements(filename: str) -> list[str]:
                   "mistral_common[audio]"],  # Required for audio processing
         "video": [],  # Kept for backwards compatibility
         # FlashInfer should be updated together with the Dockerfile
-        "flashinfer": ["flashinfer-python==0.2.14.post1"],
+        "flashinfer": ["flashinfer-python==0.3.0"],
         # Optional deps for AMD FP4 quantization support
         "petit-kernel": ["petit-kernel"],
     },