Revert the commit for DeepGEMM to fix vLLM WideEP

krishung5 · krishung5 · commit eddee3a35eb6 · 2025-08-05T10:47:51.000-07:00
diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm
@@ -17,7 +17,9 @@ ARG TORCH_BACKEND="cu128"
 
 # Match 0.10.0 vLLM release
 # https://github.com/vllm-project/vllm/releases/tag/v0.10.0
-ARG DEEPGEMM_REF="1876566"
+# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
+# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
+ARG DEEPGEMM_REF="03d0be3"
 ARG FLASHINF_REF="v0.2.8rc1"
 
 # Define general architecture ARGs for supporting both x86 and aarch64 builds.