Merge pull request vllm-project#14 from wenxcs/wenxh/fp8-on-a100-v5-pr

0612 kernel of FP8 on A100
ykim362 · Jun 15, 2024 · b28848e · b28848e
2 parents 9f42e46 + d0b7fad
commit b28848e
Show file tree

Hide file tree

Showing 9 changed files with 780 additions and 446 deletions.
diff --git a/requirements-cuda.txt b/requirements-cuda.txt
@@ -8,4 +8,11 @@ vllm-nccl-cu12>=2.18,<2.19  # for downloading nccl library
 torch == 2.2.1
 xformers == 0.0.25  # Requires PyTorch 2.2.1
 
-cupy-cuda12x
+# Dependencies for pycublas-moe-groupe-gemm
+gitpython
+pytest
+loguru
+# In case of invalid url, please install from this file:
+#   pip install gitpython pytest loguru && pip install vllm/model_executor/layers/fused_moe/pycublas.zip
+# or 
+#   pip install gitpython pytest loguru && pip install git+https://github.com/wenxcs/pycublas.git@moe-group-gemm