updating vllm modules to have upstream triton attention (#17)

bringlein · web-flow · commit d11132b6558c · 2025-03-24T19:04:37.000+01:00
Updating to be able to use vllm-project/vllm#14071. --------- Signed-off-by: Burkhard Ringlein <ngl@zurich.ibm.com>
diff --git a/Dockerfile b/Dockerfile
@@ -2,7 +2,7 @@
 ARG BASE_UBI_IMAGE_TAG=9.4
 ARG PYTHON_VERSION=3.12
 ARG MAX_JOBS=64
-ARG PIP_VLLM_VERSION=0.7.3
+ARG PIP_VLLM_VERSION=0.8.1
 
 ARG VLLM_SOURCE=pip 
 # or VLLM_SOURCE=custom 
@@ -55,7 +55,7 @@ ENV CUDA_HOME="/usr/local/cuda" \
 # install build dependencies
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=cache,target=/root/.cache/uv \
-    --mount=type=bind,source=vllm/requirements-build.txt,target=requirements-build.txt \
+    --mount=type=bind,source=vllm/requirements/build.txt,target=requirements-build.txt \
     uv pip install -r requirements-build.txt
 
 # set env variables for build
@@ -154,6 +154,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     uv pip install vllm-*.whl
 
 # copy python stuff of vllm
+ARG VLLM_SOURCE
 RUN mkdir -p /workspace/vllm
 COPY vllm/vllm /workspace/vllm
 RUN if [ "$VLLM_SOURCE" = "custom" ] ; then cp -r /workspace/vllm/* ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/vllm/  \
diff --git a/Dockerfile.rocm b/Dockerfile.rocm
@@ -14,8 +14,10 @@ ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
 ARG FA_BRANCH="1a7f4dfa"
 ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
 
+# if not using the submodule
 ARG VLLM_BRANCH="v0.7.3+rocm"
-ARG VLLM_VERSION=0.7.3
+
+ARG VLLM_VERSION=0.8.1
 ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942
 ARG VLLM_SOURCE=submodule
 # or ARG VLLM_SOURCE=upstream
@@ -158,12 +160,17 @@ RUN cd pytorch && git checkout ${PYTORCH_BRANCH} && \
     && python3 tools/amd_build/build_amd.py \
     && CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist \
     && pip install dist/*.whl
-RUN git clone ${PYTORCH_VISION_REPO} vision
-RUN cd vision && git checkout ${PYTORCH_VISION_BRANCH} \
-    && python3 setup.py bdist_wheel --dist-dir=dist \
-    && pip install dist/*.whl
-RUN mkdir -p /workspace/install && cp /workspace/pytorch/dist/*.whl /workspace/install \
-    && cp /workspace/vision/dist/*.whl /workspace/install
+# RUN git clone ${PYTORCH_VISION_REPO} vision
+# RUN cd vision && git checkout ${PYTORCH_VISION_BRANCH} \
+#     && python3 setup.py bdist_wheel --dist-dir=dist \
+#     && pip install dist/*.whl
+# WORKDIR /workspace/vision/dist
+# RUN --mount=type=cache,target=/root/.cache/pip \
+#     pip download torchvision==${PYTORCH_VISION_BRANCH} --no-deps
+
+RUN mkdir -p /workspace/install && cp /workspace/pytorch/dist/*.whl /workspace/install 
+# \
+#     && cp /workspace/vision/dist/*.whl /workspace/install
 
 
 ## vLLM Builder #################################################################
@@ -242,7 +249,7 @@ ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
 
 # Build vLLM
 RUN cd vllm \
-    && python3 -m pip install -r requirements-rocm.txt
+    && python3 -m pip install -r requirements/rocm.txt
 
 ENV CMAKE_PREFIX_PATH="/opt/rocm/;/opt/rocm/hip;$(python3 -c 'import sys; print(sys.prefix)')"
 RUN cd vllm \
@@ -332,9 +339,13 @@ WORKDIR /workspace
 #ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
-COPY rocm_vllm/requirements-*.txt /workspace/
-RUN python3 -m pip install -r requirements-rocm.txt \ 
-    && pip uninstall -y vllm
+COPY rocm_vllm/requirements/*.txt /workspace/
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install -r rocm.txt \ 
+    && pip uninstall -y vllm \ 
+    # FIXME: remove once rocm requirements are updated again
+    && uv pip install cachetools
 # export PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} \
 
 # Install vllm
@@ -361,12 +372,14 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     uv pip install /workspace/fa-install/*.whl
 
 # Install pytorch 
+# ARG ROCM_VERSION
 RUN mkdir -p /workspace/torch-install
 COPY --from=pytorch-builder /workspace/install/*.whl /workspace/torch-install
 RUN ls -al /workspace/torch-install/
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=cache,target=/root/.cache/uv \
-    uv pip install /workspace/torch-install/*.whl
+    uv pip install /workspace/torch-install/*.whl  && \
+    uv pip install torchvision --no-deps --index-url https://download.pytorch.org/whl/rocm6.2.4
 # install rocm pytorch
 # RUN --mount=type=cache,target=/root/.cache/pip \
 #     --mount=type=cache,target=/root/.cache/uv \
@@ -387,7 +400,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 # ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_cupti/lib:${LD_LIBRARY_PATH}"
 
 
-# copy requirements before to avoid reinstall
+# copy requirements explicitly before to avoid reinstall
 COPY triton-dejavu/requirements-opt.txt dejavu-requirements-opt.txt
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=cache,target=/root/.cache/uv \
@@ -418,16 +431,14 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=cache,target=/root/.cache/uv \
     uv pip install pytest llnl-hatchet debugpy
 
-
-
 # install lm_eval
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=cache,target=/root/.cache/uv \
     git clone --depth 1 https://github.com/EleutherAI/lm-evaluation-harness && cd lm-evaluation-harness && uv pip install .
 
 # copy vllm benchmarks and download share GPT
 COPY vllm/benchmarks benchmarks
-RUN wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
+COPY ShareGPT_V3_unfiltered_cleaned_split.json ShareGPT_V3_unfiltered_cleaned_split.json
 
 ENV STORE_TEST_RESULT_PATH=/results
 
diff --git a/Makefile b/Makefile
@@ -23,14 +23,14 @@ build: Dockerfile ShareGPT_V3_unfiltered_cleaned_split.json
 	docker build --progress=plain --build-arg MAX_JOBS=$(MAX_JOBS) . -t ${TAG}
 	@echo "Built docker image with tag: ${TAG}"
 
-rocm-vllm-all.tar: .git/modules/rocm_vllm/index
+rocm-vllm-all.tar: .git/modules/rocm_vllm/index 
 	cd rocm_vllm; ls -A | xargs tar --mtime='1970-01-01' -cf ../rocm-vllm-all.tar
 
-rocm: Dockerfile.rocm rocm-vllm-all.tar all-git.tar
+rocm: Dockerfile.rocm rocm-vllm-all.tar all-git.tar ShareGPT_V3_unfiltered_cleaned_split.json
 	docker build --progress=plain --build-arg MAX_JOBS=$(MAX_JOBS) --build-arg VLLM_SOURCE=submodule . -t ${TAG} -f Dockerfile.rocm
 	@echo "Built docker image with tag: ${TAG}"
 
-rocm-upstream: Dockerfile.rocm 
+rocm-upstream: Dockerfile.rocm ShareGPT_V3_unfiltered_cleaned_split.json
 	@echo "using https://github.com/ROCm/vllm repository; vllm submodule CURRENTLY IGNORED"
 	docker build --progress=plain --build-arg MAX_JOBS=$(MAX_JOBS) --build-arg VLLM_SOURCE=upsteram . -t ${TAG} -f Dockerfile.rocm
 	@echo "Built docker image with tag: ${TAG}"
diff --git a/rocm_vllm b/rocm_vllm
@@ -1 +1 @@
-Subproject commit 0feb91a3a89e45ef5f11a7e7bf0f846ae15b031e
+Subproject commit af40d336b8f3bc064c5273f9f6b502c699ae2cfc
diff --git a/vllm b/vllm
@@ -1 +1 @@
-Subproject commit 7dae899d667bff55dcc3ab9c5a1fb0a9877dd750
+Subproject commit c21b99b91241409c2fdf9f3f8c542e8748b317be