Skip to content

Commit 78dba40

Browse files
authored
[Hardware][IBM Z]Enable v1 for s390x and s390x dockerfile fixes (#22725)
Signed-off-by: Nikhil Suryawanshi <suryawanshin74@gmail.com>
1 parent e9d6a3d commit 78dba40

File tree

7 files changed

+96
-18
lines changed

7 files changed

+96
-18
lines changed

docker/Dockerfile.s390x

Lines changed: 79 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ ENV LANG=C.UTF-8 \
1616
RUN microdnf install -y \
1717
which procps findutils tar vim git gcc gcc-gfortran g++ make patch zlib-devel \
1818
libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \
19-
openssl-devel openblas openblas-devel autoconf automake libtool cmake numpy && \
19+
openssl-devel openblas openblas-devel autoconf automake libtool cmake numpy libsndfile && \
2020
microdnf clean all
2121

2222
# Python Installation
@@ -136,6 +136,71 @@ RUN --mount=type=cache,target=/root/.cache/uv \
136136
mkdir -p /tmp/hf-xet/dist && \
137137
cp dist/*.whl /tmp/hf-xet/dist/
138138

139+
# Build numba
140+
FROM python-install AS numba-builder
141+
142+
ARG MAX_JOBS
143+
ARG NUMBA_VERSION=0.61.2
144+
145+
WORKDIR /tmp
146+
147+
# Clone all required dependencies
148+
RUN --mount=type=cache,target=/root/.cache/uv \
149+
microdnf install ninja-build gcc gcc-c++ -y && \
150+
git clone --recursive https://github.com/llvm/llvm-project.git -b llvmorg-15.0.7 && \
151+
git clone --recursive https://github.com/numba/llvmlite.git -b v0.44.0 && \
152+
git clone --recursive https://github.com/numba/numba.git -b ${NUMBA_VERSION} && \
153+
cd llvm-project && mkdir build && cd build && \
154+
uv pip install 'cmake<4' setuptools numpy && \
155+
export PREFIX=/usr/local && CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt" \
156+
CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')" \
157+
CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')" \
158+
CMAKE_ARGS="${CMAKE_ARGS} -DFFI_INCLUDE_DIR=$PREFIX/include" \
159+
CMAKE_ARGS="${CMAKE_ARGS} -DFFI_LIBRARY_DIR=$PREFIX/lib" \
160+
cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \
161+
-DCMAKE_BUILD_TYPE=Release \
162+
-DCMAKE_LIBRARY_PATH="${PREFIX}" \
163+
-DLLVM_ENABLE_LIBEDIT=OFF \
164+
-DLLVM_ENABLE_LIBXML2=OFF \
165+
-DLLVM_ENABLE_RTTI=ON \
166+
-DLLVM_ENABLE_TERMINFO=OFF \
167+
-DLLVM_INCLUDE_BENCHMARKS=OFF \
168+
-DLLVM_INCLUDE_DOCS=OFF \
169+
-DLLVM_INCLUDE_EXAMPLES=OFF \
170+
-DLLVM_INCLUDE_GO_TESTS=OFF \
171+
-DLLVM_INCLUDE_TESTS=OFF \
172+
-DLLVM_INCLUDE_UTILS=ON \
173+
-DLLVM_INSTALL_UTILS=ON \
174+
-DLLVM_UTILS_INSTALL_DIR=libexec/llvm \
175+
-DLLVM_BUILD_LLVM_DYLIB=OFF \
176+
-DLLVM_LINK_LLVM_DYLIB=OFF \
177+
-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly \
178+
-DLLVM_ENABLE_FFI=ON \
179+
-DLLVM_ENABLE_Z3_SOLVER=OFF \
180+
-DLLVM_OPTIMIZED_TABLEGEN=ON \
181+
-DCMAKE_POLICY_DEFAULT_CMP0111=NEW \
182+
-DCOMPILER_RT_BUILD_BUILTINS=ON \
183+
-DCOMPILER_RT_BUILTINS_HIDE_SYMBOLS=OFF \
184+
-DCOMPILER_RT_BUILD_LIBFUZZER=OFF \
185+
-DCOMPILER_RT_BUILD_CRT=OFF \
186+
-DCOMPILER_RT_BUILD_MEMPROF=OFF \
187+
-DCOMPILER_RT_BUILD_PROFILE=OFF \
188+
-DCOMPILER_RT_BUILD_SANITIZERS=OFF \
189+
-DCOMPILER_RT_BUILD_XRAY=OFF \
190+
-DCOMPILER_RT_BUILD_GWP_ASAN=OFF \
191+
-DCOMPILER_RT_BUILD_ORC=OFF \
192+
-DCOMPILER_RT_INCLUDE_TESTS=OFF \
193+
${CMAKE_ARGS} -GNinja ../llvm \
194+
195+
&& ninja install . && \
196+
# build llvmlite
197+
cd ../../llvmlite && python setup.py bdist_wheel && \
198+
cd ../numba && \
199+
if ! grep '#include "dynamic_annotations.h"' numba/_dispatcher.cpp; then \
200+
sed -i '/#include "internal\/pycore_atomic.h"/i\#include "dynamic_annotations.h"' numba/_dispatcher.cpp; \
201+
fi && python setup.py bdist_wheel
202+
203+
139204
# Final build stage
140205
FROM python-install AS vllm-cpu
141206
ARG PYTHON_VERSION
@@ -163,23 +228,30 @@ RUN --mount=type=cache,target=/root/.cache/uv \
163228
--mount=type=bind,from=torch-vision,source=/tmp/vision/dist,target=/tmp/vision-wheels/ \
164229
--mount=type=bind,from=hf-xet-builder,source=/tmp/hf-xet/dist,target=/tmp/hf-xet-wheels/ \
165230
--mount=type=bind,from=torch,source=/tmp/pytorch/dist,target=/tmp/torch-wheels/ \
231+
--mount=type=bind,from=numba-builder,source=/tmp/llvmlite/dist,target=/tmp/llvmlite-wheels/ \
232+
--mount=type=bind,from=numba-builder,source=/tmp/numba/dist,target=/tmp/numba-wheels/ \
166233
sed -i '/^torch/d' requirements/build.txt && \
167-
ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl | head -n 1) && \
168-
VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl | head -n 1) && \
169-
HF_XET_WHL_FILE=$(ls /tmp/hf-xet-wheels/*.whl | head -n 1) && \
170-
TORCH_WHL_FILE=$(ls /tmp/torch-wheels/*.whl | head -n 1) && \
234+
ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl) && \
235+
VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl) && \
236+
HF_XET_WHL_FILE=$(ls /tmp/hf-xet-wheels/*.whl) && \
237+
TORCH_WHL_FILE=$(ls /tmp/torch-wheels/*.whl) && \
238+
LLVM_WHL_FILE=$(ls /tmp/llvmlite-wheels/*.whl) && \
239+
NUMBA_WHL_FILE=$(ls /tmp/numba-wheels/*.whl) && \
171240
uv pip install -v \
172241
$ARROW_WHL_FILE \
173242
$VISION_WHL_FILE \
174243
$HF_XET_WHL_FILE \
175244
$TORCH_WHL_FILE \
245+
$LLVM_WHL_FILE \
246+
$NUMBA_WHL_FILE \
176247
--index-strategy unsafe-best-match \
177248
-r requirements/build.txt \
178-
-r requirements/cpu.txt
249+
-r requirements/cpu.txt
250+
179251

180252
# Build and install vllm
181253
RUN --mount=type=cache,target=/root/.cache/uv \
182-
VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
254+
VLLM_TARGET_DEVICE=cpu VLLM_CPU_MOE_PREPACK=0 python setup.py bdist_wheel && \
183255
uv pip install "$(echo dist/*.whl)[tensorizer]"
184256

185257
# setup non-root user for vllm
@@ -196,4 +268,3 @@ WORKDIR /home/vllm
196268

197269
# Set the default entrypoint
198270
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
199-

requirements/common.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ prometheus-fastapi-instrumentator >= 7.0.0
2020
tiktoken >= 0.6.0 # Required for DBRX tokenizer
2121
lm-format-enforcer >= 0.10.11, < 0.11
2222
llguidance >= 0.7.11, < 0.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64"
23-
outlines_core == 0.2.10
23+
outlines_core == 0.2.10 ; platform_machine != "s390x"
24+
outlines == 0.1.11 ; platform_machine == "s390x"
2425
# required for outlines backend disk cache
2526
diskcache == 5.6.3
2627
lark == 1.2.2

requirements/cpu.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Common dependencies
22
-r common.txt
33

4-
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
5-
numba == 0.61.2; python_version > '3.9'
4+
numba == 0.60.0; python_version == '3.9' and platform_machine != "s390x" # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
5+
numba == 0.61.2; python_version > '3.9' and platform_machine != "s390x"
66

77
# Dependencies for CPUs
88
packaging>=24.2

vllm/engine/arg_utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,12 +1076,13 @@ def create_engine_config(
10761076
# Set default arguments for V0 or V1 Engine.
10771077
if use_v1:
10781078
self._set_default_args_v1(usage_context, model_config)
1079-
# Disable chunked prefill for POWER (ppc64le)/ARM CPUs in V1
1079+
# Disable chunked prefill for POWER (ppc64le)/ARM/s390x CPUs in V1
10801080
if current_platform.is_cpu(
10811081
) and current_platform.get_cpu_architecture() in (
1082-
CpuArchEnum.POWERPC, CpuArchEnum.ARM):
1082+
CpuArchEnum.POWERPC, CpuArchEnum.S390X, CpuArchEnum.ARM):
10831083
logger.info(
1084-
"Chunked prefill is not supported for ARM and POWER CPUs; "
1084+
"Chunked prefill is not supported for ARM and POWER "
1085+
"and S390X CPUs; "
10851086
"disabling it for V1 backend.")
10861087
self.enable_chunked_prefill = False
10871088
else:

vllm/platforms/cpu.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,5 +332,6 @@ def default_v1(cls, model_config) -> bool:
332332
supplied model configuration.
333333
"""
334334
arch = cls.get_cpu_architecture()
335-
return (cls.supports_v1(model_config) and arch
336-
in (CpuArchEnum.X86, CpuArchEnum.POWERPC, CpuArchEnum.ARM))
335+
return (cls.supports_v1(model_config)
336+
and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC,
337+
CpuArchEnum.ARM, CpuArchEnum.S390X))

vllm/platforms/interface.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class CpuArchEnum(enum.Enum):
8181
X86 = enum.auto()
8282
ARM = enum.auto()
8383
POWERPC = enum.auto()
84+
S390X = enum.auto()
8485
OTHER = enum.auto()
8586
UNKNOWN = enum.auto()
8687

@@ -377,6 +378,8 @@ def get_cpu_architecture(cls) -> CpuArchEnum:
377378
return CpuArchEnum.ARM
378379
elif machine.startswith("ppc"):
379380
return CpuArchEnum.POWERPC
381+
elif machine == "s390x":
382+
return CpuArchEnum.S390X
380383

381384
return CpuArchEnum.OTHER if machine else CpuArchEnum.UNKNOWN
382385

vllm/v1/worker/cpu_worker.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ def init_device(self):
4343
# Setup OpenMP threads affinity.
4444
omp_cpuids = envs.VLLM_CPU_OMP_THREADS_BIND
4545
if omp_cpuids == "auto" and platform.system() == "Linux":
46-
if current_platform.get_cpu_architecture() == CpuArchEnum.POWERPC:
47-
# For POWERPC SMT-8/4/2
46+
cpu_arch = current_platform.get_cpu_architecture()
47+
if cpu_arch in (CpuArchEnum.POWERPC, CpuArchEnum.S390X):
48+
# For S390X/POWERPC SMT-8/4/2
4849
self.local_omp_cpuid = self._get_autobind_cpu_ids(
4950
lambda cpus: [cpu for cpu in cpus if cpu.id % 8 < 4])
5051
elif current_platform.get_cpu_architecture() == CpuArchEnum.X86:

0 commit comments

Comments
 (0)