Skip to content

Commit 2a88836

Browse files
authored
Build and container fixes for aarch64 (#408)
* Fix gds_path default CUDA provides symlinks to correct GDS lib and include directories so there is no need to hardcode full path that is architecture-dependent. Set default GDS path to point to CUDA install directory and use 'include' and 'lib64' symlinks. Signed-off-by: Vlad Buslov <vladbu@nvidia.com> * Fix container build to properly support x86 and Arm Currently the build script and Docker build file assume that the container is built on x86_64 Linux host (by not specifying a platform which makes docker default to host architecture, hardcoding 'x86_64' in several places, etc.), which makes it impossible to create either x86_64 or aarch64 container on Arm host or aarch64 container on x86 host. Configure target architecture via 'ARCH' docker variable. Set it to x86 by default in Dockerfile (for any users that use it directly) and to host architecture in build-container.sh. Allow user to specify ARCH value via '--arch' CLI parameter, set docker build platform value accordingly and pass the value to docker build as a build arg. Signed-off-by: Vlad Buslov <vladbu@nvidia.com> * Fix nixlbench container build to properly support x86 and Arm Similar to nixl library container infrastructure nixlbench also does not specify a docker build platform, hardcodes 'x86_64' in several places and assumes x86 manylinux platform which makes it impossible to build anything on aarch64 host or for aarch64 target. Configure target architecture via 'ARCH' docker variable. Set it to x86 by default in nixlbench Dockerfile (for any users that use it directly) and to host architecture in build-container.sh. Allow user to specify ARCH value via '--arch' CLI parameter, set docker build platform value accordingly and pass the value to docker build as a build arg. Signed-off-by: Vlad Buslov <vladbu@nvidia.com> --------- Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
1 parent b1c22ed commit 2a88836

File tree

6 files changed

+54
-19
lines changed

6 files changed

+54
-19
lines changed

benchmark/nixlbench/contrib/Dockerfile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,10 @@ FROM ucx_${UCX}_image AS ucx_image
7373

7474
# --- Stage 4: Final Image Assembly ---
7575
# Re-declare ARGs needed in this final stage
76+
ARG ARCH="x86_64"
7677
ARG DEFAULT_PYTHON_VERSION
7778
ARG WHL_PYTHON_VERSIONS="3.12"
78-
ARG WHL_PLATFORM="manylinux_2_39_x86_64"
79+
ARG WHL_PLATFORM="manylinux_2_39_$ARCH"
7980

8081
WORKDIR /workspace
8182
RUN git clone https://github.com/etcd-cpp-apiv3/etcd-cpp-apiv3.git &&\
@@ -104,9 +105,9 @@ RUN rm -rf build && \
104105
ninja && \
105106
ninja install
106107

107-
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/x86_64-linux-gnu/plugins
108-
RUN echo "/usr/local/nixl/lib/x86_64-linux-gnu" > /etc/ld.so.conf.d/nixl.conf && \
109-
echo "/usr/local/nixl/lib/x86_64-linux-gnu/plugins" >> /etc/ld.so.conf.d/nixl.conf && \
108+
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/$ARCH-linux-gnu/plugins
109+
RUN echo "/usr/local/nixl/lib/$ARCH-linux-gnu" > /etc/ld.so.conf.d/nixl.conf && \
110+
echo "/usr/local/nixl/lib/$ARCH-linux-gnu/plugins" >> /etc/ld.so.conf.d/nixl.conf && \
110111
ldconfig
111112

112113
# Create the wheel

benchmark/nixlbench/contrib/build.sh

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ fi
3535

3636
BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base
3737
BASE_IMAGE_TAG=25.03-cuda12.8-devel-ubuntu24.04
38-
WHL_PLATFORM=manylinux_2_39_x86_64
38+
ARCH=$(uname -m)
39+
[ "$ARCH" = "arm64" ] && ARCH="aarch64"
40+
WHL_BASE=manylinux_2_39
41+
WHL_PLATFORM=${WHL_BASE}_${ARCH}
3942
WHL_PYTHON_VERSIONS="3.12"
4043
OS="ubuntu24"
4144

@@ -108,6 +111,15 @@ get_options() {
108111
missing_requirement $1
109112
fi
110113
;;
114+
--arch)
115+
if [ "$2" ]; then
116+
ARCH=$2
117+
WHL_PLATFORM=${WHL_BASE}_${ARCH}
118+
shift
119+
else
120+
missing_requirement $1
121+
fi
122+
;;
111123
--)
112124
shift
113125
break
@@ -147,6 +159,7 @@ show_build_options() {
147159
echo "Build Context: ${BUILD_CONTEXT}"
148160
echo "Build Context Args: ${BUILD_CONTEXT_ARGS}"
149161
echo "Base Image: ${BASE_IMAGE}:${BASE_IMAGE_TAG}"
162+
echo "Container arch: ${ARCH}"
150163
echo "Python Versions for wheel build: ${WHL_PYTHON_VERSIONS}"
151164
echo "Wheel Platform: ${WHL_PLATFORM}"
152165
}
@@ -161,6 +174,7 @@ show_help() {
161174
echo " [--os [ubuntu24|ubuntu22] to select Ubuntu version]"
162175
echo " [--python-versions python versions to build for, comma separated]"
163176
echo " [--tag tag for image]"
177+
echo " [--arch [x86_64|aarch64] to select target architecture]"
164178
exit 0
165179
}
166180

@@ -178,7 +192,8 @@ get_options "$@"
178192
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG"
179193
BUILD_ARGS+=" --build-arg WHL_PYTHON_VERSIONS=$WHL_PYTHON_VERSIONS"
180194
BUILD_ARGS+=" --build-arg WHL_PLATFORM=$WHL_PLATFORM"
195+
BUILD_ARGS+=" --build-arg ARCH=$ARCH"
181196

182197
show_build_options
183198

184-
docker build -f $DOCKER_FILE $BUILD_ARGS $TAG $NO_CACHE $BUILD_ARGS $BUILD_CONTEXT_ARGS $BUILD_CONTEXT --progress plain
199+
docker build --platform linux/$ARCH -f $DOCKER_FILE $BUILD_ARGS $TAG $NO_CACHE $BUILD_CONTEXT_ARGS $BUILD_CONTEXT --progress plain

contrib/Dockerfile

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ ARG BASE_IMAGE_TAG="25.03-cuda12.8-devel-ubuntu24.04"
1818

1919
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}
2020

21+
ARG ARCH="x86_64"
2122
ARG DEFAULT_PYTHON_VERSION="3.12"
2223

2324
RUN apt-get update -y && \
@@ -57,13 +58,16 @@ ENV RUSTUP_HOME=/usr/local/rustup \
5758
CARGO_HOME=/usr/local/cargo \
5859
PATH=/usr/local/cargo/bin:$PATH \
5960
RUST_VERSION=1.86.0 \
60-
RUSTARCH=x86_64-unknown-linux-gnu
61+
RUSTARCH=${ARCH}-unknown-linux-gnu
6162

62-
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
63-
echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
63+
# Download rustup-init and its checksum for the target architecture
64+
RUN wget --tries=3 --waitretry=5 \
65+
"https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" \
66+
"https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init.sha256" && \
67+
sha256sum -c rustup-init.sha256 && \
6468
chmod +x rustup-init && \
6569
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
66-
rm rustup-init && \
70+
rm rustup-init* && \
6771
chmod -R a+w $RUSTUP_HOME $CARGO_HOME
6872

6973
WORKDIR /workspace/nixl
@@ -110,16 +114,16 @@ RUN rm -rf build && \
110114
ninja install
111115

112116
ENV NIXL_PREFIX=/usr/local/nixl
113-
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/x86_64-linux-gnu/plugins
114-
RUN echo "/usr/local/nixl/lib/x86_64-linux-gnu" > /etc/ld.so.conf.d/nixl.conf && \
115-
echo "/usr/local/nixl/lib/x86_64-linux-gnu/plugins" >> /etc/ld.so.conf.d/nixl.conf && \
117+
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/$ARCH-linux-gnu/plugins
118+
RUN echo "/usr/local/nixl/lib/$ARCH-linux-gnu" > /etc/ld.so.conf.d/nixl.conf && \
119+
echo "/usr/local/nixl/lib/$ARCH-linux-gnu/plugins" >> /etc/ld.so.conf.d/nixl.conf && \
116120
ldconfig
117121

118122
RUN cd src/bindings/rust && cargo build --release --locked
119123

120124
# Create the wheel
121125
ARG WHL_PYTHON_VERSIONS="3.12"
122-
ARG WHL_PLATFORM="manylinux_2_39_x86_64"
126+
ARG WHL_PLATFORM="manylinux_2_39_$ARCH"
123127
RUN IFS=',' read -ra PYTHON_VERSIONS <<< "$WHL_PYTHON_VERSIONS" && \
124128
for PYTHON_VERSION in "${PYTHON_VERSIONS[@]}"; do \
125129
uv build --wheel --out-dir /tmp/dist --python $PYTHON_VERSION; \

contrib/build-container.sh

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ VERSION=v$latest_tag.dev.$commit_id
3030

3131
BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base
3232
BASE_IMAGE_TAG=25.03-cuda12.8-devel-ubuntu24.04
33-
WHL_PLATFORM=manylinux_2_39_x86_64
33+
ARCH=$(uname -m)
34+
[ "$ARCH" = "arm64" ] && ARCH="aarch64"
35+
WHL_BASE=manylinux_2_39
36+
WHL_PLATFORM=${WHL_BASE}_${ARCH}
3437
WHL_PYTHON_VERSIONS="3.12"
3538
OS="ubuntu24"
3639

@@ -84,6 +87,15 @@ get_options() {
8487
missing_requirement $1
8588
fi
8689
;;
90+
--arch)
91+
if [ "$2" ]; then
92+
ARCH=$2
93+
WHL_PLATFORM=${WHL_BASE}_${ARCH}
94+
shift
95+
else
96+
missing_requirement $1
97+
fi
98+
;;
8799
--)
88100
shift
89101
break
@@ -103,7 +115,7 @@ get_options() {
103115

104116
if [[ $OS == "ubuntu22" ]]; then
105117
BASE_IMAGE_TAG=24.10-cuda12.6-devel-ubuntu22.04
106-
WHL_PLATFORM=manylinux_2_34_x86_64
118+
WHL_PLATFORM=manylinux_2_34_${ARCH}
107119
fi
108120

109121
if [ -z "$TAG" ]; then
@@ -117,6 +129,7 @@ show_build_options() {
117129
echo "Image Tag: ${TAG}"
118130
echo "Build Context: ${BUILD_CONTEXT}"
119131
echo "Base Image: ${BASE_IMAGE}:${BASE_IMAGE_TAG}"
132+
echo "Container arch: ${ARCH}"
120133
echo "Python Versions for wheel build: ${WHL_PYTHON_VERSIONS}"
121134
echo "Wheel Platform: ${WHL_PLATFORM}"
122135
}
@@ -129,6 +142,7 @@ show_help() {
129142
echo " [--os [ubuntu24|ubuntu22] to select Ubuntu version]"
130143
echo " [--tag tag for image]"
131144
echo " [--python-versions python versions to build for, comma separated]"
145+
echo " [--arch [x86_64|aarch64] to select target architecture]"
132146
exit 0
133147
}
134148

@@ -151,7 +165,8 @@ fi
151165
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG"
152166
BUILD_ARGS+=" --build-arg WHL_PYTHON_VERSIONS=$WHL_PYTHON_VERSIONS"
153167
BUILD_ARGS+=" --build-arg WHL_PLATFORM=$WHL_PLATFORM"
168+
BUILD_ARGS+=" --build-arg ARCH=$ARCH"
154169

155170
show_build_options
156171

157-
docker build -f $DOCKER_FILE $BUILD_ARGS $TAG $NO_CACHE $BUILD_ARGS $BUILD_CONTEXT
172+
docker build --platform linux/$ARCH -f $DOCKER_FILE $BUILD_ARGS $TAG $NO_CACHE $BUILD_CONTEXT

meson_options.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ option('etcd_lib_path', type: 'string', value: '', description: 'Path to ETCD Li
1919
option('disable_gds_backend', type : 'boolean', value : false, description : 'disable gds backend')
2020
option('disable_mooncake_backend', type : 'boolean', value : false, description : 'disable mooncake backend')
2121
option('install_headers', type : 'boolean', value : true, description : 'install headers')
22-
option('gds_path', type: 'string', value: '/usr/local/cuda/targets/x86_64-linux/', description: 'Path to GDS CuFile install')
22+
option('gds_path', type: 'string', value: '/usr/local/cuda/', description: 'Path to GDS CuFile install')
2323
option('cudapath_inc', type: 'string', value: '', description: 'Include path for CUDA')
2424
option('cudapath_lib', type: 'string', value: '', description: 'Library path for CUDA')
2525
option('cudapath_stub', type: 'string', value: '', description: 'Extra Stub path for CUDA')

src/plugins/cuda_gds/meson.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
gds_path = get_option('gds_path')
1717
if gds_path != ''
18-
gds_lib_path = gds_path + '/lib'
18+
gds_lib_path = gds_path + '/lib64'
1919
gds_inc_path = gds_path + '/include'
2020
cufile_dep = declare_dependency(
2121
link_args: ['-L' + gds_lib_path, '-lcufile'],

0 commit comments

Comments
 (0)