Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 3 additions & 135 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,7 @@ concurrency:
cancel-in-progress: true

jobs:
hipblas-jobs:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
ubuntu-version: ${{ matrix.ubuntu-version }}
ubuntu-codename: ${{ matrix.ubuntu-codename }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
matrix:
include:
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas'
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
grpc-base-image: "ubuntu:24.04"
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-hipblas"
ubuntu-version: '2404'
ubuntu-codename: 'noble'

# Unified base image build - GPU drivers are now packaged in individual backends
core-image-build:
uses: ./.github/workflows/image_build.yml
with:
Expand All @@ -72,9 +37,10 @@ jobs:
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
#max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix:
include:
# Unified base image for all platforms
# GPU-specific backends will be pulled at runtime and contain their own GPU libraries
- build-type: ''
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
Expand All @@ -86,101 +52,3 @@ jobs:
skip-drivers: 'false'
ubuntu-version: '2404'
ubuntu-codename: 'noble'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "9"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target"
aio: "-aio-gpu-nvidia-cuda-12"
ubuntu-version: '2404'
ubuntu-codename: 'noble'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target"
aio: "-aio-gpu-nvidia-cuda-13"
ubuntu-version: '2404'
ubuntu-codename: 'noble'
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target"
aio: "-aio-gpu-vulkan"
ubuntu-version: '2404'
ubuntu-codename: 'noble'
- build-type: 'intel'
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
grpc-base-image: "ubuntu:24.04"
tag-suffix: '-gpu-intel'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel"
ubuntu-version: '2404'
ubuntu-codename: 'noble'

gh-runner:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
skip-drivers: ${{ matrix.skip-drivers }}
ubuntu-version: ${{ matrix.ubuntu-version }}
ubuntu-codename: ${{ matrix.ubuntu-codename }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
matrix:
include:
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "9"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64'
base-image: "ubuntu:24.04"
runs-on: 'ubuntu-24.04-arm'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'true'
ubuntu-version: "2404"
ubuntu-codename: 'noble'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-cuda-13'
base-image: "ubuntu:24.04"
runs-on: 'ubuntu-24.04-arm'
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
ubuntu-version: '2404'
ubuntu-codename: 'noble'
130 changes: 3 additions & 127 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ RUN apt-get update && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
# GPU drivers are no longer installed in the main image.
# Each backend now packages its own GPU libraries (CUDA, ROCm, SYCL, Vulkan)
# This allows for a unified base image that works with any backend.
FROM requirements AS requirements-drivers

ARG BUILD_TYPE
Expand All @@ -29,132 +31,6 @@ ARG UBUNTU_VERSION=2404
RUN mkdir -p /run/localai
RUN echo "default" > /run/localai/capability

# Vulkan requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils mesa-vulkan-drivers && \
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
mkdir -p /opt/vulkan-sdk && \
mv 1.4.328.1 /opt/vulkan-sdk/ && \
cd /opt/vulkan-sdk/1.4.328.1 && \
./vulkansdk --no-deps --maxjobs \
vulkan-loader \
vulkan-validationlayers \
vulkan-extensionlayer \
vulkan-tools \
shaderc && \
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
rm -rf /opt/vulkan-sdk && \
ldconfig && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
echo "vulkan" > /run/localai/capability
fi
EOT

# CuBLAS requirements
RUN <<EOT bash
if ( [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "l4t" ] ) && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
fi
if [ "arm64" = "$TARGETARCH" ]; then
if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
else
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
fi
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
if [ "arm64" = "$TARGETARCH" ]; then
apt-get install -y --no-install-recommends \
libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
fi
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
echo "nvidia-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
fi
EOT

RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
echo "nvidia-l4t-cuda-${CUDA_MAJOR_VERSION}" > /run/localai/capability
fi
EOT

# https://github.com/NVIDIA/Isaac-GR00T/issues/343
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
dpkg -i cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0_0.6.0-1_arm64.deb && \
cp /var/cudss-local-tegra-repo-ubuntu${UBUNTU_VERSION}-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
apt-get update && apt-get -y install cudss cudss-cuda-${CUDA_MAJOR_VERSION} && \
wget https://developer.download.nvidia.com/compute/nvpl/25.5/local_installers/nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb && \
dpkg -i nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5_1.0-1_arm64.deb && \
cp /var/nvpl-local-repo-ubuntu${UBUNTU_VERSION}-25.5/nvpl-*-keyring.gpg /usr/share/keyrings/ && \
apt-get update && apt-get install -y nvpl
fi
EOT

# If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
libclblast-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi

RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
hipblas-dev \
rocblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
echo "amd" > /run/localai/capability && \
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
ldconfig \
; fi

RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot let's add libomp.so to the new package GPU script on hipblas

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in 676585e. The libomp.so* is now included in the rocm_libs list and the script searches in /opt/rocm/lib/llvm/lib, versioned ROCm LLVM lib paths, and architecture-specific system lib paths.

; fi

RUN expr "${BUILD_TYPE}" = intel && echo "intel" > /run/localai/capability || echo "not intel"

# Cuda
ENV PATH=/usr/local/cuda/bin:${PATH}

# HipBLAS requirements
ENV PATH=/opt/rocm/bin:${PATH}

###################################
###################################

Expand Down
6 changes: 6 additions & 0 deletions backend/Dockerfile.python
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,15 @@ EOT
COPY backend/python/${BACKEND} /${BACKEND}
COPY backend/backend.proto /${BACKEND}/backend.proto
COPY backend/python/common/ /${BACKEND}/common
COPY scripts/build/package-gpu-libs.sh /package-gpu-libs.sh

RUN cd /${BACKEND} && PORTABLE_PYTHON=true make

# Package GPU libraries into the backend's lib directory
RUN mkdir -p /${BACKEND}/lib && \
TARGET_LIB_DIR="/${BACKEND}/lib" BUILD_TYPE="${BUILD_TYPE}" CUDA_MAJOR_VERSION="${CUDA_MAJOR_VERSION}" \
bash /package-gpu-libs.sh "/${BACKEND}/lib"

FROM scratch
ARG BACKEND=rerankers
COPY --from=builder /${BACKEND}/ /
10 changes: 10 additions & 0 deletions backend/cpp/llama-cpp/package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
set -e

CURDIR=$(dirname "$(realpath $0)")
REPO_ROOT="${CURDIR}/../../.."

# Create lib directory
mkdir -p $CURDIR/package/lib
Expand Down Expand Up @@ -37,6 +38,15 @@ else
exit 1
fi

# Package GPU libraries based on BUILD_TYPE
# The GPU library packaging script will detect BUILD_TYPE and copy appropriate GPU libraries
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
if [ -f "$GPU_LIB_SCRIPT" ]; then
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
package_gpu_libs
fi

echo "Packaging completed successfully"
ls -liah $CURDIR/package/
ls -liah $CURDIR/package/lib/
10 changes: 10 additions & 0 deletions backend/go/stablediffusion-ggml/package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
set -e

CURDIR=$(dirname "$(realpath $0)")
REPO_ROOT="${CURDIR}/../../.."

# Create lib directory
mkdir -p $CURDIR/package/lib
Expand Down Expand Up @@ -50,6 +51,15 @@ else
exit 1
fi

# Package GPU libraries based on BUILD_TYPE
# The GPU library packaging script will detect BUILD_TYPE and copy appropriate GPU libraries
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
if [ -f "$GPU_LIB_SCRIPT" ]; then
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
package_gpu_libs
fi

echo "Packaging completed successfully"
ls -liah $CURDIR/package/
ls -liah $CURDIR/package/lib/
10 changes: 10 additions & 0 deletions backend/go/whisper/package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
set -e

CURDIR=$(dirname "$(realpath $0)")
REPO_ROOT="${CURDIR}/../../.."

# Create lib directory
mkdir -p $CURDIR/package/lib
Expand Down Expand Up @@ -50,6 +51,15 @@ else
exit 1
fi

# Package GPU libraries based on BUILD_TYPE
# The GPU library packaging script will detect BUILD_TYPE and copy appropriate GPU libraries
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
if [ -f "$GPU_LIB_SCRIPT" ]; then
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
package_gpu_libs
fi

echo "Packaging completed successfully"
ls -liah $CURDIR/package/
ls -liah $CURDIR/package/lib/
Loading
Loading