diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 3550113f531b..ce9afda154f9 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -13,42 +13,7 @@ concurrency: cancel-in-progress: true jobs: - hipblas-jobs: - uses: ./.github/workflows/image_build.yml - with: - tag-latest: ${{ matrix.tag-latest }} - tag-suffix: ${{ matrix.tag-suffix }} - build-type: ${{ matrix.build-type }} - cuda-major-version: ${{ matrix.cuda-major-version }} - cuda-minor-version: ${{ matrix.cuda-minor-version }} - platforms: ${{ matrix.platforms }} - runs-on: ${{ matrix.runs-on }} - base-image: ${{ matrix.base-image }} - grpc-base-image: ${{ matrix.grpc-base-image }} - aio: ${{ matrix.aio }} - makeflags: ${{ matrix.makeflags }} - ubuntu-version: ${{ matrix.ubuntu-version }} - ubuntu-codename: ${{ matrix.ubuntu-codename }} - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - strategy: - matrix: - include: - - build-type: 'hipblas' - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-hipblas' - base-image: "rocm/dev-ubuntu-24.04:6.4.4" - grpc-base-image: "ubuntu:24.04" - runs-on: 'ubuntu-latest' - makeflags: "--jobs=3 --output-sync=target" - aio: "-aio-gpu-hipblas" - ubuntu-version: '2404' - ubuntu-codename: 'noble' - + # Unified base image build - GPU drivers are now packaged in individual backends core-image-build: uses: ./.github/workflows/image_build.yml with: @@ -72,9 +37,10 @@ jobs: quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} strategy: - #max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }} matrix: include: + # Unified base image for all platforms + # GPU-specific backends will be pulled at runtime and contain their own GPU libraries - build-type: '' platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' @@ -86,101 +52,3 @@ jobs: skip-drivers: 'false' ubuntu-version: '2404' ubuntu-codename: 'noble' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - makeflags: "--jobs=4 --output-sync=target" - aio: "-aio-gpu-nvidia-cuda-12" - ubuntu-version: '2404' - ubuntu-codename: 'noble' - - build-type: 'cublas' - cuda-major-version: "13" - cuda-minor-version: "0" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-13' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - makeflags: "--jobs=4 --output-sync=target" - aio: "-aio-gpu-nvidia-cuda-13" - ubuntu-version: '2404' - ubuntu-codename: 'noble' - - build-type: 'vulkan' - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-vulkan' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - makeflags: "--jobs=4 --output-sync=target" - aio: "-aio-gpu-vulkan" - ubuntu-version: '2404' - ubuntu-codename: 'noble' - - build-type: 'intel' - platforms: 'linux/amd64' - tag-latest: 'auto' - base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" - grpc-base-image: "ubuntu:24.04" - tag-suffix: '-gpu-intel' - runs-on: 'ubuntu-latest' - makeflags: "--jobs=3 --output-sync=target" - aio: "-aio-gpu-intel" - ubuntu-version: '2404' - ubuntu-codename: 'noble' - - gh-runner: - uses: ./.github/workflows/image_build.yml - with: - tag-latest: ${{ matrix.tag-latest }} - tag-suffix: ${{ matrix.tag-suffix }} - build-type: ${{ matrix.build-type }} - cuda-major-version: ${{ matrix.cuda-major-version }} - cuda-minor-version: ${{ matrix.cuda-minor-version }} - platforms: ${{ matrix.platforms }} - runs-on: ${{ matrix.runs-on }} - aio: ${{ matrix.aio }} - base-image: ${{ matrix.base-image }} - grpc-base-image: ${{ matrix.grpc-base-image }} - makeflags: ${{ matrix.makeflags }} - skip-drivers: ${{ matrix.skip-drivers }} - ubuntu-version: ${{ matrix.ubuntu-version }} - ubuntu-codename: ${{ matrix.ubuntu-codename }} - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - strategy: - matrix: - include: - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/arm64' - tag-latest: 'auto' - tag-suffix: '-nvidia-l4t-arm64' - base-image: "ubuntu:24.04" - runs-on: 'ubuntu-24.04-arm' - makeflags: "--jobs=4 --output-sync=target" - skip-drivers: 'true' - ubuntu-version: "2404" - ubuntu-codename: 'noble' - - build-type: 'cublas' - cuda-major-version: "13" - cuda-minor-version: "0" - platforms: 'linux/arm64' - tag-latest: 'auto' - tag-suffix: '-nvidia-l4t-arm64-cuda-13' - base-image: "ubuntu:24.04" - runs-on: 'ubuntu-24.04-arm' - makeflags: "--jobs=4 --output-sync=target" - skip-drivers: 'false' - ubuntu-version: '2404' - ubuntu-codename: 'noble' diff --git a/Dockerfile b/Dockerfile index 4f1c125548f0..73d6a7b1304f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,9 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here. +# GPU drivers are no longer installed in the main image. +# Each backend now packages its own GPU libraries (CUDA, ROCm, SYCL, Vulkan) +# This allows for a unified base image that works with any backend. FROM requirements AS requirements-drivers ARG BUILD_TYPE @@ -29,132 +31,6 @@ ARG UBUNTU_VERSION=2404 RUN mkdir -p /run/localai RUN echo "default" > /run/localai/capability -# Vulkan requirements -RUN < /run/localai/capability - fi -EOT - -# CuBLAS requirements -RUN < /run/localai/capability - fi -EOT - -RUN < /run/localai/capability - fi -EOT - -# https://github.com/NVIDIA/Isaac-GR00T/issues/343 -RUN < /run/localai/capability && \ - # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able - # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency - ldconfig \ - ; fi - -RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ - ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \ - ; fi - -RUN expr "${BUILD_TYPE}" = intel && echo "intel" > /run/localai/capability || echo "not intel" - -# Cuda -ENV PATH=/usr/local/cuda/bin:${PATH} - -# HipBLAS requirements -ENV PATH=/opt/rocm/bin:${PATH} - ################################### ################################### diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index 1a1c43b1822f..a4fa25c4f88a 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -174,9 +174,15 @@ EOT COPY backend/python/${BACKEND} /${BACKEND} COPY backend/backend.proto /${BACKEND}/backend.proto COPY backend/python/common/ /${BACKEND}/common +COPY scripts/build/package-gpu-libs.sh /package-gpu-libs.sh RUN cd /${BACKEND} && PORTABLE_PYTHON=true make +# Package GPU libraries into the backend's lib directory +RUN mkdir -p /${BACKEND}/lib && \ + TARGET_LIB_DIR="/${BACKEND}/lib" BUILD_TYPE="${BUILD_TYPE}" CUDA_MAJOR_VERSION="${CUDA_MAJOR_VERSION}" \ + bash /package-gpu-libs.sh "/${BACKEND}/lib" + FROM scratch ARG BACKEND=rerankers COPY --from=builder /${BACKEND}/ / \ No newline at end of file diff --git a/backend/cpp/llama-cpp/package.sh b/backend/cpp/llama-cpp/package.sh index c911c081bb07..b1b7cd9a818a 100755 --- a/backend/cpp/llama-cpp/package.sh +++ b/backend/cpp/llama-cpp/package.sh @@ -6,6 +6,7 @@ set -e CURDIR=$(dirname "$(realpath $0)") +REPO_ROOT="${CURDIR}/../../.." # Create lib directory mkdir -p $CURDIR/package/lib @@ -37,6 +38,15 @@ else exit 1 fi +# Package GPU libraries based on BUILD_TYPE +# The GPU library packaging script will detect BUILD_TYPE and copy appropriate GPU libraries +GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh" +if [ -f "$GPU_LIB_SCRIPT" ]; then + echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..." + source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib" + package_gpu_libs +fi + echo "Packaging completed successfully" ls -liah $CURDIR/package/ ls -liah $CURDIR/package/lib/ \ No newline at end of file diff --git a/backend/go/stablediffusion-ggml/package.sh b/backend/go/stablediffusion-ggml/package.sh index f8cda2f41eee..34b158c41faa 100755 --- a/backend/go/stablediffusion-ggml/package.sh +++ b/backend/go/stablediffusion-ggml/package.sh @@ -6,6 +6,7 @@ set -e CURDIR=$(dirname "$(realpath $0)") +REPO_ROOT="${CURDIR}/../../.." # Create lib directory mkdir -p $CURDIR/package/lib @@ -50,6 +51,15 @@ else exit 1 fi +# Package GPU libraries based on BUILD_TYPE +# The GPU library packaging script will detect BUILD_TYPE and copy appropriate GPU libraries +GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh" +if [ -f "$GPU_LIB_SCRIPT" ]; then + echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..." + source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib" + package_gpu_libs +fi + echo "Packaging completed successfully" ls -liah $CURDIR/package/ ls -liah $CURDIR/package/lib/ diff --git a/backend/go/whisper/package.sh b/backend/go/whisper/package.sh index 9173706f2854..dfecdf5c68cb 100755 --- a/backend/go/whisper/package.sh +++ b/backend/go/whisper/package.sh @@ -6,6 +6,7 @@ set -e CURDIR=$(dirname "$(realpath $0)") +REPO_ROOT="${CURDIR}/../../.." # Create lib directory mkdir -p $CURDIR/package/lib @@ -50,6 +51,15 @@ else exit 1 fi +# Package GPU libraries based on BUILD_TYPE +# The GPU library packaging script will detect BUILD_TYPE and copy appropriate GPU libraries +GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh" +if [ -f "$GPU_LIB_SCRIPT" ]; then + echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..." + source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib" + package_gpu_libs +fi + echo "Packaging completed successfully" ls -liah $CURDIR/package/ ls -liah $CURDIR/package/lib/ diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh index eb55f43d9547..7956b3c10a5a 100644 --- a/backend/python/common/libbackend.sh +++ b/backend/python/common/libbackend.sh @@ -465,6 +465,14 @@ function startBackend() { if [ "x${PORTABLE_PYTHON}" == "xtrue" ] || [ -x "$(_portable_python)" ]; then _makeVenvPortable --update-pyvenv-cfg fi + + # Set up GPU library paths if a lib directory exists + # This allows backends to include their own GPU libraries (CUDA, ROCm, etc.) + if [ -d "${EDIR}/lib" ]; then + export LD_LIBRARY_PATH="${EDIR}/lib:${LD_LIBRARY_PATH:-}" + echo "Added ${EDIR}/lib to LD_LIBRARY_PATH for GPU libraries" + fi + if [ ! -z "${BACKEND_FILE:-}" ]; then exec "${EDIR}/venv/bin/python" "${BACKEND_FILE}" "$@" elif [ -e "${MY_DIR}/server.py" ]; then diff --git a/scripts/build/package-gpu-libs.sh b/scripts/build/package-gpu-libs.sh new file mode 100755 index 000000000000..7dc434fd570b --- /dev/null +++ b/scripts/build/package-gpu-libs.sh @@ -0,0 +1,281 @@ +#!/bin/bash +# Script to package GPU libraries based on BUILD_TYPE +# This script copies GPU-specific runtime libraries to a target lib directory +# so backends can run in isolation with their own GPU libraries. +# +# Usage: source package-gpu-libs.sh TARGET_LIB_DIR +# package_gpu_libs +# +# Environment variables: +# BUILD_TYPE - The GPU build type (cublas, l4t, hipblas, sycl_f16, sycl_f32, intel, vulkan) +# CUDA_MAJOR_VERSION - CUDA major version (for cublas/l4t builds) +# +# This enables backends to be fully self-contained and run on a unified base image +# without requiring GPU drivers to be pre-installed in the host image. + +set -e + +TARGET_LIB_DIR="${1:-./lib}" + +# Create target directory if it doesn't exist +mkdir -p "$TARGET_LIB_DIR" + +# Helper function to copy library and follow symlinks +copy_lib() { + local src="$1" + if [ -e "$src" ]; then + cp -arfLv "$src" "$TARGET_LIB_DIR/" 2>/dev/null || true + fi +} + +# Helper function to copy all matching libraries from a glob pattern +copy_libs_glob() { + local pattern="$1" + # Use nullglob option to handle non-matching patterns gracefully + local old_nullglob=$(shopt -p nullglob) + shopt -s nullglob + local matched=($pattern) + eval "$old_nullglob" + for lib in "${matched[@]}"; do + if [ -e "$lib" ]; then + copy_lib "$lib" + fi + done +} + +# Package NVIDIA CUDA libraries +package_cuda_libs() { + echo "Packaging CUDA libraries for BUILD_TYPE=${BUILD_TYPE}..." + + local cuda_lib_paths=( + "/usr/local/cuda/lib64" + "/usr/local/cuda-${CUDA_MAJOR_VERSION:-}/lib64" + "/usr/lib/x86_64-linux-gnu" + "/usr/lib/aarch64-linux-gnu" + ) + + # Core CUDA runtime libraries + local cuda_libs=( + "libcudart.so*" + "libcublas.so*" + "libcublasLt.so*" + "libcufft.so*" + "libcurand.so*" + "libcusparse.so*" + "libcusolver.so*" + "libnvrtc.so*" + "libnvrtc-builtins.so*" + "libcudnn.so*" + "libcudnn_ops.so*" + "libcudnn_cnn.so*" + "libnvJitLink.so*" + "libnvinfer.so*" + "libnvonnxparser.so*" + ) + + for lib_path in "${cuda_lib_paths[@]}"; do + if [ -d "$lib_path" ]; then + for lib_pattern in "${cuda_libs[@]}"; do + copy_libs_glob "${lib_path}/${lib_pattern}" + done + fi + done + + # Copy CUDA target directory for runtime compilation support + if [ -d "/usr/local/cuda/targets" ]; then + mkdir -p "$TARGET_LIB_DIR/../cuda" + cp -arfL /usr/local/cuda/targets "$TARGET_LIB_DIR/../cuda/" 2>/dev/null || true + fi + + echo "CUDA libraries packaged successfully" +} + +# Package AMD ROCm/HIPBlas libraries +package_rocm_libs() { + echo "Packaging ROCm/HIPBlas libraries for BUILD_TYPE=${BUILD_TYPE}..." + + local rocm_lib_paths=( + "/opt/rocm/lib" + "/opt/rocm/lib64" + "/opt/rocm/hip/lib" + ) + + # Find the actual ROCm versioned directory + for rocm_dir in /opt/rocm-*; do + if [ -d "$rocm_dir/lib" ]; then + rocm_lib_paths+=("$rocm_dir/lib") + fi + done + + # Core ROCm/HIP runtime libraries + local rocm_libs=( + "libamdhip64.so*" + "libhipblas.so*" + "librocblas.so*" + "librocrand.so*" + "librocsparse.so*" + "librocsolver.so*" + "librocfft.so*" + "libMIOpen.so*" + "libroctx64.so*" + "libhsa-runtime64.so*" + "libamd_comgr.so*" + "libhip_hcc.so*" + "libhiprtc.so*" + ) + + for lib_path in "${rocm_lib_paths[@]}"; do + if [ -d "$lib_path" ]; then + for lib_pattern in "${rocm_libs[@]}"; do + copy_libs_glob "${lib_path}/${lib_pattern}" + done + fi + done + + # Copy rocblas library data (tuning files, etc.) + local old_nullglob=$(shopt -p nullglob) + shopt -s nullglob + local rocm_dirs=(/opt/rocm /opt/rocm-*) + eval "$old_nullglob" + for rocm_base in "${rocm_dirs[@]}"; do + if [ -d "$rocm_base/lib/rocblas" ]; then + mkdir -p "$TARGET_LIB_DIR/rocblas" + cp -arfL "$rocm_base/lib/rocblas/"* "$TARGET_LIB_DIR/rocblas/" 2>/dev/null || true + fi + done + + # Copy libomp from LLVM (required for ROCm) + shopt -s nullglob + local omp_libs=(/opt/rocm*/lib/llvm/lib/libomp.so*) + eval "$old_nullglob" + for omp_path in "${omp_libs[@]}"; do + if [ -e "$omp_path" ]; then + copy_lib "$omp_path" + fi + done + + echo "ROCm libraries packaged successfully" +} + +# Package Intel oneAPI/SYCL libraries +package_intel_libs() { + echo "Packaging Intel oneAPI/SYCL libraries for BUILD_TYPE=${BUILD_TYPE}..." + + local intel_lib_paths=( + "/opt/intel/oneapi/compiler/latest/lib" + "/opt/intel/oneapi/mkl/latest/lib/intel64" + "/opt/intel/oneapi/tbb/latest/lib/intel64/gcc4.8" + ) + + # Core Intel oneAPI runtime libraries + local intel_libs=( + "libsycl.so*" + "libOpenCL.so*" + "libmkl_core.so*" + "libmkl_intel_lp64.so*" + "libmkl_intel_thread.so*" + "libmkl_sequential.so*" + "libmkl_sycl.so*" + "libiomp5.so*" + "libsvml.so*" + "libirng.so*" + "libimf.so*" + "libintlc.so*" + "libtbb.so*" + "libtbbmalloc.so*" + "libpi_level_zero.so*" + "libpi_opencl.so*" + "libze_loader.so*" + ) + + for lib_path in "${intel_lib_paths[@]}"; do + if [ -d "$lib_path" ]; then + for lib_pattern in "${intel_libs[@]}"; do + copy_libs_glob "${lib_path}/${lib_pattern}" + done + fi + done + + echo "Intel oneAPI libraries packaged successfully" +} + +# Package Vulkan libraries +package_vulkan_libs() { + echo "Packaging Vulkan libraries for BUILD_TYPE=${BUILD_TYPE}..." + + local vulkan_lib_paths=( + "/usr/lib/x86_64-linux-gnu" + "/usr/lib/aarch64-linux-gnu" + "/usr/local/lib" + ) + + # Core Vulkan runtime libraries + local vulkan_libs=( + "libvulkan.so*" + "libshaderc_shared.so*" + "libSPIRV.so*" + "libSPIRV-Tools.so*" + "libglslang.so*" + ) + + for lib_path in "${vulkan_lib_paths[@]}"; do + if [ -d "$lib_path" ]; then + for lib_pattern in "${vulkan_libs[@]}"; do + copy_libs_glob "${lib_path}/${lib_pattern}" + done + fi + done + + # Copy Vulkan ICD files + if [ -d "/usr/share/vulkan/icd.d" ]; then + mkdir -p "$TARGET_LIB_DIR/../vulkan/icd.d" + cp -arfL /usr/share/vulkan/icd.d/* "$TARGET_LIB_DIR/../vulkan/icd.d/" 2>/dev/null || true + fi + + echo "Vulkan libraries packaged successfully" +} + +# Main function to package GPU libraries based on BUILD_TYPE +package_gpu_libs() { + local build_type="${BUILD_TYPE:-}" + + echo "Packaging GPU libraries for BUILD_TYPE=${build_type}..." + + case "$build_type" in + cublas|l4t) + package_cuda_libs + ;; + hipblas) + package_rocm_libs + ;; + sycl_f16|sycl_f32|intel) + package_intel_libs + ;; + vulkan) + package_vulkan_libs + ;; + ""|cpu) + echo "No GPU libraries to package for BUILD_TYPE=${build_type}" + ;; + *) + echo "Unknown BUILD_TYPE: ${build_type}, skipping GPU library packaging" + ;; + esac + + echo "GPU library packaging complete. Contents of ${TARGET_LIB_DIR}:" + ls -la "$TARGET_LIB_DIR/" 2>/dev/null || echo " (empty or not created)" +} + +# Export the function so it can be sourced and called +export -f package_gpu_libs +export -f copy_lib +export -f copy_libs_glob +export -f package_cuda_libs +export -f package_rocm_libs +export -f package_intel_libs +export -f package_vulkan_libs + +# If script is run directly (not sourced), execute the packaging +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + package_gpu_libs +fi