Skip to content

Commit

Permalink
support the llama.cpp CUDA backend (#2310)
Browse files Browse the repository at this point in the history
* rebase onto llama.cpp commit ggerganov/llama.cpp@d46dbc76f
* support for CUDA backend (enabled by default)
* partial support for Occam's Vulkan backend (disabled by default)
* partial support for HIP/ROCm backend (disabled by default)
* sync llama.cpp.cmake with upstream llama.cpp CMakeLists.txt
* changes to GPT4All backend, bindings, and chat UI to handle choice of llama.cpp backend (Kompute or CUDA)
* ship CUDA runtime with installed version
* make device selection in the UI on macOS actually do something
* model whitelist: remove dbrx, mamba, persimmon, plamo; add internlm and starcoder2

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
  • Loading branch information
cebtenzzre authored May 15, 2024
1 parent a618ca5 commit d2a99d9
Show file tree
Hide file tree
Showing 22 changed files with 1,326 additions and 739 deletions.
45 changes: 41 additions & 4 deletions .circleci/continue_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ jobs:
command: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev vulkan-sdk patchelf
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev vulkan-sdk patchelf cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
- run:
name: Installing Qt
command: |
Expand All @@ -121,6 +123,7 @@ jobs:
set -eo pipefail
export CMAKE_PREFIX_PATH=~/Qt/6.5.1/gcc_64/lib/cmake
export PATH=$PATH:$HOME/Qt/Tools/QtInstallerFramework/4.7/bin
export PATH=$PATH:/usr/local/cuda/bin
mkdir build
cd build
mkdir upload
Expand Down Expand Up @@ -162,6 +165,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Build
command: |
Expand Down Expand Up @@ -218,7 +226,9 @@ jobs:
command: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev vulkan-sdk
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
- run:
name: Installing Qt
command: |
Expand All @@ -235,6 +245,7 @@ jobs:
name: Build
command: |
export CMAKE_PREFIX_PATH=~/Qt/6.5.1/gcc_64/lib/cmake
export PATH=$PATH:/usr/local/cuda/bin
~/Qt/Tools/CMake/bin/cmake -DCMAKE_BUILD_TYPE=Release -S gpt4all-chat -B build
~/Qt/Tools/CMake/bin/cmake --build build --target all
Expand Down Expand Up @@ -269,6 +280,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Build
command: |
Expand Down Expand Up @@ -394,12 +410,15 @@ jobs:
command: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cmake build-essential vulkan-sdk
sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
pip install setuptools wheel cmake
- run:
name: Build C library
command: |
export PATH=$PATH:/usr/local/cuda/bin
git submodule update --init --recursive
cd gpt4all-backend
cmake -B build
Expand Down Expand Up @@ -459,6 +478,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Install dependencies
command:
Expand Down Expand Up @@ -530,11 +554,14 @@ jobs:
command: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cmake build-essential vulkan-sdk
sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
- run:
name: Build Libraries
command: |
export PATH=$PATH:/usr/local/cuda/bin
cd gpt4all-backend
mkdir -p runtimes/build
cd runtimes/build
Expand Down Expand Up @@ -599,6 +626,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Install dependencies
command: |
Expand Down Expand Up @@ -642,6 +674,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Install dependencies
command: |
Expand Down
30 changes: 0 additions & 30 deletions LICENSE_SOM.txt

This file was deleted.

80 changes: 63 additions & 17 deletions gpt4all-backend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,23 @@ cmake_minimum_required(VERSION 3.16)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if(APPLE)
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
if(BUILD_UNIVERSAL)
if (APPLE)
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
else()
option(LLMODEL_KOMPUTE "llmodel: use Kompute" ON)
option(LLMODEL_VULKAN "llmodel: use Vulkan" OFF)
option(LLMODEL_CUDA "llmodel: use CUDA" ON)
option(LLMODEL_ROCM "llmodel: use ROCm" OFF)
endif()

if (APPLE)
if (BUILD_UNIVERSAL)
# Build a Universal binary on macOS
# This requires that the found Qt library is compiled as Universal binaries.
set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
else()
# Build for the host architecture on macOS
if(NOT CMAKE_OSX_ARCHITECTURES)
if (NOT CMAKE_OSX_ARCHITECTURES)
set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
endif()
endif()
Expand Down Expand Up @@ -39,36 +47,70 @@ else()
message(STATUS "Interprocedural optimization support detected")
endif()

set(DIRECTORY llama.cpp-mainline)
include(llama.cpp.cmake)

set(BUILD_VARIANTS default avxonly)
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(BUILD_VARIANTS ${BUILD_VARIANTS} metal)
set(BUILD_VARIANTS)
set(GPTJ_BUILD_VARIANT cpu)
if (APPLE)
list(APPEND BUILD_VARIANTS metal)
endif()
if (LLMODEL_KOMPUTE)
list(APPEND BUILD_VARIANTS kompute kompute-avxonly)
set(GPTJ_BUILD_VARIANT kompute)
else()
list(PREPEND BUILD_VARIANTS cpu cpu-avxonly)
endif()
if (LLMODEL_VULKAN)
list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly)
endif()
if (LLMODEL_CUDA)
include(CheckLanguage)
check_language(CUDA)
if (NOT CMAKE_CUDA_COMPILER)
message(WARNING "CUDA Toolkit not found. To build without CUDA, use -DLLMODEL_CUDA=OFF.")
endif()
enable_language(CUDA)
list(APPEND BUILD_VARIANTS cuda cuda-avxonly)
endif()
if (LLMODEL_ROCM)
enable_language(HIP)
list(APPEND BUILD_VARIANTS rocm rocm-avxonly)
endif()

set(CMAKE_VERBOSE_MAKEFILE ON)

# Go through each build variant
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
# Determine flags
if (BUILD_VARIANT STREQUAL avxonly)
set(GPT4ALL_ALLOW_NON_AVX NO)
if (BUILD_VARIANT MATCHES avxonly)
set(GPT4ALL_ALLOW_NON_AVX OFF)
else()
set(GPT4ALL_ALLOW_NON_AVX YES)
set(GPT4ALL_ALLOW_NON_AVX ON)
endif()
set(LLAMA_AVX2 ${GPT4ALL_ALLOW_NON_AVX})
set(LLAMA_F16C ${GPT4ALL_ALLOW_NON_AVX})
set(LLAMA_FMA ${GPT4ALL_ALLOW_NON_AVX})

if (BUILD_VARIANT STREQUAL metal)
set(LLAMA_METAL YES)
else()
set(LLAMA_METAL NO)
set(LLAMA_METAL OFF)
set(LLAMA_KOMPUTE OFF)
set(LLAMA_VULKAN OFF)
set(LLAMA_CUDA OFF)
set(LLAMA_ROCM OFF)
if (BUILD_VARIANT MATCHES metal)
set(LLAMA_METAL ON)
elseif (BUILD_VARIANT MATCHES kompute)
set(LLAMA_KOMPUTE ON)
elseif (BUILD_VARIANT MATCHES vulkan)
set(LLAMA_VULKAN ON)
elseif (BUILD_VARIANT MATCHES cuda)
set(LLAMA_CUDA ON)
elseif (BUILD_VARIANT MATCHES rocm)
set(LLAMA_HIPBLAS ON)
endif()

# Include GGML
set(LLAMA_K_QUANTS YES)
include_ggml(llama.cpp-mainline -mainline-${BUILD_VARIANT} ON)
include_ggml(-mainline-${BUILD_VARIANT})

# Function for preparing individual implementations
function(prepare_target TARGET_NAME BASE_LIB)
Expand All @@ -93,11 +135,15 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
prepare_target(llamamodel-mainline llama-mainline)

if (NOT LLAMA_METAL)
if (BUILD_VARIANT MATCHES ${GPTJ_BUILD_VARIANT})
add_library(gptj-${BUILD_VARIANT} SHARED
gptj.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
prepare_target(gptj llama-mainline)
endif()

if (BUILD_VARIANT STREQUAL cuda)
set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE)
endif()
endforeach()

add_library(llmodel
Expand Down
2 changes: 1 addition & 1 deletion gpt4all-backend/llama.cpp-mainline
Loading

0 comments on commit d2a99d9

Please sign in to comment.