Skip to content

Commit

Permalink
Merge branch 'master' of github.com:ggerganov/llama.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
vvhg1 committed Mar 12, 2024
2 parents f675f58 + 8030da7 commit 742ad73
Show file tree
Hide file tree
Showing 322 changed files with 134,196 additions and 20,821 deletions.
3 changes: 2 additions & 1 deletion .devops/full-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ ARG CUDA_DOCKER_ARCH=all
RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git

COPY requirements.txt requirements.txt
COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
Expand Down
3 changes: 2 additions & 1 deletion .devops/full-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ ARG ROCM_DOCKER_ARCH=\
gfx1101 \
gfx1102

COPY requirements.txt requirements.txt
COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
Expand Down
3 changes: 2 additions & 1 deletion .devops/full.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ FROM ubuntu:$UBUNTU_VERSION as build
RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git

COPY requirements.txt requirements.txt
COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
Expand Down
28 changes: 28 additions & 0 deletions .devops/main-intel.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04

FROM intel/oneapi-basekit:$ONEAPI_VERSION as build

ARG LLAMA_SYCL_F16=OFF
RUN apt-get update && \
apt-get install -y git

WORKDIR /app

COPY . .

RUN mkdir build && \
cd build && \
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
echo "LLAMA_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
fi && \
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
cmake --build . --config Release --target main

FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime

COPY --from=build /app/build/bin/main /main

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/main" ]
3 changes: 2 additions & 1 deletion .devops/main-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ ARG ROCM_DOCKER_ARCH=\
gfx1101 \
gfx1102

COPY requirements.txt requirements.txt
COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
Expand Down
29 changes: 29 additions & 0 deletions .devops/main-vulkan.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
ARG UBUNTU_VERSION=jammy

FROM ubuntu:$UBUNTU_VERSION as build

# Install build tools
RUN apt update && apt install -y git build-essential cmake wget

# Install Vulkan SDK
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt update -y && \
apt-get install -y vulkan-sdk

# Build it
WORKDIR /app
COPY . .
RUN mkdir build && \
cd build && \
cmake .. -DLLAMA_VULKAN=1 && \
cmake --build . --config Release --target main

# Clean up
WORKDIR /
RUN cp /app/build/bin/main /main && \
rm -rf /app

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/main" ]
22 changes: 22 additions & 0 deletions .devops/nix/apps.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
perSystem =
{ config, lib, ... }:
{
apps =
let
inherit (config.packages) default;
binaries = [
"llama"
"llama-embedding"
"llama-server"
"quantize"
"train-text-from-scratch"
];
mkApp = name: {
type = "app";
program = "${default}/bin/${name}";
};
in
lib.genAttrs binaries mkApp;
};
}
13 changes: 13 additions & 0 deletions .devops/nix/devshells.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
perSystem =
{ config, lib, ... }:
{
devShells =
lib.concatMapAttrs
(name: package: {
${name} = package.passthru.shell;
${name + "-extra"} = package.passthru.shell-extra;
})
config.packages;
};
}
37 changes: 37 additions & 0 deletions .devops/nix/docker.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
lib,
dockerTools,
buildEnv,
llama-cpp,
interactive ? true,
coreutils,
}:

# A tar that can be fed into `docker load`:
#
# $ nix build .#llamaPackages.docker
# $ docker load < result

# For details and variations cf.
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
# - https://nixery.dev/

# Approximate (compressed) sizes, at the time of writing, are:
#
# .#llamaPackages.docker: 125M;
# .#llamaPackagesCuda.docker: 537M;
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.

dockerTools.buildLayeredImage {
name = llama-cpp.pname;
tag = "latest";

contents =
[ llama-cpp ]
++ lib.optionals interactive [
coreutils
dockerTools.binSh
dockerTools.caCertificates
];
}
39 changes: 39 additions & 0 deletions .devops/nix/jetson-support.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{ inputs, ... }:
{
perSystem =
{
config,
system,
lib,
pkgsCuda,
...
}:
{
legacyPackages =
let
caps.llamaPackagesXavier = "7.2";
caps.llamaPackagesOrin = "8.7";
caps.llamaPackagesTX2 = "6.2";
caps.llamaPackagesNano = "5.3";

pkgsFor =
cap:
import inputs.nixpkgs {
inherit system;
config = {
cudaSupport = true;
cudaCapabilities = [ cap ];
cudaEnableForwardCompat = false;
inherit (pkgsCuda.config) allowUnfreePredicate;
};
};
in
builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;

packages = lib.optionalAttrs (system == "aarch64-linux") {
jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
};
};
}
47 changes: 47 additions & 0 deletions .devops/nix/nixpkgs-instances.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{ inputs, ... }:
{
# The _module.args definitions are passed on to modules as arguments. E.g.
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
# `_module.args.pkgs` (defined in this case by flake-parts).
perSystem =
{ system, ... }:
{
_module.args = {
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
# again, the below creates several nixpkgs instances which the
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
#
# This is currently "slow" and "expensive", on a certain scale.
# This also isn't "right" in that this hinders dependency injection at
# the level of flake inputs. This might get removed in the foreseeable
# future.
#
# Note that you can use these expressions without Nix
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).

pkgsCuda = import inputs.nixpkgs {
inherit system;
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
# and ucx are built with CUDA support)
config.cudaSupport = true;
config.allowUnfreePredicate =
p:
builtins.all
(
license:
license.free
|| builtins.elem license.shortName [
"CUDA EULA"
"cuDNN EULA"
]
)
(p.meta.licenses or [ p.meta.license ]);
};
# Ensure dependencies use ROCm consistently
pkgsRocm = import inputs.nixpkgs {
inherit system;
config.rocmSupport = true;
};
};
};
}
Loading

0 comments on commit 742ad73

Please sign in to comment.