forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of github.com:ggerganov/llama.cpp
- Loading branch information
Showing
322 changed files
with
134,196 additions
and
20,821 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04 | ||
|
||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build | ||
|
||
ARG LLAMA_SYCL_F16=OFF | ||
RUN apt-get update && \ | ||
apt-get install -y git | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
RUN mkdir build && \ | ||
cd build && \ | ||
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ | ||
echo "LLAMA_SYCL_F16 is set" && \ | ||
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ | ||
fi && \ | ||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ | ||
cmake --build . --config Release --target main | ||
|
||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime | ||
|
||
COPY --from=build /app/build/bin/main /main | ||
|
||
ENV LC_ALL=C.utf8 | ||
|
||
ENTRYPOINT [ "/main" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
ARG UBUNTU_VERSION=jammy | ||
|
||
FROM ubuntu:$UBUNTU_VERSION as build | ||
|
||
# Install build tools | ||
RUN apt update && apt install -y git build-essential cmake wget | ||
|
||
# Install Vulkan SDK | ||
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ | ||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ | ||
apt update -y && \ | ||
apt-get install -y vulkan-sdk | ||
|
||
# Build it | ||
WORKDIR /app | ||
COPY . . | ||
RUN mkdir build && \ | ||
cd build && \ | ||
cmake .. -DLLAMA_VULKAN=1 && \ | ||
cmake --build . --config Release --target main | ||
|
||
# Clean up | ||
WORKDIR / | ||
RUN cp /app/build/bin/main /main && \ | ||
rm -rf /app | ||
|
||
ENV LC_ALL=C.utf8 | ||
|
||
ENTRYPOINT [ "/main" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
perSystem = | ||
{ config, lib, ... }: | ||
{ | ||
apps = | ||
let | ||
inherit (config.packages) default; | ||
binaries = [ | ||
"llama" | ||
"llama-embedding" | ||
"llama-server" | ||
"quantize" | ||
"train-text-from-scratch" | ||
]; | ||
mkApp = name: { | ||
type = "app"; | ||
program = "${default}/bin/${name}"; | ||
}; | ||
in | ||
lib.genAttrs binaries mkApp; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
perSystem = | ||
{ config, lib, ... }: | ||
{ | ||
devShells = | ||
lib.concatMapAttrs | ||
(name: package: { | ||
${name} = package.passthru.shell; | ||
${name + "-extra"} = package.passthru.shell-extra; | ||
}) | ||
config.packages; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{ | ||
lib, | ||
dockerTools, | ||
buildEnv, | ||
llama-cpp, | ||
interactive ? true, | ||
coreutils, | ||
}: | ||
|
||
# A tar that can be fed into `docker load`: | ||
# | ||
# $ nix build .#llamaPackages.docker | ||
# $ docker load < result | ||
|
||
# For details and variations cf. | ||
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage | ||
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922 | ||
# - https://nixery.dev/ | ||
|
||
# Approximate (compressed) sizes, at the time of writing, are: | ||
# | ||
# .#llamaPackages.docker: 125M; | ||
# .#llamaPackagesCuda.docker: 537M; | ||
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M. | ||
|
||
dockerTools.buildLayeredImage { | ||
name = llama-cpp.pname; | ||
tag = "latest"; | ||
|
||
contents = | ||
[ llama-cpp ] | ||
++ lib.optionals interactive [ | ||
coreutils | ||
dockerTools.binSh | ||
dockerTools.caCertificates | ||
]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
{ inputs, ... }: | ||
{ | ||
perSystem = | ||
{ | ||
config, | ||
system, | ||
lib, | ||
pkgsCuda, | ||
... | ||
}: | ||
{ | ||
legacyPackages = | ||
let | ||
caps.llamaPackagesXavier = "7.2"; | ||
caps.llamaPackagesOrin = "8.7"; | ||
caps.llamaPackagesTX2 = "6.2"; | ||
caps.llamaPackagesNano = "5.3"; | ||
|
||
pkgsFor = | ||
cap: | ||
import inputs.nixpkgs { | ||
inherit system; | ||
config = { | ||
cudaSupport = true; | ||
cudaCapabilities = [ cap ]; | ||
cudaEnableForwardCompat = false; | ||
inherit (pkgsCuda.config) allowUnfreePredicate; | ||
}; | ||
}; | ||
in | ||
builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps; | ||
|
||
packages = lib.optionalAttrs (system == "aarch64-linux") { | ||
jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp; | ||
jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp; | ||
jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp; | ||
}; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
{ inputs, ... }: | ||
{ | ||
# The _module.args definitions are passed on to modules as arguments. E.g. | ||
# the module `{ pkgs ... }: { /* config */ }` implicitly uses | ||
# `_module.args.pkgs` (defined in this case by flake-parts). | ||
perSystem = | ||
{ system, ... }: | ||
{ | ||
_module.args = { | ||
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs | ||
# again, the below creates several nixpkgs instances which the | ||
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`. | ||
# | ||
# This is currently "slow" and "expensive", on a certain scale. | ||
# This also isn't "right" in that this hinders dependency injection at | ||
# the level of flake inputs. This might get removed in the foreseeable | ||
# future. | ||
# | ||
# Note that you can use these expressions without Nix | ||
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point). | ||
|
||
pkgsCuda = import inputs.nixpkgs { | ||
inherit system; | ||
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, | ||
# and ucx are built with CUDA support) | ||
config.cudaSupport = true; | ||
config.allowUnfreePredicate = | ||
p: | ||
builtins.all | ||
( | ||
license: | ||
license.free | ||
|| builtins.elem license.shortName [ | ||
"CUDA EULA" | ||
"cuDNN EULA" | ||
] | ||
) | ||
(p.meta.licenses or [ p.meta.license ]); | ||
}; | ||
# Ensure dependencies use ROCm consistently | ||
pkgsRocm = import inputs.nixpkgs { | ||
inherit system; | ||
config.rocmSupport = true; | ||
}; | ||
}; | ||
}; | ||
} |
Oops, something went wrong.