Merge remote-tracking branch 'upstream/concedo'

YellowRoseCx · Jul 2, 2024 · 7a5219a · 7a5219a
2 parents b9e1db8 + 7499a6b
commit 7a5219a
Show file tree

Hide file tree

Showing 391 changed files with 44,820 additions and 40,708 deletions.
diff --git a/.devops/llama-cli-intel.Dockerfile b/.devops/llama-cli-intel.Dockerfile
@@ -2,19 +2,19 @@ ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
 
-ARG LLAMA_SYCL_F16=OFF
+ARG GGML_SYCL_F16=OFF
 RUN apt-get update && \
  apt-get install -y git
 
 WORKDIR /app
 
 COPY . .
 
-RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
- echo "LLAMA_SYCL_F16 is set" && \
- export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
+RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
+ echo "GGML_SYCL_F16 is set" && \
+ export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
  fi && \
- cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
+ cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
  cmake --build build --config Release --target llama-cli
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime

diff --git a/.devops/llama-server-intel.Dockerfile b/.devops/llama-server-intel.Dockerfile
@@ -2,28 +2,30 @@ ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
 
-ARG LLAMA_SYCL_F16=OFF
+ARG GGML_SYCL_F16=OFF
 RUN apt-get update && \
  apt-get install -y git libcurl4-openssl-dev
 
 WORKDIR /app
 
 COPY . .
 
-RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
- echo "LLAMA_SYCL_F16 is set" && \
- export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
+RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
+ echo "GGML_SYCL_F16 is set" && \
+ export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
  fi && \
- cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
+ cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
  cmake --build build --config Release --target llama-server
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
 
 RUN apt-get update && \
- apt-get install -y libcurl4-openssl-dev
+ apt-get install -y libcurl4-openssl-dev curl
 
 COPY --from=build /app/build/bin/llama-server /llama-server
 
 ENV LC_ALL=C.utf8
 
+HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
+
 ENTRYPOINT [ "/llama-server" ]
diff --git a/.editorconfig b/.editorconfig
@@ -28,4 +28,5 @@ indent_size = 2
 indent_style = tab
 
 [examples/cvector-generator/*.txt]
+trim_trailing_whitespace = unset
 insert_final_newline = unset
diff --git a/.github/workflows/kcpp-build-release-win-cuda.yaml b/.github/workflows/kcpp-build-release-win-cuda.yaml
@@ -14,11 +14,11 @@ jobs:
  with:
  ref: ${{ github.head_ref || github.ref_name }}
 
- - uses: Jimver/cuda-toolkit@v0.2.11
+ - uses: Jimver/cuda-toolkit@v0.2.15
  id: cuda-toolkit
  with:
  cuda: '11.4.4'
- 
+
  - name: Build
  id: cmake_build
  run: |

diff --git a/.github/workflows/kcpp-build-release-win-cuda12.yaml b/.github/workflows/kcpp-build-release-win-cuda12.yaml
@@ -14,11 +14,11 @@ jobs:
  with:
  ref: ${{ github.head_ref || github.ref_name }}
 
- - uses: Jimver/cuda-toolkit@v0.2.11
+ - uses: Jimver/cuda-toolkit@v0.2.15
  id: cuda-toolkit
  with:
  cuda: '12.1.0'
- 
+
  - name: Build
  id: cmake_build
  run: |

diff --git a/.github/workflows/kcpp-build-release-win-full-cu12.yaml b/.github/workflows/kcpp-build-release-win-full-cu12.yaml
@@ -38,7 +38,7 @@ jobs:
  run: |
  make -j ${env:NUMBER_OF_PROCESSORS}
 
- - uses: Jimver/cuda-toolkit@v0.2.11
+ - uses: Jimver/cuda-toolkit@v0.2.15
  id: cuda-toolkit
  with:
  cuda: '12.1.0'

diff --git a/.github/workflows/kcpp-build-release-win-full.yaml b/.github/workflows/kcpp-build-release-win-full.yaml
@@ -38,7 +38,7 @@ jobs:
  run: |
  make -j ${env:NUMBER_OF_PROCESSORS}
 
- - uses: Jimver/cuda-toolkit@v0.2.11
+ - uses: Jimver/cuda-toolkit@v0.2.15
  id: cuda-toolkit
  with:
  cuda: '11.4.4'

diff --git a/.github/workflows/kcpp-build-release-win-noavx2-full.yaml b/.github/workflows/kcpp-build-release-win-noavx2-full.yaml
@@ -1,4 +1,4 @@
-name: Koboldcpp Windows Full Binaries
+name: Koboldcpp Windows Full AVX1 Binaries
 
 on: workflow_dispatch
 env:
@@ -38,7 +38,7 @@ jobs:
  run: |
  make -j ${env:NUMBER_OF_PROCESSORS}
 
- - uses: Jimver/cuda-toolkit@v0.2.11
+ - uses: Jimver/cuda-toolkit@v0.2.15
  id: cuda-toolkit
  with:
  cuda: '11.4.4'

diff --git a/.gitignore b/.gitignore
@@ -125,6 +125,8 @@ tests/test-tokenizer-1-bpe
 /koboldcpp_vulkan.dll
 /cublas64_11.dll
 /cublasLt64_11.dll
+/cublas64_12.dll
+/cublasLt64_12.dll
 /rocblas/
 rocblas.dll
 hipblas.dll