Skip to content

feat: pack libraries and create releases #13

feat: pack libraries and create releases

feat: pack libraries and create releases #13

Workflow file for this run

name: CI Quality Gate
on:
pull_request:
types: [opened, synchronize, reopened]
workflow_dispatch:
env:
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
jobs:
build-and-test:
runs-on: ${{ matrix.runs-on }}
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
include:
- os: "linux"
name: "amd64-avx2"
runs-on: "ubuntu-18-04"
cmake-flags: "-DLLAMA_NATIVE=OFF"
run-e2e: true
vulkan: false
- os: "linux"
name: "amd64-avx"
runs-on: "ubuntu-18-04"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
run-e2e: false
vulkan: false
- os: "linux"
name: "amd64-avx512"
runs-on: "ubuntu-18-04"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
run-e2e: false
vulkan: false
- os: "linux"
name: "amd64-vulkan"
runs-on: "ubuntu-18-04-cuda-11-7"
cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
run-e2e: false
vulkan: true
- os: "linux"
name: "amd64-cuda-11-7"
runs-on: "ubuntu-18-04-cuda-11-7"
cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
run-e2e: false
vulkan: false
- os: "linux"
name: "amd64-cuda-12-0"
runs-on: "ubuntu-18-04-cuda-12-0"
cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
run-e2e: false
vulkan: false
- os: "mac"
name: "amd64"
runs-on: "macos-13"
cmake-flags: "-DLLAMA_METAL=OFF"
run-e2e: true
vulkan: false
- os: "mac"
name: "arm64"
runs-on: "mac-silicon"
cmake-flags: "-DLLAMA_METAL_EMBED_LIBRARY=ON"
run-e2e: true
vulkan: false
- os: "windows"
name: "amd64-avx2"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: true
vulkan: false
- os: "windows"
name: "amd64-avx"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
vulkan: false
- os: "windows"
name: "amd64-avx512"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
vulkan: false
- os: "windows"
name: "amd64-vulkan"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
vulkan: true
- os: "windows"
name: "amd64-avx2-cuda-12-0"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
vulkan: false
- os: "windows"
name: "amd64-avx-cuda-12-0"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
vulkan: false
- os: "windows"
name: "amd64-avx512-cuda-12-0"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
vulkan: false
- os: "windows"
name: "amd64-avx2-cuda-11-7"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
vulkan: false
- os: "windows"
name: "amd64-avx-cuda-11-7"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
vulkan: false
- os: "windows"
name: "amd64-avx512-cuda-11-7"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
vulkan: false
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Prepare Vulkan SDK
if: ${{ matrix.vulkan }}
uses: humbletim/setup-vulkan-sdk@v1.2.0
with:
vulkan-query-version: 1.3.275.0
vulkan-components: Vulkan-Headers, Vulkan-Loader
vulkan-use-cache: true
- name: cuda-toolkit
if: ${{ matrix.os }} == 'windows' && endsWith(${{ matrix.name }}, 'cuda-11-7')
uses: Jimver/cuda-toolkit@v0.2.15
with:
cuda: 11.7.0
method: local
use-github-cache: true
- name: cuda-toolkit
if: ${{ matrix.os }} == 'windows' && endsWith(${{ matrix.name }}, 'cuda-11-7')
uses: Jimver/cuda-toolkit@v0.2.15
with:
cuda: 12.0.0
method: network
use-github-cache: true
- name: Install choco on Windows
if: runner.os == 'Windows'
run: |
choco install make -y
- name: Build
run: |
make build-example-server CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}"
- name: Package
run: |
make package
- name: Run e2e testing
if: ${{ matrix.run-e2e }}
run: |
make run-e2e-test LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}
- name: Upload Artifact
uses: actions/upload-artifact@v2
with:
name: cortex.llamacpp-${{ matrix.os }}-${{ matrix.name }}
path: ./cortex.llamacpp