forked from abetlen/llama-cpp-python
-
Notifications
You must be signed in to change notification settings - Fork 0
122 lines (106 loc) · 4.12 KB
/
build-wheels-cuda.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
name: Build Wheels (CUDA) for Windows
on:
workflow_dispatch:
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: windows-2022
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('windows-2022')
'pyver' = @("3.9", "3.10", "3.11", "3.12")
'cuda' = @("12.6.3")
'releasetag' = @("AVX2")
'cudaarch' = @("all")
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
CUDAVER: ${{ matrix.cuda }}
AVXVER: ${{ matrix.releasetag }}
CUDAARCHVER: ${{ matrix.cudaarch }}
# https://cmake.org/cmake/help/latest/prop_tgt/CUDA_ARCHITECTURES.html
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/#gpu-feature-list
# e.g. "all" "89"
steps:
- name: Add MSBuild to PATH
if: runner.os == 'Windows'
uses: microsoft/setup-msbuild@main
with:
vs-version: '[17.12,17.14)'
msbuild-architecture: x64
- uses: actions/checkout@main
with:
submodules: "recursive"
# from kingbri1/flash-attention build-wheels.yml
- name: Install CUDA ${{ matrix.cuda }}
uses: Jimver/cuda-toolkit@master
id: cuda-toolkit
with:
cuda: "${{ matrix.cuda }}"
- name: Install the latest version of uv and set the python version
uses: astral-sh/setup-uv@main
with:
python-version: ${{ matrix.pyver }}
- name: Install Dependencies
run: |
git config --system core.longpaths true
uv pip install --upgrade build setuptools wheel packaging
- name: Build Wheel
run: |
# $cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
# echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
# $env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CUDA_PATH
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CUDA_PATH
if ($IsLinux) {
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
}
$env:VERBOSE = '1'
$env:CMAKE_ARGS = '-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=' + $env:CUDAARCHVER
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
if ($env:AVXVER -eq 'AVX') {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
}
if ($env:AVXVER -eq 'AVX2') {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=on -DGGML_FMA=off -DGGML_F16C=off'
}
# if ($env:AVXVER -eq 'AVX512') {
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX512=on'
# }
# if ($env:AVXVER -eq 'basic') {
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=off -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
# }
python -m build --wheel
# write the build tag to the output
Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
$wheel = (gi '.\dist\*.whl')[0]
$tagVer = $wheel.name.split('-')[1]
Write-Output "TAG_VERSION=$tagVer" >> $env:GITHUB_ENV
- uses: softprops/action-gh-release@v2
with:
files: dist/*
# Set tag_name to <tag>-cu<cuda_version>
tag_name: v${{ env.TAG_VERSION }}-cu${{ env.CUDA_VERSION }}-win
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}