Skip to content

Commit 26c0846

Browse files
committed
Initial release
0 parents  commit 26c0846

9 files changed

+13094
-0
lines changed

.gitignore

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
*.o
2+
*.a
3+
.cache/
4+
.vs/
5+
.vscode/
6+
.DS_Store
7+
8+
build/
9+
build-em/
10+
build-debug/
11+
build-release/
12+
build-static/
13+
build-no-accel/
14+
build-sanitize-addr/
15+
build-sanitize-thread/
16+
17+
/main
18+
/quantize
19+
20+
arm_neon.h
21+
compile_commands.json

Makefile

+203
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
ifndef UNAME_S
2+
UNAME_S := $(shell uname -s)
3+
endif
4+
5+
ifndef UNAME_P
6+
UNAME_P := $(shell uname -p)
7+
endif
8+
9+
ifndef UNAME_M
10+
UNAME_M := $(shell uname -m)
11+
endif
12+
13+
CCV := $(shell $(CC) --version | head -n 1)
14+
CXXV := $(shell $(CXX) --version | head -n 1)
15+
16+
# Mac OS + Arm can report x86_64
17+
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
18+
ifeq ($(UNAME_S),Darwin)
19+
ifneq ($(UNAME_P),arm)
20+
SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
21+
ifeq ($(SYSCTL_M),1)
22+
# UNAME_P := arm
23+
# UNAME_M := arm64
24+
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
25+
endif
26+
endif
27+
endif
28+
29+
#
30+
# Compile flags
31+
#
32+
33+
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
34+
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
35+
LDFLAGS =
36+
37+
# OS specific
38+
# TODO: support Windows
39+
ifeq ($(UNAME_S),Linux)
40+
CFLAGS += -pthread
41+
CXXFLAGS += -pthread
42+
endif
43+
ifeq ($(UNAME_S),Darwin)
44+
CFLAGS += -pthread
45+
CXXFLAGS += -pthread
46+
endif
47+
ifeq ($(UNAME_S),FreeBSD)
48+
CFLAGS += -pthread
49+
CXXFLAGS += -pthread
50+
endif
51+
ifeq ($(UNAME_S),Haiku)
52+
CFLAGS += -pthread
53+
CXXFLAGS += -pthread
54+
endif
55+
56+
# Architecture specific
57+
# TODO: probably these flags need to be tweaked on some architectures
58+
# feel free to update the Makefile for your architecture and send a pull request or issue
59+
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
60+
ifeq ($(UNAME_S),Darwin)
61+
CFLAGS += -mf16c
62+
AVX1_M := $(shell sysctl machdep.cpu.features)
63+
ifneq (,$(findstring FMA,$(AVX1_M)))
64+
CFLAGS += -mfma
65+
endif
66+
ifneq (,$(findstring AVX1.0,$(AVX1_M)))
67+
CFLAGS += -mavx
68+
endif
69+
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
70+
ifneq (,$(findstring AVX2,$(AVX2_M)))
71+
CFLAGS += -mavx2
72+
endif
73+
else ifeq ($(UNAME_S),Linux)
74+
AVX1_M := $(shell grep "avx " /proc/cpuinfo)
75+
ifneq (,$(findstring avx,$(AVX1_M)))
76+
CFLAGS += -mavx
77+
endif
78+
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
79+
ifneq (,$(findstring avx2,$(AVX2_M)))
80+
CFLAGS += -mavx2
81+
endif
82+
FMA_M := $(shell grep "fma " /proc/cpuinfo)
83+
ifneq (,$(findstring fma,$(FMA_M)))
84+
CFLAGS += -mfma
85+
endif
86+
F16C_M := $(shell grep "f16c " /proc/cpuinfo)
87+
ifneq (,$(findstring f16c,$(F16C_M)))
88+
CFLAGS += -mf16c
89+
endif
90+
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
91+
ifneq (,$(findstring sse3,$(SSE3_M)))
92+
CFLAGS += -msse3
93+
endif
94+
else ifeq ($(UNAME_S),Haiku)
95+
AVX1_M := $(shell sysinfo -cpu | grep "AVX ")
96+
ifneq (,$(findstring avx,$(AVX1_M)))
97+
CFLAGS += -mavx
98+
endif
99+
AVX2_M := $(shell sysinfo -cpu | grep "AVX2 ")
100+
ifneq (,$(findstring avx2,$(AVX2_M)))
101+
CFLAGS += -mavx2
102+
endif
103+
FMA_M := $(shell sysinfo -cpu | grep "FMA ")
104+
ifneq (,$(findstring fma,$(FMA_M)))
105+
CFLAGS += -mfma
106+
endif
107+
F16C_M := $(shell sysinfo -cpu | grep "F16C ")
108+
ifneq (,$(findstring f16c,$(F16C_M)))
109+
CFLAGS += -mf16c
110+
endif
111+
else
112+
CFLAGS += -mfma -mf16c -mavx -mavx2
113+
endif
114+
endif
115+
ifeq ($(UNAME_M),amd64)
116+
CFLAGS += -mavx -mavx2 -mfma -mf16c
117+
endif
118+
ifneq ($(filter ppc64%,$(UNAME_M)),)
119+
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
120+
ifneq (,$(findstring POWER9,$(POWER9_M)))
121+
CFLAGS += -mpower9-vector
122+
endif
123+
# Require c++23's std::byteswap for big-endian support.
124+
ifeq ($(UNAME_M),ppc64)
125+
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
126+
endif
127+
endif
128+
ifndef WHISPER_NO_ACCELERATE
129+
# Mac M1 - include Accelerate framework
130+
ifeq ($(UNAME_S),Darwin)
131+
CFLAGS += -DGGML_USE_ACCELERATE
132+
LDFLAGS += -framework Accelerate
133+
endif
134+
endif
135+
ifdef WHISPER_OPENBLAS
136+
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
137+
LDFLAGS += -lopenblas
138+
endif
139+
ifdef WHISPER_GPROF
140+
CFLAGS += -pg
141+
CXXFLAGS += -pg
142+
endif
143+
ifneq ($(filter aarch64%,$(UNAME_M)),)
144+
CFLAGS += -mcpu=native
145+
CXXFLAGS += -mcpu=native
146+
endif
147+
ifneq ($(filter armv6%,$(UNAME_M)),)
148+
# Raspberry Pi 1, 2, 3
149+
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
150+
endif
151+
ifneq ($(filter armv7%,$(UNAME_M)),)
152+
# Raspberry Pi 4
153+
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
154+
endif
155+
ifneq ($(filter armv8%,$(UNAME_M)),)
156+
# Raspberry Pi 4
157+
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
158+
endif
159+
160+
#
161+
# Print build information
162+
#
163+
164+
$(info I llama.cpp build info: )
165+
$(info I UNAME_S: $(UNAME_S))
166+
$(info I UNAME_P: $(UNAME_P))
167+
$(info I UNAME_M: $(UNAME_M))
168+
$(info I CFLAGS: $(CFLAGS))
169+
$(info I CXXFLAGS: $(CXXFLAGS))
170+
$(info I LDFLAGS: $(LDFLAGS))
171+
$(info I CC: $(CCV))
172+
$(info I CXX: $(CXXV))
173+
$(info )
174+
175+
default: main quantize
176+
177+
#
178+
# Build library
179+
#
180+
181+
ggml.o: ggml.c ggml.h
182+
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
183+
184+
utils.o: utils.cpp utils.h
185+
$(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o
186+
187+
clean:
188+
rm -f *.o main quantize
189+
190+
main: main.cpp ggml.o utils.o
191+
$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS)
192+
./main -h
193+
194+
quantize: quantize.cpp ggml.o utils.o
195+
$(CXX) $(CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $(LDFLAGS)
196+
197+
#
198+
# Tests
199+
#
200+
201+
.PHONY: tests
202+
tests:
203+
bash ./tests/run-tests.sh

convert-pth-to-ggml.py

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# Convert a LLaMA model checkpoint to a ggml compatible file
2+
#
3+
# Load the model using Torch
4+
# Iterate over all variables and write them to a binary file.
5+
#
6+
# For each variable, write the following:
7+
# - Number of dimensions (int)
8+
# - Name length (int)
9+
# - Dimensions (int[n_dims])
10+
# - Name (char[name_length])
11+
# - Data (float[n_dims])
12+
#
13+
# By default, the bigger matrices are converted to 16-bit floats.
14+
# This can be disabled by adding the "use-f32" CLI argument.
15+
#
16+
# At the start of the ggml file we write the model parameters
17+
# and vocabulary.
18+
#
19+
20+
import sys
21+
import json
22+
import struct
23+
import numpy as np
24+
import torch
25+
26+
from sentencepiece import SentencePieceProcessor
27+
28+
if len(sys.argv) < 3:
29+
print("Usage: convert-ckpt-to-ggml.py dir-model ftype\n")
30+
print(" ftype == 0 -> float32")
31+
print(" ftype == 1 -> float16")
32+
sys.exit(1)
33+
34+
# output in the same directory as the model
35+
dir_model = sys.argv[1]
36+
fname_out = sys.argv[1] + "/ggml-model.bin"
37+
38+
fname_hparams = sys.argv[1] + "/params.json"
39+
fname_model = sys.argv[1] + "/consolidated.00.pth"
40+
fname_tokenizer = sys.argv[1] + "/../tokenizer.model"
41+
42+
# possible data types
43+
# ftype == 0 -> float32
44+
# ftype == 1 -> float16
45+
#
46+
# map from ftype to string
47+
ftype_str = ["f32", "f16"]
48+
49+
ftype = 1
50+
if len(sys.argv) > 2:
51+
ftype = int(sys.argv[2])
52+
if ftype < 0 or ftype > 1:
53+
print("Invalid ftype: " + str(ftype))
54+
sys.exit(1)
55+
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
56+
57+
with open(fname_hparams, "r") as f:
58+
hparams = json.load(f)
59+
60+
tokenizer = SentencePieceProcessor(fname_tokenizer)
61+
62+
hparams.update({"vocab_size": tokenizer.vocab_size()})
63+
64+
print(hparams)
65+
66+
model = torch.load(fname_model, map_location="cpu")
67+
68+
fout = open(fname_out, "wb")
69+
70+
fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
71+
fout.write(struct.pack("i", hparams["vocab_size"]))
72+
fout.write(struct.pack("i", hparams["dim"]))
73+
fout.write(struct.pack("i", hparams["multiple_of"]))
74+
fout.write(struct.pack("i", hparams["n_heads"]))
75+
fout.write(struct.pack("i", hparams["n_layers"]))
76+
fout.write(struct.pack("i", 64)) # rot
77+
fout.write(struct.pack("i", ftype))
78+
79+
# Is this correct??
80+
for i in range(32000):
81+
# TODO: this is probably wrong - not sure how this tokenizer works
82+
text = tokenizer.decode([29889, i]).encode('utf-8')
83+
# remove the first byte (it's always '.')
84+
text = text[1:]
85+
fout.write(struct.pack("i", len(text)))
86+
fout.write(text)
87+
88+
for k, v in model.items():
89+
name = k
90+
shape = v.shape
91+
92+
# skip layers.X.attention.inner_attention.rope.freqs
93+
if name[-5:] == "freqs":
94+
continue
95+
96+
print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype)
97+
98+
#data = tf.train.load_variable(dir_model, name).squeeze()
99+
data = v.numpy().squeeze()
100+
n_dims = len(data.shape);
101+
102+
# for efficiency - transpose some matrices
103+
# "model/h.*/attn/c_attn/w"
104+
# "model/h.*/attn/c_proj/w"
105+
# "model/h.*/mlp/c_fc/w"
106+
# "model/h.*/mlp/c_proj/w"
107+
#if name[-14:] == "/attn/c_attn/w" or \
108+
# name[-14:] == "/attn/c_proj/w" or \
109+
# name[-11:] == "/mlp/c_fc/w" or \
110+
# name[-13:] == "/mlp/c_proj/w":
111+
# print(" Transposing")
112+
# data = data.transpose()
113+
114+
dshape = data.shape
115+
116+
# default type is fp16
117+
ftype_cur = 1
118+
if ftype == 0 or n_dims == 1:
119+
print(" Converting to float32")
120+
data = data.astype(np.float32)
121+
ftype_cur = 0
122+
123+
# header
124+
str = name.encode('utf-8')
125+
fout.write(struct.pack("iii", n_dims, len(str), ftype_cur))
126+
for i in range(n_dims):
127+
fout.write(struct.pack("i", dshape[n_dims - 1 - i]))
128+
fout.write(str);
129+
130+
# data
131+
data.tofile(fout)
132+
133+
fout.close()
134+
135+
print("Done. Output file: " + fname_out)
136+
print("")

0 commit comments

Comments
 (0)