Skip to content

Commit 5c64a09

Browse files
authoredJun 7, 2023
k-quants : allow to optionally disable at compile time (#1734)
* k-quants : put behind optional compile flag LLAMA_K_QUANTS * build : enable k-quants by default
1 parent 5b57a5b commit 5c64a09

File tree

6 files changed

+251
-229
lines changed

6 files changed

+251
-229
lines changed
 

‎CMakeLists.txt

+6-2
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kern
7272
set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
7373
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
7474
option(LLAMA_METAL "llama: use Metal" OFF)
75+
option(LLAMA_K_QUANTS "llama: use k-quants" ON)
7576

7677
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
7778
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
@@ -226,6 +227,10 @@ if (LLAMA_METAL)
226227
)
227228
endif()
228229

230+
if (LLAMA_K_QUANTS)
231+
set(GGML_SOURCES_EXTRA ${GGML_SOURCES_EXTRA} k_quants.c k_quants.h)
232+
endif()
233+
229234
if (LLAMA_CLBLAST)
230235
find_package(CLBlast)
231236
if (CLBlast_FOUND)
@@ -396,11 +401,10 @@ endif()
396401
add_library(ggml OBJECT
397402
ggml.c
398403
ggml.h
399-
ggml-quants-k.h
400-
ggml-quants-k.c
401404
${GGML_SOURCES_CUDA}
402405
${GGML_SOURCES_OPENCL}
403406
${GGML_SOURCES_METAL}
407+
${GGML_SOURCES_EXTRA}
404408
)
405409

406410
target_include_directories(ggml PUBLIC .)

‎Makefile

+22-15
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
121121
endif
122122
endif
123123

124+
ifndef LLAMA_NO_K_QUANTS
125+
CFLAGS += -DGGML_USE_K_QUANTS
126+
OBJS += k_quants.o
127+
endif
128+
124129
ifndef LLAMA_NO_ACCELERATE
125130
# Mac M1 - include Accelerate framework.
126131
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
@@ -140,7 +145,7 @@ ifdef LLAMA_OPENBLAS
140145
endif # LLAMA_OPENBLAS
141146

142147
ifdef LLAMA_BLIS
143-
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
148+
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
144149
LDFLAGS += -lblis -L/usr/local/lib
145150
endif # LLAMA_BLIS
146151

@@ -212,6 +217,11 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
212217
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
213218
endif
214219

220+
ifdef LLAMA_NO_K_QUANTS
221+
k_quants.o: k_quants.c k_quants.h
222+
$(CC) $(CFLAGS) -c $< -o $@
223+
endif # LLAMA_NO_K_QUANTS
224+
215225
#
216226
# Print build information
217227
#
@@ -231,10 +241,7 @@ $(info )
231241
# Build library
232242
#
233243

234-
ggml.o: ggml.c ggml.h ggml-cuda.h ggml-quants-k.h
235-
$(CC) $(CFLAGS) -c $< -o $@
236-
237-
ggml-quants-k.o: ggml-quants-k.c ggml-quants-k.h ggml.h ggml-cuda.h
244+
ggml.o: ggml.c ggml.h ggml-cuda.h
238245
$(CC) $(CFLAGS) -c $< -o $@
239246

240247
llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h
@@ -243,7 +250,7 @@ llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h
243250
common.o: examples/common.cpp examples/common.h
244251
$(CXX) $(CXXFLAGS) -c $< -o $@
245252

246-
libllama.so: llama.o ggml.o ggml-quants-k.o $(OBJS)
253+
libllama.so: llama.o ggml.o $(OBJS)
247254
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
248255

249256
clean:
@@ -253,28 +260,28 @@ clean:
253260
# Examples
254261
#
255262

256-
main: examples/main/main.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
263+
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS)
257264
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
258265
@echo
259266
@echo '==== Run ./main -h for help. ===='
260267
@echo
261268

262-
quantize: examples/quantize/quantize.cpp build-info.h ggml.o ggml-quants-k.o llama.o $(OBJS)
269+
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
263270
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
264271

265-
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o ggml-quants-k.o llama.o $(OBJS)
272+
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS)
266273
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
267274

268-
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
275+
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS)
269276
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
270277

271-
embedding: examples/embedding/embedding.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
278+
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS)
272279
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
273280

274-
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
281+
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
275282
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
276283

277-
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
284+
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS)
278285
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
279286

280287
build-info.h: $(wildcard .git/index) scripts/build-info.sh
@@ -289,11 +296,11 @@ build-info.h: $(wildcard .git/index) scripts/build-info.sh
289296
# Tests
290297
#
291298

292-
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o ggml-quants-k.o $(OBJS)
299+
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
293300
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
294301
./$@
295302

296-
vdot: pocs/vdot/vdot.cpp ggml.o ggml-quants-k.o $(OBJS)
303+
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
297304
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
298305

299306
.PHONY: tests clean

0 commit comments

Comments
 (0)
Please sign in to comment.