From c7c4d65d8e4a95ed59e1aef000d897a073ea473b Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 28 Aug 2024 19:11:06 -0700 Subject: [PATCH] Speed up KV in llamafile-bench --- llama.cpp/llama-bench/llama-bench.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama.cpp/llama-bench/llama-bench.cpp b/llama.cpp/llama-bench/llama-bench.cpp index 278b2131ff..2ae41288d7 100644 --- a/llama.cpp/llama-bench/llama-bench.cpp +++ b/llama.cpp/llama-bench/llama-bench.cpp @@ -265,8 +265,8 @@ static const cmd_params cmd_params_defaults = { /* n_pg */ {}, /* n_batch */ {2048}, /* n_ubatch */ {512}, - /* type_k */ {GGML_TYPE_F16}, - /* type_v */ {GGML_TYPE_F16}, + /* type_k */ {X86_HAVE(AVX512_BF16) ? GGML_TYPE_BF16 : GGML_TYPE_F16}, + /* type_v */ {X86_HAVE(AVX512_BF16) ? GGML_TYPE_BF16 : GGML_TYPE_F16}, /* n_threads */ {cpu_get_num_math()}, /* n_gpu_layers */ {0}, /* split_mode */ {LLAMA_SPLIT_MODE_LAYER},