Skip to content

Commit

Permalink
talk-llama : fix build after ggml sync (ggerganov#1049)
Browse files Browse the repository at this point in the history
sed -i 's,GGML_BACKEND_CUDA,GGML_BACKEND_GPU,g' examples/talk-llama/llama.cpp
  • Loading branch information
przemoc authored and iThalay committed Sep 23, 2024
1 parent 3b9e496 commit 1dddee9
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions examples/talk-llama/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ static void llama_model_load_internal(
}

#ifdef GGML_USE_CUBLAS
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CUDA
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
#else
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU
#endif
Expand Down Expand Up @@ -1054,7 +1054,7 @@ static void llama_model_load_internal(
layer.w2 = ml->get_tensor(layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}, backend);
layer.w3 = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend);

if (backend == GGML_BACKEND_CUDA) {
if (backend == GGML_BACKEND_GPU) {
vram_total +=
ggml_nbytes(layer.attention_norm) + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) +
ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) + ggml_nbytes(layer.attention_norm) +
Expand Down Expand Up @@ -1115,7 +1115,7 @@ static void llama_model_load_internal(
}
}
for (llama_load_tensor & lt : ml->tensors_map.tensors) {
if (lt.ggml_tensor->backend != GGML_BACKEND_CUDA) {
if (lt.ggml_tensor->backend != GGML_BACKEND_GPU) {
continue;
}
if (progress_callback) {
Expand Down

0 comments on commit 1dddee9

Please sign in to comment.