From 0b3e42a5f80b7221c21ac63444141890a8f4a8dc Mon Sep 17 00:00:00 2001 From: reversebias Date: Sat, 9 Mar 2024 23:00:12 +1100 Subject: [PATCH] Explicitly enable prompt caching on llama.cpp endpoints --- src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts b/src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts index faa08c8f55b..2ace6f403f2 100644 --- a/src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts +++ b/src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts @@ -41,6 +41,7 @@ export function endpointLlamacpp( stop: model.parameters.stop, repeat_penalty: model.parameters.repetition_penalty, n_predict: model.parameters.max_new_tokens, + cache_prompt: true, }), });