diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index a63083cbb09ab..23f54a59bd14c 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -197,7 +197,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri // run the first time to get the total output length int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size()); - + // if it turns out that our buffer is too small, we resize it if ((size_t) res > buf.size()) { buf.resize(res);