From 5db4c71a167496626240ddec72241d85944441fb Mon Sep 17 00:00:00 2001 From: CJ Pais Date: Tue, 5 Mar 2024 20:37:16 -0800 Subject: [PATCH 1/2] fix num tokens for multimodal + empty prompt in response --- examples/server/server.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8fe5e0b19668f..c71c9199d3a7a 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1303,6 +1303,7 @@ struct llama_server_context bool ingest_images(server_slot &slot, int n_batch) { int image_idx = 0; + std::string prompt = ""; while (image_idx < (int) slot.images.size()) { @@ -1366,6 +1367,10 @@ struct llama_server_context slot.params.input_suffix : // no more images, then process suffix prompt (json)(slot.images[image_idx].prefix_prompt); + // rebuild the prompt since it was cleared earlier + prompt += img.prefix_prompt; + prompt += json_prompt; + std::vector append_tokens = tokenize(json_prompt, false); // has next image for (int i = 0; i < (int) append_tokens.size(); ++i) { @@ -1374,6 +1379,13 @@ struct llama_server_context } } + // There is no prompt caching in multimodal currently + slot.n_prompt_tokens = slot.n_past; + slot.n_prompt_tokens_processed = slot.n_past; + + // prompt for multimodal is set to empty to avoid processing those tokens here + slot.prompt = prompt; + return true; } From a98a166d12517ceb04ee8c16f3d3d5a6e385a424 Mon Sep 17 00:00:00 2001 From: CJ Pais Date: Tue, 5 Mar 2024 20:45:39 -0800 Subject: [PATCH 2/2] add back the [img-id] --- examples/server/server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index c71c9199d3a7a..682693164fc30 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1369,6 +1369,7 @@ struct llama_server_context // rebuild the prompt since it was cleared earlier prompt += img.prefix_prompt; + prompt += "[img-" + std::to_string(img.id) + "]"; prompt += json_prompt; std::vector append_tokens = tokenize(json_prompt, false); // has next image