Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/cpp/llama-cpp/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

LLAMA_VERSION?=4ae88d07d026e66b41e85afece74e88af54f4e66
LLAMA_VERSION?=835b2b915c52bcabcd688d025eacff9a07b65f52
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

CMAKE_ARGS?=
Expand Down
12 changes: 3 additions & 9 deletions backend/cpp/llama-cpp/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -802,11 +802,6 @@ class BackendServiceImpl final : public backend::Backend::Service {
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"documents\" must be a non-empty string array");
}

// Tokenize the query
auto tokenized_query = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, request->query(), /* add_special */ false, true);
if (tokenized_query.size() != 1) {
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"query\" must contain only a single prompt");
}
// Create and queue the task
json responses = json::array();
bool error = false;
Expand All @@ -818,10 +813,9 @@ class BackendServiceImpl final : public backend::Backend::Service {
documents.push_back(request->documents(i));
}

auto tokenized_docs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, documents, /* add_special */ false, true);
tasks.reserve(tokenized_docs.size());
for (size_t i = 0; i < tokenized_docs.size(); i++) {
auto tmp = format_rerank(ctx_server.vocab, tokenized_query[0], tokenized_docs[i]);
tasks.reserve(documents.size());
for (size_t i = 0; i < documents.size(); i++) {
auto tmp = format_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
server_task task = server_task(SERVER_TASK_TYPE_RERANK);
task.id = ctx_server.queue_tasks.get_new_id();
task.index = i;
Expand Down
Loading