@@ -802,11 +802,6 @@ class BackendServiceImpl final : public backend::Backend::Service {
802
802
return grpc::Status (grpc::StatusCode::INVALID_ARGUMENT, " \" documents\" must be a non-empty string array" );
803
803
}
804
804
805
- // Tokenize the query
806
- auto tokenized_query = tokenize_input_prompts (ctx_server.vocab , ctx_server.mctx , request->query (), /* add_special */ false , true );
807
- if (tokenized_query.size () != 1 ) {
808
- return grpc::Status (grpc::StatusCode::INVALID_ARGUMENT, " \" query\" must contain only a single prompt" );
809
- }
810
805
// Create and queue the task
811
806
json responses = json::array ();
812
807
bool error = false ;
@@ -818,10 +813,9 @@ class BackendServiceImpl final : public backend::Backend::Service {
818
813
documents.push_back (request->documents (i));
819
814
}
820
815
821
- auto tokenized_docs = tokenize_input_prompts (ctx_server.vocab , ctx_server.mctx , documents, /* add_special */ false , true );
822
- tasks.reserve (tokenized_docs.size ());
823
- for (size_t i = 0 ; i < tokenized_docs.size (); i++) {
824
- auto tmp = format_rerank (ctx_server.vocab , tokenized_query[0 ], tokenized_docs[i]);
816
+ tasks.reserve (documents.size ());
817
+ for (size_t i = 0 ; i < documents.size (); i++) {
818
+ auto tmp = format_rerank (ctx_server.model , ctx_server.vocab , ctx_server.mctx , request->query (), documents[i]);
825
819
server_task task = server_task (SERVER_TASK_TYPE_RERANK);
826
820
task.id = ctx_server.queue_tasks .get_new_id ();
827
821
task.index = i;
0 commit comments