From 374b648acd3a7fdee9b0a9d6b20c7090bb782fed Mon Sep 17 00:00:00 2001 From: Yann Follet Date: Fri, 3 Oct 2025 06:28:28 +0000 Subject: [PATCH 1/6] server / ranking : add sorting and management of top_n --- tools/server/server.cpp | 17 +++++----- tools/server/utils.hpp | 75 +++++++++++++++++++++-------------------- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 6062904a8c7c0..8b86eaab28335 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -5061,13 +5061,13 @@ int main(int argc, char ** argv) { const json body = json::parse(req.body); // TODO: implement - //int top_n = 1; - //if (body.count("top_n") != 1) { - // top_n = body.at("top_n"); - //} else { - // res_error(res, format_error_response("\"top_n\" must be provided", ERROR_TYPE_INVALID_REQUEST)); - // return; - //} + int top_n = 1; + if (body.count("top_n") == 1) { + top_n = body.at("top_n"); + } else { + res_error(res, format_error_response("\"top_n\" must be provided", ERROR_TYPE_INVALID_REQUEST)); + return; + } // if true, use TEI API format, otherwise use Jina API format // Jina: https://jina.ai/reranker/ @@ -5133,7 +5133,8 @@ int main(int argc, char ** argv) { body, responses, is_tei_format, - documents); + documents, + top_n); res_ok(res, root); }; diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index 4ca1423aaf2d4..35e9c8bb3f7f3 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -849,46 +849,47 @@ static json format_response_rerank( const json & request, const json & ranks, bool is_tei_format, - std::vector & texts) { - json res; - if (is_tei_format) { - // TEI response format - res = json::array(); - bool return_text = json_value(request, "return_text", false); - for (const auto & rank : ranks) { - int index = json_value(rank, "index", 0); - json elem = json{ - {"index", index}, - {"score", json_value(rank, "score", 0.0)}, - }; - if (return_text) { - elem["text"] = std::move(texts[index]); - } - res.push_back(elem); - } - } else { - // Jina response format - json results = json::array(); - int32_t n_tokens = 0; - for (const auto & rank : ranks) { - results.push_back(json{ - {"index", json_value(rank, "index", 0)}, - {"relevance_score", json_value(rank, "score", 0.0)}, - }); - - n_tokens += json_value(rank, "tokens_evaluated", 0); + std::vector & texts, + int top_n) { + json results; + int32_t n_tokens = 0; + bool return_text = is_tei_format && json_value(request, "return_text", false); + std::vector elements; // Temporary vector to hold unsorted elements + std::string score_label = is_tei_format ? "score" : "relevance_score"; + for (const auto & rank : ranks) { + int index = json_value(rank, "index", 0); + json elem = json{ + {"index", index}, + {score_label, json_value(rank, "score", 0.0)}, + }; + n_tokens += json_value(rank, "tokens_evaluated", 0); + if (return_text) { + elem["text"] = std::move(texts[index]); } + elements.push_back(elem); + } + + std::sort(elements.begin(), elements.end(), [score_label](const json& a, const json& b) { + return json_value(a, score_label, 0.0) > json_value(b, score_label, 0.0); + }); - res = json{ - {"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))}, - {"object", "list"}, - {"usage", json{ - {"prompt_tokens", n_tokens}, - {"total_tokens", n_tokens} - }}, - {"results", results} - }; + results = json::array(); + int count = 0; + for (const auto & elem : elements) { + if (++count > top_n) break; + results.push_back(elem); } + if (is_tei_format) return results; + + json res = json{ + {"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))}, + {"object", "list"}, + {"usage", json{ + {"prompt_tokens", n_tokens}, + {"total_tokens", n_tokens} + }}, + {"results", results} + }; return res; } From b86bfdb44bd0da4ccfbfe19c955c9ab8687c3383 Mon Sep 17 00:00:00 2001 From: Yann Follet Date: Sat, 4 Oct 2025 07:29:44 +0000 Subject: [PATCH 2/6] Make the retro compatible if no top_n will return all results here is a script to make some test ```script URL=${1:-http://127.0.0.1:8181} curl "$URL/v1/rerank" -H "Content-Type: application/json" \ -d '{ "model": "M", "query": "What is the recipe to make bread ?", "return_text" : true, "texts" : true, "top_n": 6, "documents": [ "voici la recette pour faire du pain, il faut de la farine de l eau et du levain et du sel", "it is a bear", "bread recipe : floor, water, yest, salt", "The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.", "here is the ingedients to bake bread : 500g floor, 350g water, 120g fresh refresh yest, 15g salt", "recipe to make cookies : floor, eggs, water, chocolat", "here is the recipe to make bread : 500g floor, 350g water, 120g fresh refresh yest, 15g salt", "il fait tres beau aujourd hui", "je n ai pas faim, je ne veux pas manger", "je suis a paris" ] }' | jq ``` --- tools/server/server.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 8b86eaab28335..0d283cd40b6ec 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -5060,15 +5060,6 @@ int main(int argc, char ** argv) { const json body = json::parse(req.body); - // TODO: implement - int top_n = 1; - if (body.count("top_n") == 1) { - top_n = body.at("top_n"); - } else { - res_error(res, format_error_response("\"top_n\" must be provided", ERROR_TYPE_INVALID_REQUEST)); - return; - } - // if true, use TEI API format, otherwise use Jina API format // Jina: https://jina.ai/reranker/ // TEI: https://huggingface.github.io/text-embeddings-inference/#/Text%20Embeddings%20Inference/rerank @@ -5093,6 +5084,11 @@ int main(int argc, char ** argv) { return; } + int top_n = documents.size(); // no top_n will return all the documents + if (body.count("top_n") == 1) { + top_n = body.at("top_n"); + } + // create and queue the task json responses = json::array(); bool error = false; From fd143c7a98a72af0789a05d48e659aec8116404f Mon Sep 17 00:00:00 2001 From: Yann Follet Date: Thu, 9 Oct 2025 01:44:15 +0000 Subject: [PATCH 3/6] use resize() instead for(...) --- tools/server/utils.hpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index 35e9c8bb3f7f3..4fda0410e340e 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -851,7 +851,6 @@ static json format_response_rerank( bool is_tei_format, std::vector & texts, int top_n) { - json results; int32_t n_tokens = 0; bool return_text = is_tei_format && json_value(request, "return_text", false); std::vector elements; // Temporary vector to hold unsorted elements @@ -868,17 +867,14 @@ static json format_response_rerank( } elements.push_back(elem); } - + std::sort(elements.begin(), elements.end(), [score_label](const json& a, const json& b) { return json_value(a, score_label, 0.0) > json_value(b, score_label, 0.0); }); - results = json::array(); - int count = 0; - for (const auto & elem : elements) { - if (++count > top_n) break; - results.push_back(elem); - } + elements.resize(std::min(top_n, (int)elements.size())); + json results = elements; + if (is_tei_format) return results; json res = json{ From a20052086d8116e9b088cc67d5ef8e0d1c1442d0 Mon Sep 17 00:00:00 2001 From: Yann Follet Date: Thu, 9 Oct 2025 01:53:23 +0000 Subject: [PATCH 4/6] simplify top_n init since no need to return error result to test : ./tests.sh unit/test_rerank.py -v -x ==================================================== test session starts ===================================================== platform linux -- Python 3.12.3, pytest-8.3.5, pluggy-1.6.0 -- /home/yann/dev/yann/llama.cpp/tools/server/tests/test/bin/python3 cachedir: .pytest_cache rootdir: /home/yann/dev/yann/llama.cpp/tools/server/tests configfile: pytest.ini plugins: anyio-4.11.0 collected 8 items unit/test_rerank.py::test_rerank PASSED [ 12%] unit/test_rerank.py::test_rerank_tei_format PASSED [ 25%] unit/test_rerank.py::test_invalid_rerank_req[documents0] PASSED [ 37%] unit/test_rerank.py::test_invalid_rerank_req[None] PASSED [ 50%] unit/test_rerank.py::test_invalid_rerank_req[123] PASSED [ 62%] unit/test_rerank.py::test_invalid_rerank_req[documents3] PASSED [ 75%] unit/test_rerank.py::test_rerank_usage[Machine learning is-A machine-Learning is-19] PASSED [ 87%] unit/test_rerank.py::test_rerank_usage[Which city?-Machine learning is -Paris, capitale de la-26] PASSED [100%] ===================================================== 8 passed in 4.31s ====================================================== --- tools/server/server.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 0d283cd40b6ec..d959b5bed15db 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -5084,10 +5084,7 @@ int main(int argc, char ** argv) { return; } - int top_n = documents.size(); // no top_n will return all the documents - if (body.count("top_n") == 1) { - top_n = body.at("top_n"); - } + int top_n = json_value(body, "top_n", (int)documents.size()); // create and queue the task json responses = json::array(); From 7c9d34c3438ef7d9f79354db928ef47b17f98ab8 Mon Sep 17 00:00:00 2001 From: Yann Follet Date: Sat, 11 Oct 2025 10:22:20 +0000 Subject: [PATCH 5/6] add rerank top_n unit test here is the result : ./tests.sh unit/test_rerank.py -v -x =================================================================== test session starts =================================================================== platform linux -- Python 3.12.3, pytest-8.3.5, pluggy-1.6.0 -- /home/yann/dev/yann/llama.cpp/tools/server/tests/test/bin/python3 cachedir: .pytest_cache rootdir: /home/yann/dev/yann/llama.cpp/tools/server/tests configfile: pytest.ini plugins: anyio-4.11.0 collected 16 items unit/test_rerank.py::test_rerank PASSED [ 6%] unit/test_rerank.py::test_rerank_tei_format PASSED [ 12%] unit/test_rerank.py::test_invalid_rerank_req[documents0] PASSED [ 18%] unit/test_rerank.py::test_invalid_rerank_req[None] PASSED [ 25%] unit/test_rerank.py::test_invalid_rerank_req[123] PASSED [ 31%] unit/test_rerank.py::test_invalid_rerank_req[documents3] PASSED [ 37%] unit/test_rerank.py::test_rerank_usage[Machine learning is-A machine-Learning is-19] PASSED [ 43%] unit/test_rerank.py::test_rerank_usage[Which city?-Machine learning is -Paris, capitale de la-26] PASSED [ 50%] unit/test_rerank.py::test_rerank_top_n[None-4] PASSED [ 56%] unit/test_rerank.py::test_rerank_top_n[2-2] PASSED [ 62%] unit/test_rerank.py::test_rerank_top_n[4-4] PASSED [ 68%] unit/test_rerank.py::test_rerank_top_n[99-4] PASSED [ 75%] unit/test_rerank.py::test_rerank_tei_top_n[None-4] PASSED [ 81%] unit/test_rerank.py::test_rerank_tei_top_n[2-2] PASSED [ 87%] unit/test_rerank.py::test_rerank_tei_top_n[4-4] PASSED [ 93%] unit/test_rerank.py::test_rerank_tei_top_n[99-4] PASSED [100%] =================================================================== 16 passed in 8.84s =================================================================== --- tools/server/tests/unit/test_rerank.py | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tools/server/tests/unit/test_rerank.py b/tools/server/tests/unit/test_rerank.py index 0b63c7821eb98..8fd7060eb2114 100644 --- a/tools/server/tests/unit/test_rerank.py +++ b/tools/server/tests/unit/test_rerank.py @@ -102,3 +102,45 @@ def test_rerank_usage(query, doc1, doc2, n_tokens): assert res.status_code == 200 assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens'] assert res.body['usage']['prompt_tokens'] == n_tokens + + +@pytest.mark.parametrize("top_n,expected_len", [ + (None, len(TEST_DOCUMENTS)), # no top_n parameter + (2, 2), + (4, 4), + (99, len(TEST_DOCUMENTS)), # higher than available docs +]) +def test_rerank_top_n(top_n, expected_len): + global server + server.start() + data = { + "query": "Machine learning is", + "documents": TEST_DOCUMENTS, + } + if top_n is not None: + data["top_n"] = top_n + + res = server.make_request("POST", "/rerank", data=data) + assert res.status_code == 200 + assert len(res.body["results"]) == expected_len + + +@pytest.mark.parametrize("top_n,expected_len", [ + (None, len(TEST_DOCUMENTS)), # no top_n parameter + (2, 2), + (4, 4), + (99, len(TEST_DOCUMENTS)), # higher than available docs +]) +def test_rerank_tei_top_n(top_n, expected_len): + global server + server.start() + data = { + "query": "Machine learning is", + "texts": TEST_DOCUMENTS, + } + if top_n is not None: + data["top_n"] = top_n + + res = server.make_request("POST", "/rerank", data=data) + assert res.status_code == 200 + assert len(res.body) == expected_len \ No newline at end of file From 7beb4cbb16cea0affc03f7c5194be5603cc633b6 Mon Sep 17 00:00:00 2001 From: Yann Follet Date: Sat, 11 Oct 2025 11:38:39 +0000 Subject: [PATCH 6/6] editor config check fix --- tools/server/tests/unit/test_rerank.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/tests/unit/test_rerank.py b/tools/server/tests/unit/test_rerank.py index 8fd7060eb2114..ded8267109682 100644 --- a/tools/server/tests/unit/test_rerank.py +++ b/tools/server/tests/unit/test_rerank.py @@ -143,4 +143,4 @@ def test_rerank_tei_top_n(top_n, expected_len): res = server.make_request("POST", "/rerank", data=data) assert res.status_code == 200 - assert len(res.body) == expected_len \ No newline at end of file + assert len(res.body) == expected_len