Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server : clarify /slots endpoint, add is_processing #10162

Merged
merged 2 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,10 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte

### GET `/slots`: Returns the current slots processing state

This endpoint can be disabled with `--no-slots`
> [!WARNING]
> This endpoint is intended for debugging and may be modified in future versions. For security reasons, we strongly advise against enabling it in production environments.

This endpoint is disabled by default and can be enabled with `--slots`

If query param `?fail_on_no_slot=1` is set, this endpoint will respond with status code 503 if there is no available slots.

Expand All @@ -709,6 +712,7 @@ Example:
"grammar": "",
"id": 0,
"ignore_eos": false,
"is_processing": false,
"logit_bias": [],
"min_p": 0.05000000074505806,
"mirostat": 0,
Expand Down Expand Up @@ -741,7 +745,6 @@ Example:
"temperature"
],
"seed": 42,
"state": 1,
"stop": [
"\n"
],
Expand All @@ -755,10 +758,6 @@ Example:
]
```

Possible values for `slot[i].state` are:
- `0`: SLOT_STATE_IDLE
- `1`: SLOT_STATE_PROCESSING

### GET `/metrics`: Prometheus compatible metrics exporter

This endpoint is only accessible if `--metrics` is set.
Expand Down
16 changes: 8 additions & 8 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1566,11 +1566,11 @@ struct server_context {

for (server_slot & slot : slots) {
json slot_data = get_formated_generation(slot);
slot_data["id"] = slot.id;
slot_data["id_task"] = slot.id_task;
slot_data["state"] = slot.state;
slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens);
slot_data["next_token"] = {
slot_data["id"] = slot.id;
slot_data["id_task"] = slot.id_task;
slot_data["is_processing"] = slot.is_processing();
slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens);
slot_data["next_token"] = {
{"has_next_token", slot.has_next_token},
{"has_new_line", slot.has_new_line},
{"n_remain", slot.n_remaining},
Expand All @@ -1581,10 +1581,10 @@ struct server_context {
{"stopping_word", slot.stopping_word},
};

if (slot_data["state"] == SLOT_STATE_IDLE) {
n_idle_slots++;
} else {
if (slot.is_processing()) {
n_processing_slots++;
} else {
n_idle_slots++;
}

slots_data.push_back(slot_data);
Expand Down
10 changes: 5 additions & 5 deletions examples/server/tests/features/steps/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,13 +260,13 @@ async def step_wait_for_server_status(context, expecting_status: Literal['health
async def step_all_slots_status(context, expected_slot_status_string: Literal['idle', 'busy'] | str):
match expected_slot_status_string:
case 'idle':
expected_slot_status = 0
expected_slot_status = False
case 'busy':
expected_slot_status = 1
expected_slot_status = True
case _:
assert False, "unknown status"

expected_slots = [{'id': slot_id, 'state': expected_slot_status}
expected_slots = [{'id': slot_id, 'is_processing': expected_slot_status}
for slot_id in range(context.n_slots)]
await request_slots_status(context, expected_slots)

Expand Down Expand Up @@ -1354,8 +1354,8 @@ async def wait_for_slots_status(context,
if status_code == 503 and status_code == expected_http_status_code:
return
if status_code == 200 and status_code == expected_http_status_code:
n_slots_idle = sum(1 if slot["state"] == 0 else 0 for slot in slots)
n_slots_processing = sum(1 if slot["state"] != 0 else 0 for slot in slots)
n_slots_idle = sum(1 if not slot["is_processing"] else 0 for slot in slots)
n_slots_processing = sum(1 if slot["is_processing"] else 0 for slot in slots)
if ((slots_idle is None or slots_idle == n_slots_idle)
and (slots_processing is None or slots_processing == n_slots_processing)):
return
Expand Down
Loading