Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions lib/llm/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,9 @@ impl

let result = state.decoder.process_token_ids(&data.token_ids).unwrap();

// todo - propagate finish reason details - possibly an annotation
// NOTE: the `finish_reason` is computed from the generated `token_ids` alone.
// The `data` field can have a `finish_reason` set, coming from the underlying
// LLM inference `Engine`, and empty `token_ids`. See comment below for more details.
let finish_reason = match &result.stop_trigger {
Some(StopTrigger::MaxTokensLimit) => Some(FinishReason::Length),
Some(StopTrigger::HiddenStopTokenDetected(_)) => Some(FinishReason::Stop),
Expand Down Expand Up @@ -203,7 +205,15 @@ impl
let mut output = output;
let mut data = output.data.take().unwrap();

data.finish_reason = finish_reason;
// NOTE: If `finish_reason.is_some()`, then one of the stop conditions was triggered
// by the token generation. We should update the `data.finish_reason` in that case.
// However, if `finish_reason.is_none()`, it is possible that we are in the case where
// `data.token_ids` is empty, and `data.finish_reason` is already correctly set.
// In that case, `process_token_ids` above will rewrite `finish_reason` to `None`,
// which we don't want to propagate to `data.finish_reason`.
if finish_reason.is_some() {
data.finish_reason = finish_reason;
}
data.text = text;
data.tokens = Some(tokens);

Expand Down
Loading