ai-dynamo · ishandhanani · Jul 14, 2025 · Jul 10, 2025
@@ -164,7 +164,9 @@ impl
 
                     let result = state.decoder.process_token_ids(&data.token_ids).unwrap();
 
-                    // todo - propagate finish reason details - possibly an annotation
+                    // NOTE: the `finish_reason` is computed from the generated `token_ids` alone.
+                    // The `data` field can have a `finish_reason` set, coming from the underlying
+                    // LLM inference `Engine`, and empty `token_ids`. See comment below for more details.
                     let finish_reason = match &result.stop_trigger {
                         Some(StopTrigger::MaxTokensLimit) => Some(FinishReason::Length),
                         Some(StopTrigger::HiddenStopTokenDetected(_)) => Some(FinishReason::Stop),
@@ -203,7 +205,15 @@ impl
                     let mut output = output;
                     let mut data = output.data.take().unwrap();
 
-                    data.finish_reason = finish_reason;
+                    // NOTE: If `finish_reason.is_some()`, then one of the stop conditions was triggered
+                    // by the token generation. We should update the `data.finish_reason` in that case.
+                    // However, if `finish_reason.is_none()`, it is possible that we are in the case where
+                    // `data.token_ids` is empty, and `data.finish_reason` is already correctly set.
+                    // In that case, `process_token_ids` above will rewrite `finish_reason` to `None`,
+                    // which we don't want to propagate to `data.finish_reason`.
+                    if finish_reason.is_some() {
+                        data.finish_reason = finish_reason;
+                    }
                     data.text = text;
                     data.tokens = Some(tokens);