Skip to content

Commit 6e47f99

Browse files
nachiketb-nvidianv-anants
authored andcommitted
chore: remove flatten for chat response types, add reasoning_content (#2543)
Changing the chat completions response objects from structs to types of dynamo_async_openai Implement aggregator traits for them chat completion structs add reasoning_content under message and delta message in lib/async-openai
1 parent 786d6a3 commit 6e47f99

File tree

15 files changed

+127
-140
lines changed

15 files changed

+127
-140
lines changed

.github/workflows/docs-link-check.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ jobs:
5050
# Set GITHUB_TOKEN to avoid github rate limits on URL checks
5151
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
5252
run: |
53+
cd docs
5354
set -euo pipefail
5455
# Run lychee against all files in repo
5556
lychee \

lib/async-openai/src/types/chat.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,9 @@ pub struct ChatCompletionResponseMessage {
449449
/// If the audio output modality is requested, this object contains data about the audio response from the model. [Learn more](https://platform.openai.com/docs/guides/audio).
450450
#[serde(skip_serializing_if = "Option::is_none")]
451451
pub audio: Option<ChatCompletionResponseMessageAudio>,
452+
453+
/// NVIDIA-specific extensions for the chat completion response.
454+
pub reasoning_content: Option<String>,
452455
}
453456

454457
#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
@@ -1021,6 +1024,9 @@ pub struct ChatCompletionStreamResponseDelta {
10211024
pub role: Option<Role>,
10221025
/// The refusal message generated by the model.
10231026
pub refusal: Option<String>,
1027+
1028+
/// NVIDIA-specific extensions for the chat completion response.
1029+
pub reasoning_content: Option<String>,
10241030
}
10251031

10261032
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]

lib/engines/mistralrs/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ impl
396396
//tracing::trace!("from_assistant: {from_assistant}");
397397

398398
#[allow(deprecated)]
399-
let inner = dynamo_async_openai::types::CreateChatCompletionStreamResponse{
399+
let delta = NvCreateChatCompletionStreamResponse {
400400
id: c.id,
401401
choices: vec![dynamo_async_openai::types::ChatChoiceStream{
402402
index: 0,
@@ -407,6 +407,7 @@ impl
407407
tool_calls: None,
408408
refusal: None,
409409
function_call: None,
410+
reasoning_content: None,
410411
},
411412
logprobs: None,
412413
finish_reason,
@@ -418,7 +419,6 @@ impl
418419
system_fingerprint: Some(c.system_fingerprint),
419420
service_tier: None,
420421
};
421-
let delta = NvCreateChatCompletionStreamResponse{inner};
422422
let ann = Annotated{
423423
id: None,
424424
data: Some(delta),

lib/llm/src/engines.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -204,18 +204,12 @@ impl
204204
for c in prompt.chars() {
205205
// we are returning characters not tokens, so there will be some postprocessing overhead
206206
tokio::time::sleep(*TOKEN_ECHO_DELAY).await;
207-
let inner = deltas.create_choice(0, Some(c.to_string()), None, None);
208-
let response = NvCreateChatCompletionStreamResponse {
209-
inner,
210-
};
207+
let response = deltas.create_choice(0, Some(c.to_string()), None, None);
211208
yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, comment: None };
212209
id += 1;
213210
}
214211

215-
let inner = deltas.create_choice(0, None, Some(dynamo_async_openai::types::FinishReason::Stop), None);
216-
let response = NvCreateChatCompletionStreamResponse {
217-
inner,
218-
};
212+
let response = deltas.create_choice(0, None, Some(dynamo_async_openai::types::FinishReason::Stop), None);
219213
yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, comment: None };
220214
};
221215

lib/llm/src/entrypoint/input/batch.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ async fn evaluate(
233233
match (item.data.as_ref(), item.event.as_deref()) {
234234
(Some(data), _) => {
235235
// Normal case
236-
let choice = data.inner.choices.first();
236+
let choice = data.choices.first();
237237
let chat_comp = choice.as_ref().unwrap();
238238
if let Some(c) = &chat_comp.delta.content {
239239
output += c;

lib/llm/src/entrypoint/input/text.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ async fn main_loop(
143143
match (item.data.as_ref(), item.event.as_deref()) {
144144
(Some(data), _) => {
145145
// Normal case
146-
let entry = data.inner.choices.first();
146+
let entry = data.choices.first();
147147
let chat_comp = entry.as_ref().unwrap();
148148
if let Some(c) = &chat_comp.delta.content {
149149
let _ = stdout.write(c.as_bytes());

lib/llm/src/http/service/openai.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use super::{
3131
service_v2, RouteDoc,
3232
};
3333
use crate::preprocessor::LLMMetricAnnotation;
34+
use crate::protocols::openai::chat_completions::aggregator::ChatCompletionAggregator;
3435
use crate::protocols::openai::{
3536
chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionResponse},
3637
completions::{NvCreateCompletionRequest, NvCreateCompletionResponse},

lib/llm/src/perf/logprobs.rs

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ impl LogprobExtractor for NvCreateChatCompletionStreamResponse {
128128
fn extract_logprobs_by_choice(&self) -> HashMap<u32, Vec<TokenLogProbs>> {
129129
let mut result = HashMap::new();
130130

131-
for choice in &self.inner.choices {
131+
for choice in &self.choices {
132132
let choice_index = choice.index;
133133

134134
let choice_logprobs = choice
@@ -574,8 +574,7 @@ mod tests {
574574
use approx::assert_abs_diff_eq;
575575
use dynamo_async_openai::types::{
576576
ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionStreamResponseDelta,
577-
ChatCompletionTokenLogprob, CreateChatCompletionStreamResponse, FinishReason, Role,
578-
TopLogprobs,
577+
ChatCompletionTokenLogprob, FinishReason, Role, TopLogprobs,
579578
};
580579
use futures::StreamExt;
581580
use std::sync::Arc;
@@ -949,7 +948,7 @@ mod tests {
949948
token_logprobs: Vec<ChatCompletionTokenLogprob>,
950949
) -> NvCreateChatCompletionStreamResponse {
951950
#[expect(deprecated)]
952-
let inner = CreateChatCompletionStreamResponse {
951+
NvCreateChatCompletionStreamResponse {
953952
id: "test_id".to_string(),
954953
choices: vec![ChatChoiceStream {
955954
index: 0,
@@ -959,6 +958,7 @@ mod tests {
959958
tool_calls: None,
960959
role: Some(Role::Assistant),
961960
refusal: None,
961+
reasoning_content: None,
962962
},
963963
finish_reason: Some(FinishReason::Stop),
964964
logprobs: Some(ChatChoiceLogprobs {
@@ -972,9 +972,7 @@ mod tests {
972972
system_fingerprint: None,
973973
object: "chat.completion.chunk".to_string(),
974974
usage: None,
975-
};
976-
977-
NvCreateChatCompletionStreamResponse { inner }
975+
}
978976
}
979977

980978
fn create_mock_response_with_multiple_choices(
@@ -992,6 +990,7 @@ mod tests {
992990
tool_calls: None,
993991
role: Some(Role::Assistant),
994992
refusal: None,
993+
reasoning_content: None,
995994
},
996995
finish_reason: Some(FinishReason::Stop),
997996
logprobs: Some(ChatChoiceLogprobs {
@@ -1001,7 +1000,7 @@ mod tests {
10011000
})
10021001
.collect();
10031002

1004-
let inner = CreateChatCompletionStreamResponse {
1003+
NvCreateChatCompletionStreamResponse {
10051004
id: "test_id".to_string(),
10061005
choices,
10071006
created: 1234567890,
@@ -1010,9 +1009,7 @@ mod tests {
10101009
system_fingerprint: None,
10111010
object: "chat.completion.chunk".to_string(),
10121011
usage: None,
1013-
};
1014-
1015-
NvCreateChatCompletionStreamResponse { inner }
1012+
}
10161013
}
10171014

10181015
#[test]
@@ -1331,7 +1328,7 @@ mod tests {
13311328
fn test_logprob_extractor_with_missing_data() {
13321329
// Test with choice that has no logprobs
13331330
#[expect(deprecated)]
1334-
let inner = CreateChatCompletionStreamResponse {
1331+
let response = NvCreateChatCompletionStreamResponse {
13351332
id: "test_id".to_string(),
13361333
choices: vec![ChatChoiceStream {
13371334
index: 0,
@@ -1341,6 +1338,7 @@ mod tests {
13411338
tool_calls: None,
13421339
role: Some(Role::Assistant),
13431340
refusal: None,
1341+
reasoning_content: None,
13441342
},
13451343
finish_reason: Some(FinishReason::Stop),
13461344
logprobs: None, // No logprobs
@@ -1353,7 +1351,6 @@ mod tests {
13531351
usage: None,
13541352
};
13551353

1356-
let response = NvCreateChatCompletionStreamResponse { inner };
13571354
let logprobs = response.extract_logprobs_by_choice();
13581355
assert_eq!(logprobs.len(), 1);
13591356
assert!(logprobs.values().any(|v| v.is_empty()));
@@ -1556,9 +1553,8 @@ mod tests {
15561553
fn create_mock_response() -> NvCreateChatCompletionStreamResponse {
15571554
// Create a mock response for testing
15581555
// In practice, this would have real logprobs data
1559-
use dynamo_async_openai::types::CreateChatCompletionStreamResponse;
15601556

1561-
let inner = CreateChatCompletionStreamResponse {
1557+
NvCreateChatCompletionStreamResponse {
15621558
id: "test_id".to_string(),
15631559
choices: vec![],
15641560
created: 1234567890,
@@ -1567,9 +1563,7 @@ mod tests {
15671563
system_fingerprint: None,
15681564
object: "chat.completion.chunk".to_string(),
15691565
usage: None,
1570-
};
1571-
1572-
NvCreateChatCompletionStreamResponse { inner }
1566+
}
15731567
}
15741568

15751569
// Mock context for testing

lib/llm/src/protocols/openai/chat_completions.rs

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use super::{
2727
OpenAIStopConditionsProvider,
2828
};
2929

30-
mod aggregator;
30+
pub mod aggregator;
3131
mod delta;
3232

3333
pub use aggregator::DeltaAggregator;
@@ -59,23 +59,16 @@ pub struct NvCreateChatCompletionRequest {
5959
/// # Fields
6060
/// - `inner`: The base OpenAI unary chat completion response, embedded
6161
/// using `serde(flatten)`.
62-
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
63-
pub struct NvCreateChatCompletionResponse {
64-
#[serde(flatten)]
65-
pub inner: dynamo_async_openai::types::CreateChatCompletionResponse,
66-
}
62+
pub type NvCreateChatCompletionResponse = dynamo_async_openai::types::CreateChatCompletionResponse;
6763

6864
/// A response structure for streamed chat completions, embedding OpenAI's
6965
/// `CreateChatCompletionStreamResponse`.
7066
///
7167
/// # Fields
7268
/// - `inner`: The base OpenAI streaming chat completion response, embedded
7369
/// using `serde(flatten)`.
74-
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
75-
pub struct NvCreateChatCompletionStreamResponse {
76-
#[serde(flatten)]
77-
pub inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse,
78-
}
70+
pub type NvCreateChatCompletionStreamResponse =
71+
dynamo_async_openai::types::CreateChatCompletionStreamResponse;
7972

8073
/// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
8174
/// providing access to NVIDIA-specific extensions.

0 commit comments

Comments
 (0)