Skip to content

Commit 35ead1d

Browse files
committed
fix: align mistralrs response ids with distributed trace ids
1 parent 88eecce commit 35ead1d

File tree

3 files changed

+24
-20
lines changed

3 files changed

+24
-20
lines changed

lib/engines/mistralrs/src/lib.rs

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,9 @@ impl MistralRsEngine {
212212

213213
// Perform warmup request
214214
let (tx, mut rx) = channel(1);
215-
let request_id = engine.mistralrs.next_request_id();
215+
let mistralrs_request_id = engine.mistralrs.next_request_id();
216216
let warmup_request = Request::Normal(Box::new(NormalRequest {
217-
id: request_id,
217+
id: mistralrs_request_id,
218218
model_id: Some(display_name.to_string()),
219219
messages: RequestMessage::Chat {
220220
messages: vec![IndexMap::from([
@@ -245,10 +245,10 @@ impl MistralRsEngine {
245245
if let Some(response) = rx.recv().await {
246246
match response.as_result() {
247247
Ok(r) => {
248-
tracing::debug!(request_id, "Warmup response: {r:?}");
248+
tracing::debug!(mistralrs_request_id, "Warmup response: {r:?}");
249249
}
250250
Err(err) => {
251-
tracing::error!(request_id, %err, "Failed converting response to result.");
251+
tracing::error!(mistralrs_request_id, %err, "Failed converting response to result.");
252252
}
253253
}
254254
}
@@ -273,6 +273,7 @@ impl
273273
) -> Result<ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>, Error> {
274274
let (request, context) = request.transfer(());
275275
let ctx = context.context();
276+
let request_id = ctx.id().to_string();
276277
let (tx, mut rx) = channel(10_000);
277278

278279
let mut messages = vec![];
@@ -339,9 +340,9 @@ impl
339340
n_choices: 1,
340341
dry_params: det.dry_params,
341342
};
342-
let request_id = ctx.id().to_string();
343+
let mistralrs_request_id = ctx.id().parse().unwrap_or(0);
343344
let mistralrs_request = Request::Normal(Box::new(NormalRequest {
344-
id: request_id,
345+
id: mistralrs_request_id,
345346
model_id: Some(self.display_name.clone()),
346347
messages: RequestMessage::Chat {
347348
messages,
@@ -370,14 +371,14 @@ impl
370371
let response = match response.as_result() {
371372
Ok(r) => r,
372373
Err(err) => {
373-
tracing::error!(request_id, %err, "Failed converting mistralrs channel response to result.");
374+
tracing::error!(mistralrs_request_id, %err, "Failed converting mistralrs channel response to result.");
374375
break;
375376
}
376377
};
377378
match response {
378379
ResponseOk::Chunk(c) => {
379380
let Some(from_assistant) = c.choices[0].delta.content.clone() else {
380-
tracing::warn!(request_id, "No content from mistralrs. Abandoning request.");
381+
tracing::warn!(mistralrs_request_id, "No content from mistralrs. Abandoning request.");
381382
break;
382383
};
383384
let finish_reason = match &c.choices[0].finish_reason.as_deref() {
@@ -388,7 +389,7 @@ impl
388389
Some(FinishReason::Length)
389390
}
390391
Some(s) => {
391-
tracing::warn!(request_id, stop_reason = s, "Unknow stop reason");
392+
tracing::warn!(mistralrs_request_id, stop_reason = s, "Unknow stop reason");
392393
Some(FinishReason::Stop)
393394
}
394395
None => None,
@@ -397,7 +398,7 @@ impl
397398

398399
#[allow(deprecated)]
399400
let inner = async_openai::types::CreateChatCompletionStreamResponse{
400-
id: c.id,
401+
id: format!("chatcmpl-{}", request_id),
401402
choices: vec![async_openai::types::ChatChoiceStream{
402403
index: 0,
403404
delta: async_openai::types::ChatCompletionStreamResponseDelta{
@@ -428,11 +429,11 @@ impl
428429
yield ann;
429430

430431
if finish_reason.is_some() {
431-
//tracing::trace!(request_id, "Finish reason: {finish_reason:?}");
432+
//tracing::trace!(mistralrs_request_id, "Finish reason: {finish_reason:?}");
432433
break;
433434
}
434435
},
435-
x => tracing::error!(request_id, "Unhandled. {x:?}"),
436+
x => tracing::error!(mistralrs_request_id, "Unhandled. {x:?}"),
436437
}
437438
}
438439
};
@@ -486,7 +487,7 @@ impl
486487
let (request, context) = request.transfer(());
487488
let ctx = context.context();
488489
let (tx, mut rx) = channel(10_000);
489-
let response_generator = request.response_generator(Some(ctx.id().to_string()));
490+
let response_generator = request.response_generator(ctx.id().to_string());
490491

491492
let messages = RequestMessage::Completion {
492493
text: prompt_to_string(&request.inner.prompt),
@@ -540,9 +541,9 @@ impl
540541
dry_params: det.dry_params,
541542
};
542543

543-
let request_id = ctx.id().to_string();
544+
let mistralrs_request_id = ctx.id().parse().unwrap_or(0);
544545
let mistralrs_request = Request::Normal(Box::new(NormalRequest {
545-
id: request_id,
546+
id: mistralrs_request_id,
546547
model_id: Some(self.display_name.clone()),
547548
messages,
548549
sampling_params,
@@ -568,7 +569,7 @@ impl
568569
let response = match response.as_result() {
569570
Ok(r) => r,
570571
Err(err) => {
571-
tracing::error!(request_id, %err, "Failed converting mistralrs channel response to result.");
572+
tracing::error!(mistralrs_request_id, %err, "Failed converting mistralrs channel response to result.");
572573
break;
573574
}
574575
};
@@ -584,7 +585,7 @@ impl
584585
Some(FinishReason::Length)
585586
}
586587
Some(s) => {
587-
tracing::warn!(request_id, stop_reason = s, "Unknow stop reason");
588+
tracing::warn!(mistralrs_request_id, stop_reason = s, "Unknow stop reason");
588589
Some(FinishReason::Stop)
589590
}
590591
None => None,
@@ -603,7 +604,7 @@ impl
603604
break;
604605
}
605606
},
606-
x => tracing::error!(request_id, "Unhandled. {x:?}"),
607+
x => tracing::error!(mistralrs_request_id, "Unhandled. {x:?}"),
607608
}
608609
}
609610
};

lib/llm/src/engines.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ impl
184184
) -> Result<ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>, Error> {
185185
let (request, context) = incoming_request.transfer(());
186186
let ctx = context.context();
187-
let deltas = request.response_generator(Some(ctx.id().to_string()));
187+
let deltas = request.response_generator(ctx.id().to_string());
188188
let req = request.inner.messages.into_iter().next_back().unwrap();
189189

190190
let prompt = match req {
@@ -237,7 +237,7 @@ impl
237237
) -> Result<ManyOut<Annotated<NvCreateCompletionResponse>>, Error> {
238238
let (request, context) = incoming_request.transfer(());
239239
let ctx = context.context();
240-
let deltas = request.response_generator(Some(ctx.id().to_string()));
240+
let deltas = request.response_generator(ctx.id().to_string());
241241
let chars_string = prompt_to_string(&request.inner.prompt);
242242
let output = stream! {
243243
let mut id = 1;

lib/llm/src/http/service/openai.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,11 +313,14 @@ async fn completions(
313313
#[tracing::instrument(skip_all)]
314314
async fn embeddings(
315315
State(state): State<Arc<service_v2::State>>,
316+
headers: HeaderMap,
316317
Json(request): Json<NvCreateEmbeddingRequest>,
317318
) -> Result<Response, ErrorResponse> {
318319
// return a 503 if the service is not ready
319320
check_ready(&state)?;
320321

322+
let request_id = get_or_create_request_id(request.inner.user.as_deref(), &headers);
323+
let request = Context::with_id(request, request_id);
321324
let request_id = request.id().to_string();
322325

323326
// Embeddings are typically not streamed, so we default to non-streaming

0 commit comments

Comments
 (0)