@@ -378,7 +378,7 @@ static json oaicompat_completion_params_parse(
378
378
return llama_params;
379
379
}
380
380
381
- static json format_final_response_oaicompat (const json & request, json result, bool streaming = false ) {
381
+ static json format_final_response_oaicompat (const json & request, json result, const std::string & completion_id, bool streaming = false ) {
382
382
bool stopped_word = result.count (" stopped_word" ) != 0 ;
383
383
bool stopped_eos = json_value (result, " stopped_eos" , false );
384
384
int num_tokens_predicted = json_value (result, " tokens_predicted" , 0 );
@@ -412,7 +412,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
412
412
{" prompt_tokens" , num_prompt_tokens},
413
413
{" total_tokens" , num_tokens_predicted + num_prompt_tokens}
414
414
}},
415
- {" id" , gen_chatcmplid () }
415
+ {" id" , completion_id }
416
416
};
417
417
418
418
if (server_verbose) {
@@ -427,7 +427,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
427
427
}
428
428
429
429
// return value is vector as there is one case where we might need to generate two responses
430
- static std::vector<json> format_partial_response_oaicompat (json result) {
430
+ static std::vector<json> format_partial_response_oaicompat (json result, const std::string & completion_id ) {
431
431
if (!result.contains (" model" ) || !result.contains (" oaicompat_token_ctr" )) {
432
432
return std::vector<json>({result});
433
433
}
@@ -471,7 +471,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
471
471
{" role" , " assistant" }
472
472
}}}})},
473
473
{" created" , t},
474
- {" id" , gen_chatcmplid () },
474
+ {" id" , completion_id },
475
475
{" model" , modelname},
476
476
{" object" , " chat.completion.chunk" }};
477
477
@@ -482,7 +482,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
482
482
{" content" , content}}}
483
483
}})},
484
484
{" created" , t},
485
- {" id" , gen_chatcmplid () },
485
+ {" id" , completion_id },
486
486
{" model" , modelname},
487
487
{" object" , " chat.completion.chunk" }};
488
488
@@ -509,7 +509,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
509
509
json ret = json {
510
510
{" choices" , choices},
511
511
{" created" , t},
512
- {" id" , gen_chatcmplid () },
512
+ {" id" , completion_id },
513
513
{" model" , modelname},
514
514
{" object" , " chat.completion.chunk" }
515
515
};
0 commit comments