@@ -407,6 +407,11 @@ LlamaEngine::~LlamaEngine() {
407407 server_map_.clear ();
408408 async_file_logger_.reset ();
409409
410+ for (auto const & [_, si] : llama_server_map_) {
411+ kill (si.pid , SIGTERM);
412+ }
413+ llama_server_map_.clear ();
414+
410415 LOG_INFO << " LlamaEngine destructed successfully" ;
411416}
412417
@@ -503,7 +508,7 @@ void LlamaEngine::UnloadModel(std::shared_ptr<Json::Value> json_body,
503508 sent = GenerateConsoleCtrlEvent (CTRL_C_EVENT,
504509 llama_server_map_[model_id].pi .dwProcessId );
505510#else
506- sent = (kill (llama_server_map_[model_id].pid , SIGINT ) != -1 );
511+ sent = (kill (llama_server_map_[model_id].pid , SIGTERM ) != -1 );
507512#endif
508513 if (sent) {
509514 LOG_INFO << " SIGINT signal sent to child process" ;
@@ -1748,7 +1753,7 @@ void LlamaEngine::HandleOpenAiChatCompletion(
17481753 auto res = cli.Post (" /v1/chat/completions" , httplib::Headers (),
17491754 data_str.data (), data_str.size (), " application/json" );
17501755 if (res) {
1751- LOG_INFO << res->body ;
1756+ LOG_DEBUG << res->body ;
17521757 auto r = ParseJsonString (res->body );
17531758 if (i == 0 ) {
17541759 result = r;
@@ -1788,7 +1793,8 @@ void LlamaEngine::HandleOpenAiChatCompletion(
17881793void LlamaEngine::HandleNonOpenAiChatCompletion (
17891794 std::shared_ptr<Json::Value> json_body, http_callback&& cb,
17901795 const std::string& model) {
1791- LOG_INFO << " Handle non OpenAI" ;
1796+ LOG_DEBUG << " Handle non OpenAI" ;
1797+ LOG_DEBUG << json_body->toStyledString ();
17921798 auto is_stream = (*json_body).get (" stream" , false ).asBool ();
17931799 auto include_usage = [&json_body, is_stream]() -> bool {
17941800 if (is_stream) {
@@ -1863,7 +1869,7 @@ void LlamaEngine::HandleNonOpenAiChatCompletion(
18631869 // llama.cpp server only supports n = 1
18641870 data[" n" ] = 1 ;
18651871 auto data_str = data.dump ();
1866- LOG_INFO << " data_str: " << data_str;
1872+ LOG_DEBUG << " data_str: " << data_str;
18671873 cli.set_read_timeout (std::chrono::seconds (60 ));
18681874 int n_probs = json_body->get (" n_probs" , 0 ).asInt ();
18691875 if (is_stream) {
@@ -1878,7 +1884,7 @@ void LlamaEngine::HandleNonOpenAiChatCompletion(
18781884 const char * data, size_t data_length,
18791885 uint64_t offset, uint64_t total_length) {
18801886 std::string s (data, data_length);
1881- LOG_INFO << s;
1887+ LOG_DEBUG << s;
18821888 if (s.size () > 6 ) {
18831889 s = s.substr (6 );
18841890 }
@@ -1909,7 +1915,11 @@ void LlamaEngine::HandleNonOpenAiChatCompletion(
19091915 logprobs =
19101916 ConvertJsonCppToNlohmann (json_data[" completion_probabilities" ]);
19111917 }
1912- std::string to_send = json_data.get (" content" , " " ).asString ();
1918+ std::string to_send;
1919+ if (json_data.isMember (" choices" ) && json_data[" choices" ].isArray () &&
1920+ json_data[" choices" ].size () > 0 ) {
1921+ to_send = json_data[" choices" ][0 ].get (" text" , " " ).asString ();
1922+ }
19131923 const std::string str =
19141924 " data: " +
19151925 CreateReturnJson (llama_utils::generate_random_string (20 ), model,
@@ -1933,7 +1943,7 @@ void LlamaEngine::HandleNonOpenAiChatCompletion(
19331943 auto res = cli.Post (" /v1/completions" , httplib::Headers (),
19341944 data_str.data (), data_str.size (), " application/json" );
19351945 if (res) {
1936- LOG_INFO << res->body ;
1946+ LOG_DEBUG << res->body ;
19371947 auto r = ParseJsonString (res->body );
19381948 json logprobs;
19391949 prompt_tokens += r[" tokens_evaluated" ].asInt ();
@@ -2008,7 +2018,7 @@ bool LlamaEngine::HandleLlamaCppEmbedding(
20082018 Json::Value ());
20092019 }
20102020 });
2011- LOG_INFO << " Done HandleEmbedding" ;
2021+ LOG_DEBUG << " Done HandleEmbedding" ;
20122022 return true ;
20132023 }
20142024 return false ;
0 commit comments