Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 76 additions & 20 deletions controllers/llamaCPP.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <regex>
#include <string>
#include <thread>
#include <trantor/utils/Logger.h>

using namespace inferences;
using json = nlohmann::json;
Expand Down Expand Up @@ -174,6 +175,7 @@ void llamaCPP::chatCompletion(

json data;
json stopWords;
int no_images = 0;
// To set default value

if (jsonBody) {
Expand All @@ -200,29 +202,79 @@ void llamaCPP::chatCompletion(
(*jsonBody).get("frequency_penalty", 0).asFloat();
data["presence_penalty"] = (*jsonBody).get("presence_penalty", 0).asFloat();
const Json::Value &messages = (*jsonBody)["messages"];
for (const auto &message : messages) {
std::string input_role = message["role"].asString();
std::string role;
if (input_role == "user") {
role = user_prompt;
std::string content = message["content"].asString();
formatted_output += role + content;
} else if (input_role == "assistant") {
role = ai_prompt;
std::string content = message["content"].asString();
formatted_output += role + content;
} else if (input_role == "system") {
role = system_prompt;
std::string content = message["content"].asString();
formatted_output = role + content + formatted_output;

} else {
role = input_role;
std::string content = message["content"].asString();
formatted_output += role + content;
if (!llama.multimodal) {

for (const auto &message : messages) {
std::string input_role = message["role"].asString();
std::string role;
if (input_role == "user") {
role = user_prompt;
std::string content = message["content"].asString();
formatted_output += role + content;
} else if (input_role == "assistant") {
role = ai_prompt;
std::string content = message["content"].asString();
formatted_output += role + content;
} else if (input_role == "system") {
role = system_prompt;
std::string content = message["content"].asString();
formatted_output = role + content + formatted_output;

} else {
role = input_role;
std::string content = message["content"].asString();
formatted_output += role + content;
}
}
formatted_output += ai_prompt;
} else {

data["image_data"] = json::array();
for (const auto &message : messages) {
std::string input_role = message["role"].asString();
std::string role;
if (input_role == "user") {
formatted_output += role;
for (auto content_piece : message["content"]) {
role = user_prompt;

auto content_piece_type = content_piece["type"].asString();
if (content_piece_type == "text") {
auto text = content_piece["text"].asString();
formatted_output += text;
} else if (content_piece_type == "image_url") {
auto image_url = content_piece["image_url"]["url"].asString();
auto base64_image_data = nitro_utils::extractBase64(image_url);
LOG_INFO << base64_image_data;
formatted_output += "[img-" + std::to_string(no_images) + "]";

json content_piece_image_data;
content_piece_image_data["data"] = base64_image_data;
content_piece_image_data["id"] = no_images;
data["image_data"].push_back(content_piece_image_data);
no_images++;
}
}

} else if (input_role == "assistant") {
role = ai_prompt;
std::string content = message["content"].asString();
formatted_output += role + content;
} else if (input_role == "system") {
role = system_prompt;
std::string content = message["content"].asString();
formatted_output = role + content + formatted_output;

} else {
role = input_role;
std::string content = message["content"].asString();
formatted_output += role + content;
}
}
formatted_output += ai_prompt;
LOG_INFO << formatted_output;
}
formatted_output += ai_prompt;

data["prompt"] = formatted_output;
for (const auto &stop_word : (*jsonBody)["stop"]) {
Expand Down Expand Up @@ -386,6 +438,10 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
int drogon_thread = drogon::app().getThreadNum() - 1;
LOG_INFO << "Drogon thread is:" << drogon_thread;
if (jsonBody) {
if (!jsonBody["mmproj"].isNull()) {
LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
params.mmproj = jsonBody["mmproj"].asString();
}
params.model = jsonBody["llama_model_path"].asString();
params.n_gpu_layers = jsonBody.get("ngl", 100).asInt();
params.n_ctx = jsonBody.get("ctx_len", 2048).asInt();
Expand Down
7 changes: 4 additions & 3 deletions controllers/llamaCPP.h
Original file line number Diff line number Diff line change
Expand Up @@ -1834,7 +1834,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
public:
llamaCPP() {
// Some default values for now below
log_disable(); // Disable the log to file feature, reduce bloat for
// log_disable(); // Disable the log to file feature, reduce bloat for
// target
// system ()
std::vector<std::string> llama_models =
Expand Down Expand Up @@ -1877,8 +1877,9 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
METHOD_LIST_END
void chatCompletion(const HttpRequestPtr &req,
std::function<void(const HttpResponsePtr &)> &&callback);
void chatCompletionPrelight(const HttpRequestPtr &req,
std::function<void(const HttpResponsePtr &)> &&callback);
void chatCompletionPrelight(
const HttpRequestPtr &req,
std::function<void(const HttpResponsePtr &)> &&callback);
void embedding(const HttpRequestPtr &req,
std::function<void(const HttpResponsePtr &)> &&callback);
void loadModel(const HttpRequestPtr &req,
Expand Down
14 changes: 14 additions & 0 deletions utils/nitro_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <drogon/HttpResponse.h>
#include <iostream>
#include <ostream>
#include <regex>
// Include platform-specific headers
#ifdef _WIN32
#include <winsock2.h>
Expand All @@ -18,6 +19,19 @@ namespace nitro_utils {

inline std::string models_folder = "./models";

inline std::string extractBase64(const std::string &input) {
std::regex pattern("base64,(.*)");
std::smatch match;

if (std::regex_search(input, match, pattern)) {
std::string base64_data = match[1];
base64_data = base64_data.substr(0, base64_data.length() - 1);
return base64_data;
}

return "";
}

inline std::vector<std::string> listFilesInDir(const std::string &path) {
std::vector<std::string> files;

Expand Down