Merge pull request #271 from janhq/103-feat-enable-llava-feature-in-nitro-1

tikikun · web-flow · commit 0cd4abebfc65 · 2023-12-15T07:25:54.000+07:00
feat: nitro multi modal
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -8,6 +8,7 @@
 #include <regex>
 #include <string>
 #include <thread>
+#include <trantor/utils/Logger.h>
 
 using namespace inferences;
 using json = nlohmann::json;
@@ -174,6 +175,7 @@ void llamaCPP::chatCompletion(
 
   json data;
   json stopWords;
+  int no_images = 0;
   // To set default value
 
   if (jsonBody) {
@@ -200,29 +202,79 @@ void llamaCPP::chatCompletion(
         (*jsonBody).get("frequency_penalty", 0).asFloat();
     data["presence_penalty"] = (*jsonBody).get("presence_penalty", 0).asFloat();
     const Json::Value &messages = (*jsonBody)["messages"];
-    for (const auto &message : messages) {
-      std::string input_role = message["role"].asString();
-      std::string role;
-      if (input_role == "user") {
-        role = user_prompt;
-        std::string content = message["content"].asString();
-        formatted_output += role + content;
-      } else if (input_role == "assistant") {
-        role = ai_prompt;
-        std::string content = message["content"].asString();
-        formatted_output += role + content;
-      } else if (input_role == "system") {
-        role = system_prompt;
-        std::string content = message["content"].asString();
-        formatted_output = role + content + formatted_output;
 
-      } else {
-        role = input_role;
-        std::string content = message["content"].asString();
-        formatted_output += role + content;
+    if (!llama.multimodal) {
+
+      for (const auto &message : messages) {
+        std::string input_role = message["role"].asString();
+        std::string role;
+        if (input_role == "user") {
+          role = user_prompt;
+          std::string content = message["content"].asString();
+          formatted_output += role + content;
+        } else if (input_role == "assistant") {
+          role = ai_prompt;
+          std::string content = message["content"].asString();
+          formatted_output += role + content;
+        } else if (input_role == "system") {
+          role = system_prompt;
+          std::string content = message["content"].asString();
+          formatted_output = role + content + formatted_output;
+
+        } else {
+          role = input_role;
+          std::string content = message["content"].asString();
+          formatted_output += role + content;
+        }
       }
+      formatted_output += ai_prompt;
+    } else {
+
+      data["image_data"] = json::array();
+      for (const auto &message : messages) {
+        std::string input_role = message["role"].asString();
+        std::string role;
+        if (input_role == "user") {
+          formatted_output += role;
+          for (auto content_piece : message["content"]) {
+            role = user_prompt;
+
+            auto content_piece_type = content_piece["type"].asString();
+            if (content_piece_type == "text") {
+              auto text = content_piece["text"].asString();
+              formatted_output += text;
+            } else if (content_piece_type == "image_url") {
+              auto image_url = content_piece["image_url"]["url"].asString();
+              auto base64_image_data = nitro_utils::extractBase64(image_url);
+              LOG_INFO << base64_image_data;
+              formatted_output += "[img-" + std::to_string(no_images) + "]";
+
+              json content_piece_image_data;
+              content_piece_image_data["data"] = base64_image_data;
+              content_piece_image_data["id"] = no_images;
+              data["image_data"].push_back(content_piece_image_data);
+              no_images++;
+            }
+          }
+
+        } else if (input_role == "assistant") {
+          role = ai_prompt;
+          std::string content = message["content"].asString();
+          formatted_output += role + content;
+        } else if (input_role == "system") {
+          role = system_prompt;
+          std::string content = message["content"].asString();
+          formatted_output = role + content + formatted_output;
+
+        } else {
+          role = input_role;
+          std::string content = message["content"].asString();
+          formatted_output += role + content;
+        }
+      }
+      formatted_output += ai_prompt;
+      LOG_INFO << formatted_output;
     }
-    formatted_output += ai_prompt;
 
     data["prompt"] = formatted_output;
     for (const auto &stop_word : (*jsonBody)["stop"]) {
@@ -386,6 +438,10 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
   int drogon_thread = drogon::app().getThreadNum() - 1;
   LOG_INFO << "Drogon thread is:" << drogon_thread;
   if (jsonBody) {
+    if (!jsonBody["mmproj"].isNull()) {
+      LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
+      params.mmproj = jsonBody["mmproj"].asString();
+    }
     params.model = jsonBody["llama_model_path"].asString();
     params.n_gpu_layers = jsonBody.get("ngl", 100).asInt();
     params.n_ctx = jsonBody.get("ctx_len", 2048).asInt();
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
@@ -1834,7 +1834,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
 public:
   llamaCPP() {
     // Some default values for now below
-    log_disable(); // Disable the log to file feature, reduce bloat for
+    // log_disable(); // Disable the log to file feature, reduce bloat for
     // target
     // system ()
     std::vector<std::string> llama_models =
@@ -1877,8 +1877,9 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
   METHOD_LIST_END
   void chatCompletion(const HttpRequestPtr &req,
                       std::function<void(const HttpResponsePtr &)> &&callback);
-  void chatCompletionPrelight(const HttpRequestPtr &req,
-                      std::function<void(const HttpResponsePtr &)> &&callback);
+  void chatCompletionPrelight(
+      const HttpRequestPtr &req,
+      std::function<void(const HttpResponsePtr &)> &&callback);
   void embedding(const HttpRequestPtr &req,
                  std::function<void(const HttpResponsePtr &)> &&callback);
   void loadModel(const HttpRequestPtr &req,
diff --git a/utils/nitro_utils.h b/utils/nitro_utils.h
@@ -6,6 +6,7 @@
 #include <drogon/HttpResponse.h>
 #include <iostream>
 #include <ostream>
+#include <regex>
 // Include platform-specific headers
 #ifdef _WIN32
 #include <winsock2.h>
@@ -18,6 +19,19 @@ namespace nitro_utils {
 
 inline std::string models_folder = "./models";
 
+inline std::string extractBase64(const std::string &input) {
+  std::regex pattern("base64,(.*)");
+  std::smatch match;
+
+  if (std::regex_search(input, match, pattern)) {
+    std::string base64_data = match[1];
+    base64_data = base64_data.substr(0, base64_data.length() - 1);
+    return base64_data;
+  }
+
+  return "";
+}
+
 inline std::vector<std::string> listFilesInDir(const std::string &path) {
   std::vector<std::string> files;