janhq · vansangpfiev · Feb 27, 2025 · Feb 27, 2025
diff --git a/engine/config/gguf_parser.cc b/engine/config/gguf_parser.cc
@@ -2,12 +2,12 @@
 #include <cstdint>
 #include <cstring>
 #include <ctime>
+#include <filesystem>
 #include <iostream>
 #include <regex>
 #include <stdexcept>
 #include <string>
 #include <vector>
-#include <filesystem>
 
 #ifdef _WIN32
 #include <io.h>
@@ -70,7 +70,7 @@ void GGUFHandler::OpenFile(const std::string& file_path) {
 
 #else
   file_size_ = std::filesystem::file_size(file_path);
- 
+
   int file_descriptor = open(file_path.c_str(), O_RDONLY);
   // Memory-map the file
   data_ = static_cast<uint8_t*>(
@@ -105,7 +105,8 @@ std::pair<std::size_t, std::string> GGUFHandler::ReadString(
   std::memcpy(&length, data_ + offset, sizeof(uint64_t));
 
   if (offset + 8 + length > file_size_) {
-    throw std::runtime_error("GGUF metadata string length exceeds file size.\n");
+    throw std::runtime_error(
+        "GGUF metadata string length exceeds file size.\n");
   }
 
   std::string value(reinterpret_cast<const char*>(data_ + offset + 8), length);
@@ -578,9 +579,8 @@ void GGUFHandler::ModelConfigFromMetadata() {
   model_config_.model = name;
   model_config_.id = name;
   model_config_.version = std::to_string(version);
-  model_config_.max_tokens =
-      std::min<int>(kDefaultMaxContextLength, max_tokens);
-  model_config_.ctx_len = std::min<int>(kDefaultMaxContextLength, max_tokens);
+  model_config_.max_tokens = max_tokens;
+  model_config_.ctx_len = max_tokens;
   model_config_.ngl = ngl;
 }
 

diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
@@ -954,6 +954,7 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
       json_data["user_prompt"] = mc.user_template;
       json_data["ai_prompt"] = mc.ai_template;
       json_data["ctx_len"] = std::min(kDefautlContextLength, mc.ctx_len);
+      json_data["max_tokens"] = std::min(kDefautlContextLength, mc.ctx_len);
       max_model_context_length = mc.ctx_len;
     } else {
       bypass_stop_check_set_.insert(model_handle);
@@ -978,6 +979,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
     if (ctx_len) {
       json_data["ctx_len"] =
           std::min(ctx_len.value(), max_model_context_length);
+      json_data["max_tokens"] =
+          std::min(ctx_len.value(), max_model_context_length);
     }
     CTL_INF(json_data.toStyledString());
     auto may_fallback_res = MayFallbackToCpu(json_data["model_path"].asString(),