diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml
index 460d85007..e0b659f93 100644
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -104,11 +104,11 @@ jobs:
         id: update-latest-version
         run: |
           echo "{\"tag_name\": \"v${{ needs.get-update-version.outputs.new_version }}\"}" > version.json
-          aws s3 sync version.json s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/version.json
-          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/mac-amd64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/mac-amd64/cortex-nightly.tar.gz
-          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/mac-arm64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/mac-arm64/cortex-nightly.tar.gz
-          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/linux-amd64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/linux-amd64/cortex-nightly.tar.gz
-          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/windows-amd64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/windows-amd64/cortex-nightly.tar.gz
+          aws s3 cp version.json s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/version.json
+          aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/mac-amd64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/mac-amd64/cortex-nightly.tar.gz
+          aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/mac-arm64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/mac-arm64/cortex-nightly.tar.gz
+          aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/linux-amd64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/linux-amd64/cortex-nightly.tar.gz
+          aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/windows-amd64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/windows-amd64/cortex-nightly.tar.gz
 
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml
index 2adbe06dc..1c82591ca 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@@ -154,10 +154,10 @@ jobs:
       - name: upload to aws s3 if public provider is aws
         if: inputs.public_provider == 'aws-s3'
         run: |
-          aws s3 sync ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/linux-amd64-cortex-nightly.tar.gz
+          aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/linux-amd64-cortex-nightly.tar.gz
 
-          aws s3 sync ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-amd64/cortex-nightly.tar.gz
-          aws s3 sync ./engine/${{ steps.set-output-params.outputs.package_name }}.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-amd64/cortex-${{ inputs.new_version }}-linux-amd64-installer.deb
+          aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-amd64/cortex-nightly.tar.gz
+          aws s3 cp ./engine/${{ steps.set-output-params.outputs.package_name }}.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-amd64/cortex-${{ inputs.new_version }}-linux-amd64-installer.deb
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml
index 337f3eceb..e754bdf85 100644
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@@ -208,10 +208,10 @@ jobs:
       - name: upload to aws s3 if public provider is aws
         if: inputs.public_provider == 'aws-s3'
         run: |
-          aws s3 sync ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/mac-${{ inputs.arch}}-cortex-nightly.tar.gz
+          aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/mac-${{ inputs.arch}}-cortex-nightly.tar.gz
 
-          aws s3 sync ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/mac-${{ inputs.arch}}/cortex-nightly.tar.gz
-          aws s3 sync ./engine/${{ steps.set-output-params.outputs.package_name }}.pkg s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/mac-${{ inputs.arch}}/cortex-${{ inputs.new_version }}-mac-${{ inputs.arch}}-installer.pkg
+          aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/mac-${{ inputs.arch}}/cortex-nightly.tar.gz
+          aws s3 cp ./engine/${{ steps.set-output-params.outputs.package_name }}.pkg s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/mac-${{ inputs.arch}}/cortex-${{ inputs.new_version }}-mac-${{ inputs.arch}}-installer.pkg
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index 0edaa8043..872c46d48 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -218,10 +218,10 @@ jobs:
         run: |
           Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
           refreshenv
-          aws s3 sync ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/windows-amd64-cortex-nightly.tar.gz
+          aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/windows-amd64-cortex-nightly.tar.gz
 
-          aws s3 sync ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/windows-amd64/cortex-nightly.tar.gz
-          aws s3 sync ./setup.exe s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/windows-amd64/cortex-${{ inputs.new_version }}-windows-amd64-installer.exe
+          aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/windows-amd64/cortex-nightly.tar.gz
+          aws s3 cp ./setup.exe s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/windows-amd64/cortex-${{ inputs.new_version }}-windows-amd64-installer.exe
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index 2fbf6ba70..76cdcf303 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -70,7 +70,6 @@ endif()
 find_package(jsoncpp CONFIG REQUIRED)
 find_package(Drogon CONFIG REQUIRED)
 find_package(yaml-cpp CONFIG REQUIRED)
-find_package(jinja2cpp CONFIG REQUIRED)
 find_package(httplib CONFIG REQUIRED)
 find_package(nlohmann_json CONFIG REQUIRED)
 find_package(CLI11 CONFIG REQUIRED)
@@ -87,7 +86,6 @@ add_executable(${TARGET_NAME} main.cc
 
 target_link_libraries(${TARGET_NAME} PRIVATE httplib::httplib)
 target_link_libraries(${TARGET_NAME} PRIVATE nlohmann_json::nlohmann_json)
-target_link_libraries(${TARGET_NAME} PRIVATE jinja2cpp)
 target_link_libraries(${TARGET_NAME} PRIVATE CLI11::CLI11)
 target_link_libraries(${TARGET_NAME} PRIVATE unofficial::minizip::minizip)
 target_link_libraries(${TARGET_NAME} PRIVATE LibArchive::LibArchive)
diff --git a/engine/commands/cortex_upd_cmd.cc b/engine/commands/cortex_upd_cmd.cc
index 3c892f6fc..b8edede53 100644
--- a/engine/commands/cortex_upd_cmd.cc
+++ b/engine/commands/cortex_upd_cmd.cc
@@ -6,6 +6,7 @@
 #include "utils/archive_utils.h"
 #include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
+#include "utils/scope_exit.h"
 #include "utils/system_info_utils.h"
 #include "utils/url_parser.h"
 
@@ -23,6 +24,16 @@ void CortexUpdCmd::Exec(std::string v) {
       ssc.Exec();
     }
   }
+
+  // Try to remove cortex temp folder if it exists first
+  try {
+    auto n = std::filesystem::remove_all(
+        std::filesystem::temp_directory_path() / "cortex");
+    CTL_INF("Deleted " << n << " files or directories");
+  } catch (const std::exception& e) {
+    CTL_WRN(e.what());
+  }
+
   if (CORTEX_VARIANT == file_manager_utils::kProdVariant) {
     if (!GetStable(v))
       return;
@@ -38,7 +49,7 @@ void CortexUpdCmd::Exec(std::string v) {
 
 bool CortexUpdCmd::GetStable(const std::string& v) {
   auto system_info = system_info_utils::GetSystemInfo();
-  CTL_INF("OS: " << system_info.os << ", Arch: " << system_info.arch);
+  CTL_INF("OS: " << system_info->os << ", Arch: " << system_info->arch);
 
   // Download file
   auto github_host = GetHostName();
@@ -56,7 +67,7 @@ bool CortexUpdCmd::GetStable(const std::string& v) {
         }
 
         if (!HandleGithubRelease(json_data["assets"],
-                                 {system_info.os + "-" + system_info.arch})) {
+                                 {system_info->os + "-" + system_info->arch})) {
           return false;
         }
       } catch (const nlohmann::json::parse_error& e) {
@@ -75,15 +86,24 @@ bool CortexUpdCmd::GetStable(const std::string& v) {
 
   // Replace binary file
   auto executable_path = file_manager_utils::GetExecutableFolderContainerPath();
-  auto src = std::filesystem::temp_directory_path() / "cortex" / kCortexBinary /
-             GetCortexBinary();
+  auto src =
+      std::filesystem::temp_directory_path() / "cortex" / GetCortexBinary();
   auto dst = executable_path / GetCortexBinary();
+  utils::ScopeExit se([]() {
+    auto cortex_tmp = std::filesystem::temp_directory_path() / "cortex";
+    try {
+      auto n = std::filesystem::remove_all(cortex_tmp);
+      CTL_INF("Deleted " << n << " files or directories");
+    } catch (const std::exception& e) {
+      CTL_WRN(e.what());
+    }
+  });
   return ReplaceBinaryInflight(src, dst);
 }
 
 bool CortexUpdCmd::GetBeta(const std::string& v) {
   auto system_info = system_info_utils::GetSystemInfo();
-  CTL_INF("OS: " << system_info.os << ", Arch: " << system_info.arch);
+  CTL_INF("OS: " << system_info->os << ", Arch: " << system_info->arch);
 
   // Download file
   auto github_host = GetHostName();
@@ -113,7 +133,7 @@ bool CortexUpdCmd::GetBeta(const std::string& v) {
         }
 
         if (!HandleGithubRelease(json_data["assets"],
-                                 {system_info.os + "-" + system_info.arch})) {
+                                 {system_info->os + "-" + system_info->arch})) {
           return false;
         }
       } catch (const nlohmann::json::parse_error& e) {
@@ -135,6 +155,15 @@ bool CortexUpdCmd::GetBeta(const std::string& v) {
   auto src =
       std::filesystem::temp_directory_path() / "cortex" / GetCortexBinary();
   auto dst = executable_path / GetCortexBinary();
+  utils::ScopeExit se([]() {
+    auto cortex_tmp = std::filesystem::temp_directory_path() / "cortex";
+    try {
+      auto n = std::filesystem::remove_all(cortex_tmp);
+      CTL_INF("Deleted " << n << " files or directories");
+    } catch (const std::exception& e) {
+      CTL_WRN(e.what());
+    }
+  });
   return ReplaceBinaryInflight(src, dst);
 }
 
@@ -205,11 +234,11 @@ bool CortexUpdCmd::HandleGithubRelease(const nlohmann::json& assets,
 
 bool CortexUpdCmd::GetNightly(const std::string& v) {
   auto system_info = system_info_utils::GetSystemInfo();
-  CTL_INF("OS: " << system_info.os << ", Arch: " << system_info.arch);
+  CTL_INF("OS: " << system_info->os << ", Arch: " << system_info->arch);
 
   // Download file
   std::string version = v.empty() ? "latest" : std::move(v);
-  std::string os_arch{system_info.os + "-" + system_info.arch};
+  std::string os_arch{system_info->os + "-" + system_info->arch};
   const char* paths[] = {
       "cortex",
       version.c_str(),
@@ -264,6 +293,15 @@ bool CortexUpdCmd::GetNightly(const std::string& v) {
   auto src =
       std::filesystem::temp_directory_path() / "cortex" / GetCortexBinary();
   auto dst = executable_path / GetCortexBinary();
+  utils::ScopeExit se([]() {
+    auto cortex_tmp = std::filesystem::temp_directory_path() / "cortex";
+    try {
+      auto n = std::filesystem::remove_all(cortex_tmp);
+      CTL_INF("Deleted " << n << " files or directories");
+    } catch (const std::exception& e) {
+      CTL_WRN(e.what());
+    }
+  });
   return ReplaceBinaryInflight(src, dst);
 }
 }  // namespace commands
diff --git a/engine/commands/engine_install_cmd.cc b/engine/commands/engine_install_cmd.cc
index 36f7a040b..59d51bfd1 100644
--- a/engine/commands/engine_install_cmd.cc
+++ b/engine/commands/engine_install_cmd.cc
@@ -10,8 +10,9 @@
 namespace commands {
 
 void EngineInstallCmd::Exec(const std::string& engine,
-                            const std::string& version) {
-  engine_service_.InstallEngine(engine, version);
+                            const std::string& version,
+                            const std::string& src) {
+  engine_service_.InstallEngine(engine, version, src);
   CLI_LOG("Engine " << engine << " installed successfully!");
 }
 };  // namespace commands
diff --git a/engine/commands/engine_install_cmd.h b/engine/commands/engine_install_cmd.h
index c6ba6f135..32b7079cc 100644
--- a/engine/commands/engine_install_cmd.h
+++ b/engine/commands/engine_install_cmd.h
@@ -9,7 +9,8 @@ class EngineInstallCmd {
  public:
   explicit EngineInstallCmd() : engine_service_{EngineService()} {};
 
-  void Exec(const std::string& engine, const std::string& version = "latest");
+  void Exec(const std::string& engine, const std::string& version = "latest",
+            const std::string& src = "");
 
  private:
   EngineService engine_service_;
diff --git a/engine/commands/model_import_cmd.cc b/engine/commands/model_import_cmd.cc
index 193b2488b..3fb047a9d 100644
--- a/engine/commands/model_import_cmd.cc
+++ b/engine/commands/model_import_cmd.cc
@@ -1,10 +1,8 @@
 #include "model_import_cmd.h"
 #include <filesystem>
-#include <iostream>
 #include <vector>
 #include "config/gguf_parser.h"
 #include "config/yaml_config.h"
-#include "trantor/utils/Logger.h"
 #include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/modellist_utils.h"
@@ -45,7 +43,7 @@ void ModelImportCmd::Exec() {
     }
 
   } catch (const std::exception& e) {
-    // don't need to remove yml file here, because it's written only if model entry is successfully added, 
+    // don't need to remove yml file here, because it's written only if model entry is successfully added,
     // remove file here can make it fail with edge case when user try to import new model with existed model_id
     CLI_LOG("Error importing model path '" + model_path_ + "' with model_id '" +
             model_handle_ + "': " + e.what());
diff --git a/engine/config/chat_template_renderer.h b/engine/config/chat_template_renderer.h
new file mode 100644
index 000000000..f40894f7b
--- /dev/null
+++ b/engine/config/chat_template_renderer.h
@@ -0,0 +1,431 @@
+/*
+ * This file contains code derived from the llama.cpp project.
+ * Original project: https://github.com/ggerganov/llama.cpp
+ *
+ * Original work Copyright (c) 2023 Georgi Gerganov
+ * Modified work Copyright (c) 2024 [Homebrew.ltd]
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * This file incorporates work covered by the above copyright and permission notice.
+ * Any modifications made to this file are covered under the copyright of the modifying party.
+ *
+ * Modifications:
+ * [Brief description of modifications made to the original code, if any]
+ *
+ * For more information about the llama.cpp project and its license, please visit:
+ * https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
+ */
+
+//
+// chat templates
+//
+
+#include <algorithm>
+#include <cctype>
+#include <cstdint>
+#include <cstring>
+#include <sstream>
+#include <string>
+#include <vector>
+namespace config {
+
+#if (defined(_MSC_VER) && _MSC_VER >= 1900 && defined(__cpp_char8_t)) || __cplusplus >= 202002L
+    #define LU8(x) reinterpret_cast<const char*>(u8##x)
+#else
+    #define LU8(x) u8##x
+#endif
+
+typedef struct llama_chat_message {
+  const char* role;
+  const char* content;
+} llama_chat_message;
+
+struct llama_chat_msg {
+  std::string role;
+  std::string content;
+};
+
+static std::string trim(const std::string& str) {
+  size_t start = 0;
+  size_t end = str.size();
+  while (start < end && isspace(str[start])) {
+    start += 1;
+  }
+  while (end > start && isspace(str[end - 1])) {
+    end -= 1;
+  }
+  return str.substr(start, end - start);
+}
+// Simple version of "llama_apply_chat_template" that only works with strings
+// This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
+static int32_t llama_chat_apply_template_internal(
+    const std::string& tmpl, const std::vector<const llama_chat_message*>& chat,
+    std::string& dest, bool add_ass) {
+  // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
+  std::stringstream ss;
+  auto tmpl_contains = [&tmpl](std::string haystack) -> bool {
+    return tmpl.find(haystack) != std::string::npos;
+  };
+  if (tmpl == "chatml" || tmpl_contains("<|im_start|>")) {
+    // chatml template
+    for (auto message : chat) {
+      ss << "<|im_start|>" << message->role << "\n"
+         << message->content << "<|im_end|>\n";
+    }
+    if (add_ass) {
+      ss << "<|im_start|>assistant\n";
+    }
+  } else if (tmpl == "llama2" || tmpl == "mistral" || tmpl_contains("[INST]")) {
+    // llama2 template and its variants
+    // [variant] support system message
+    bool support_system_message = tmpl_contains("<<SYS>>") || tmpl == "mistral";
+    // [variant] space before + after response
+    bool space_around_response = tmpl_contains("' ' + eos_token");
+    // [variant] add BOS inside history
+    bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
+    // [variant] trim spaces from the input message
+    bool strip_message = tmpl_contains("content.strip()");
+    // construct the prompt
+    bool is_inside_turn = true;  // skip BOS at the beginning
+    ss << "[INST] ";
+    for (auto message : chat) {
+      std::string content =
+          strip_message ? trim(message->content) : message->content;
+      std::string role(message->role);
+      if (!is_inside_turn) {
+        is_inside_turn = true;
+        ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
+      }
+      if (role == "system") {
+        if (support_system_message) {
+          ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
+        } else {
+          // if the model does not support system message, we still include it in the first message, but without <<SYS>>
+          ss << content << "\n";
+        }
+      } else if (role == "user") {
+        ss << content << " [/INST]";
+      } else {
+        ss << (space_around_response ? " " : "") << content
+           << (space_around_response ? " " : "") << "</s>";
+        is_inside_turn = false;
+      }
+    }
+    // llama2 templates seem to not care about "add_generation_prompt"
+  } else if (tmpl == "phi3" ||
+             (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>"))) {
+    // Phi 3
+    for (auto message : chat) {
+      std::string role(message->role);
+      ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
+    }
+    if (add_ass) {
+      ss << "<|assistant|>\n";
+    }
+  } else if (tmpl == "zephyr" || tmpl_contains("<|user|>")) {
+    // zephyr template
+    for (auto message : chat) {
+      ss << "<|" << message->role << "|>" << "\n"
+         << message->content << "<|endoftext|>\n";
+    }
+    if (add_ass) {
+      ss << "<|assistant|>\n";
+    }
+  } else if (tmpl == "monarch" ||
+             tmpl_contains("bos_token + message['role']")) {
+    // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
+    for (auto message : chat) {
+      std::string bos =
+          (message == chat.front()) ? "" : "<s>";  // skip BOS for first message
+      ss << bos << message->role << "\n" << message->content << "</s>\n";
+    }
+    if (add_ass) {
+      ss << "<s>assistant\n";
+    }
+  } else if (tmpl == "gemma" || tmpl == "gemma2" ||
+             tmpl_contains("<start_of_turn>")) {
+    // google/gemma-7b-it
+    std::string system_prompt = "";
+    for (auto message : chat) {
+      std::string role(message->role);
+      if (role == "system") {
+        // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
+        system_prompt = trim(message->content);
+        continue;
+      }
+      // in gemma, "assistant" is "model"
+      role = role == "assistant" ? "model" : message->role;
+      ss << "<start_of_turn>" << role << "\n";
+      if (!system_prompt.empty() && role != "model") {
+        ss << system_prompt << "\n\n";
+        system_prompt = "";
+      }
+      ss << trim(message->content) << "<end_of_turn>\n";
+    }
+    if (add_ass) {
+      ss << "<start_of_turn>model\n";
+    }
+  } else if (tmpl == "orion" ||
+             tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
+    // OrionStarAI/Orion-14B-Chat
+    std::string system_prompt = "";
+    for (auto message : chat) {
+      std::string role(message->role);
+      if (role == "system") {
+        // there is no system message support, we will merge it with user prompt
+        system_prompt = message->content;
+        continue;
+      } else if (role == "user") {
+        ss << "Human: ";
+        if (!system_prompt.empty()) {
+          ss << system_prompt << "\n\n";
+          system_prompt = "";
+        }
+        ss << message->content << "\n\nAssistant: </s>";
+      } else {
+        ss << message->content << "</s>";
+      }
+    }
+  } else if (tmpl == "openchat" || tmpl_contains("GPT4 Correct ")) {
+    // openchat/openchat-3.5-0106,
+    for (auto message : chat) {
+      std::string role(message->role);
+      if (role == "system") {
+        ss << message->content << "<|end_of_turn|>";
+      } else {
+        role[0] = toupper(role[0]);
+        ss << "GPT4 Correct " << role << ": " << message->content
+           << "<|end_of_turn|>";
+      }
+    }
+    if (add_ass) {
+      ss << "GPT4 Correct Assistant:";
+    }
+  } else if (tmpl == "vicuna" || tmpl == "vicuna-orca" ||
+             (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: "))) {
+    // eachadea/vicuna-13b-1.1 (and Orca variant)
+    for (auto message : chat) {
+      std::string role(message->role);
+      if (role == "system") {
+        // Orca-Vicuna variant uses a system prefix
+        if (tmpl == "vicuna-orca" || tmpl_contains("SYSTEM: ")) {
+          ss << "SYSTEM: " << message->content << "\n";
+        } else {
+          ss << message->content << "\n\n";
+        }
+      } else if (role == "user") {
+        ss << "USER: " << message->content << "\n";
+      } else if (role == "assistant") {
+        ss << "ASSISTANT: " << message->content << "</s>\n";
+      }
+    }
+    if (add_ass) {
+      ss << "ASSISTANT:";
+    }
+  } else if (tmpl == "deepseek" ||
+             (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>"))) {
+    // deepseek-ai/deepseek-coder-33b-instruct
+    for (auto message : chat) {
+      std::string role(message->role);
+      if (role == "system") {
+        ss << message->content;
+      } else if (role == "user") {
+        ss << "### Instruction:\n" << message->content << "\n";
+      } else if (role == "assistant") {
+        ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
+      }
+    }
+    if (add_ass) {
+      ss << "### Response:\n";
+    }
+  } else if (tmpl == "command-r" || (tmpl_contains("<|START_OF_TURN_TOKEN|>") &&
+                                     tmpl_contains("<|USER_TOKEN|>"))) {
+    // CohereForAI/c4ai-command-r-plus
+    for (auto message : chat) {
+      std::string role(message->role);
+      if (role == "system") {
+        ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>"
+           << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
+      } else if (role == "user") {
+        ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content)
+           << "<|END_OF_TURN_TOKEN|>";
+      } else if (role == "assistant") {
+        ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
+           << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
+      }
+    }
+    if (add_ass) {
+      ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
+    }
+  } else if (tmpl == "llama3" || (tmpl_contains("<|start_header_id|>") &&
+                                  tmpl_contains("<|end_header_id|>"))) {
+    // Llama 3
+    for (auto message : chat) {
+      std::string role(message->role);
+      ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n"
+         << trim(message->content) << "<|eot_id|>";
+    }
+    if (add_ass) {
+      ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
+    }
+  } else if (tmpl == "chatglm3" || tmpl_contains("[gMASK]sop")) {
+    // chatglm3-6b
+    ss << "[gMASK]" << "sop";
+    for (auto message : chat) {
+      std::string role(message->role);
+      ss << "<|" << role << "|>" << "\n " << message->content;
+    }
+    if (add_ass) {
+      ss << "<|assistant|>";
+    }
+  } else if (tmpl == "chatglm4" || tmpl_contains("[gMASK]<sop>")) {
+    ss << "[gMASK]" << "<sop>";
+    for (auto message : chat) {
+      std::string role(message->role);
+      ss << "<|" << role << "|>" << "\n" << message->content;
+    }
+    if (add_ass) {
+      ss << "<|assistant|>";
+    }
+  } else if (tmpl == "minicpm" || tmpl_contains(LU8("<用户>"))) {
+    // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
+    for (auto message : chat) {
+      std::string role(message->role);
+      if (role == "user") {
+        ss << LU8("<用户>");
+        ss << trim(message->content);
+        ss << "<AI>";
+      } else {
+        ss << trim(message->content);
+      }
+    }
+  } else if (tmpl == "deepseek2" ||
+             tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
+    // DeepSeek-V2
+    for (auto message : chat) {
+      std::string role(message->role);
+      if (role == "system") {
+        ss << message->content << "\n\n";
+      } else if (role == "user") {
+        ss << "User: " << message->content << "\n\n";
+      } else if (role == "assistant") {
+        ss << "Assistant: " << message->content << LU8("<｜end▁of▁sentence｜>");
+      }
+    }
+    if (add_ass) {
+      ss << "Assistant:";
+    }
+  } else if (tmpl == "exaone3" ||
+             (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") &&
+              tmpl_contains("[|endofturn|]"))) {
+    // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
+    // EXAONE-3.0-7.8B-Instruct
+    for (auto message : chat) {
+      std::string role(message->role);
+      if (role == "system") {
+        ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
+      } else if (role == "user") {
+        ss << "[|user|]" << trim(message->content) << "\n";
+      } else if (role == "assistant") {
+        ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
+      }
+    }
+    if (add_ass) {
+      ss << "[|assistant|]";
+    }
+  } else {
+    // template not supported
+    return -1;
+  }
+  dest = ss.str();
+  return dest.size();
+}
+
+int32_t llama_chat_apply_template(const char* tmpl,
+                                  const struct llama_chat_message* chat,
+                                  size_t n_msg, bool add_ass, char* buf,
+                                  int32_t length) {
+  std::string curr_tmpl(tmpl == nullptr ? "" : tmpl);
+
+  // format the chat to string
+  std::vector<const llama_chat_message*> chat_vec;
+  chat_vec.resize(n_msg);
+  for (size_t i = 0; i < n_msg; i++) {
+    chat_vec[i] = &chat[i];
+  }
+
+  std::string formatted_chat;
+  int32_t res = llama_chat_apply_template_internal(curr_tmpl, chat_vec,
+                                                   formatted_chat, add_ass);
+  if (res < 0) {
+    return res;
+  }
+  if (buf && length > 0) {
+    strncpy(buf, formatted_chat.c_str(), length);
+  }
+  return res;
+}
+
+std::string llama_chat_apply_template(const std::string& tmpl,
+                                      const std::vector<llama_chat_msg>& msgs,
+                                      bool add_ass) {
+  int alloc_size = 0;
+  bool fallback = false;  // indicate if we must fallback to default chatml
+  std::vector<llama_chat_message> chat;
+  for (auto& msg : msgs) {
+    chat.push_back({msg.role.c_str(), msg.content.c_str()});
+    alloc_size += (msg.role.size() + msg.content.size()) * 1.25;
+  }
+
+  const char* ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str();
+  std::vector<char> buf(alloc_size);
+
+  // run the first time to get the total output length
+  int32_t res = llama_chat_apply_template(ptr_tmpl, chat.data(), chat.size(),
+                                          add_ass, buf.data(), buf.size());
+
+  // error: chat template is not supported
+  if (res < 0) {
+    if (ptr_tmpl != nullptr) {
+      // if the custom "tmpl" is not supported, we throw an error
+      // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
+      throw std::runtime_error("this custom template is not supported");
+    } else {
+      // If the built-in template is not supported, we default to chatml
+      res = llama_chat_apply_template("chatml", chat.data(), chat.size(),
+                                      add_ass, buf.data(), buf.size());
+      fallback = true;
+    }
+  }
+
+  // if it turns out that our buffer is too small, we resize it
+  if ((size_t)res > buf.size()) {
+    buf.resize(res);
+    res =
+        llama_chat_apply_template(fallback ? "chatml" : ptr_tmpl, chat.data(),
+                                  chat.size(), add_ass, buf.data(), buf.size());
+  }
+
+  std::string formatted_chat(buf.data(), res);
+  return formatted_chat;
+}
+}  // namespace config
\ No newline at end of file
diff --git a/engine/config/gguf_parser.cc b/engine/config/gguf_parser.cc
index 160734468..178e4b652 100644
--- a/engine/config/gguf_parser.cc
+++ b/engine/config/gguf_parser.cc
@@ -19,7 +19,7 @@
 
 #include <fcntl.h>  // For file descriptors
 
-#include <jinja2cpp/template.h>
+#include "chat_template_renderer.h"
 
 #include "gguf_parser.h"
 #include "trantor/utils/Logger.h"
@@ -361,19 +361,10 @@ void GGUFHandler::PrintMetadata() {
     if (key.compare("tokenizer.chat_template") == 0) {
       LOG_INFO << key << ": " << "\n" << value << "\n";
 
-      jinja2::Template chat_template;
-      chat_template.Load(value);
-      jinja2::ValuesMap params{
-          {"add_generation_prompt", true},
-          {"bos_token", "<|begin_of_text|>"},
-          {"eos_token", "<|eot_id|>"},
-          {"messages",
-           jinja2::ValuesList{
-               jinja2::ValuesMap{{"role", "system"},
-                                 {"content", "{system_message}"}},
-               jinja2::ValuesMap{{"role", "user"}, {"content", "{prompt}"}}}}};
-      std::string result = chat_template.RenderAsString(params).value();
-
+      std::vector<llama_chat_msg> messages{
+          llama_chat_msg{"system", "{system_message}"},
+          llama_chat_msg{"user", "{prompt}"}};
+      std::string result = llama_chat_apply_template(value, messages, true);
       LOG_INFO << "result jinja render: " << result << "\n";
     } else {
       LOG_INFO << key << ": " << value << "\n";
@@ -555,19 +546,10 @@ void GGUFHandler::ModelConfigFromMetadata() {
             ">\n\n";
       } else {
         try {
-          jinja2::Template jinja2_chat_template;
-          jinja2_chat_template.Load(value);
-          jinja2::ValuesMap params{
-              {"add_generation_prompt", true},
-              {"bos_token", tokens[bos_token]},
-              {"eos_token", tokens[eos_token]},
-              {"messages",
-               jinja2::ValuesList{
-                   jinja2::ValuesMap{{"role", "system"},
-                                     {"content", "{system_message}"}},
-                   jinja2::ValuesMap{{"role", "user"},
-                                     {"content", "{prompt}"}}}}};
-          chat_template = jinja2_chat_template.RenderAsString(params).value();
+          std::vector<llama_chat_msg> messages{
+              llama_chat_msg{"system", "{system_message}"},
+              llama_chat_msg{"user", "{prompt}"}};
+          chat_template = llama_chat_apply_template(value, messages, true);
         } catch (const std::exception& e) {
           std::cerr << "Error render chat template: " << e.what()
                     << ". Using default template: \n[INST] "
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
index f57efb7a2..31ace9ffd 100644
--- a/engine/controllers/command_line_parser.cc
+++ b/engine/controllers/command_line_parser.cc
@@ -331,7 +331,8 @@ void CommandLineParser::SetupEngineCommands() {
   });
   for (auto& engine : engine_service_.kSupportEngines) {
     std::string engine_name{engine};
-    EngineInstall(install_cmd, engine_name, cml_data_.engine_version);
+    EngineInstall(install_cmd, engine_name, cml_data_.engine_version,
+                  cml_data_.engine_src);
   }
 
   auto uninstall_cmd =
@@ -395,7 +396,7 @@ void CommandLineParser::SetupSystemCommands() {
 
 void CommandLineParser::EngineInstall(CLI::App* parent,
                                       const std::string& engine_name,
-                                      std::string& version) {
+                                      std::string& version, std::string& src) {
   auto install_engine_cmd = parent->add_subcommand(engine_name, "");
   install_engine_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
                             " engines install " + engine_name + " [options]");
@@ -404,9 +405,12 @@ void CommandLineParser::EngineInstall(CLI::App* parent,
   install_engine_cmd->add_option("-v, --version", version,
                                  "Engine version to download");
 
-  install_engine_cmd->callback([engine_name, &version] {
+  install_engine_cmd->add_option("-s, --source", src,
+                                 "Install engine by local path");
+
+  install_engine_cmd->callback([engine_name, &version, &src] {
     try {
-      commands::EngineInstallCmd().Exec(engine_name, version);
+      commands::EngineInstallCmd().Exec(engine_name, version, src);
     } catch (const std::exception& e) {
       CTL_ERR(e.what());
     }
diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h
index 98f437098..87a8063fd 100644
--- a/engine/controllers/command_line_parser.h
+++ b/engine/controllers/command_line_parser.h
@@ -21,7 +21,7 @@ class CommandLineParser {
   void SetupSystemCommands();
 
   void EngineInstall(CLI::App* parent, const std::string& engine_name,
-                     std::string& version);
+                     std::string& version, std::string& src);
 
   void EngineUninstall(CLI::App* parent, const std::string& engine_name);
 
@@ -35,6 +35,7 @@ class CommandLineParser {
     std::string model_alias;
     std::string model_path;
     std::string engine_version = "latest";
+    std::string engine_src;
     std::string cortex_version;
     bool check_upd = true;
     int port;
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
index e35002e1f..1c1466e5e 100644
--- a/engine/controllers/engines.cc
+++ b/engine/controllers/engines.cc
@@ -38,7 +38,7 @@ void Engines::InstallEngine(
         auto jsonResponse = json::parse(res->body);
         auto assets = jsonResponse["assets"];
 
-        auto os_arch{system_info.os + "-" + system_info.arch};
+        auto os_arch{system_info->os + "-" + system_info->arch};
         for (auto& asset : assets) {
           auto assetName = asset["name"].get<std::string>();
           if (assetName.find(os_arch) != std::string::npos) {
diff --git a/engine/e2e-test/test_cli_engine_install.py b/engine/e2e-test/test_cli_engine_install.py
index dfb4e9599..b63fa6f0f 100644
--- a/engine/e2e-test/test_cli_engine_install.py
+++ b/engine/e2e-test/test_cli_engine_install.py
@@ -1,4 +1,5 @@
 import platform
+import tempfile
 
 import pytest
 from test_runner import run
@@ -36,3 +37,16 @@ def test_engines_install_pre_release_llamacpp(self):
         assert "Start downloading" in output, "Should display downloading message"
         assert exit_code == 0, f"Install engine failed with error: {error}"
 
+    def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self):
+        exit_code, output, error = run(
+            "Install Engine", ["engines", "install", "cortex.llamacpp", "-s", tempfile.gettempdir()], timeout=None
+        )
+        assert "Start downloading" in output, "Should display downloading message"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+        
+    def test_engines_should_not_perform_with_dummy_path(self):
+        exit_code, output, error = run(
+            "Install Engine", ["engines", "install", "cortex.llamacpp", "-s", "abcpod"], timeout=None
+        )
+        assert "Folder does not exist" in output, "Should display error"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
diff --git a/engine/e2e-test/test_cortex_update.py b/engine/e2e-test/test_cortex_update.py
index 0c4d3a774..2d7d652ec 100644
--- a/engine/e2e-test/test_cortex_update.py
+++ b/engine/e2e-test/test_cortex_update.py
@@ -1,5 +1,7 @@
 import pytest
 from test_runner import run
+import tempfile
+import os
 
 
 class TestCortexUpdate:
@@ -10,3 +12,4 @@ def test_cortex_update(self):
         exit_code, output, error = run("Update cortex", ["update"])
         assert exit_code == 0, "Something went wrong"
         assert "Updated cortex sucessfully" in output
+        assert os.path.exists(os.path.join(tempfile.gettempdir()), 'cortex') == False
diff --git a/engine/main.cc b/engine/main.cc
index bdac8148c..e7fe9bd22 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -88,10 +88,10 @@ void RunServer() {
 int main(int argc, char* argv[]) {
   // Stop the program if the system is not supported
   auto system_info = system_info_utils::GetSystemInfo();
-  if (system_info.arch == system_info_utils::kUnsupported ||
-      system_info.os == system_info_utils::kUnsupported) {
-    CTL_ERR("Unsupported OS or architecture: " << system_info.os << ", "
-                                               << system_info.arch);
+  if (system_info->arch == system_info_utils::kUnsupported ||
+      system_info->os == system_info_utils::kUnsupported) {
+    CTL_ERR("Unsupported OS or architecture: " << system_info->os << ", "
+                                               << system_info->arch);
     return 1;
   }
 
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index 0d5cf9ac8..1b1f1d278 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -12,6 +12,32 @@
 
 using json = nlohmann::json;
 
+namespace {
+std::string GetSuitableCudaVersion(const std::string& engine,
+                                   const std::string& cuda_driver_version) {
+  auto suitable_toolkit_version = "";
+  if (engine == "cortex.tensorrt-llm") {
+    // for tensorrt-llm, we need to download cuda toolkit v12.4
+    suitable_toolkit_version = "12.4";
+  } else {
+    // llamacpp
+    auto cuda_driver_semver =
+        semantic_version_utils::SplitVersion(cuda_driver_version);
+    if (cuda_driver_semver.major == 11) {
+      suitable_toolkit_version = "11.7";
+    } else if (cuda_driver_semver.major == 12) {
+      suitable_toolkit_version = "12.0";
+    }
+  }
+  return suitable_toolkit_version;
+}
+}  // namespace
+
+EngineService::EngineService()
+    : hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(),
+              .cuda_driver_version = system_info_utils::GetCudaVersion()} {}
+EngineService::~EngineService() {}
+
 std::optional<EngineInfo> EngineService::GetEngineInfo(
     const std::string& engine) const {
   // if engine is not found in kSupportEngine, throw runtime error
@@ -74,8 +100,101 @@ std::vector<EngineInfo> EngineService::GetEngineInfoList() const {
 }
 
 void EngineService::InstallEngine(const std::string& engine,
-                                  const std::string& version) {
-  auto system_info = system_info_utils::GetSystemInfo();
+                                  const std::string& version,
+                                  const std::string& src) {
+
+  if (!src.empty()) {
+    UnzipEngine(engine, version, src);
+  } else {
+    DownloadEngine(engine, version);
+    DownloadCuda(engine);
+  }
+}
+
+void EngineService::UnzipEngine(const std::string& engine,
+                                const std::string& version,
+                                const std::string& path) {
+  bool found_cuda = false;
+
+  CTL_INF("engine: " << engine);
+  CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version);
+  std::string cuda_variant = "cuda-";
+  cuda_variant += GetSuitableCudaVersion(engine, hw_inf_.cuda_driver_version) +
+                  "-" + hw_inf_.sys_inf->os + "-" + hw_inf_.sys_inf->arch +
+                  ".tar.gz";
+  CTL_INF("cuda_variant: " << cuda_variant);
+
+  std::vector<std::string> variants;
+  // Loop through all files in the directory
+  // 1. Push all engine variants to a list
+  // 2. If cuda version is matched, extract it
+  if (std::filesystem::exists(path) && std::filesystem::is_directory(path)) {
+    for (const auto& entry : std::filesystem::directory_iterator(path)) {
+      CTL_INF("file path: " << entry.path().string());
+      if (entry.is_regular_file() && (entry.path().extension() == ".tar.gz" ||
+                                      entry.path().extension() == ".gz")) {
+        CTL_INF("file name: " << entry.path().filename().string());
+        variants.push_back(entry.path().filename().string());
+        if (std::string cf = entry.path().filename().string();
+            cf == cuda_variant) {
+          CTL_INF("Found cuda variant, extract it");
+          found_cuda = true;
+          // extract binary
+          auto engine_path =
+              file_manager_utils::GetEnginesContainerPath() / engine;
+          archive_utils::ExtractArchive(path + "/" + cf, engine_path.string());
+        }
+      }
+    }
+  } else {
+    // Folder does not exist, throw exception
+    CTL_ERR("Folder does not exist: " << path);
+    return;
+  }
+
+  auto matched_variant = GetMatchedVariant(engine, variants);
+  CTL_INF("Matched variant: " << matched_variant);
+  if (matched_variant.empty()) {
+    CTL_INF("No variant found for " << hw_inf_.sys_inf->os << "-"
+                                    << hw_inf_.sys_inf->arch
+                                    << ", will get engine from remote");
+    // Go with the remote flow
+    DownloadEngine(engine, version);
+  } else {
+    auto engine_path = file_manager_utils::GetEnginesContainerPath();
+    archive_utils::ExtractArchive(path + "/" + matched_variant,
+                                  engine_path.string());
+  }
+
+  // Not match any cuda binary, download from remote
+  if (!found_cuda) {
+    DownloadCuda(engine);
+  }
+}
+
+void EngineService::UninstallEngine(const std::string& engine) {
+  // TODO: Unload the model which is currently running on engine_
+
+  // TODO: Unload engine if is loaded
+
+  auto ecp = file_manager_utils::GetEnginesContainerPath();
+  auto engine_path = ecp / engine;
+
+  if (!std::filesystem::exists(engine_path)) {
+    throw std::runtime_error("Engine " + engine + " is not installed!");
+  }
+
+  try {
+    std::filesystem::remove_all(engine_path);
+    CTL_INF("Engine " << engine << " uninstalled successfully!");
+  } catch (const std::exception& e) {
+    CTL_ERR("Failed to uninstall engine " << engine << ": " << e.what());
+    throw;
+  }
+}
+
+void EngineService::DownloadEngine(const std::string& engine,
+                                   const std::string& version) {
   auto get_params = [&engine, &version]() -> std::vector<std::string> {
     if (version == "latest") {
       return {"repos", "janhq", engine, "releases", version};
@@ -109,11 +228,11 @@ void EngineService::InstallEngine(const std::string& engine,
       body = get_data(body);
     }
     if (body.empty()) {
-      throw std::runtime_error("No release found for " + version);      
+      throw std::runtime_error("No release found for " + version);
     }
 
     auto assets = body["assets"];
-    auto os_arch{system_info.os + "-" + system_info.arch};
+    auto os_arch{hw_inf_.sys_inf->os + "-" + hw_inf_.sys_inf->arch};
 
     std::vector<std::string> variants;
     for (auto& asset : assets) {
@@ -121,24 +240,9 @@ void EngineService::InstallEngine(const std::string& engine,
       variants.push_back(asset_name);
     }
 
-    auto cuda_driver_version = system_info_utils::GetCudaVersion();
     CTL_INF("engine: " << engine);
-    CTL_INF("CUDA version: " << cuda_driver_version);
-    std::string matched_variant = "";
-
-    if (engine == "cortex.tensorrt-llm") {
-      matched_variant = engine_matcher_utils::ValidateTensorrtLlm(
-          variants, system_info.os, cuda_driver_version);
-    } else if (engine == "cortex.onnx") {
-      matched_variant = engine_matcher_utils::ValidateOnnx(
-          variants, system_info.os, system_info.arch);
-    } else if (engine == "cortex.llamacpp") {
-      cortex::cpuid::CpuInfo cpu_info;
-      auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(cpu_info);
-      matched_variant = engine_matcher_utils::Validate(
-          variants, system_info.os, system_info.arch, suitable_avx,
-          cuda_driver_version);
-    }
+    CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version);
+    auto matched_variant = GetMatchedVariant(engine, variants);
     CTL_INF("Matched variant: " << matched_variant);
     if (matched_variant.empty()) {
       CTL_ERR("No variant found for " << os_arch);
@@ -195,85 +299,6 @@ void EngineService::InstallEngine(const std::string& engine,
               }
               CTL_INF("Finished!");
             });
-        if (system_info.os == "mac" || engine == "cortex.onnx") {
-          // mac and onnx engine does not require cuda toolkit
-          return;
-        }
-
-        if (cuda_driver_version.empty()) {
-          CTL_WRN("No cuda driver, continue with CPU");
-          return;
-        }
-
-        // download cuda toolkit
-        const std::string jan_host = "https://catalog.jan.ai";
-        const std::string cuda_toolkit_file_name = "cuda.tar.gz";
-        const std::string download_id = "cuda";
-
-        // TODO: we don't have API to retrieve list of cuda toolkit dependencies atm because we hosting it at jan
-        //  will have better logic after https://github.com/janhq/cortex/issues/1046 finished
-        // for now, assume that we have only 11.7 and 12.4
-        auto suitable_toolkit_version = "";
-        if (engine == "cortex.tensorrt-llm") {
-          // for tensorrt-llm, we need to download cuda toolkit v12.4
-          suitable_toolkit_version = "12.4";
-        } else {
-          // llamacpp
-          auto cuda_driver_semver =
-              semantic_version_utils::SplitVersion(cuda_driver_version);
-          if (cuda_driver_semver.major == 11) {
-            suitable_toolkit_version = "11.7";
-          } else if (cuda_driver_semver.major == 12) {
-            suitable_toolkit_version = "12.0";
-          }
-        }
-
-        // compare cuda driver version with cuda toolkit version
-        // cuda driver version should be greater than toolkit version to ensure compatibility
-        if (semantic_version_utils::CompareSemanticVersion(
-                cuda_driver_version, suitable_toolkit_version) < 0) {
-          CTL_ERR("Your Cuda driver version "
-                  << cuda_driver_version
-                  << " is not compatible with cuda toolkit version "
-                  << suitable_toolkit_version);
-          throw std::runtime_error(
-              "Cuda driver is not compatible with cuda toolkit");
-        }
-
-        std::ostringstream cuda_toolkit_url;
-        cuda_toolkit_url << jan_host << "/" << "dist/cuda-dependencies/"
-                         << cuda_driver_version << "/" << system_info.os << "/"
-                         << cuda_toolkit_file_name;
-
-        LOG_DEBUG << "Cuda toolkit download url: " << cuda_toolkit_url.str();
-        auto cuda_toolkit_local_path =
-            file_manager_utils::GetContainerFolderPath(
-                file_manager_utils::DownloadTypeToString(
-                    DownloadType::CudaToolkit)) /
-            cuda_toolkit_file_name;
-        LOG_DEBUG << "Download to: " << cuda_toolkit_local_path.string();
-        auto downloadCudaToolkitTask{DownloadTask{
-            .id = download_id,
-            .type = DownloadType::CudaToolkit,
-            .items = {DownloadItem{.id = download_id,
-                                   .downloadUrl = cuda_toolkit_url.str(),
-                                   .localPath = cuda_toolkit_local_path}},
-        }};
-
-        download_service.AddDownloadTask(
-            downloadCudaToolkitTask, [&](const DownloadTask& finishedTask) {
-              auto engine_path =
-                  file_manager_utils::GetEnginesContainerPath() / engine;
-              archive_utils::ExtractArchive(
-                  finishedTask.items[0].localPath.string(),
-                  engine_path.string());
-
-              try {
-                std::filesystem::remove(finishedTask.items[0].localPath);
-              } catch (std::exception& e) {
-                CTL_ERR("Error removing downloaded file: " << e.what());
-              }
-            });
         return;
       }
     }
@@ -282,23 +307,89 @@ void EngineService::InstallEngine(const std::string& engine,
   }
 }
 
-void EngineService::UninstallEngine(const std::string& engine) {
-  // TODO: Unload the model which is currently running on engine_
+void EngineService::DownloadCuda(const std::string& engine) {
+  if (hw_inf_.sys_inf->os == "mac" || engine == "cortex.onnx") {
+    // mac and onnx engine does not require cuda toolkit
+    return;
+  }
 
-  // TODO: Unload engine if is loaded
+  if (hw_inf_.cuda_driver_version.empty()) {
+    CTL_WRN("No cuda driver, continue with CPU");
+    return;
+  }
+  // download cuda toolkit
+  const std::string jan_host = "catalog.jan.ai";
+  const std::string cuda_toolkit_file_name = "cuda.tar.gz";
+  const std::string download_id = "cuda";
+
+  auto suitable_toolkit_version =
+      GetSuitableCudaVersion(engine, hw_inf_.cuda_driver_version);
+
+  // compare cuda driver version with cuda toolkit version
+  // cuda driver version should be greater than toolkit version to ensure compatibility
+  if (semantic_version_utils::CompareSemanticVersion(
+          hw_inf_.cuda_driver_version, suitable_toolkit_version) < 0) {
+    CTL_ERR("Your Cuda driver version "
+            << hw_inf_.cuda_driver_version
+            << " is not compatible with cuda toolkit version "
+            << suitable_toolkit_version);
+    throw std::runtime_error("Cuda driver is not compatible with cuda toolkit");
+  }
 
-  auto ecp = file_manager_utils::GetEnginesContainerPath();
-  auto engine_path = ecp / engine;
+  auto url_obj = url_parser::Url{
+      .protocol = "https",
+      .host = jan_host,
+      .pathParams = {"dist", "cuda-dependencies", suitable_toolkit_version,
+                     hw_inf_.sys_inf->os, cuda_toolkit_file_name},
+  };
 
-  if (!std::filesystem::exists(engine_path)) {
-    throw std::runtime_error("Engine " + engine + " is not installed!");
-  }
+  auto cuda_toolkit_url = url_parser::FromUrl(url_obj);
+
+  LOG_DEBUG << "Cuda toolkit download url: " << cuda_toolkit_url;
+  auto cuda_toolkit_local_path =
+      file_manager_utils::GetContainerFolderPath(
+          file_manager_utils::DownloadTypeToString(DownloadType::CudaToolkit)) /
+      cuda_toolkit_file_name;
+  LOG_DEBUG << "Download to: " << cuda_toolkit_local_path.string();
+  auto downloadCudaToolkitTask{DownloadTask{
+      .id = download_id,
+      .type = DownloadType::CudaToolkit,
+      .items = {DownloadItem{.id = download_id,
+                             .downloadUrl = cuda_toolkit_url,
+                             .localPath = cuda_toolkit_local_path}},
+  }};
+
+  DownloadService download_service;
+  download_service.AddDownloadTask(
+      downloadCudaToolkitTask, [&](const DownloadTask& finishedTask) {
+        auto engine_path =
+            file_manager_utils::GetEnginesContainerPath() / engine;
+        archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(),
+                                      engine_path.string());
+
+        try {
+          std::filesystem::remove(finishedTask.items[0].localPath);
+        } catch (std::exception& e) {
+          CTL_ERR("Error removing downloaded file: " << e.what());
+        }
+      });
+}
 
-  try {
-    std::filesystem::remove_all(engine_path);
-    CTL_INF("Engine " << engine << " uninstalled successfully!");
-  } catch (const std::exception& e) {
-    CTL_ERR("Failed to uninstall engine " << engine << ": " << e.what());
-    throw;
+std::string EngineService::GetMatchedVariant(
+    const std::string& engine, const std::vector<std::string>& variants) {
+  std::string matched_variant;
+  if (engine == "cortex.tensorrt-llm") {
+    matched_variant = engine_matcher_utils::ValidateTensorrtLlm(
+        variants, hw_inf_.sys_inf->os, hw_inf_.cuda_driver_version);
+  } else if (engine == "cortex.onnx") {
+    matched_variant = engine_matcher_utils::ValidateOnnx(
+        variants, hw_inf_.sys_inf->os, hw_inf_.sys_inf->arch);
+  } else if (engine == "cortex.llamacpp") {
+    auto suitable_avx =
+        engine_matcher_utils::GetSuitableAvxVariant(hw_inf_.cpu_inf);
+    matched_variant = engine_matcher_utils::Validate(
+        variants, hw_inf_.sys_inf->os, hw_inf_.sys_inf->arch, suitable_avx,
+        hw_inf_.cuda_driver_version);
   }
-}
+  return matched_variant;
+}
\ No newline at end of file
diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h
index 442923356..5e434bf24 100644
--- a/engine/services/engine_service.h
+++ b/engine/services/engine_service.h
@@ -1,9 +1,11 @@
 #pragma once
 
+#include <memory>
 #include <optional>
 #include <string>
 #include <string_view>
 #include <vector>
+#include "utils/cpuid/cpu_info.h"
 
 struct EngineInfo {
   std::string name;
@@ -14,6 +16,9 @@ struct EngineInfo {
   std::string status;
 };
 
+namespace system_info_utils {
+struct SystemInfo;
+}
 class EngineService {
  public:
   constexpr static auto kIncompatible = "Incompatible";
@@ -23,12 +28,35 @@ class EngineService {
   const std::vector<std::string_view> kSupportEngines = {
       "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"};
 
+  EngineService();
+  ~EngineService();
+
   std::optional<EngineInfo> GetEngineInfo(const std::string& engine) const;
 
   std::vector<EngineInfo> GetEngineInfoList() const;
 
   void InstallEngine(const std::string& engine,
-                     const std::string& version = "latest");
+                     const std::string& version = "latest",
+                     const std::string& src = "");
+
+  void UnzipEngine(const std::string& engine, const std::string& version,
+                   const std::string& path);
 
   void UninstallEngine(const std::string& engine);
+
+ private:
+  void DownloadEngine(const std::string& engine,
+                      const std::string& version = "latest");
+  void DownloadCuda(const std::string& engine);
+
+  std::string GetMatchedVariant(const std::string& engine,
+                                const std::vector<std::string>& variants);
+
+ private:
+  struct HardwareInfo {
+    std::unique_ptr<system_info_utils::SystemInfo> sys_inf;
+    cortex::cpuid::CpuInfo cpu_inf;
+    std::string cuda_driver_version;
+  };
+  HardwareInfo hw_inf_;
 };
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 29575dfab..dc6fc3f68 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -95,12 +95,16 @@ void ModelService::DownloadModelByDirectUrl(const std::string& url) {
       url_obj.pathParams[2] = "resolve";
     }
   }
-
+  auto author{url_obj.pathParams[0]};
   auto model_id{url_obj.pathParams[1]};
   auto file_name{url_obj.pathParams.back()};
 
-  auto local_path =
-      file_manager_utils::GetModelsContainerPath() / model_id / model_id;
+  if (author == "cortexso") {
+    return DownloadModelFromCortexso(model_id);
+  }
+
+  auto local_path{file_manager_utils::GetModelsContainerPath() /
+                  "huggingface.co" / author / model_id / file_name};
 
   try {
     std::filesystem::create_directories(local_path.parent_path());
@@ -120,10 +124,10 @@ void ModelService::DownloadModelByDirectUrl(const std::string& url) {
                                      .localPath = local_path,
                                  }}}};
 
-  auto on_finished = [](const DownloadTask& finishedTask) {
+  auto on_finished = [&author](const DownloadTask& finishedTask) {
     CLI_LOG("Model " << finishedTask.id << " downloaded successfully!")
     auto gguf_download_item = finishedTask.items[0];
-    model_callback_utils::ParseGguf(gguf_download_item);
+    model_callback_utils::ParseGguf(gguf_download_item, author);
   };
 
   download_service_.AddDownloadTask(downloadTask, on_finished);
diff --git a/engine/templates/macos/create_pkg.sh b/engine/templates/macos/create_pkg.sh
index 74bcc1438..bf8cbcaf1 100644
--- a/engine/templates/macos/create_pkg.sh
+++ b/engine/templates/macos/create_pkg.sh
@@ -14,6 +14,8 @@ cp $SOURCE_BINARY_PATH installer/$DESTINATION_BINARY_NAME
 export DESTINATION_BINARY_NAME
 cp postinstall Scripts/postinstall
 sed -i '' "3s/.*/DESTINATION_BINARY_NAME=$DESTINATION_BINARY_NAME/" Scripts/postinstall
+sed -i '' "4s/.*/DATA_FOLDER_NAME=$DATA_FOLDER_NAME/" Scripts/postinstall
+sed -i '' "5s/.*/CONFIGURATION_FILE_NAME=$CONFIGURATION_FILE_NAME/" Scripts/postinstall
 chmod +x Scripts/postinstall
 
 export DATA_FOLDER_NAME CONFIGURATION_FILE_NAME UNINSTALLER_FILE_NAME
diff --git a/engine/templates/macos/postinstall b/engine/templates/macos/postinstall
index 7382c2cd0..551f98dcf 100644
--- a/engine/templates/macos/postinstall
+++ b/engine/templates/macos/postinstall
@@ -1,8 +1,15 @@
 #!/usr/bin/env sh
 set -e
 DESTINATION_BINARY_NAME=cortex
-USER_TO_RUN_AS=${SUDO_USER:-$(whoami)}
-echo "Download cortex.llamacpp engines by default"
+DATA_FOLDER_NAME=.cortex
+CONFIGURATION_FILE_NAME=.cortexrc
+
+USER_TO_RUN_AS=$(stat -f "%Su" /dev/console)
+
+echo "Download cortex.llamacpp engines by default for user $USER_TO_RUN_AS"
 sudo -u $USER_TO_RUN_AS /usr/local/bin/$DESTINATION_BINARY_NAME engines install cortex.llamacpp
 
+sudo chown -R $USER_TO_RUN_AS:staff "/Users/$USER_TO_RUN_AS/$DATA_FOLDER_NAME"
+sudo chown $USER_TO_RUN_AS:staff "/Users/$USER_TO_RUN_AS/$CONFIGURATION_FILE_NAME"
+
 exit 0
\ No newline at end of file
diff --git a/engine/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt
index fa1c5477e..f89881118 100644
--- a/engine/test/components/CMakeLists.txt
+++ b/engine/test/components/CMakeLists.txt
@@ -8,10 +8,9 @@ add_executable(${PROJECT_NAME} ${SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/m
 find_package(Drogon CONFIG REQUIRED)
 find_package(GTest CONFIG REQUIRED)
 find_package(yaml-cpp CONFIG REQUIRED)
-find_package(jinja2cpp CONFIG REQUIRED)
 find_package(httplib CONFIG REQUIRED)
 
-target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main yaml-cpp::yaml-cpp jinja2cpp
+target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main yaml-cpp::yaml-cpp
                                               ${CMAKE_THREAD_LIBS_INIT})
 
 target_link_libraries(${PROJECT_NAME} PRIVATE httplib::httplib)
diff --git a/engine/utils/cortexso_parser.h b/engine/utils/cortexso_parser.h
index d4e85bee9..af3372022 100644
--- a/engine/utils/cortexso_parser.h
+++ b/engine/utils/cortexso_parser.h
@@ -1,5 +1,4 @@
 #include <trantor/utils/Logger.h>
-#include <sstream>
 #include <string>
 #include <vector>
 
@@ -7,57 +6,57 @@
 #include <nlohmann/json.hpp>
 #include "httplib.h"
 #include "utils/file_manager_utils.h"
+#include "utils/huggingface_utils.h"
 #include "utils/logging_utils.h"
 
 namespace cortexso_parser {
-constexpr static auto kHuggingFaceHost = "https://huggingface.co";
+constexpr static auto kHuggingFaceHost = "huggingface.co";
 
 inline std::optional<DownloadTask> getDownloadTask(
     const std::string& modelId, const std::string& branch = "main") {
   using namespace nlohmann;
-  std::ostringstream oss;
-  oss << "/api/models/cortexso/" << modelId << "/tree/" << branch;
-  const std::string url = oss.str();
+  url_parser::Url url = {
+      .protocol = "https",
+      .host = kHuggingFaceHost,
+      .pathParams = {"api", "models", "cortexso", modelId, "tree", branch}};
 
-  std::ostringstream repoAndModelId;
-  repoAndModelId << "cortexso/" << modelId;
-  const std::string repoAndModelIdStr = repoAndModelId.str();
-
-  httplib::Client cli(kHuggingFaceHost);
-  if (auto res = cli.Get(url)) {
+  httplib::Client cli(url.GetProtocolAndHost());
+  if (auto res = cli.Get(url.GetPathAndQuery())) {
     if (res->status == httplib::StatusCode::OK_200) {
       try {
         auto jsonResponse = json::parse(res->body);
 
-        std::vector<DownloadItem> downloadItems{};
-        std::filesystem::path model_container_path =
-            file_manager_utils::GetModelsContainerPath() / modelId;
+        std::vector<DownloadItem> download_items{};
+        auto model_container_path =
+            file_manager_utils::GetModelsContainerPath() / "cortex.so" /
+            modelId / branch;
         file_manager_utils::CreateDirectoryRecursively(
             model_container_path.string());
 
         for (const auto& [key, value] : jsonResponse.items()) {
-          std::ostringstream downloadUrlOutput;
           auto path = value["path"].get<std::string>();
           if (path == ".gitattributes" || path == ".gitignore" ||
               path == "README.md") {
             continue;
           }
-          downloadUrlOutput << kHuggingFaceHost << "/" << repoAndModelIdStr
-                            << "/resolve/" << branch << "/" << path;
-          const std::string download_url = downloadUrlOutput.str();
-          auto local_path = model_container_path / path;
+          url_parser::Url download_url = {
+              .protocol = "https",
+              .host = kHuggingFaceHost,
+              .pathParams = {"cortexso", modelId, "resolve", branch, path}};
 
-          downloadItems.push_back(DownloadItem{.id = path,
-                                               .downloadUrl = download_url,
-                                               .localPath = local_path});
+          auto local_path = model_container_path / path;
+          download_items.push_back(
+              DownloadItem{.id = path,
+                           .downloadUrl = download_url.ToFullPath(),
+                           .localPath = local_path});
         }
 
-        DownloadTask downloadTask{
+        DownloadTask download_tasks{
             .id = branch == "main" ? modelId : modelId + "-" + branch,
             .type = DownloadType::Model,
-            .items = downloadItems};
+            .items = download_items};
 
-        return downloadTask;
+        return download_tasks;
       } catch (const json::parse_error& e) {
         CTL_ERR("JSON parse error: {}" << e.what());
       }
diff --git a/engine/utils/model_callback_utils.h b/engine/utils/model_callback_utils.h
index 3a3b0f288..c6e98dd48 100644
--- a/engine/utils/model_callback_utils.h
+++ b/engine/utils/model_callback_utils.h
@@ -6,27 +6,14 @@
 #include "config/gguf_parser.h"
 #include "config/yaml_config.h"
 #include "services/download_service.h"
+#include "utils/huggingface_utils.h"
 #include "utils/logging_utils.h"
+#include "utils/modellist_utils.h"
 
 namespace model_callback_utils {
-inline void WriteYamlOutput(const DownloadItem& modelYmlDownloadItem) {
-  config::YamlHandler handler;
-  handler.ModelConfigFromFile(modelYmlDownloadItem.localPath.string());
-  config::ModelConfig model_config = handler.GetModelConfig();
-  model_config.id =
-      modelYmlDownloadItem.localPath.parent_path().filename().string();
-
-  CTL_INF("Updating model config in "
-          << modelYmlDownloadItem.localPath.string());
-  handler.UpdateModelConfig(model_config);
-  std::string yaml_filename{model_config.id + ".yaml"};
-  std::filesystem::path yaml_output =
-      modelYmlDownloadItem.localPath.parent_path().parent_path() /
-      yaml_filename;
-  handler.WriteYamlFile(yaml_output.string());
-}
 
-inline void ParseGguf(const DownloadItem& ggufDownloadItem) {
+inline void ParseGguf(const DownloadItem& ggufDownloadItem,
+                      std::optional<std::string> author = nullptr) {
   config::GGUFHandler gguf_handler;
   config::YamlHandler yaml_handler;
   gguf_handler.Parse(ggufDownloadItem.localPath.string());
@@ -36,17 +23,27 @@ inline void ParseGguf(const DownloadItem& ggufDownloadItem) {
   model_config.files = {ggufDownloadItem.localPath.string()};
   yaml_handler.UpdateModelConfig(model_config);
 
-  std::string yaml_filename{model_config.id + ".yaml"};
-  std::filesystem::path yaml_output =
-      ggufDownloadItem.localPath.parent_path().parent_path() / yaml_filename;
-  std::filesystem::path yaml_path(ggufDownloadItem.localPath.parent_path() /
-                                  "model.yml");
-  if (!std::filesystem::exists(yaml_output)) {  // if model.yml doesn't exist
-    yaml_handler.WriteYamlFile(yaml_output.string());
-  }
+  auto yaml_path{ggufDownloadItem.localPath};
+  auto yaml_name = yaml_path.replace_extension(".yml");
+
   if (!std::filesystem::exists(yaml_path)) {
     yaml_handler.WriteYamlFile(yaml_path.string());
   }
+
+  auto url_obj = url_parser::FromUrlString(ggufDownloadItem.downloadUrl);
+  auto branch = url_obj.pathParams[3];
+  CTL_INF("Adding model to modellist with branch: " << branch);
+
+  auto author_id = author.has_value() ? author.value() : "cortexso";
+  modellist_utils::ModelListUtils modellist_utils_obj;
+  modellist_utils::ModelEntry model_entry{
+      .model_id = model_config.id,
+      .author_repo_id = author_id,
+      .branch_name = branch,
+      .path_to_model_yaml = yaml_name.string(),
+      .model_alias = model_config.id,
+      .status = modellist_utils::ModelStatus::READY};
+  modellist_utils_obj.AddModelEntry(model_entry);
 }
 
 inline void DownloadModelCb(const DownloadTask& finishedTask) {
@@ -67,12 +64,27 @@ inline void DownloadModelCb(const DownloadTask& finishedTask) {
     }
   }
 
-  if (model_yml_di != nullptr) {
-    WriteYamlOutput(*model_yml_di);
-  }
-
   if (need_parse_gguf && gguf_di != nullptr) {
     ParseGguf(*gguf_di);
   }
+
+  if (model_yml_di != nullptr) {
+    auto url_obj = url_parser::FromUrlString(model_yml_di->downloadUrl);
+    auto branch = url_obj.pathParams[3];
+    CTL_INF("Adding model to modellist with branch: " << branch);
+    config::YamlHandler yaml_handler;
+    yaml_handler.ModelConfigFromFile(model_yml_di->localPath.string());
+    auto mc = yaml_handler.GetModelConfig();
+
+    modellist_utils::ModelListUtils modellist_utils_obj;
+    modellist_utils::ModelEntry model_entry{
+        .model_id = mc.name,
+        .author_repo_id = "cortexso",
+        .branch_name = branch,
+        .path_to_model_yaml = model_yml_di->localPath.string(),
+        .model_alias = mc.name,
+        .status = modellist_utils::ModelStatus::READY};
+    modellist_utils_obj.AddModelEntry(model_entry);
+  }
 }
 }  // namespace model_callback_utils
diff --git a/engine/utils/modellist_utils.cc b/engine/utils/modellist_utils.cc
index 261bf58d5..7e1a43833 100644
--- a/engine/utils/modellist_utils.cc
+++ b/engine/utils/modellist_utils.cc
@@ -3,10 +3,10 @@
 #include <filesystem>
 #include <fstream>
 #include <iostream>
-#include <regex>
 #include <sstream>
 #include <stdexcept>
 #include "file_manager_utils.h"
+
 namespace modellist_utils {
 const std::string ModelListUtils::kModelListPath =
     (file_manager_utils::GetModelsContainerPath() /
@@ -208,7 +208,8 @@ bool ModelListUtils::UpdateModelAlias(const std::string& model_id,
       });
   bool check_alias_unique = std::none_of(
       entries.begin(), entries.end(), [&](const ModelEntry& entry) {
-        return (entry.model_id == new_model_alias && entry.model_id != model_id) ||
+        return (entry.model_id == new_model_alias &&
+                entry.model_id != model_id) ||
                entry.model_alias == new_model_alias;
       });
   if (it != entries.end() && check_alias_unique) {
@@ -237,4 +238,4 @@ bool ModelListUtils::DeleteModelEntry(const std::string& identifier) {
   }
   return false;  // Entry not found or not in READY state
 }
-}  // namespace modellist_utils
\ No newline at end of file
+}  // namespace modellist_utils
diff --git a/engine/utils/modellist_utils.h b/engine/utils/modellist_utils.h
index 75a41d880..b7aaca81a 100644
--- a/engine/utils/modellist_utils.h
+++ b/engine/utils/modellist_utils.h
@@ -1,9 +1,10 @@
 #pragma once
+
 #include <trantor/utils/Logger.h>
 #include <mutex>
 #include <string>
 #include <vector>
-#include "logging_utils.h"
+
 namespace modellist_utils {
 
 enum class ModelStatus { READY, RUNNING };
@@ -22,7 +23,7 @@ class ModelListUtils {
  private:
   mutable std::mutex mutex_;  // For thread safety
 
-    bool IsUnique(const std::vector<ModelEntry>& entries,
+  bool IsUnique(const std::vector<ModelEntry>& entries,
                 const std::string& model_id,
                 const std::string& model_alias) const;
   void SaveModelList(const std::vector<ModelEntry>& entries) const;
@@ -40,6 +41,7 @@ class ModelListUtils {
   bool UpdateModelEntry(const std::string& identifier,
                         const ModelEntry& updated_entry);
   bool DeleteModelEntry(const std::string& identifier);
-  bool UpdateModelAlias(const std::string& model_id, const std::string& model_alias);
+  bool UpdateModelAlias(const std::string& model_id,
+                        const std::string& model_alias);
 };
-}  // namespace modellist_utils
\ No newline at end of file
+}  // namespace modellist_utils
diff --git a/engine/utils/scope_exit.h b/engine/utils/scope_exit.h
new file mode 100644
index 000000000..d79d0951f
--- /dev/null
+++ b/engine/utils/scope_exit.h
@@ -0,0 +1,15 @@
+#pragma once
+
+namespace utils {
+template <typename F>
+struct ScopeExit {
+  ScopeExit(F&& f) : f_(std::forward<F>(f)) {}
+  ~ScopeExit() { f_(); }
+  F f_;
+};
+
+template <typename F>
+ScopeExit<F> makeScopeExit(F&& f) {
+  return ScopeExit<F>(std::forward<F>(f));
+};
+}  // namespace utils
\ No newline at end of file
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
index 9cdcc8f05..9dbfcc7c9 100644
--- a/engine/utils/system_info_utils.h
+++ b/engine/utils/system_info_utils.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <trantor/utils/Logger.h>
+#include <memory>
 #include <regex>
 #include <sstream>
 #include <vector>
@@ -22,6 +23,8 @@ constexpr static auto kGpuInfoRegex{
     R"((\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+))"};
 
 struct SystemInfo {
+  explicit SystemInfo(std::string os, std::string arch)
+      : os(std::move(os)), arch(std::move(arch)) {}
   std::string os;
   std::string arch;
 };
@@ -51,7 +54,7 @@ inline std::string GetGpuArch(const std::string& gpuName) {
   }
 }
 
-inline SystemInfo GetSystemInfo() {
+inline std::unique_ptr<SystemInfo> GetSystemInfo() {
   std::ostringstream arch;
   std::ostringstream os;
 
@@ -76,7 +79,7 @@ inline SystemInfo GetSystemInfo() {
 #else
   os << kUnsupported;
 #endif
-  return SystemInfo{os.str(), arch.str()};
+  return std::make_unique<SystemInfo>(os.str(), arch.str());
 }
 
 inline bool IsNvidiaSmiAvailable() {
diff --git a/engine/utils/url_parser.h b/engine/utils/url_parser.h
index 55dd557b8..97d499a97 100644
--- a/engine/utils/url_parser.h
+++ b/engine/utils/url_parser.h
@@ -7,11 +7,35 @@
 #include "exceptions/malformed_url_exception.h"
 
 namespace url_parser {
+
+struct explicit_bool {
+  bool b = false;
+  template <class T, std::enable_if_t<std::is_same_v<T, bool>, bool> = true>
+  explicit_bool(T v) : b(v) {}
+  explicit_bool(explicit_bool const&) noexcept = default;
+  explicit_bool& operator=(explicit_bool const&) & noexcept = default;
+  explicit_bool() noexcept = default;
+  ~explicit_bool() noexcept = default;
+  bool operator!() const { return !b; }
+  explicit operator bool() const { return b; }
+};
+
+struct explicit_int {
+  int i = 0;
+  template <class T, std::enable_if_t<std::is_same_v<T, int>, int> = true>
+  explicit_int(T v) : i(v) {}
+  explicit_int(explicit_int const&) noexcept = default;
+  explicit_int& operator=(explicit_int const&) & noexcept = default;
+  explicit_int() noexcept = default;
+  ~explicit_int() noexcept = default;
+
+  explicit operator int() const { return i; }
+};
 struct Url {
   std::string protocol;
   std::string host;
   std::vector<std::string> pathParams;
-  std::unordered_map<std::string, std::variant<std::string, int, bool>> queries;
+  std::unordered_map<std::string, std::variant<std::string, explicit_int, explicit_bool>> queries;
 
   std::string GetProtocolAndHost() const { return protocol + "://" + host; }
 
@@ -30,6 +54,10 @@ struct Url {
     }
     return path;
   };
+
+  std::string ToFullPath() const {
+    return GetProtocolAndHost() + GetPathAndQuery();
+  }
 };
 
 const std::regex url_regex(
@@ -102,10 +130,10 @@ inline std::string FromUrl(const Url& url) {
       std::string value_str;
       if (std::holds_alternative<std::string>(value)) {
         value_str = std::get<std::string>(value);
-      } else if (std::holds_alternative<int>(value)) {
-        value_str = std::to_string(std::get<int>(value));
-      } else if (std::holds_alternative<bool>(value)) {
-        value_str = std::get<bool>(value) ? "true" : "false";
+      } else if (std::holds_alternative<explicit_int>(value)) {
+        value_str = std::to_string(int(std::get<explicit_int>(value)));
+      } else if (std::holds_alternative<explicit_bool>(value)) {
+        value_str = std::get<explicit_bool>(value) ? "true" : "false";
       }
       if (!query_string.empty()) {
         query_string += "&";
diff --git a/engine/vcpkg.json b/engine/vcpkg.json
index 8f7729524..40abc186e 100644
--- a/engine/vcpkg.json
+++ b/engine/vcpkg.json
@@ -10,7 +10,6 @@
         ]
       },
       "drogon",
-      "jinja2cpp",
       "jsoncpp",
       "minizip",
       "nlohmann-json",