diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index 81bbbf7d1..84bb7efed 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -22,9 +22,7 @@
             "description": "The assistant has been successfully created."
           }
         },
-        "tags": [
-          "Assistants"
-        ]
+        "tags": ["Assistants"]
       },
       "get": {
         "operationId": "AssistantsController_findAll",
@@ -83,9 +81,7 @@
             }
           }
         },
-        "tags": [
-          "Assistants"
-        ]
+        "tags": ["Assistants"]
       }
     },
     "/v1/assistants/{id}": {
@@ -116,9 +112,7 @@
             }
           }
         },
-        "tags": [
-          "Assistants"
-        ]
+        "tags": ["Assistants"]
       },
       "delete": {
         "operationId": "AssistantsController_remove",
@@ -147,9 +141,7 @@
             }
           }
         },
-        "tags": [
-          "Assistants"
-        ]
+        "tags": ["Assistants"]
       }
     },
     "/healthz": {
@@ -166,9 +158,7 @@
             }
           }
         },
-        "tags": [
-          "Server"
-        ]
+        "tags": ["Server"]
       }
     },
     "/processManager/destroy": {
@@ -185,9 +175,7 @@
             }
           }
         },
-        "tags": [
-          "Server"
-        ]
+        "tags": ["Server"]
       }
     },
     "/v1/chat/completions": {
@@ -227,9 +215,7 @@
             }
           }
         },
-        "tags": [
-          "Chat"
-        ]
+        "tags": ["Chat"]
       }
     },
     "/v1/models/pull": {
@@ -328,14 +314,10 @@
             }
           }
         },
-        "tags": [
-          "Pulling Models"
-        ]
+        "tags": ["Pulling Models"]
       },
       "delete": {
-        "tags": [
-          "Pulling Models"
-        ],
+        "tags": ["Pulling Models"],
         "summary": "Stop model download",
         "description": "Stops the download of a model with the corresponding taskId provided in the request body",
         "operationId": "ModelsController_stopModelDownload",
@@ -351,9 +333,7 @@
                     "description": "The unique identifier of the download task to be stopped"
                   }
                 },
-                "required": [
-                  "taskId"
-                ]
+                "required": ["taskId"]
               }
             }
           }
@@ -448,9 +428,7 @@
             }
           }
         },
-        "tags": [
-          "Running Models"
-        ]
+        "tags": ["Running Models"]
       }
     },
     "/v1/models/start": {
@@ -483,9 +461,7 @@
             }
           }
         },
-        "tags": [
-          "Running Models"
-        ]
+        "tags": ["Running Models"]
       }
     },
     "/v1/models/stop": {
@@ -518,9 +494,7 @@
             }
           }
         },
-        "tags": [
-          "Running Models"
-        ]
+        "tags": ["Running Models"]
       }
     },
     "/v1/models/{id}": {
@@ -551,9 +525,7 @@
             }
           }
         },
-        "tags": [
-          "Running Models"
-        ]
+        "tags": ["Running Models"]
       },
       "delete": {
         "operationId": "ModelsController_remove",
@@ -582,9 +554,7 @@
             }
           }
         },
-        "tags": [
-          "Running Models"
-        ]
+        "tags": ["Running Models"]
       }
     },
     "/v1/models/{model}": {
@@ -624,9 +594,7 @@
             }
           }
         },
-        "tags": [
-          "Running Models"
-        ]
+        "tags": ["Running Models"]
       }
     },
     "/v1/models/import": {
@@ -667,18 +635,14 @@
             }
           }
         },
-        "tags": [
-          "Pulling Models"
-        ]
+        "tags": ["Pulling Models"]
       }
     },
     "/v1/threads": {
       "post": {
         "operationId": "ThreadsController_create",
         "summary": "Create thread",
-        "tags": [
-          "Threads"
-        ],
+        "tags": ["Threads"],
         "description": "Creates a new thread.",
         "parameters": [],
         "requestBody": {
@@ -707,9 +671,7 @@
       "get": {
         "operationId": "ThreadsController_findAll",
         "summary": "List threads",
-        "tags": [
-          "Threads"
-        ],
+        "tags": ["Threads"],
         "description": "Lists all the available threads along with its configurations.",
         "parameters": [],
         "responses": {
@@ -733,9 +695,7 @@
       "get": {
         "operationId": "ThreadsController_retrieveMessage",
         "summary": "Retrieve message",
-        "tags": [
-          "Messages"
-        ],
+        "tags": ["Messages"],
         "description": "Retrieves a message.",
         "parameters": [
           {
@@ -771,9 +731,7 @@
       "post": {
         "operationId": "ThreadsController_updateMessage",
         "summary": "Modify message",
-        "tags": [
-          "Messages"
-        ],
+        "tags": ["Messages"],
         "description": "Modifies a message.",
         "responses": {
           "201": {
@@ -820,9 +778,7 @@
         "operationId": "ThreadsController_deleteMessage",
         "summary": "Delete message",
         "description": "Deletes a message.",
-        "tags": [
-          "Messages"
-        ],
+        "tags": ["Messages"],
         "parameters": [
           {
             "name": "thread_id",
@@ -859,9 +815,7 @@
       "get": {
         "operationId": "ThreadsController_getMessagesOfThread",
         "summary": "List messages",
-        "tags": [
-          "Messages"
-        ],
+        "tags": ["Messages"],
         "description": "Returns a list of messages for a given thread.",
         "parameters": [
           {
@@ -929,9 +883,7 @@
       "post": {
         "operationId": "ThreadsController_createMessageInThread",
         "summary": "Create message",
-        "tags": [
-          "Messages"
-        ],
+        "tags": ["Messages"],
         "description": "Create a message.",
         "responses": {
           "201": {
@@ -972,9 +924,7 @@
         "operationId": "ThreadsController_cleanThread",
         "summary": "Clean thread",
         "description": "Deletes all messages in a thread.",
-        "tags": [
-          "Threads"
-        ],
+        "tags": ["Threads"],
         "parameters": [
           {
             "name": "thread_id",
@@ -996,9 +946,7 @@
       "get": {
         "operationId": "ThreadsController_retrieveThread",
         "summary": "Retrieve thread",
-        "tags": [
-          "Threads"
-        ],
+        "tags": ["Threads"],
         "description": "Retrieves a thread.",
         "parameters": [
           {
@@ -1026,9 +974,7 @@
       "post": {
         "operationId": "ThreadsController_modifyThread",
         "summary": "Modify thread",
-        "tags": [
-          "Threads"
-        ],
+        "tags": ["Threads"],
         "description": "Modifies a thread.",
         "parameters": [
           {
@@ -1069,9 +1015,7 @@
       "delete": {
         "operationId": "ThreadsController_remove",
         "summary": "Delete thread",
-        "tags": [
-          "Threads"
-        ],
+        "tags": ["Threads"],
         "description": "Deletes a specific thread defined by a thread `id` .",
         "parameters": [
           {
@@ -1108,9 +1052,7 @@
             "description": ""
           }
         },
-        "tags": [
-          "System"
-        ]
+        "tags": ["System"]
       },
       "get": {
         "operationId": "SystemController_get",
@@ -1122,9 +1064,7 @@
             "description": "Ok"
           }
         },
-        "tags": [
-          "System"
-        ]
+        "tags": ["System"]
       }
     },
     "/v1/system/events/download": {
@@ -1145,9 +1085,7 @@
             }
           }
         },
-        "tags": [
-          "System"
-        ]
+        "tags": ["System"]
       }
     },
     "/v1/system/events/model": {
@@ -1168,9 +1106,7 @@
             }
           }
         },
-        "tags": [
-          "System"
-        ]
+        "tags": ["System"]
       }
     },
     "/v1/system/events/resources": {
@@ -1191,164 +1127,408 @@
             }
           }
         },
-        "tags": [
-          "System"
-        ]
+        "tags": ["System"]
       }
     },
-    "/v1/engines": {
+    "/v1/engines/{name}": {
       "get": {
-        "operationId": "EnginesController_findAll",
-        "summary": "List available engines",
-        "description": "Lists the currently available local engines.",
-        "parameters": [],
+        "operationId": "EnginesController_findOne",
+        "summary": "Get an engine",
+        "description": "Retrieves an engine instance, providing basic information about the engine.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["onnxruntime", "llama-cpp", "tensorrt-llm"]
+            },
+            "description": "The type of engine"
+          }
+        ],
         "responses": {
           "200": {
             "description": "Successful response",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/EngineList"
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "engine": {
+                        "type": "string",
+                        "example": "llama-cpp"
+                      },
+                      "name": {
+                        "type": "string",
+                        "example": "mac-arm64"
+                      },
+                      "version": {
+                        "type": "string",
+                        "example": "0.1.35-28.10.24"
+                      }
+                    }
+                  }
+                },
+                "example": [
+                  {
+                    "engine": "llama-cpp",
+                    "name": "mac-arm64",
+                    "version": "0.1.35-28.10.24"
+                  },
+                  {
+                    "engine": "llama-cpp",
+                    "name": "linux-amd64-avx",
+                    "version": "0.1.35-27.10.24"
+                  }
+                ]
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      },
+      "post": {
+        "summary": "Install an engine",
+        "description": "Install an engine of a specific type, with optional version and variant",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["onnxruntime", "llama-cpp", "tensorrt-llm"]
+            },
+            "description": "The type of engine"
+          },
+          {
+            "name": "version",
+            "in": "query",
+            "required": false,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The version of the engine to install (optional)"
+          },
+          {
+            "name": "variant",
+            "in": "query",
+            "required": false,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The variant of the engine to install (optional)"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {}
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      },
+      "delete": {
+        "summary": "Uninstall an engine",
+        "description": "Uninstall an engine based on type, version, and variant",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["onnxruntime", "llama-cpp", "tensorrt-llm"]
+            },
+            "description": "The type of engine"
+          },
+          {
+            "name": "version",
+            "in": "query",
+            "required": false,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The version of the engine to uninstall (optional)"
+          },
+          {
+            "name": "variant",
+            "in": "query",
+            "required": false,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The variant of the engine to uninstall (optional)"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful uninstallation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "success": {
+                      "type": "boolean",
+                      "description": "Indicates if the uninstallation was successful"
+                    },
+                    "message": {
+                      "type": "string",
+                      "description": "Description of the uninstallation action taken"
+                    },
+                    "uninstalledEngines": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "version": {
+                            "type": "string"
+                          },
+                          "variant": {
+                            "type": "string"
+                          }
+                        }
+                      },
+                      "description": "List of uninstalled engine versions and variants"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "description": "Error message describing the issue with the request"
+                    }
+                  }
                 }
               }
             }
           }
         },
-        "tags": [
-          "Engines"
-        ]
+        "tags": ["Engines"]
       }
     },
-    "/v1/engines/{name}": {
+    "/v1/engines/{name}/default": {
       "get": {
-        "operationId": "EnginesController_findOne",
-        "summary": "Get an engine",
-        "description": "Retrieves an engine instance, providing basic information about the engine.",
+        "summary": "Get default engine variant",
+        "description": "Retrieves the default engine variant for the specified engine type.",
         "parameters": [
           {
             "name": "name",
-            "required": true,
             "in": "path",
-            "description": "The unique identifier of the engine.",
+            "required": true,
             "schema": {
               "type": "string",
-              "enum": [
-                "onnxruntime",
-                "llama-cpp",
-                "tensorrt-llm"
-              ]
-            }
+              "enum": ["onnxruntime", "llama-cpp", "tensorrt-llm"]
+            },
+            "description": "The type of engine"
           }
         ],
         "responses": {
           "200": {
-            "description": "Ok",
+            "description": "Successful response",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/Engine"
+                  "type": "object",
+                  "properties": {
+                    "engine": {
+                      "type": "string",
+                      "example": "llama-cpp"
+                    },
+                    "name": {
+                      "type": "string",
+                      "example": "mac-arm64"
+                    },
+                    "version": {
+                      "type": "string",
+                      "example": "0.1.35-28.10.24"
+                    }
+                  }
                 }
               }
             }
           }
         },
-        "tags": [
-          "Engines"
-        ]
-      }
-    },
-    "/v1/engines/install/{name}": {
+        "tags": ["Engines"]
+      },
       "post": {
-        "operationId": "EnginesController_initialize",
-        "summary": "Install an engine",
-        "description": "Install an engine with the given name. It will download the engine if it is not available locally.",
+        "summary": "Set default engine variant",
+        "description": "Sets the default engine variant for the specified engine type.",
         "parameters": [
           {
             "name": "name",
-            "required": true,
             "in": "path",
-            "description": "The unique identifier of the engine.",
+            "required": true,
             "schema": {
               "type": "string",
-              "enum": [
-                "onnxruntime",
-                "llama-cpp",
-                "tensorrt-llm"
-              ]
-            }
+              "enum": ["onnxruntime", "llama-cpp", "tensorrt-llm"]
+            },
+            "description": "The type of engine"
+          },
+          {
+            "name": "version",
+            "in": "query",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The version of the engine variant"
+          },
+          {
+            "name": "variant",
+            "in": "query",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The variant of the engine"
           }
         ],
         "responses": {
           "200": {
-            "description": "Ok",
+            "description": "Successful response",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/EngineInstallationResponseDto"
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Default engine variant set successfully"
+                    }
+                  }
                 }
               }
             }
-          },
-          "400": {
-            "description": "Bad Request",
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/v1/engines/{name}/load": {
+      "post": {
+        "summary": "Load engine",
+        "description": "Loads the specified engine type.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["onnxruntime", "llama-cpp", "tensorrt-llm"]
+            },
+            "description": "The name of the engine to update"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Engine loaded successfully",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/SimpleErrorResponse"
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine loaded successfully"
+                    }
+                  }
                 }
               }
             }
           }
         },
-        "tags": [
-          "Engines"
-        ]
+        "tags": ["Engines"]
       },
       "delete": {
-        "operationId": "EnginesController_deleteEngine",
-        "summary": "Uninstall an engine",
-        "description": "Uninstall an installed engine with the given name.",
+        "summary": "Unload engine",
+        "description": "Unloads the specified engine type.",
         "parameters": [
           {
             "name": "name",
-            "required": true,
             "in": "path",
-            "description": "The unique identifier of the engine.",
+            "required": true,
             "schema": {
               "type": "string",
-              "enum": [
-                "onnxruntime",
-                "llama-cpp",
-                "tensorrt-llm"
-              ]
-            }
+              "enum": ["onnxruntime", "llama-cpp", "tensorrt-llm"]
+            },
+            "description": "The name of the engine to update"
           }
         ],
         "responses": {
           "200": {
-            "description": "Ok",
+            "description": "Engine unloaded successfully",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/EngineUninstallationResponseDto"
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine unloaded successfully"
+                    }
+                  }
                 }
               }
             }
-          },
-          "400": {
-            "description": "Bad Request",
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/v1/engines/{name}/update": {
+      "post": {
+        "summary": "Update engine",
+        "description": "Updates the specified engine type using the engine variant currently set as default.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["onnxruntime", "llama-cpp", "tensorrt-llm"]
+            },
+            "description": "The name of the engine to update"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Engine updated successfully",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/SimpleErrorResponse"
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine updated successfully"
+                    }
+                  }
                 }
               }
             }
           }
         },
-        "tags": [
-          "Engines"
-        ]
+        "tags": ["Engines"]
       }
     }
   },
@@ -1582,11 +1762,7 @@
             "description": "Indicates whether the assistant was successfully deleted."
           }
         },
-        "required": [
-          "id",
-          "object",
-          "deleted"
-        ]
+        "required": ["id", "object", "deleted"]
       },
       "Message": {
         "type": "object",
@@ -1603,21 +1779,14 @@
         "properties": {
           "role": {
             "type": "string",
-            "enum": [
-              "system",
-              "user",
-              "assistant",
-              "tool"
-            ]
+            "enum": ["system", "user", "assistant", "tool"]
           },
           "name": {
             "type": "string",
             "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
           }
         },
-        "required": [
-          "role"
-        ]
+        "required": ["role"]
       },
       "SystemMessage": {
         "allOf": [
@@ -1646,10 +1815,7 @@
                 "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
               }
             },
-            "required": [
-              "content",
-              "role"
-            ]
+            "required": ["content", "role"]
           }
         ]
       },
@@ -1700,10 +1866,7 @@
                 "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
               }
             },
-            "required": [
-              "content",
-              "role"
-            ]
+            "required": ["content", "role"]
           }
         ]
       },
@@ -1815,10 +1978,7 @@
                 "type": "string"
               }
             },
-            "required": [
-              "content",
-              "tool_call_id"
-            ]
+            "required": ["content", "tool_call_id"]
           }
         ]
       },
@@ -1835,36 +1995,26 @@
         "properties": {
           "type": {
             "type": "string",
-            "enum": [
-              "text"
-            ]
+            "enum": ["text"]
           },
           "text": {
             "type": "string"
           }
         },
-        "required": [
-          "type",
-          "text"
-        ]
+        "required": ["type", "text"]
       },
       "ImageContentPart": {
         "type": "object",
         "properties": {
           "type": {
             "type": "string",
-            "enum": [
-              "image_url"
-            ]
+            "enum": ["image_url"]
           },
           "image_url": {
             "$ref": "#/components/schemas/ImageUrl"
           }
         },
-        "required": [
-          "type",
-          "image_url"
-        ]
+        "required": ["type", "image_url"]
       },
       "AudioContentPart": {
         "type": "object",
@@ -1877,10 +2027,7 @@
             "$ref": "#/components/schemas/InputAudio"
           }
         },
-        "required": [
-          "type",
-          "input_audio"
-        ]
+        "required": ["type", "input_audio"]
       },
       "RefusalContentPart": {
         "type": "object",
@@ -1892,10 +2039,7 @@
             "type": "string"
           }
         },
-        "required": [
-          "type",
-          "refusal"
-        ]
+        "required": ["type", "refusal"]
       },
       "ImageUrl": {
         "type": "object",
@@ -1910,9 +2054,7 @@
             "description": "Specifies the detail level of the image. Defaults to `auto`."
           }
         },
-        "required": [
-          "url"
-        ]
+        "required": ["url"]
       },
       "InputAudio": {
         "type": "object",
@@ -1923,17 +2065,11 @@
           },
           "format": {
             "type": "string",
-            "enum": [
-              "wav",
-              "mp3"
-            ],
+            "enum": ["wav", "mp3"],
             "description": "The format of the encoded audio data. Currently supports `wav` and `mp3`."
           }
         },
-        "required": [
-          "data",
-          "format"
-        ]
+        "required": ["data", "format"]
       },
       "Audio": {
         "type": "object",
@@ -1944,9 +2080,7 @@
             "description": "Unique identifier for a previous audio response from the model."
           }
         },
-        "required": [
-          "id"
-        ]
+        "required": ["id"]
       },
       "ToolCall": {
         "type": "object",
@@ -1961,11 +2095,7 @@
             "$ref": "#/components/schemas/FunctionCall"
           }
         },
-        "required": [
-          "id",
-          "type",
-          "function"
-        ]
+        "required": ["id", "type", "function"]
       },
       "FunctionCall": {
         "type": "object",
@@ -1977,10 +2107,7 @@
             "type": "string"
           }
         },
-        "required": [
-          "name",
-          "arguments"
-        ]
+        "required": ["name", "arguments"]
       },
       "CreateChatCompletionDto": {
         "type": "object",
@@ -2034,9 +2161,7 @@
           },
           "stop": {
             "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
-            "example": [
-              "End"
-            ],
+            "example": ["End"],
             "type": "array",
             "items": {
               "type": "string"
@@ -2066,15 +2191,10 @@
             "type": "array",
             "items": {
               "type": "string",
-              "enum": [
-                "text",
-                "audio"
-              ]
+              "enum": ["text", "audio"]
             },
             "description": "Specifies the modalities (types of input) supported by the model. Currently, cortex only support text modalities. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
-            "example": [
-              "text"
-            ]
+            "example": ["text"]
           },
           "audio": {
             "description": "Parameters for audio output. Required when audio output is requested with `modalities: ['audio']`. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
@@ -2087,19 +2207,10 @@
               "format": {
                 "type": "string",
                 "description": "Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.",
-                "enum": [
-                  "mp3",
-                  "wav",
-                  "flac",
-                  "opus",
-                  "pcm16"
-                ]
+                "enum": ["mp3", "wav", "flac", "opus", "pcm16"]
               }
             },
-            "required": [
-              "voice",
-              "format"
-            ]
+            "required": ["voice", "format"]
           },
           "store": {
             "type": "boolean",
@@ -2146,16 +2257,10 @@
               "type": {
                 "type": "string",
                 "description": "The format of the generated output. Must be one of `text`, `json_schema` or `json_object`.",
-                "enum": [
-                  "text",
-                  "json_object",
-                  "json_schema"
-                ]
+                "enum": ["text", "json_object", "json_schema"]
               }
             },
-            "required": [
-              "type"
-            ]
+            "required": ["type"]
           },
           "seed": {
             "type": "number",
@@ -2185,37 +2290,26 @@
             "properties": {
               "type": {
                 "type": "string",
-                "enum": [
-                  "function"
-                ]
+                "enum": ["function"]
               },
               "function": {
                 "$ref": "#/components/schemas/Function"
               }
             },
-            "required": [
-              "type",
-              "function"
-            ]
+            "required": ["type", "function"]
           },
           "tool_choice": {
             "anyOf": [
               {
                 "type": "string",
-                "enum": [
-                  "none",
-                  "auto",
-                  "required"
-                ]
+                "enum": ["none", "auto", "required"]
               },
               {
                 "type": "object",
                 "properties": {
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "function"
-                    ]
+                    "enum": ["function"]
                   },
                   "function": {
                     "type": "object",
@@ -2224,15 +2318,10 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "name"
-                    ]
+                    "required": ["name"]
                   }
                 },
-                "required": [
-                  "type",
-                  "function"
-                ]
+                "required": ["type", "function"]
               }
             ]
           },
@@ -2307,10 +2396,7 @@
             "description": "Minimum number of tokens to keep. This parameter only supported by `llama-cpp` engine."
           }
         },
-        "required": [
-          "messages",
-          "model"
-        ]
+        "required": ["messages", "model"]
       },
       "Function": {
         "type": "object",
@@ -2330,9 +2416,7 @@
             "default": false
           }
         },
-        "required": [
-          "name"
-        ]
+        "required": ["name"]
       },
       "MessageDto": {
         "type": "object",
@@ -2346,10 +2430,7 @@
             "description": "The role of the participant in the chat, such as 'user' or 'system', indicating who is the sender of the message."
           }
         },
-        "required": [
-          "content",
-          "role"
-        ]
+        "required": ["content", "role"]
       },
       "ChoiceDto": {
         "type": "object",
@@ -2371,11 +2452,7 @@
             ]
           }
         },
-        "required": [
-          "finish_reason",
-          "index",
-          "message"
-        ]
+        "required": ["finish_reason", "index", "message"]
       },
       "UsageDto": {
         "type": "object",
@@ -2393,11 +2470,7 @@
             "description": "The total number of tokens used in both the prompt and the completion, summarizing the entire token count of the chat operation."
           }
         },
-        "required": [
-          "completion_tokens",
-          "prompt_tokens",
-          "total_tokens"
-        ]
+        "required": ["completion_tokens", "prompt_tokens", "total_tokens"]
       },
       "ChatCompletionResponseDto": {
         "type": "object",
@@ -2424,17 +2497,11 @@
                   "type": "object",
                   "properties": {
                     "content": {
-                      "type": [
-                        "string",
-                        "null"
-                      ],
+                      "type": ["string", "null"],
                       "description": "The contents of the message."
                     },
                     "refusal": {
-                      "type": [
-                        "string",
-                        "null"
-                      ],
+                      "type": ["string", "null"],
                       "description": "The refusal message generated by the model."
                     },
                     "tool_calls": {
@@ -2463,17 +2530,10 @@
                                 "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
                               }
                             },
-                            "required": [
-                              "name",
-                              "arguments"
-                            ]
+                            "required": ["name", "arguments"]
                           }
                         },
-                        "required": [
-                          "id",
-                          "type",
-                          "function"
-                        ]
+                        "required": ["id", "type", "function"]
                       }
                     },
                     "role": {
@@ -2494,10 +2554,7 @@
                           "description": "The name of the function to call."
                         }
                       },
-                      "required": [
-                        "arguments",
-                        "name"
-                      ]
+                      "required": ["arguments", "name"]
                     },
                     "audio": {
                       "type": "object",
@@ -2520,27 +2577,17 @@
                           "description": "Transcript of the audio generated by the model."
                         }
                       },
-                      "required": [
-                        "id",
-                        "expires_at",
-                        "data",
-                        "transcript"
-                      ]
+                      "required": ["id", "expires_at", "data", "transcript"]
                     }
                   },
-                  "required": [
-                    "role"
-                  ]
+                  "required": ["role"]
                 },
                 "logprobs": {
                   "type": "object",
                   "description": "Log probability information for the choice.",
                   "properties": {
                     "content": {
-                      "type": [
-                        "array",
-                        "null"
-                      ],
+                      "type": ["array", "null"],
                       "description": "A list of message content tokens with log probability information.",
                       "items": {
                         "type": "object",
@@ -2554,17 +2601,11 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": [
-                              "array",
-                              "null"
-                            ],
+                            "type": ["array", "null"],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": [
-                          "token",
-                          "logprob"
-                        ]
+                        "required": ["token", "logprob"]
                       }
                     },
                     "top_logprobs": {
@@ -2582,24 +2623,15 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": [
-                              "array",
-                              "null"
-                            ],
+                            "type": ["array", "null"],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": [
-                          "token",
-                          "logprob"
-                        ]
+                        "required": ["token", "logprob"]
                       }
                     },
                     "refusal": {
-                      "type": [
-                        "array",
-                        "null"
-                      ],
+                      "type": ["array", "null"],
                       "description": "A list of message refusal tokens with log probability information.",
                       "items": {
                         "type": "object",
@@ -2613,27 +2645,17 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": [
-                              "array",
-                              "null"
-                            ],
+                            "type": ["array", "null"],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": [
-                          "token",
-                          "logprob"
-                        ]
+                        "required": ["token", "logprob"]
                       }
                     }
                   }
                 }
               },
-              "required": [
-                "finish_reason",
-                "index",
-                "message"
-              ]
+              "required": ["finish_reason", "index", "message"]
             }
           },
           "created": {
@@ -2645,10 +2667,7 @@
             "description": "The model used for the chat completion."
           },
           "service_tier": {
-            "type": [
-              "string",
-              "null"
-            ],
+            "type": ["string", "null"],
             "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request."
           },
           "system_fingerprint": {
@@ -2688,10 +2707,7 @@
                     "description": "Tokens generated by the model for reasoning."
                   }
                 },
-                "required": [
-                  "audio_tokens",
-                  "reasoning_tokens"
-                ]
+                "required": ["audio_tokens", "reasoning_tokens"]
               },
               "prompt_tokens_details": {
                 "type": "object",
@@ -2706,10 +2722,7 @@
                     "description": "Cached tokens present in the prompt."
                   }
                 },
-                "required": [
-                  "audio_tokens",
-                  "cached_tokens"
-                ]
+                "required": ["audio_tokens", "cached_tokens"]
               }
             },
             "required": [
@@ -2749,10 +2762,7 @@
                   "description": "A chat completion delta generated by streamed model responses.",
                   "properties": {
                     "content": {
-                      "type": [
-                        "string",
-                        "null"
-                      ],
+                      "type": ["string", "null"],
                       "description": "The contents of the chunk message."
                     },
                     "function_call": {
@@ -2790,18 +2800,10 @@
                                 "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
                               }
                             },
-                            "required": [
-                              "name",
-                              "arguments"
-                            ]
+                            "required": ["name", "arguments"]
                           }
                         },
-                        "required": [
-                          "index",
-                          "id",
-                          "type",
-                          "function"
-                        ]
+                        "required": ["index", "id", "type", "function"]
                       }
                     },
                     "role": {
@@ -2809,10 +2811,7 @@
                       "description": "The role of the author of this message."
                     },
                     "refusal": {
-                      "type": [
-                        "string",
-                        "null"
-                      ],
+                      "type": ["string", "null"],
                       "description": "The refusal message generated by the model."
                     }
                   }
@@ -2822,10 +2821,7 @@
                   "description": "Log probability information for the choice.",
                   "properties": {
                     "content": {
-                      "type": [
-                        "array",
-                        "null"
-                      ],
+                      "type": ["array", "null"],
                       "description": "A list of message content tokens with log probability information.",
                       "items": {
                         "type": "object",
@@ -2839,17 +2835,11 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": [
-                              "array",
-                              "null"
-                            ],
+                            "type": ["array", "null"],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": [
-                          "token",
-                          "logprob"
-                        ]
+                        "required": ["token", "logprob"]
                       }
                     },
                     "top_logprobs": {
@@ -2867,24 +2857,15 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": [
-                              "array",
-                              "null"
-                            ],
+                            "type": ["array", "null"],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": [
-                          "token",
-                          "logprob"
-                        ]
+                        "required": ["token", "logprob"]
                       }
                     },
                     "refusal": {
-                      "type": [
-                        "array",
-                        "null"
-                      ],
+                      "type": ["array", "null"],
                       "description": "A list of message refusal tokens with log probability information.",
                       "items": {
                         "type": "object",
@@ -2898,26 +2879,17 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": [
-                              "array",
-                              "null"
-                            ],
+                            "type": ["array", "null"],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": [
-                          "token",
-                          "logprob"
-                        ]
+                        "required": ["token", "logprob"]
                       }
                     }
                   }
                 },
                 "finish_reason": {
-                  "type": [
-                    "string",
-                    "null"
-                  ],
+                  "type": ["string", "null"],
                   "description": "The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool, or function_call (deprecated) if the model called a function."
                 },
                 "index": {
@@ -2925,10 +2897,7 @@
                   "description": "The index of the choice in the list of choices."
                 }
               },
-              "required": [
-                "delta",
-                "index"
-              ]
+              "required": ["delta", "index"]
             }
           },
           "created": {
@@ -2940,10 +2909,7 @@
             "description": "The model used to generate the completion."
           },
           "service_tier": {
-            "type": [
-              "string",
-              "null"
-            ],
+            "type": ["string", "null"],
             "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request."
           },
           "system_fingerprint": {
@@ -2971,11 +2937,7 @@
                 "description": "Total number of tokens used in the request (prompt + completion)."
               }
             },
-            "required": [
-              "completion_tokens",
-              "prompt_tokens",
-              "total_tokens"
-            ]
+            "required": ["completion_tokens", "prompt_tokens", "total_tokens"]
           }
         },
         "required": [
@@ -2996,9 +2958,7 @@
             "description": "The name of the embedding model to be used."
           },
           "input": {
-            "example": [
-              "Hello World"
-            ],
+            "example": ["Hello World"],
             "description": "The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.",
             "type": "array",
             "items": {
@@ -3016,10 +2976,7 @@
             "description": "Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional."
           }
         },
-        "required": [
-          "model",
-          "input"
-        ]
+        "required": ["model", "input"]
       },
       "EmbeddingsResponseDto": {
         "type": "object",
@@ -3048,18 +3005,11 @@
             ]
           }
         },
-        "required": [
-          "object",
-          "model",
-          "embedding",
-          "usage"
-        ]
+        "required": ["object", "model", "embedding", "usage"]
       },
       "PullModelRequest": {
         "type": "object",
-        "required": [
-          "model"
-        ],
+        "required": ["model"],
         "properties": {
           "model": {
             "type": "string",
@@ -3117,9 +3067,7 @@
           },
           "files": {
             "description": "The URL sources from which the model downloaded or accessed.",
-            "example": [
-              "https://huggingface.co/cortexso/mistral/tree/gguf"
-            ],
+            "example": ["https://huggingface.co/cortexso/mistral/tree/gguf"],
             "oneOf": [
               {
                 "type": "array",
@@ -3139,9 +3087,7 @@
           },
           "stop": {
             "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
-            "example": [
-              "End"
-            ],
+            "example": ["End"],
             "type": "array",
             "items": {
               "type": "string"
@@ -3211,10 +3157,7 @@
             "default": ""
           }
         },
-        "required": [
-          "model",
-          "files"
-        ]
+        "required": ["model", "files"]
       },
       "StartModelSuccessDto": {
         "type": "object",
@@ -3228,10 +3171,7 @@
             "description": "The unique identifier of the model."
           }
         },
-        "required": [
-          "message",
-          "modelId"
-        ]
+        "required": ["message", "modelId"]
       },
       "ModelStartDto": {
         "type": "object",
@@ -3278,9 +3218,7 @@
             "example": "/tmp/model.gguf"
           }
         },
-        "required": [
-          "model"
-        ]
+        "required": ["model"]
       },
       "ModelStopDto": {
         "type": "object",
@@ -3291,9 +3229,7 @@
             "description": "A downloaded model name."
           }
         },
-        "required": [
-          "model"
-        ]
+        "required": ["model"]
       },
       "ImportModelRequest": {
         "type": "object",
@@ -3313,16 +3249,10 @@
           "option": {
             "type": "string",
             "description": "Import options such as symlink or copy.",
-            "enum": [
-              "symlink",
-              "copy"
-            ]
+            "enum": ["symlink", "copy"]
           }
         },
-        "required": [
-          "model",
-          "modelPath"
-        ]
+        "required": ["model", "modelPath"]
       },
       "ImportModelResponse": {
         "type": "object",
@@ -3341,11 +3271,7 @@
             "example": "OK"
           }
         },
-        "required": [
-          "message",
-          "modelHandle",
-          "result"
-        ]
+        "required": ["message", "modelHandle", "result"]
       },
       "CommonResponseDto": {
         "type": "object",
@@ -3355,9 +3281,7 @@
             "description": "The response success or error message."
           }
         },
-        "required": [
-          "message"
-        ]
+        "required": ["message"]
       },
       "EngineUninstallationResponseDto": {
         "type": "object",
@@ -3413,11 +3337,7 @@
             "example": "OK"
           }
         },
-        "required": [
-          "data",
-          "object",
-          "result"
-        ]
+        "required": ["data", "object", "result"]
       },
       "Engine": {
         "type": "object",
@@ -3447,12 +3367,7 @@
             "example": "0.1.34"
           }
         },
-        "required": [
-          "description",
-          "name",
-          "productName",
-          "status"
-        ]
+        "required": ["description", "name", "productName", "status"]
       },
       "ModelDto": {
         "type": "object",
@@ -3468,9 +3383,7 @@
             "description": "A predefined text or framework that guides the AI model's response generation."
           },
           "stop": {
-            "example": [
-              "End"
-            ],
+            "example": ["End"],
             "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
             "type": "array",
             "items": {
@@ -3582,9 +3495,7 @@
             "example": "llamacpp"
           }
         },
-        "required": [
-          "id"
-        ]
+        "required": ["id"]
       },
       "ListModelsResponseDto": {
         "type": "object",
@@ -3592,9 +3503,7 @@
           "object": {
             "type": "string",
             "example": "list",
-            "enum": [
-              "list"
-            ]
+            "enum": ["list"]
           },
           "data": {
             "description": "List of models",
@@ -3604,10 +3513,7 @@
             }
           }
         },
-        "required": [
-          "object",
-          "data"
-        ]
+        "required": ["object", "data"]
       },
       "UpdateModelDto": {
         "type": "object",
@@ -3626,9 +3532,7 @@
             "items": {
               "type": "string"
             },
-            "example": [
-              "</s>"
-            ]
+            "example": ["</s>"]
           },
           "stream": {
             "type": "boolean",
@@ -3787,11 +3691,7 @@
             "description": "Indicates whether the model was successfully deleted."
           }
         },
-        "required": [
-          "id",
-          "object",
-          "deleted"
-        ]
+        "required": ["id", "object", "deleted"]
       },
       "CreateThreadAssistantDto": {
         "type": "object",
@@ -3881,10 +3781,7 @@
           "tool_resources": {
             "type": "object",
             "example": {
-              "resources": [
-                "database1",
-                "database2"
-              ]
+              "resources": ["database1", "database2"]
             },
             "description": "Tool resources for the assistant."
           }
@@ -3912,9 +3809,7 @@
             }
           }
         },
-        "required": [
-          "assistants"
-        ]
+        "required": ["assistants"]
       },
       "ContentDto": {
         "type": "object",
@@ -3933,10 +3828,7 @@
             "description": "Text content of the message along with any annotations."
           }
         },
-        "required": [
-          "type",
-          "text"
-        ]
+        "required": ["type", "text"]
       },
       "GetMessageResponseDto": {
         "type": "object",
@@ -4110,13 +4002,7 @@
             "description": "Indicates whether there are more messages to retrieve."
           }
         },
-        "required": [
-          "object",
-          "data",
-          "first_id",
-          "last_id",
-          "has_more"
-        ]
+        "required": ["object", "data", "first_id", "last_id", "has_more"]
       },
       "CreateMessageDto": {
         "type": "object",
@@ -4132,10 +4018,7 @@
             "description": "The text contents of the message."
           }
         },
-        "required": [
-          "role",
-          "content"
-        ]
+        "required": ["role", "content"]
       },
       "UpdateMessageDto": {
         "type": "object",
@@ -4161,11 +4044,7 @@
             "description": "Indicates whether the message was successfully deleted."
           }
         },
-        "required": [
-          "id",
-          "object",
-          "deleted"
-        ]
+        "required": ["id", "object", "deleted"]
       },
       "GetThreadResponseDto": {
         "type": "object",
@@ -4186,9 +4065,7 @@
             "description": "Unix timestamp representing the creation time of the thread."
           },
           "assistants": {
-            "example": [
-              "assistant-001"
-            ],
+            "example": ["assistant-001"],
             "description": "List of assistants involved in the thread.",
             "type": "array",
             "items": {
@@ -4242,12 +4119,8 @@
             "description": "Indicates whether the thread was successfully deleted."
           }
         },
-        "required": [
-          "id",
-          "object",
-          "deleted"
-        ]
+        "required": ["id", "object", "deleted"]
       }
     }
   }
-}
\ No newline at end of file
+}
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index dadad73a9..d4e9ac5f6 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -73,7 +73,6 @@ find_package(jsoncpp CONFIG REQUIRED)
 find_package(Drogon CONFIG REQUIRED)
 find_package(yaml-cpp CONFIG REQUIRED)
 find_package(httplib CONFIG REQUIRED)
-find_package(nlohmann_json CONFIG REQUIRED)
 find_package(unofficial-minizip CONFIG REQUIRED)
 find_package(LibArchive REQUIRED)
 find_package(CURL REQUIRED)
@@ -149,7 +148,6 @@ add_executable(${TARGET_NAME} main.cc
 target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
 
 target_link_libraries(${TARGET_NAME} PRIVATE httplib::httplib)
-target_link_libraries(${TARGET_NAME} PRIVATE nlohmann_json::nlohmann_json)
 target_link_libraries(${TARGET_NAME} PRIVATE unofficial::minizip::minizip)
 target_link_libraries(${TARGET_NAME} PRIVATE LibArchive::LibArchive)
 target_link_libraries(${TARGET_NAME} PRIVATE CURL::libcurl)
diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt
index 19f206a40..be0a7dcfe 100644
--- a/engine/cli/CMakeLists.txt
+++ b/engine/cli/CMakeLists.txt
@@ -63,7 +63,6 @@ add_compile_definitions(CORTEX_CONFIG_FILE_PATH="${CORTEX_CONFIG_FILE_PATH}")
 find_package(jsoncpp CONFIG REQUIRED)
 find_package(yaml-cpp CONFIG REQUIRED)
 find_package(httplib CONFIG REQUIRED)
-find_package(nlohmann_json CONFIG REQUIRED)
 find_package(CLI11 CONFIG REQUIRED)
 find_package(unofficial-minizip CONFIG REQUIRED)
 find_package(LibArchive REQUIRED)
@@ -87,7 +86,6 @@ add_executable(${TARGET_NAME} main.cc
   )
 
 target_link_libraries(${TARGET_NAME} PRIVATE httplib::httplib)
-target_link_libraries(${TARGET_NAME} PRIVATE nlohmann_json::nlohmann_json)
 target_link_libraries(${TARGET_NAME} PRIVATE CLI11::CLI11)
 target_link_libraries(${TARGET_NAME} PRIVATE unofficial::minizip::minizip)
 target_link_libraries(${TARGET_NAME} PRIVATE LibArchive::LibArchive)
@@ -128,4 +126,4 @@ set_target_properties(${TARGET_NAME} PROPERTIES
                       RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}
                       RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}
                       RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
-)
\ No newline at end of file
+)
diff --git a/engine/cli/command_line_parser.cc b/engine/cli/command_line_parser.cc
index 1eea35196..625750248 100644
--- a/engine/cli/command_line_parser.cc
+++ b/engine/cli/command_line_parser.cc
@@ -1,12 +1,14 @@
 #include "command_line_parser.h"
 #include <memory>
 #include <optional>
+#include <string>
 #include "commands/cortex_upd_cmd.h"
 #include "commands/engine_get_cmd.h"
 #include "commands/engine_install_cmd.h"
 #include "commands/engine_list_cmd.h"
 #include "commands/engine_uninstall_cmd.h"
-#include "commands/model_alias_cmd.h"
+#include "commands/engine_update_cmd.h"
+#include "commands/engine_use_cmd.h"
 #include "commands/model_del_cmd.h"
 #include "commands/model_get_cmd.h"
 #include "commands/model_import_cmd.h"
@@ -94,8 +96,8 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
           CLI_LOG("\nNew Cortex release available: "
                   << CORTEX_CPP_VERSION << " -> " << *latest_version);
           CLI_LOG("To update, run: " << commands::GetRole()
-                                      << commands::GetCortexBinary()
-                                      << " update");
+                                     << commands::GetCortexBinary()
+                                     << " update");
         }
         done = true;
       });
@@ -138,7 +140,8 @@ void CommandLineParser::SetupCommonCommands() {
     }
   });
 
-  auto run_cmd = app_.add_subcommand("run", "Shortcut: pull, start & chat with a model");
+  auto run_cmd =
+      app_.add_subcommand("run", "Shortcut: pull, start & chat with a model");
   run_cmd->group(kCommonCommandsGroup);
   run_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
                  " run [options] [model_id]");
@@ -270,30 +273,6 @@ void CommandLineParser::SetupModelCommands() {
                                  cml_data_.model_id);
   });
 
-  std::string model_alias;
-  auto model_alias_cmd =
-      models_cmd->add_subcommand("alias", "Add a model alias instead of ID");
-  model_alias_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
-                         " models alias --model_id [model_id] --alias [alias]");
-  model_alias_cmd->group(kSubcommands);
-  model_alias_cmd->add_option(
-      "--model_id", cml_data_.model_id,
-      "Can be a model ID or model alias");
-  model_alias_cmd->add_option("--alias", cml_data_.model_alias,
-                              "new alias to be set");
-  model_alias_cmd->callback([this, model_alias_cmd]() {
-    if (std::exchange(executed_, true))
-      return;
-    if (cml_data_.model_id.empty() || cml_data_.model_alias.empty()) {
-      CLI_LOG("[model_id] and [alias] are required\n");
-      CLI_LOG(model_alias_cmd->help());
-      return;
-    }
-    commands::ModelAliasCmd mdc;
-    mdc.Exec(cml_data_.config.apiServerHost,
-             std::stoi(cml_data_.config.apiServerPort), cml_data_.model_id,
-             cml_data_.model_alias);
-  });
   // Model update parameters comment
   ModelUpdate(models_cmd);
 
@@ -384,6 +363,41 @@ void CommandLineParser::SetupEngineCommands() {
     EngineUninstall(uninstall_cmd, engine_name);
   }
 
+  auto engine_upd_cmd = engines_cmd->add_subcommand("update", "Update engine");
+  engine_upd_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                        " engines update [engine_name]");
+  engine_upd_cmd->callback([this, engine_upd_cmd] {
+    if (std::exchange(executed_, true))
+      return;
+    if (engine_upd_cmd->get_subcommands().empty()) {
+      CLI_LOG("[engine_name] is required\n");
+      CLI_LOG(engine_upd_cmd->help());
+    }
+  });
+  engine_upd_cmd->group(kSubcommands);
+  for (auto& engine : engine_service_.kSupportEngines) {
+    std::string engine_name{engine};
+    EngineUpdate(engine_upd_cmd, engine_name);
+  }
+
+  auto engine_use_cmd =
+      engines_cmd->add_subcommand("use", "Set engine as default");
+  engine_use_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                        " engines use [engine_name]");
+  engine_use_cmd->callback([this, engine_use_cmd] {
+    if (std::exchange(executed_, true))
+      return;
+    if (engine_use_cmd->get_subcommands().empty()) {
+      CLI_LOG("[engine_name] is required\n");
+      CLI_LOG(engine_use_cmd->help());
+    }
+  });
+  engine_use_cmd->group(kSubcommands);
+  for (auto& engine : engine_service_.kSupportEngines) {
+    std::string engine_name{engine};
+    EngineUse(engine_use_cmd, engine_name);
+  }
+
   EngineGet(engines_cmd);
 }
 
@@ -400,7 +414,11 @@ void CommandLineParser::SetupSystemCommands() {
                                             << " to " << cml_data_.port);
       auto config_path = file_manager_utils::GetConfigurationPath();
       cml_data_.config.apiServerPort = std::to_string(cml_data_.port);
-      config_yaml_utils::DumpYamlConfig(cml_data_.config, config_path.string());
+      auto result = config_yaml_utils::DumpYamlConfig(cml_data_.config,
+                                                      config_path.string());
+      if (result.has_error()) {
+        CLI_LOG("Error update " << config_path.string() << result.error());
+      }
     }
     commands::ServerStartCmd ssc;
     ssc.Exec(cml_data_.config.apiServerHost,
@@ -417,8 +435,7 @@ void CommandLineParser::SetupSystemCommands() {
     ssc.Exec();
   });
 
-  auto ps_cmd =
-      app_.add_subcommand("ps", "Show active model statuses");
+  auto ps_cmd = app_.add_subcommand("ps", "Show active model statuses");
   ps_cmd->group(kSystemGroup);
   ps_cmd->usage("Usage:\n" + commands::GetCortexBinary() + "ps");
   ps_cmd->callback([&]() {
@@ -460,13 +477,16 @@ void CommandLineParser::EngineInstall(CLI::App* parent,
   install_engine_cmd->add_option("-s, --source", src,
                                  "Install engine by local path");
 
+  install_engine_cmd->add_flag("-m, --menu", cml_data_.show_menu,
+                               "Display menu for engine variant selection");
+
   install_engine_cmd->callback([this, engine_name, &version, &src] {
     if (std::exchange(executed_, true))
       return;
     try {
-      commands::EngineInstallCmd(download_service_,
-                                 cml_data_.config.apiServerHost,
-                                 std::stoi(cml_data_.config.apiServerPort))
+      commands::EngineInstallCmd(
+          download_service_, cml_data_.config.apiServerHost,
+          std::stoi(cml_data_.config.apiServerPort), cml_data_.show_menu)
           .Exec(engine_name, version, src);
     } catch (const std::exception& e) {
       CTL_ERR(e.what());
@@ -494,6 +514,47 @@ void CommandLineParser::EngineUninstall(CLI::App* parent,
   });
 }
 
+void CommandLineParser::EngineUpdate(CLI::App* parent,
+                                     const std::string& engine_name) {
+  auto engine_update_cmd = parent->add_subcommand(engine_name, "");
+  engine_update_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                           " engines update " + engine_name);
+  engine_update_cmd->group(kEngineGroup);
+
+  engine_update_cmd->callback([this, engine_name] {
+    if (std::exchange(executed_, true))
+      return;
+    try {
+      commands::EngineUpdateCmd().Exec(
+          cml_data_.config.apiServerHost,
+          std::stoi(cml_data_.config.apiServerPort), engine_name);
+    } catch (const std::exception& e) {
+      CTL_ERR(e.what());
+    }
+  });
+}
+
+void CommandLineParser::EngineUse(CLI::App* parent,
+                                  const std::string& engine_name) {
+  auto engine_use_cmd = parent->add_subcommand(engine_name, "");
+  engine_use_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                        " engines use " + engine_name);
+  engine_use_cmd->group(kEngineGroup);
+
+  engine_use_cmd->callback([this, engine_name] {
+    if (std::exchange(executed_, true))
+      return;
+    auto result = commands::EngineUseCmd().Exec(
+        cml_data_.config.apiServerHost,
+        std::stoi(cml_data_.config.apiServerPort), engine_name);
+    if (result.has_error()) {
+      CTL_ERR(result.error());
+    } else {
+      CTL_INF("Engine " << engine_name << " is set as default");
+    }
+  });
+}
+
 void CommandLineParser::EngineGet(CLI::App* parent) {
   auto get_cmd = parent->add_subcommand("get", "Get engine info");
   get_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
diff --git a/engine/cli/command_line_parser.h b/engine/cli/command_line_parser.h
index f2f00ae95..9f3cdda12 100644
--- a/engine/cli/command_line_parser.h
+++ b/engine/cli/command_line_parser.h
@@ -27,7 +27,12 @@ class CommandLineParser {
 
   void EngineUninstall(CLI::App* parent, const std::string& engine_name);
 
+  void EngineUpdate(CLI::App* parent, const std::string& engine_name);
+
   void EngineGet(CLI::App* parent);
+
+  void EngineUse(CLI::App* parent, const std::string& engine_name);
+
   void ModelUpdate(CLI::App* parent);
 
   CLI::App app_;
@@ -50,6 +55,7 @@ class CommandLineParser {
     bool display_engine = false;
     bool display_version = false;
     std::string filter = "";
+    bool show_menu = false;
 
     int port;
     config_yaml_utils::CortexConfig config;
diff --git a/engine/cli/commands/chat_completion_cmd.cc b/engine/cli/commands/chat_completion_cmd.cc
index be7b9d170..f81040bac 100644
--- a/engine/cli/commands/chat_completion_cmd.cc
+++ b/engine/cli/commands/chat_completion_cmd.cc
@@ -29,8 +29,10 @@ struct ChunkParser {
         is_done = true;
       } else {
         try {
-          content = nlohmann::json::parse(s)["choices"][0]["delta"]["content"];
-        } catch (const nlohmann::json::parse_error& e) {
+          content =
+              json_helper::ParseJsonString(s)["choices"][0]["delta"]["content"]
+                  .asString();
+        } catch (const std::exception& e) {
           CTL_WRN("JSON parse error: " << e.what());
         }
       }
diff --git a/engine/cli/commands/cortex_upd_cmd.cc b/engine/cli/commands/cortex_upd_cmd.cc
index 6983de470..5d2d7f26b 100644
--- a/engine/cli/commands/cortex_upd_cmd.cc
+++ b/engine/cli/commands/cortex_upd_cmd.cc
@@ -1,9 +1,9 @@
 #include "cortex_upd_cmd.h"
 #include "httplib.h"
-#include "nlohmann/json.hpp"
 #include "server_stop_cmd.h"
 #include "utils/archive_utils.h"
 #include "utils/file_manager_utils.h"
+#include "utils/json_helper.h"
 #include "utils/logging_utils.h"
 #include "utils/scope_exit.h"
 #include "utils/system_info_utils.h"
@@ -144,26 +144,26 @@ std::optional<std::string> CheckNewUpdate(
   if (auto res = cli.Get(release_path)) {
     if (res->status == httplib::StatusCode::OK_200) {
       try {
-        auto get_latest = [](const nlohmann::json& data) -> std::string {
+        auto get_latest = [](const Json::Value& data) -> std::string {
           if (data.empty()) {
             return "";
           }
 
           if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) {
-            for (auto& d : data) {
-              if (auto tag = d["tag_name"].get<std::string>();
+            for (const auto& d : data) {
+              if (auto tag = d["tag_name"].asString();
                   tag.find(kBetaComp) != std::string::npos) {
                 return tag;
               }
             }
-            return data[0]["tag_name"].get<std::string>();
+            return data[0]["tag_name"].asString();
           } else {
-            return data["tag_name"].get<std::string>();
+            return data["tag_name"].asString();
           }
           return "";
         };
 
-        auto json_res = nlohmann::json::parse(res->body);
+        auto json_res = json_helper::ParseJsonString(res->body);
         std::string latest_version = get_latest(json_res);
         if (latest_version.empty()) {
           CTL_WRN("Release not found!");
@@ -178,7 +178,7 @@ std::optional<std::string> CheckNewUpdate(
         if (current_version != latest_version) {
           return latest_version;
         }
-      } catch (const nlohmann::json::parse_error& e) {
+      } catch (const std::exception& e) {
         CTL_INF("JSON parse error: " << e.what());
         return std::nullopt;
       }
@@ -321,7 +321,7 @@ bool CortexUpdCmd::GetStable(const std::string& v) {
   if (auto res = cli.Get(release_path)) {
     if (res->status == httplib::StatusCode::OK_200) {
       try {
-        auto json_data = nlohmann::json::parse(res->body);
+        auto json_data = json_helper::ParseJsonString(res->body);
         if (json_data.empty()) {
           CLI_LOG("Version not found: " << v);
           return false;
@@ -333,7 +333,7 @@ bool CortexUpdCmd::GetStable(const std::string& v) {
             !downloaded_exe_path) {
           return false;
         }
-      } catch (const nlohmann::json::parse_error& e) {
+      } catch (const std::exception& e) {
         CLI_LOG_ERROR("JSON parse error: " << e.what());
         return false;
       }
@@ -377,12 +377,12 @@ bool CortexUpdCmd::GetBeta(const std::string& v) {
   if (auto res = cli.Get(release_path)) {
     if (res->status == httplib::StatusCode::OK_200) {
       try {
-        auto json_res = nlohmann::json::parse(res->body);
+        auto json_res = json_helper::ParseJsonString(res->body);
 
-        nlohmann::json json_data;
-        for (auto& jr : json_res) {
+        Json::Value json_data;
+        for (const auto& jr : json_res) {
           // Get the latest beta or match version
-          if (auto tag = jr["tag_name"].get<std::string>();
+          if (auto tag = jr["tag_name"].asString();
               (v.empty() && tag.find(kBetaComp) != std::string::npos) ||
               (tag == v)) {
             json_data = jr;
@@ -401,7 +401,7 @@ bool CortexUpdCmd::GetBeta(const std::string& v) {
             !downloaded_exe_path) {
           return false;
         }
-      } catch (const nlohmann::json::parse_error& e) {
+      } catch (const std::exception& e) {
         CLI_LOG_ERROR("JSON parse error: " << e.what());
         return false;
       }
@@ -429,14 +429,13 @@ bool CortexUpdCmd::GetBeta(const std::string& v) {
 
   assert(!!downloaded_exe_path);
   return InstallNewVersion(dst, downloaded_exe_path.value());
-  ;
 }
 
 std::optional<std::string> CortexUpdCmd::HandleGithubRelease(
-    const nlohmann::json& assets, const std::string& os_arch) {
+    const Json::Value& assets, const std::string& os_arch) {
   std::string matched_variant = "";
-  for (auto& asset : assets) {
-    auto asset_name = asset["name"].get<std::string>();
+  for (const auto& asset : assets) {
+    auto asset_name = asset["name"].asString();
     if (asset_name.find(kCortexBinary) != std::string::npos &&
         asset_name.find(os_arch) != std::string::npos &&
         asset_name.find(kReleaseFormat) != std::string::npos) {
@@ -451,11 +450,11 @@ std::optional<std::string> CortexUpdCmd::HandleGithubRelease(
   }
   CTL_INF("Matched variant: " << matched_variant);
 
-  for (auto& asset : assets) {
-    auto asset_name = asset["name"].get<std::string>();
+  for (const auto& asset : assets) {
+    auto asset_name = asset["name"].asString();
     if (asset_name == matched_variant) {
-      auto download_url = asset["browser_download_url"].get<std::string>();
-      auto file_name = asset["name"].get<std::string>();
+      auto download_url = asset["browser_download_url"].asString();
+      auto file_name = asset["name"].asString();
       CTL_INF("Download url: " << download_url);
 
       auto local_path =
diff --git a/engine/cli/commands/cortex_upd_cmd.h b/engine/cli/commands/cortex_upd_cmd.h
index 938e598ad..bd3fc51df 100644
--- a/engine/cli/commands/cortex_upd_cmd.h
+++ b/engine/cli/commands/cortex_upd_cmd.h
@@ -1,5 +1,6 @@
 #pragma once
 #include <string>
+#include "services/download_service.h"
 #if !defined(_WIN32)
 #include <sys/stat.h>
 #include <unistd.h>
@@ -57,7 +58,8 @@ inline std::string GetCortexServerBinary() {
     return has_exe ? kCortexServerBinary + "-nightly.exe"
                    : kCortexServerBinary + "-nightly";
   } else if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) {
-    return has_exe ? kCortexServerBinary + "-beta.exe" : kCortexServerBinary + "-beta";
+    return has_exe ? kCortexServerBinary + "-beta.exe"
+                   : kCortexServerBinary + "-beta";
   } else {
     return has_exe ? kCortexServerBinary + ".exe" : kCortexServerBinary;
   }
@@ -104,8 +106,8 @@ class CortexUpdCmd {
 
   bool GetStable(const std::string& v);
   bool GetBeta(const std::string& v);
-  std::optional<std::string> HandleGithubRelease(const nlohmann::json& assets,
-                           const std::string& os_arch);
+  std::optional<std::string> HandleGithubRelease(const Json::Value& assets,
+                                                 const std::string& os_arch);
   bool GetNightly(const std::string& v);
 };
 }  // namespace commands
diff --git a/engine/cli/commands/engine_get_cmd.cc b/engine/cli/commands/engine_get_cmd.cc
index d1bf26641..8699c336b 100644
--- a/engine/cli/commands/engine_get_cmd.cc
+++ b/engine/cli/commands/engine_get_cmd.cc
@@ -2,10 +2,11 @@
 #include <json/reader.h>
 #include <json/value.h>
 #include <iostream>
-
-#include "httplib.h"
 #include "server_start_cmd.h"
+#include "services/engine_service.h"
+#include "utils/curl_utils.h"
 #include "utils/logging_utils.h"
+#include "utils/url_parser.h"
 
 // clang-format off
 #include <tabulate/table.hpp>
@@ -25,30 +26,43 @@ void EngineGetCmd::Exec(const std::string& host, int port,
   }
 
   tabulate::Table table;
-  table.add_row({"Name", "Supported Formats", "Version", "Variant", "Status"});
-  httplib::Client cli(host + ":" + std::to_string(port));
-  auto res = cli.Get("/v1/engines/" + engine_name);
-  if (res) {
-    if (res->status == httplib::StatusCode::OK_200) {
-      Json::Value v;
-      Json::Reader reader;
-      reader.parse(res->body, v);
-
-      table.add_row({v["name"].asString(), v["format"].asString(),
-                     v["version"].asString(), v["variant"].asString(),
-                     v["status"].asString()});
-
-    } else {
-      CLI_LOG_ERROR(
-          "Failed to get engine list with status code: " << res->status);
+  table.add_row({"#", "Name", "Version", "Variant", "Status"});
+
+  auto url = url_parser::Url{
+      .protocol = "http",
+      .host = host + ":" + std::to_string(port),
+      .pathParams = {"v1", "engines", engine_name},
+  };
+  auto result = curl_utils::SimpleGetJson(url.ToFullPath());
+  if (result.has_error()) {
+    // TODO: refactor this
+    Json::Value root;
+    Json::Reader reader;
+    if (!reader.parse(result.error(), root)) {
+      CLI_LOG(result.error());
       return;
     }
-  } else {
-    auto err = res.error();
-    CLI_LOG_ERROR("HTTP error: " << httplib::to_string(err));
+    CLI_LOG(root["message"].asString());
     return;
   }
 
+  std::vector<EngineVariantResponse> output;
+  auto installed_variants = result.value();
+  for (const auto& variant : installed_variants) {
+    output.push_back(EngineVariantResponse{
+        .name = variant["name"].asString(),
+        .version = variant["version"].asString(),
+        .engine = engine_name,
+    });
+  }
+
+  int count = 0;
+  for (auto const& v : output) {
+    count += 1;
+    table.add_row(
+        {std::to_string(count), v.engine, v.version, v.name, "Installed"});
+  }
+
   std::cout << table << std::endl;
 }
 };  // namespace commands
diff --git a/engine/cli/commands/engine_install_cmd.cc b/engine/cli/commands/engine_install_cmd.cc
index f046d89e1..9bc5c6f89 100644
--- a/engine/cli/commands/engine_install_cmd.cc
+++ b/engine/cli/commands/engine_install_cmd.cc
@@ -1,10 +1,10 @@
 #include "engine_install_cmd.h"
 #include <future>
 #include "server_start_cmd.h"
+#include "utils/cli_selection_utils.h"
 #include "utils/download_progress.h"
-#include "utils/engine_constants.h"
-#include "utils/json_helper.h"
 #include "utils/logging_utils.h"
+#include "utils/string_utils.h"
 
 namespace commands {
 bool EngineInstallCmd::Exec(const std::string& engine,
@@ -32,38 +32,141 @@ bool EngineInstallCmd::Exec(const std::string& engine,
     }
   }
 
+  if (show_menu_) {
+    DownloadProgress dp;
+    dp.Connect(host_, port_);
+    // engine can be small, so need to start ws first
+    auto dp_res = std::async(std::launch::deferred, [&dp, &engine] {
+      return dp.Handle(DownloadType::Engine);
+    });
+
+    auto versions_url = url_parser::Url{
+        .protocol = "http",
+        .host = host_ + ":" + std::to_string(port_),
+        .pathParams = {"v1", "engines", engine, "versions"},
+    };
+    auto versions_result = curl_utils::SimpleGetJson(versions_url.ToFullPath());
+    if (versions_result.has_error()) {
+      CTL_ERR(versions_result.error());
+      return false;
+    }
+    std::vector<std::string> version_selections;
+    for (const auto& release_version : versions_result.value()) {
+      version_selections.push_back(release_version["name"].asString());
+    }
+
+    auto selected_version =
+        cli_selection_utils::PrintSelection(version_selections);
+    if (selected_version == std::nullopt) {
+      CTL_ERR("Invalid version selection");
+      return false;
+    }
+    std::cout << "Selected version: " << selected_version.value() << std::endl;
+
+    auto variant_url = url_parser::Url{
+        .protocol = "http",
+        .host = host_ + ":" + std::to_string(port_),
+        .pathParams =
+            {
+                "v1",
+                "engines",
+                engine,
+                "versions",
+                selected_version.value(),
+            },
+    };
+    auto variant_result = curl_utils::SimpleGetJson(variant_url.ToFullPath());
+    if (variant_result.has_error()) {
+      CTL_ERR(variant_result.error());
+      return false;
+    }
+
+    std::vector<std::string> variant_selections;
+    for (const auto& variant : variant_result.value()) {
+      auto v_name = variant["name"].asString();
+      if (string_utils::StringContainsIgnoreCase(v_name, hw_inf_.sys_inf->os)) {
+        variant_selections.push_back(variant["name"].asString());
+      }
+    }
+    auto selected_variant =
+        cli_selection_utils::PrintSelection(variant_selections);
+    if (selected_variant == std::nullopt) {
+      CTL_ERR("Invalid variant selection");
+      return false;
+    }
+    std::cout << "Selected " << selected_variant.value() << " - "
+              << selected_version.value() << std::endl;
+
+    auto install_url =
+        url_parser::Url{.protocol = "http",
+                        .host = host_ + ":" + std::to_string(port_),
+                        .pathParams =
+                            {
+                                "v1",
+                                "engines",
+                                engine,
+                            },
+                        .queries = {
+                            {"version", selected_version.value()},
+                            {"variant", selected_variant.value()},
+                        }};
+
+    auto response = curl_utils::SimplePostJson(install_url.ToFullPath());
+    if (response.has_error()) {
+      CTL_ERR(response.error());
+      return false;
+    }
+
+    if (!dp_res.get())
+      return false;
+
+    bool check_cuda_download = !system_info_utils::GetCudaVersion().empty();
+    if (check_cuda_download) {
+      if (!dp.Handle(DownloadType::CudaToolkit))
+        return false;
+    }
+
+    CLI_LOG("Engine " << engine << " downloaded successfully!")
+    return true;
+  }
+
+  // default
   DownloadProgress dp;
   dp.Connect(host_, port_);
   // engine can be small, so need to start ws first
-  auto dp_res = std::async(std::launch::deferred, [&dp] {
-    return dp.Handle(DownloadType::Engine);
-  });
-  CLI_LOG("Validating download items, please wait..")
+  auto dp_res = std::async(std::launch::deferred,
+                           [&dp] { return dp.Handle(DownloadType::Engine); });
 
-  httplib::Client cli(host_ + ":" + std::to_string(port_));
-  Json::Value json_data;
-  json_data["version"] = version.empty() ? "latest" : version;
-  auto data_str = json_data.toStyledString();
-  cli.set_read_timeout(std::chrono::seconds(60));
-  auto res = cli.Post("/v1/engines/install/" + engine, httplib::Headers(),
-                      data_str.data(), data_str.size(), "application/json");
-
-  if (res) {
-    if (res->status != httplib::StatusCode::OK_200) {
-      auto root = json_helper::ParseJsonString(res->body);
-      CLI_LOG(root["message"].asString());
-      dp.ForceStop();
+  auto install_url = url_parser::Url{
+      .protocol = "http",
+      .host = host_ + ":" + std::to_string(port_),
+      .pathParams =
+          {
+              "v1",
+              "engines",
+              engine,
+          },
+  };
+
+  if (!version.empty()) {
+    install_url.queries = {{"version", version}};
+  }
+
+  auto response = curl_utils::SimplePostJson(install_url.ToFullPath());
+  if (response.has_error()) {
+    // TODO: namh refactor later
+    Json::Value root;
+    Json::Reader reader;
+    if (!reader.parse(response.error(), root)) {
+      CLI_LOG(response.error());
       return false;
-    } else {
-      CLI_LOG("Start downloading..");
     }
-  } else {
-    auto err = res.error();
-    CTL_ERR("HTTP error: " << httplib::to_string(err));
-    dp.ForceStop();
+    CLI_LOG(root["message"].asString());
     return false;
   }
 
+  CLI_LOG("Validating download items, please wait..")
+
   if (!dp_res.get())
     return false;
 
diff --git a/engine/cli/commands/engine_install_cmd.h b/engine/cli/commands/engine_install_cmd.h
index 4a22d03f7..deb9197e1 100644
--- a/engine/cli/commands/engine_install_cmd.h
+++ b/engine/cli/commands/engine_install_cmd.h
@@ -7,8 +7,14 @@ namespace commands {
 
 class EngineInstallCmd {
  public:
-  explicit EngineInstallCmd(std::shared_ptr<DownloadService> download_service, const std::string& host, int port)
-      : engine_service_{EngineService(download_service)}, host_(host), port_(port) {};
+  explicit EngineInstallCmd(std::shared_ptr<DownloadService> download_service,
+                            const std::string& host, int port, bool show_menu)
+      : engine_service_{EngineService(download_service)},
+        host_(host),
+        port_(port),
+        show_menu_(show_menu),
+        hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(),
+                .cuda_driver_version = system_info_utils::GetCudaVersion()} {};
 
   bool Exec(const std::string& engine, const std::string& version = "latest",
             const std::string& src = "");
@@ -17,5 +23,13 @@ class EngineInstallCmd {
   EngineService engine_service_;
   std::string host_;
   int port_;
+  bool show_menu_;
+
+  struct HardwareInfo {
+    std::unique_ptr<system_info_utils::SystemInfo> sys_inf;
+    cortex::cpuid::CpuInfo cpu_inf;
+    std::string cuda_driver_version;
+  };
+  HardwareInfo hw_inf_;
 };
 }  // namespace commands
diff --git a/engine/cli/commands/engine_list_cmd.cc b/engine/cli/commands/engine_list_cmd.cc
index cc2d38a02..3a2b527c9 100644
--- a/engine/cli/commands/engine_list_cmd.cc
+++ b/engine/cli/commands/engine_list_cmd.cc
@@ -1,10 +1,14 @@
 #include "engine_list_cmd.h"
-#include "httplib.h"
-#include "json/json.h"
+#include <json/reader.h>
+#include <json/value.h>
 #include "server_start_cmd.h"
+#include "services/engine_service.h"
+#include "utils/curl_utils.h"
 #include "utils/logging_utils.h"
+#include "utils/url_parser.h"
 // clang-format off
 #include <tabulate/table.hpp>
+#include <unordered_map>
 // clang-format on
 
 namespace commands {
@@ -20,36 +24,74 @@ bool EngineListCmd::Exec(const std::string& host, int port) {
   }
 
   tabulate::Table table;
-  table.add_row(
-      {"#", "Name", "Supported Formats", "Version", "Variant", "Status"});
-
-  httplib::Client cli(host + ":" + std::to_string(port));
-  auto res = cli.Get("/v1/engines");
-  if (res) {
-    if (res->status == httplib::StatusCode::OK_200) {
-      int count = 0;
-      // CLI_LOG(res->body);
-      Json::Value body;
-      Json::Reader reader;
-      reader.parse(res->body, body);
-      if (!body["data"].isNull()) {
-        for (auto const& v : body["data"]) {
-          count += 1;
-          table.add_row({std::to_string(count), v["name"].asString(),
-                         v["format"].asString(), v["version"].asString(),
-                         v["variant"].asString(), v["status"].asString()});
-        }
-      }
-    } else {
-      CLI_LOG_ERROR("Failed to get engine list with status code: " << res->status);
-      return false;
-    }
-  } else {
-    auto err = res.error();
-    CLI_LOG_ERROR("HTTP error: " << httplib::to_string(err));
+  table.add_row({"#", "Name", "Version", "Variant", "Status"});
+
+  auto url = url_parser::Url{
+      .protocol = "http",
+      .host = host + ":" + std::to_string(port),
+      .pathParams = {"v1", "engines"},
+  };
+  auto result = curl_utils::SimpleGetJson(url.ToFullPath());
+  if (result.has_error()) {
+    CTL_ERR(result.error());
     return false;
   }
 
+  std::vector<std::string> engines = {
+      kLlamaEngine,
+      kOnnxEngine,
+      kTrtLlmEngine,
+  };
+
+  std::unordered_map<std::string, std::vector<EngineVariantResponse>>
+      engine_map;
+
+  for (const auto& engine : engines) {
+    auto installed_variants = result.value()[engine];
+    for (const auto& variant : installed_variants) {
+      engine_map[engine].push_back(EngineVariantResponse{
+          .name = variant["name"].asString(),
+          .version = variant["version"].asString(),
+          .engine = engine,
+      });
+    }
+  }
+
+  // TODO: namh support onnx and tensorrt
+  auto default_engine_url = url_parser::Url{
+      .protocol = "http",
+      .host = host + ":" + std::to_string(port),
+      .pathParams = {"v1", "engines", kLlamaEngine, "default"},
+  };
+  auto selected_variant_result =
+      curl_utils::SimpleGetJson(default_engine_url.ToFullPath());
+
+  std::optional<std::pair<std::string, std::string>> variant_pair =
+      std::nullopt;
+  if (selected_variant_result.has_value()) {
+    variant_pair = std::make_pair<std::string, std::string>(
+        selected_variant_result.value()["variant"].asString(),
+        selected_variant_result.value()["version"].asString());
+  }
+
+  std::vector<EngineVariantResponse> output;
+  for (const auto& [key, value] : engine_map) {
+    output.insert(output.end(), value.begin(), value.end());
+  }
+
+  int count = 0;
+  for (auto const& v : output) {
+    count += 1;
+    if (variant_pair.has_value() && v.name == variant_pair->first &&
+        v.version == variant_pair->second) {
+      table.add_row(
+          {std::to_string(count), v.engine, v.version, v.name, "Default"});
+      continue;
+    }
+    table.add_row(
+        {std::to_string(count), v.engine, v.version, v.name, "Installed"});
+  }
+
   std::cout << table << std::endl;
   return true;
 }
diff --git a/engine/cli/commands/engine_uninstall_cmd.cc b/engine/cli/commands/engine_uninstall_cmd.cc
index ebd9eb869..d915b1dc5 100644
--- a/engine/cli/commands/engine_uninstall_cmd.cc
+++ b/engine/cli/commands/engine_uninstall_cmd.cc
@@ -1,7 +1,8 @@
 #include "engine_uninstall_cmd.h"
-#include "httplib.h"
 #include "server_start_cmd.h"
+#include "utils/curl_utils.h"
 #include "utils/logging_utils.h"
+#include "utils/url_parser.h"
 
 namespace commands {
 
@@ -16,18 +17,16 @@ void EngineUninstallCmd::Exec(const std::string& host, int port,
     }
   }
 
-  // Call API to delete engine
-  httplib::Client cli(host + ":" + std::to_string(port));
-  auto res = cli.Delete("/v1/engines/" + engine);
-  if (res) {
-    if (res->status == httplib::StatusCode::OK_200) {
-      CLI_LOG("Engine " + engine + " uninstalled successfully!");
-    } else {
-      CTL_ERR("Engine failed to uninstall with status code: " << res->status);
-    }
-  } else {
-    auto err = res.error();
-    CTL_ERR("HTTP error: " << httplib::to_string(err));
+  auto url = url_parser::Url{.protocol = "http",
+                             .host = host + ":" + std::to_string(port),
+                             .pathParams = {"v1", "engines", engine}};
+
+  auto result = curl_utils::SimpleDelete(url.ToFullPath());
+  if (result.has_error()) {
+    CTL_ERR(result.error());
+    return;
   }
+
+  CLI_LOG("Engine " + engine + " uninstalled successfully!");
 }
 };  // namespace commands
diff --git a/engine/cli/commands/engine_update_cmd.cc b/engine/cli/commands/engine_update_cmd.cc
new file mode 100644
index 000000000..b9e3acf1b
--- /dev/null
+++ b/engine/cli/commands/engine_update_cmd.cc
@@ -0,0 +1,54 @@
+#include "engine_update_cmd.h"
+#include <future>
+#include "server_start_cmd.h"
+#include "utils/cli_selection_utils.h"
+#include "utils/curl_utils.h"
+#include "utils/download_progress.h"
+#include "utils/logging_utils.h"
+#include "utils/system_info_utils.h"
+#include "utils/url_parser.h"
+
+namespace commands {
+bool EngineUpdateCmd::Exec(const std::string& host, int port,
+                           const std::string& engine) {
+  // Start server if server is not started yet
+  if (!commands::IsServerAlive(host, port)) {
+    CLI_LOG("Starting server ...");
+    commands::ServerStartCmd ssc;
+    if (!ssc.Exec(host, port)) {
+      return false;
+    }
+  }
+
+  DownloadProgress dp;
+  dp.Connect(host, port);
+  // engine can be small, so need to start ws first
+  auto dp_res = std::async(std::launch::deferred, [&dp, &engine] {
+    return dp.Handle(DownloadType::Engine);
+  });
+  CLI_LOG("Validating download items, please wait..")
+
+  auto update_url = url_parser::Url{
+      .protocol = "http",
+      .host = host + ":" + std::to_string(port),
+      .pathParams = {"v1", "engines", engine, "update"},
+  };
+  auto update_result = curl_utils::SimplePostJson(update_url.ToFullPath());
+  if (update_result.has_error()) {
+    CTL_ERR(update_result.error());
+    return false;
+  }
+
+  if (!dp_res.get())
+    return false;
+
+  bool check_cuda_download = !system_info_utils::GetCudaVersion().empty();
+  if (check_cuda_download) {
+    if (!dp.Handle(DownloadType::CudaToolkit))
+      return false;
+  }
+
+  CLI_LOG("Engine " << engine << " updated successfully!")
+  return true;
+}
+};  // namespace commands
diff --git a/engine/cli/commands/engine_update_cmd.h b/engine/cli/commands/engine_update_cmd.h
new file mode 100644
index 000000000..5e62d7d28
--- /dev/null
+++ b/engine/cli/commands/engine_update_cmd.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <string>
+
+namespace commands {
+
+class EngineUpdateCmd {
+ public:
+  bool Exec(const std::string& host, int port, const std::string& engine);
+};
+}  // namespace commands
diff --git a/engine/cli/commands/engine_use_cmd.cc b/engine/cli/commands/engine_use_cmd.cc
new file mode 100644
index 000000000..d03f9ddc0
--- /dev/null
+++ b/engine/cli/commands/engine_use_cmd.cc
@@ -0,0 +1,82 @@
+#include "engine_use_cmd.h"
+#include "server_start_cmd.h"
+#include "utils/cli_selection_utils.h"
+#include "utils/curl_utils.h"
+#include "utils/logging_utils.h"
+#include "utils/url_parser.h"
+
+namespace commands {
+cpp::result<void, std::string> EngineUseCmd::Exec(const std::string& host,
+                                                  int port,
+                                                  const std::string& engine) {
+  // Start server if server is not started yet
+  if (!commands::IsServerAlive(host, port)) {
+    CLI_LOG("Starting server ...");
+    commands::ServerStartCmd ssc;
+    if (!ssc.Exec(host, port)) {
+      return cpp::fail("Failed to start server");
+    }
+  }
+
+  auto get_installed_url = url_parser::Url{
+      .protocol = "http",
+      .host = host + ":" + std::to_string(port),
+      .pathParams = {"v1", "engines", engine},
+  };
+  auto installed_variants_results =
+      curl_utils::SimpleGetJson(get_installed_url.ToFullPath());
+  if (installed_variants_results.has_error()) {
+    CTL_ERR(installed_variants_results.error());
+    return cpp::fail("Failed to get installed variants");
+  }
+  if (installed_variants_results.value().size() == 0) {
+    return cpp::fail("No installed variants found");
+  }
+
+  std::map<std::string, std::vector<std::string>> variant_map;
+  for (const auto& variant : installed_variants_results.value()) {
+    auto variant_name = variant["name"].asString();
+    if (variant_map.find(variant_name) == variant_map.end()) {
+      variant_map[variant_name] = {variant["version"].asString()};
+    } else {
+      variant_map[variant_name].push_back(variant["version"].asString());
+    }
+  }
+
+  std::vector<std::string> variant_selections;
+  for (const auto& [key, value] : variant_map) {
+    variant_selections.push_back(key);
+  }
+
+  auto selected_variant =
+      cli_selection_utils::PrintSelection(variant_selections);
+  if (!selected_variant.has_value()) {
+    CTL_ERR("Invalid variant selection");
+    return cpp::fail("Invalid variant selection");
+  }
+
+  auto selected_version = cli_selection_utils::PrintSelection(
+      variant_map[selected_variant.value()]);
+  if (!selected_variant.has_value()) {
+    CTL_ERR("Invalid version selection");
+    return cpp::fail("Invalid version selection");
+  }
+
+  auto set_default_engine_variant =
+      url_parser::Url{.protocol = "http",
+                      .host = host + ":" + std::to_string(port),
+                      .pathParams = {"v1", "engines", engine, "default"},
+                      .queries = {{"version", selected_version.value()},
+                                  {"variant", selected_variant.value()}}};
+
+  auto response =
+      curl_utils::SimplePostJson(set_default_engine_variant.ToFullPath());
+  if (response.has_error()) {
+    CTL_ERR(response.error());
+    return cpp::fail("Failed to set default engine variant");
+  }
+
+  CLI_LOG("Engine " << engine << " updated successfully!");
+  return {};
+}
+};  // namespace commands
diff --git a/engine/cli/commands/engine_use_cmd.h b/engine/cli/commands/engine_use_cmd.h
new file mode 100644
index 000000000..be435e53c
--- /dev/null
+++ b/engine/cli/commands/engine_use_cmd.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <string>
+#include "utils/result.hpp"
+
+namespace commands {
+
+class EngineUseCmd {
+ public:
+  cpp::result<void, std::string> Exec(const std::string& host, int port,
+                                      const std::string& engine);
+};
+}  // namespace commands
diff --git a/engine/cli/commands/model_alias_cmd.cc b/engine/cli/commands/model_alias_cmd.cc
deleted file mode 100644
index 05d35e023..000000000
--- a/engine/cli/commands/model_alias_cmd.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-#include "model_alias_cmd.h"
-#include "database/models.h"
-#include "httplib.h"
-#include "server_start_cmd.h"
-#include "json/json.h"
-
-namespace commands {
-
-void ModelAliasCmd::Exec(const std::string& host, int port,
-                         const std::string& model_handle,
-                         const std::string& model_alias) {
-  // Start server if server is not started yet
-  if (!commands::IsServerAlive(host, port)) {
-    CLI_LOG("Starting server ...");
-    commands::ServerStartCmd ssc;
-    if (!ssc.Exec(host, port)) {
-      return;
-    }
-  }
-
-  // Call API to delete model
-  httplib::Client cli(host + ":" + std::to_string(port));
-  Json::Value json_data;
-  json_data["model"] = model_handle;
-  json_data["modelAlias"] = model_alias;
-  auto data_str = json_data.toStyledString();
-  auto res = cli.Post("/v1/models/alias", httplib::Headers(), data_str.data(),
-                      data_str.size(), "application/json");
-  if (res) {
-    if (res->status == httplib::StatusCode::OK_200) {
-      CLI_LOG("Successfully set model alias '" + model_alias +
-                "' for modeID '" + model_handle + "'.");
-    } else {
-      CLI_LOG_ERROR("Model failed to set alias with status code: " << res->status);
-    }
-  } else {
-    auto err = res.error();
-    CLI_LOG_ERROR("HTTP error: " << httplib::to_string(err));
-  }
-}
-
-}  // namespace commands
\ No newline at end of file
diff --git a/engine/cli/commands/model_alias_cmd.h b/engine/cli/commands/model_alias_cmd.h
deleted file mode 100644
index b61dd724c..000000000
--- a/engine/cli/commands/model_alias_cmd.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-#include <string>
-#include "utils/logging_utils.h"
-namespace commands {
-
-class ModelAliasCmd {
- public:
-  void Exec(const std::string& host, int port, const std::string& model_handle,
-            const std::string& model_alias);
-};
-}  // namespace commands
\ No newline at end of file
diff --git a/engine/cli/commands/model_del_cmd.cc b/engine/cli/commands/model_del_cmd.cc
index 693de804d..d78fcc921 100644
--- a/engine/cli/commands/model_del_cmd.cc
+++ b/engine/cli/commands/model_del_cmd.cc
@@ -15,7 +15,7 @@ void ModelDelCmd::Exec(const std::string& host, int port,
       return;
     }
   }
-  
+
   // Call API to delete model
   httplib::Client cli(host + ":" + std::to_string(port));
   auto res = cli.Delete("/v1/models/" + model_handle);
diff --git a/engine/cli/commands/model_get_cmd.cc b/engine/cli/commands/model_get_cmd.cc
index 0eda66105..2c7c294e3 100644
--- a/engine/cli/commands/model_get_cmd.cc
+++ b/engine/cli/commands/model_get_cmd.cc
@@ -1,13 +1,6 @@
 #include "model_get_cmd.h"
-#include <filesystem>
-#include <iomanip>
-#include <iostream>
-#include <vector>
-#include "config/yaml_config.h"
-#include "database/models.h"
 #include "httplib.h"
 #include "server_start_cmd.h"
-#include "utils/file_manager_utils.h"
 #include "utils/json_helper.h"
 #include "utils/logging_utils.h"
 
@@ -40,4 +33,4 @@ void ModelGetCmd::Exec(const std::string& host, int port,
   }
 }
 
-}  // namespace commands
\ No newline at end of file
+}  // namespace commands
diff --git a/engine/cli/commands/model_pull_cmd.cc b/engine/cli/commands/model_pull_cmd.cc
index ad8938146..a4bf68bea 100644
--- a/engine/cli/commands/model_pull_cmd.cc
+++ b/engine/cli/commands/model_pull_cmd.cc
@@ -1,16 +1,10 @@
 #include "model_pull_cmd.h"
-#include <memory>
-#include "common/event.h"
-#include "database/models.h"
 #include "server_start_cmd.h"
 #include "utils/cli_selection_utils.h"
 #include "utils/download_progress.h"
-#include "utils/format_utils.h"
-#include "utils/huggingface_utils.h"
 #include "utils/json_helper.h"
 #include "utils/logging_utils.h"
 #include "utils/scope_exit.h"
-#include "utils/string_utils.h"
 #if defined(_WIN32)
 #include <signal.h>
 #endif
diff --git a/engine/cli/commands/model_start_cmd.cc b/engine/cli/commands/model_start_cmd.cc
index 1055805f5..cc8f19edc 100644
--- a/engine/cli/commands/model_start_cmd.cc
+++ b/engine/cli/commands/model_start_cmd.cc
@@ -1,7 +1,5 @@
 #include "model_start_cmd.h"
-#include "config/yaml_config.h"
 #include "cortex_upd_cmd.h"
-#include "database/models.h"
 #include "httplib.h"
 #include "run_cmd.h"
 #include "server_start_cmd.h"
diff --git a/engine/cli/commands/model_stop_cmd.cc b/engine/cli/commands/model_stop_cmd.cc
index d84f17003..06a6acbaf 100644
--- a/engine/cli/commands/model_stop_cmd.cc
+++ b/engine/cli/commands/model_stop_cmd.cc
@@ -1,6 +1,6 @@
 #include "model_stop_cmd.h"
-#include "utils/logging_utils.h"
 #include "httplib.h"
+#include "utils/logging_utils.h"
 
 namespace commands {
 
diff --git a/engine/cli/commands/ps_cmd.cc b/engine/cli/commands/ps_cmd.cc
index fe84031a4..ca891dab4 100644
--- a/engine/cli/commands/ps_cmd.cc
+++ b/engine/cli/commands/ps_cmd.cc
@@ -2,9 +2,9 @@
 #include <httplib.h>
 #include <string>
 #include <tabulate/table.hpp>
-#include "nlohmann/json.hpp"
 #include "utils/engine_constants.h"
 #include "utils/format_utils.h"
+#include "utils/json_helper.h"
 #include "utils/logging_utils.h"
 #include "utils/string_utils.h"
 
@@ -20,18 +20,17 @@ void PsCmd::Exec(const std::string& host, int port) {
     return;
   }
 
-  auto body = nlohmann::json::parse(res->body);
-  auto data = body["data"];
+  auto data = json_helper::ParseJsonString(res->body)["data"];
   std::vector<ModelLoadedStatus> model_status_list;
   try {
     for (const auto& item : data) {
       ModelLoadedStatus model_status;
       // TODO(sang) hardcode for now
       model_status.engine = kLlamaEngine;
-      model_status.model = item["id"];
-      model_status.ram = item["ram"];
-      model_status.start_time = item["start_time"];
-      model_status.vram = item["vram"];
+      model_status.model = item["id"].asString();
+      model_status.ram = item["ram"].asUInt64();
+      model_status.start_time = item["start_time"].asUInt64();
+      model_status.vram = item["vram"].asUInt64();
       model_status_list.push_back(model_status);
     }
   } catch (const std::exception& e) {
diff --git a/engine/cli/commands/run_cmd.cc b/engine/cli/commands/run_cmd.cc
index d09298cd5..174255db3 100644
--- a/engine/cli/commands/run_cmd.cc
+++ b/engine/cli/commands/run_cmd.cc
@@ -67,19 +67,6 @@ std::optional<std::string> SelectLocalModel(std::string host, int port,
   return model_id;
 }
 
-namespace {
-std::string Repo2Engine(const std::string& r) {
-  if (r == kLlamaRepo) {
-    return kLlamaEngine;
-  } else if (r == kOnnxRepo) {
-    return kOnnxEngine;
-  } else if (r == kTrtLlmRepo) {
-    return kTrtLlmEngine;
-  }
-  return r;
-};
-}  // namespace
-
 void RunCmd::Exec(bool run_detach) {
   std::optional<std::string> model_id =
       SelectLocalModel(host_, port_, model_service_, model_handle_);
@@ -107,59 +94,59 @@ void RunCmd::Exec(bool run_detach) {
 
     // Check if engine existed. If not, download it
     {
-      auto required_engine =
-          engine_service_.GetEngineInfo(Repo2Engine(mc.engine));
-
-      if (!required_engine.has_value()) {
-        throw std::runtime_error("Engine not found: " + mc.engine);
-      }
-      if (required_engine.value().status == EngineService::kIncompatible) {
-        throw std::runtime_error("Engine " + mc.engine + " is incompatible");
+      auto is_engine_ready = engine_service_.IsEngineReady(mc.engine);
+      if (is_engine_ready.has_error()) {
+        throw std::runtime_error(is_engine_ready.error());
       }
-      if (required_engine.value().status == EngineService::kNotInstalled) {
-        if (!EngineInstallCmd(download_service_, host_, port_)
+
+      if (!is_engine_ready.value()) {
+        CTL_INF("Engine " << mc.engine
+                          << " is not ready. Proceed to install..");
+        if (!EngineInstallCmd(download_service_, host_, port_, false)
                  .Exec(mc.engine)) {
           return;
+        } else {
+          CTL_INF("Engine " << mc.engine << " is ready");
         }
       }
-    }
 
-    // Start server if it is not running
-    {
-      if (!commands::IsServerAlive(host_, port_)) {
-        CLI_LOG("Starting server ...");
-        commands::ServerStartCmd ssc;
-        if (!ssc.Exec(host_, port_)) {
-          return;
+      // Start server if it is not running
+      {
+        if (!commands::IsServerAlive(host_, port_)) {
+          CLI_LOG("Starting server ...");
+          commands::ServerStartCmd ssc;
+          if (!ssc.Exec(host_, port_)) {
+            return;
+          }
         }
       }
-    }
 
-    // Always start model if not llamacpp
-    // If it is llamacpp, then check model status first
-    {
-      if ((mc.engine.find(kLlamaRepo) == std::string::npos &&
-           mc.engine.find(kLlamaEngine) == std::string::npos) ||
-          !commands::ModelStatusCmd(model_service_)
-               .IsLoaded(host_, port_, *model_id)) {
+      // Always start model if not llamacpp
+      // If it is llamacpp, then check model status first
+      {
+        if ((mc.engine.find(kLlamaRepo) == std::string::npos &&
+             mc.engine.find(kLlamaEngine) == std::string::npos) ||
+            !commands::ModelStatusCmd(model_service_)
+                 .IsLoaded(host_, port_, *model_id)) {
 
-        auto res =
-            commands::ModelStartCmd(model_service_)
-                .Exec(host_, port_, *model_id, false /*print_success_log*/);
-        if (!res) {
-          CLI_LOG("Error: Failed to start model");
-          return;
+          auto res =
+              commands::ModelStartCmd(model_service_)
+                  .Exec(host_, port_, *model_id, false /*print_success_log*/);
+          if (!res) {
+            CLI_LOG("Error: Failed to start model");
+            return;
+          }
         }
       }
-    }
 
-    // Chat
-    if (run_detach) {
-      CLI_LOG(*model_id << " model started successfully. Use `"
-                        << commands::GetCortexBinary() << " run " << *model_id
-                        << "` for interactive chat shell");
-    } else {
-      ChatCompletionCmd(model_service_).Exec(host_, port_, *model_id, mc, "");
+      // Chat
+      if (run_detach) {
+        CLI_LOG(*model_id << " model started successfully. Use `"
+                          << commands::GetCortexBinary() << " run " << *model_id
+                          << "` for interactive chat shell");
+      } else {
+        ChatCompletionCmd(model_service_).Exec(host_, port_, *model_id, mc, "");
+      }
     }
   } catch (const std::exception& e) {
     CLI_LOG("Fail to run model with ID '" + model_handle_ + "': " + e.what());
diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc
index ca5363fa6..47a8cf320 100644
--- a/engine/cli/commands/server_start_cmd.cc
+++ b/engine/cli/commands/server_start_cmd.cc
@@ -97,12 +97,8 @@ bool ServerStartCmd::Exec(const std::string& host, int port) {
       v += g;
     }
     CTL_INF("LD_LIBRARY_PATH: " << v);
-    auto data_path = file_manager_utils::GetEnginesContainerPath();
-    auto llamacpp_path = data_path / "cortex.llamacpp/";
-    auto trt_path = data_path / "cortex.tensorrt-llm/";
-    if (!std::filesystem::exists(llamacpp_path)) {
-      std::filesystem::create_directory(llamacpp_path);
-    }
+    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
+    auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
 
     auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v;
     setenv(name, new_v.c_str(), true);
diff --git a/engine/cli/main.cc b/engine/cli/main.cc
index 52c1ce457..62a88eb38 100644
--- a/engine/cli/main.cc
+++ b/engine/cli/main.cc
@@ -48,7 +48,7 @@ void SetupLogger(trantor::FileLogger& async_logger, bool verbose) {
         std::filesystem::path(config.logFolderPath) /
         std::filesystem::path(cortex_utils::logs_folder));
     async_logger.setFileName(config.logFolderPath + "/" +
-                                cortex_utils::logs_cli_base_name);
+                             cortex_utils::logs_cli_base_name);
     async_logger.setMaxLines(config.maxLogLines);  // Keep last 100000 lines
     async_logger.startLogging();
     trantor::Logger::setOutputFunction(
@@ -96,7 +96,12 @@ int main(int argc, char* argv[]) {
     }
   }
 
-  { file_manager_utils::CreateConfigFileIfNotExist(); }
+  {
+    auto result = file_manager_utils::CreateConfigFileIfNotExist();
+    if (result.has_error()) {
+      CTL_ERR("Error creating config file: " << result.error());
+    }
+  }
 
   RemoveBinaryTempFileIfExists();
 
@@ -104,7 +109,7 @@ int main(int argc, char* argv[]) {
   SetupLogger(async_file_logger, verbose);
 
   if (should_install_server) {
-    InstallServer(); 
+    InstallServer();
     return 0;
   }
 
diff --git a/engine/cli/utils/download_progress.cc b/engine/cli/utils/download_progress.cc
index b47b4fc9a..30d9a205d 100644
--- a/engine/cli/utils/download_progress.cc
+++ b/engine/cli/utils/download_progress.cc
@@ -146,4 +146,4 @@ bool DownloadProgress::Handle(const DownloadType& event_type) {
   if (status_ == DownloadStatus::DownloadError)
     return false;
   return true;
-}
\ No newline at end of file
+}
diff --git a/engine/common/base.h b/engine/common/base.h
index 382f21b80..478cc7feb 100644
--- a/engine/common/base.h
+++ b/engine/common/base.h
@@ -20,9 +20,6 @@ class BaseModel {
   virtual void GetModels(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) = 0;
-  virtual void GetEngines(
-      const HttpRequestPtr& req,
-      std::function<void(const HttpResponsePtr&)>&& callback) = 0;
   virtual void FineTuning(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) = 0;
@@ -48,4 +45,4 @@ class BaseEmbedding {
       std::function<void(const HttpResponsePtr&)>&& callback) = 0;
 
   // The derived class can also override other methods if needed
-};
\ No newline at end of file
+};
diff --git a/engine/common/download_task.h b/engine/common/download_task.h
index 39bf03a99..94fb11a48 100644
--- a/engine/common/download_task.h
+++ b/engine/common/download_task.h
@@ -2,15 +2,12 @@
 
 #include <json/json.h>
 #include <filesystem>
-#include <nlohmann/json.hpp>
 #include <sstream>
 #include <string>
 #include <optional>
 
 enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit, Cortex };
 
-using namespace nlohmann;
-
 struct DownloadItem {
 
   std::string id;
@@ -115,23 +112,6 @@ struct DownloadTask {
 
     return root;
   }
-
-  json ToJson() const {
-    json dl_items = json::array();
-
-    for (const auto& item : items) {
-      json dl_item{{"id", item.id},
-                   {"downloadUrl", item.downloadUrl},
-                   {"localPath", item.localPath},
-                   {"checksum", item.checksum.value_or("N/A")},
-                   {"bytes", item.bytes.value_or(0)},
-                   {"downloadedBytes", item.downloadedBytes.value_or(0)}};
-      dl_items.push_back(dl_item);
-    }
-
-    return json{
-        {"id", id}, {"type", DownloadTypeToString(type)}, {"items", dl_items}};
-  }
 };
 
 namespace common {
diff --git a/engine/common/event.h b/engine/common/event.h
index c23ebea5f..0efd67968 100644
--- a/engine/common/event.h
+++ b/engine/common/event.h
@@ -1,13 +1,12 @@
 #pragma once
 
 #include <eventpp/eventqueue.h>
-#include <nlohmann/json.hpp>
 #include <string>
 #include "common/download_task.h"
 #include "eventpp/utilities/anydata.h"
+#include "utils/json_helper.h"
 
 namespace cortex::event {
-using namespace nlohmann;
 
 enum class EventType {
   DownloadEvent,
@@ -64,14 +63,15 @@ inline DownloadEventType DownloadEventTypeFromString(const std::string& str) {
 }  // namespace
 
 struct DownloadEvent : public cortex::event::Event {
-  std::string ToJsonString() const {
-    json json{{"type", DownloadEventTypeToString(type_)},
-              {"task", download_task_.ToJson()}};
-    return json.dump();
-  }
-
   DownloadEventType type_;
   DownloadTask download_task_;
+
+  std::string ToJsonString() const {
+    Json::Value root;
+    root["type"] = DownloadEventTypeToString(type_);
+    root["task"] = download_task_.ToJsonCpp();
+    return json_helper::DumpJsonString(root);
+  }
 };
 
 inline DownloadEvent GetDownloadEventFromJson(const Json::Value& item_json) {
diff --git a/engine/config/gguf_parser.cc b/engine/config/gguf_parser.cc
index 1e217c344..acd2b9c76 100644
--- a/engine/config/gguf_parser.cc
+++ b/engine/config/gguf_parser.cc
@@ -2,9 +2,7 @@
 #include <cstdint>
 #include <cstring>
 #include <ctime>
-#include <fstream>
 #include <iostream>
-#include <map>
 #include <regex>
 #include <stdexcept>
 #include <string>
@@ -42,7 +40,6 @@ void GGUFHandler::OpenFile(const std::string& file_path) {
   if (file_handle_ == INVALID_HANDLE_VALUE) {
     throw std::runtime_error("Failed to open file");
   }
-
   // Get the file size
   LARGE_INTEGER file_size_struct;
   if (!GetFileSizeEx(file_handle_, &file_size_struct)) {
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
index 065ad4565..63f86ed38 100644
--- a/engine/controllers/engines.cc
+++ b/engine/controllers/engines.cc
@@ -1,15 +1,111 @@
 #include "engines.h"
-#include <utility>
 #include "services/engine_service.h"
 #include "utils/archive_utils.h"
 #include "utils/cortex_utils.h"
+#include "utils/engine_constants.h"
 #include "utils/logging_utils.h"
+#include "utils/string_utils.h"
 
-void Engines::InstallEngine(
+namespace {
+// Need to change this after we rename repositories
+std::string NormalizeEngine(const std::string& engine) {
+  if (engine == kLlamaEngine) {
+    return kLlamaRepo;
+  } else if (engine == kOnnxEngine) {
+    return kOnnxRepo;
+  } else if (engine == kTrtLlmEngine) {
+    return kTrtLlmRepo;
+  }
+  return engine;
+};
+}  // namespace
+
+void Engines::ListEngine(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) const {
+  std::vector<std::string> supported_engines{kLlamaEngine, kOnnxEngine,
+                                             kTrtLlmEngine};
+  Json::Value ret;
+  for (const auto& engine : supported_engines) {
+    auto installed_engines =
+        engine_service_->GetInstalledEngineVariants(engine);
+    if (installed_engines.has_error()) {
+      continue;
+    }
+    Json::Value variants(Json::arrayValue);
+    for (const auto& variant : installed_engines.value()) {
+      variants.append(variant.ToJson());
+    }
+    ret[engine] = variants;
+  }
+
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+  resp->setStatusCode(k200OK);
+  callback(resp);
+}
+
+void Engines::UninstallEngine(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback,
-    const std::string& engine) {
+    const std::string& engine, const std::optional<std::string> version,
+    const std::optional<std::string> variant) {
+
+  auto result =
+      engine_service_->UninstallEngineVariant(engine, version, variant);
+
+  Json::Value ret;
+  if (result.has_error()) {
+    CTL_INF(result.error());
+    ret["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+  } else {
+    CTL_INF("Engine uninstalled successfully");
+    ret["message"] = "Engine " + engine + " uninstalled successfully!";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k200OK);
+    callback(resp);
+  }
+}
+
+void Engines::GetEngineVersions(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine) const {
+  if (engine.empty()) {
+    Json::Value res;
+    res["message"] = "Engine name is required";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+    return;
+  }
+
+  // TODO: namh support pagination
+  auto result = engine_service_->GetEngineReleases(engine);
+  if (result.has_error()) {
+    Json::Value res;
+    res["message"] = "Failed to get engine releases: " + result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+    return;
+  }
 
+  Json::Value releases(Json::arrayValue);
+  for (const auto& release : result.value()) {
+    releases.append(release.ToApiJson());
+  }
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(releases);
+  resp->setStatusCode(k200OK);
+  callback(resp);
+}
+
+void Engines::GetEngineVariants(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine, const std::string& version) const {
   if (engine.empty()) {
     Json::Value res;
     res["message"] = "Engine name is required";
@@ -20,12 +116,30 @@ void Engines::InstallEngine(
     return;
   }
 
-  std::string version = "latest";
-  if (auto o = req->getJsonObject(); o) {
-    version = (*o).get("version", "latest").asString();
+  auto result = engine_service_->GetEngineVariants(engine, version);
+
+  auto normalize_version = string_utils::RemoveSubstring(version, "v");
+  Json::Value releases(Json::arrayValue);
+  for (const auto& release : result.value()) {
+    auto json = release.ToApiJson(NormalizeEngine(engine), normalize_version);
+    if (json != std::nullopt) {
+      releases.append(json.value());
+    }
   }
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(releases);
+  resp->setStatusCode(k200OK);
+  callback(resp);
+}
+
+void Engines::InstallEngine(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine, const std::optional<std::string> version,
+    const std::optional<std::string> variant_name) {
+  auto normalized_version = version.value_or("latest");
 
-  auto result = engine_service_->InstallEngineAsync(engine, version);
+  auto result = engine_service_->InstallEngineAsyncV2(
+      engine, normalized_version, variant_name);
   if (result.has_error()) {
     Json::Value res;
     res["message"] = result.error();
@@ -34,85 +148,147 @@ void Engines::InstallEngine(
     callback(resp);
   } else {
     Json::Value res;
-    res["message"] = "Engine " + engine + " starts installing!";
+    res["message"] = "Engine starts installing!";
     auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
     resp->setStatusCode(k200OK);
     callback(resp);
   }
 }
 
-void Engines::ListEngine(
+void Engines::GetInstalledEngineVariants(
     const HttpRequestPtr& req,
-    std::function<void(const HttpResponsePtr&)>&& callback) const {
-  auto status_list = engine_service_->GetEngineInfoList();
-
-  Json::Value ret;
-  ret["object"] = "list";
-  Json::Value data(Json::arrayValue);
-  for (auto& status : status_list) {
-    Json::Value ret;
-    ret["name"] = status.name;
-    ret["description"] = status.description;
-    ret["version"] = status.version.value_or("");
-    ret["variant"] = status.variant.value_or("");
-    ret["productName"] = status.product_name;
-    ret["status"] = status.status;
-    ret["format"] = status.format;
-
-    data.append(std::move(ret));
-  }
-
-  ret["data"] = data;
-  ret["result"] = "OK";
-  auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine) const {
+  auto result = engine_service_->GetInstalledEngineVariants(engine);
+  if (result.has_error()) {
+    Json::Value res;
+    res["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+    return;
+  }
+  Json::Value releases(Json::arrayValue);
+  for (const auto& variant : result.value()) {
+    releases.append(variant.ToJson());
+  }
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(releases);
   resp->setStatusCode(k200OK);
   callback(resp);
 }
 
-void Engines::GetEngine(const HttpRequestPtr& req,
-                        std::function<void(const HttpResponsePtr&)>&& callback,
-                        const std::string& engine) const {
-  auto status = engine_service_->GetEngineInfo(engine);
-  Json::Value ret;
-  if (status.has_value()) {
-    ret["name"] = status->name;
-    ret["description"] = status->description;
-    ret["version"] = status->version.value_or("");
-    ret["variant"] = status->variant.value_or("");
-    ret["productName"] = status->product_name;
-    ret["status"] = status->status;
-    ret["format"] = status->format;
-
-    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+void Engines::UpdateEngine(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine) {
+  auto result = engine_service_->UpdateEngine(engine);
+  if (result.has_error()) {
+    Json::Value res;
+    res["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+  } else {
+    auto resp =
+        cortex_utils::CreateCortexHttpJsonResponse(result.value().ToJson());
     resp->setStatusCode(k200OK);
     callback(resp);
+  }
+}
+
+void Engines::GetLatestEngineVersion(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine) {
+  auto result = engine_service_->GetLatestEngineVersion(engine);
+  if (result.has_error()) {
+    Json::Value res;
+    res["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
   } else {
-    Json::Value ret;
-    ret["message"] = "Engine not found";
-    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    auto resp =
+        cortex_utils::CreateCortexHttpJsonResponse(result.value().ToApiJson());
+    resp->setStatusCode(k200OK);
+    callback(resp);
+  }
+}
+
+void Engines::SetDefaultEngineVariant(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine, const std::string& version,
+    const std::string& variant) {
+  auto result =
+      engine_service_->SetDefaultEngineVariant(engine, version, variant);
+  if (result.has_error()) {
+    Json::Value res;
+    res["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
     resp->setStatusCode(k400BadRequest);
     callback(resp);
+  } else {
+    auto resp =
+        cortex_utils::CreateCortexHttpJsonResponse(result.value().ToJson());
+    resp->setStatusCode(k200OK);
+    callback(resp);
   }
 }
 
-void Engines::UninstallEngine(
+void Engines::GetDefaultEngineVariant(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback,
-    const std::string& engine) {
+    const std::string& engine) const {
+  auto result = engine_service_->GetDefaultEngineVariant(engine);
+  if (result.has_error()) {
+    Json::Value res;
+    res["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+  } else {
+    auto resp =
+        cortex_utils::CreateCortexHttpJsonResponse(result.value().ToJson());
+    resp->setStatusCode(k200OK);
+    callback(resp);
+  }
+}
 
-  auto result = engine_service_->UninstallEngine(engine);
-  Json::Value ret;
+void Engines::LoadEngine(const HttpRequestPtr& req,
+                         std::function<void(const HttpResponsePtr&)>&& callback,
+                         const std::string& engine) {
+  auto result = engine_service_->LoadEngine(engine);
+  if (result.has_error()) {
+    Json::Value res;
+    res["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+  } else {
+    Json::Value res;
+    res["message"] = "Engine " + engine + " loaded successfully!";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k200OK);
+    callback(resp);
+  }
+}
 
+void Engines::UnloadEngine(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine) {
+  auto result = engine_service_->UnloadEngine(engine);
   if (result.has_error()) {
-    CTL_INF(result.error());
-    ret["message"] = result.error();
-    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    Json::Value res;
+    res["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
     resp->setStatusCode(k400BadRequest);
     callback(resp);
   } else {
-    CTL_INF("Engine uninstalled successfully");
-    ret["message"] = "Engine " + engine + " uninstalled successfully!";
-    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    Json::Value res;
+    res["message"] = "Engine " + engine + " unloaded successfully!";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
     resp->setStatusCode(k200OK);
     callback(resp);
   }
diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h
index 03b00d01c..de1dbf6ea 100644
--- a/engine/controllers/engines.h
+++ b/engine/controllers/engines.h
@@ -11,34 +11,99 @@ using namespace drogon;
 class Engines : public drogon::HttpController<Engines, false> {
  public:
   METHOD_LIST_BEGIN
-  METHOD_ADD(Engines::InstallEngine, "/install/{1}", Post);
-  METHOD_ADD(Engines::UninstallEngine, "/{1}", Delete);
+
+  METHOD_ADD(Engines::GetInstalledEngineVariants, "/{1}", Get);
+  METHOD_ADD(Engines::InstallEngine, "/{1}?version={2}&variant={3}", Post);
+  METHOD_ADD(Engines::UninstallEngine, "/{1}?version={2}&variant={3}", Delete);
+  METHOD_ADD(Engines::SetDefaultEngineVariant,
+             "/{1}/default?version={2}&variant={3}", Post);
+  METHOD_ADD(Engines::GetDefaultEngineVariant, "/{1}/default", Get);
+
+  METHOD_ADD(Engines::LoadEngine, "/{1}/load", Post);
+  METHOD_ADD(Engines::UnloadEngine, "/{1}/load", Delete);
+  METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Post);
   METHOD_ADD(Engines::ListEngine, "", Get);
-  METHOD_ADD(Engines::GetEngine, "/{1}", Get);
+  METHOD_ADD(Engines::GetEngineVersions, "/{1}/versions", Get);
+  METHOD_ADD(Engines::GetEngineVariants, "/{1}/versions/{2}", Get);
+
+  ADD_METHOD_TO(Engines::GetInstalledEngineVariants, "/v1/engines/{1}", Get);
+  ADD_METHOD_TO(Engines::InstallEngine,
+                "/v1/engines/{1}?version={2}&variant={3}", Post);
+  ADD_METHOD_TO(Engines::UninstallEngine,
+                "/v1/engines/{1}?version={2}&variant={3}", Delete);
+  ADD_METHOD_TO(Engines::SetDefaultEngineVariant,
+                "/v1/engines/{1}/default?version={2}&variant={3}", Post);
+  ADD_METHOD_TO(Engines::GetDefaultEngineVariant, "/v1/engines/{1}/default",
+                Get);
 
-  ADD_METHOD_TO(Engines::InstallEngine, "/v1/engines/install/{1}", Post);
-  ADD_METHOD_TO(Engines::UninstallEngine, "/v1/engines/{1}", Delete);
+  ADD_METHOD_TO(Engines::LoadEngine, "/v1/engines/{1}/load", Post);
+  ADD_METHOD_TO(Engines::UnloadEngine, "/v1/engines/{1}/load", Post);
+  ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Post);
+  ADD_METHOD_TO(Engines::GetEngineVersions, "/v1/engines/{1}/versions", Get);
+  ADD_METHOD_TO(Engines::GetEngineVariants, "/v1/engines/{1}/versions/{2}",
+                Get);
   ADD_METHOD_TO(Engines::ListEngine, "/v1/engines", Get);
-  ADD_METHOD_TO(Engines::GetEngine, "/v1/engines/{1}", Get);
   METHOD_LIST_END
 
   explicit Engines(std::shared_ptr<EngineService> engine_service)
       : engine_service_{engine_service} {}
 
-  void InstallEngine(const HttpRequestPtr& req,
-                     std::function<void(const HttpResponsePtr&)>&& callback,
-                     const std::string& engine);
-
   void ListEngine(const HttpRequestPtr& req,
                   std::function<void(const HttpResponsePtr&)>&& callback) const;
 
-  void GetEngine(const HttpRequestPtr& req,
-                 std::function<void(const HttpResponsePtr&)>&& callback,
-                 const std::string& engine) const;
-
   void UninstallEngine(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback,
-                       const std::string& engine);
+                       const std::string& engine,
+                       const std::optional<std::string> version,
+                       const std::optional<std::string> variant);
+
+  void GetEngineVersions(const HttpRequestPtr& req,
+                         std::function<void(const HttpResponsePtr&)>&& callback,
+                         const std::string& engine) const;
+
+  void GetEngineVariants(const HttpRequestPtr& req,
+                         std::function<void(const HttpResponsePtr&)>&& callback,
+                         const std::string& engine,
+                         const std::string& version) const;
+
+  void InstallEngine(const HttpRequestPtr& req,
+                     std::function<void(const HttpResponsePtr&)>&& callback,
+                     const std::string& engine,
+                     const std::optional<std::string> version,
+                     const std::optional<std::string> variant_name);
+
+  void GetInstalledEngineVariants(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback,
+      const std::string& engine) const;
+
+  void GetLatestEngineVersion(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback,
+      const std::string& engine);
+
+  void UpdateEngine(const HttpRequestPtr& req,
+                    std::function<void(const HttpResponsePtr&)>&& callback,
+                    const std::string& engine);
+
+  void SetDefaultEngineVariant(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback,
+      const std::string& engine, const std::string& version,
+      const std::string& variant);
+
+  void GetDefaultEngineVariant(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback,
+      const std::string& engine) const;
+
+  void LoadEngine(const HttpRequestPtr& req,
+                  std::function<void(const HttpResponsePtr&)>&& callback,
+                  const std::string& engine);
+
+  void UnloadEngine(const HttpRequestPtr& req,
+                    std::function<void(const HttpResponsePtr&)>&& callback,
+                    const std::string& engine);
 
  private:
   std::shared_ptr<EngineService> engine_service_;
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index b8f0740db..c205e85df 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -348,8 +348,7 @@ void Models::ImportModel(
           std::filesystem::path(model_yaml_path).parent_path() /
           std::filesystem::path(modelPath).filename();
       std::filesystem::copy_file(
-          modelPath, file_path,
-          std::filesystem::copy_options::update_existing);
+          modelPath, file_path, std::filesystem::copy_options::update_existing);
       model_config.files.push_back(file_path.string());
       auto size = std::filesystem::file_size(file_path);
       model_config.size = size;
@@ -409,7 +408,6 @@ void Models::StartModel(
     std::function<void(const HttpResponsePtr&)>&& callback) {
   if (!http_util::HasFieldInReq(req, callback, "model"))
     return;
-  auto config = file_manager_utils::GetCortexConfig();
   auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
   StartParameterOverride params_override;
   if (auto& o = (*(req->getJsonObject()))["prompt_template"]; !o.isNull()) {
@@ -462,16 +460,16 @@ void Models::StartModel(
   std::string engine_name = params_override.bypass_model_check()
                                 ? kLlamaEngine
                                 : model_entry.value().engine;
-  auto engine_entry = engine_service_->GetEngineInfo(engine_name);
-  if (engine_entry.has_error()) {
+  auto engine_validate = engine_service_->IsEngineReady(engine_name);
+  if (engine_validate.has_error()) {
     Json::Value ret;
-    ret["message"] = "Cannot find engine: " + engine_name;
+    ret["message"] = engine_validate.error();
     auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
     resp->setStatusCode(drogon::k400BadRequest);
     callback(resp);
     return;
   }
-  if (engine_entry->status != "Ready") {
+  if (!engine_validate.value()) {
     Json::Value ret;
     ret["message"] = "Engine is not ready! Please install first!";
     auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
@@ -480,9 +478,7 @@ void Models::StartModel(
     return;
   }
 
-  auto result = model_service_->StartModel(config.apiServerHost,
-                                           std::stoi(config.apiServerPort),
-                                           model_handle, params_override);
+  auto result = model_service_->StartModel(model_handle, params_override);
   if (result.has_error()) {
     Json::Value ret;
     ret["message"] = result.error();
@@ -500,12 +496,12 @@ void Models::StartModel(
 
 void Models::StopModel(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!http_util::HasFieldInReq(req, callback, "model"))
+  if (!http_util::HasFieldInReq(req, callback, "model")) {
     return;
-  auto config = file_manager_utils::GetCortexConfig();
+  }
+
   auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
-  auto result = model_service_->StopModel(
-      config.apiServerHost, std::stoi(config.apiServerPort), model_handle);
+  auto result = model_service_->StopModel(model_handle);
   if (result.has_error()) {
     Json::Value ret;
     ret["message"] = result.error();
@@ -525,10 +521,7 @@ void Models::GetModelStatus(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback,
     const std::string& model_id) {
-  auto config = file_manager_utils::GetCortexConfig();
-
-  auto result = model_service_->GetModelStatus(
-      config.apiServerHost, std::stoi(config.apiServerPort), model_id);
+  auto result = model_service_->GetModelStatus(model_id);
   if (result.has_error()) {
     Json::Value ret;
     ret["message"] = result.error();
diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index 29319d963..95826c6c6 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -2,16 +2,15 @@
 
 #include "trantor/utils/Logger.h"
 #include "utils/cortex_utils.h"
-#include "utils/cpuid/cpu_info.h"
-#include "utils/engine_constants.h"
-#include "utils/file_manager_utils.h"
 #include "utils/function_calling/common.h"
+
 using namespace inferences;
-using json = nlohmann::json;
+
 namespace inferences {
 
-server::server(std::shared_ptr<services::InferenceService> inference_service)
-    : inference_svc_(inference_service) {
+server::server(std::shared_ptr<services::InferenceService> inference_service,
+               std::shared_ptr<EngineService> engine_service)
+    : inference_svc_(inference_service), engine_service_(engine_service) {
 #if defined(_WIN32)
   if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
       should_use_dll_search_path) {
@@ -68,7 +67,10 @@ void server::Embedding(const HttpRequestPtr& req,
 void server::UnloadModel(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-  auto ir = inference_svc_->UnloadModel(req->getJsonObject());
+  auto engine = (*req->getJsonObject())["engine"].asString();
+  auto model = (*req->getJsonObject())["model_id"].asString();
+  CTL_INF("Unloading model: " + model + ", engine: " + engine);
+  auto ir = inference_svc_->UnloadModel(engine, model);
   auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(ir));
   resp->setStatusCode(
       static_cast<HttpStatusCode>(std::get<0>(ir)["status_code"].asInt()));
@@ -96,14 +98,6 @@ void server::GetModels(const HttpRequestPtr& req,
   LOG_TRACE << "Done get models";
 }
 
-void server::GetEngines(
-    const HttpRequestPtr& req,
-    std::function<void(const HttpResponsePtr&)>&& callback) {
-  auto ir = inference_svc_->GetEngines(req->getJsonObject());
-  auto resp = cortex_utils::CreateCortexHttpJsonResponse(ir);
-  callback(resp);
-}
-
 void server::FineTuning(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
@@ -125,16 +119,6 @@ void server::LoadModel(const HttpRequestPtr& req,
   LOG_TRACE << "Done load model";
 }
 
-void server::UnloadEngine(
-    const HttpRequestPtr& req,
-    std::function<void(const HttpResponsePtr&)>&& callback) {
-  auto ir = inference_svc_->UnloadEngine(req->getJsonObject());
-  auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(ir));
-  resp->setStatusCode(
-      static_cast<HttpStatusCode>(std::get<0>(ir)["status_code"].asInt()));
-  callback(resp);
-}
-
 void server::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
                               std::shared_ptr<services::SyncQueue> q) {
   auto err_or_done = std::make_shared<std::atomic_bool>(false);
diff --git a/engine/controllers/server.h b/engine/controllers/server.h
index 15844b403..baba4a961 100644
--- a/engine/controllers/server.h
+++ b/engine/controllers/server.h
@@ -1,8 +1,7 @@
-
 #pragma once
-#include <nlohmann/json.hpp>
-#include <string>
+
 #include <memory>
+#include <string>
 
 #if defined(_WIN32)
 #define NOMINMAX
@@ -15,11 +14,8 @@
 #define CPPHTTPLIB_NO_EXCEPTIONS 1
 #endif
 
-#include <condition_variable>
 #include <cstddef>
 #include <string>
-#include <variant>
-
 #include "common/base.h"
 #include "services/inference_service.h"
 
@@ -27,8 +23,6 @@
 #define SERVER_VERBOSE 1
 #endif
 
-using json = nlohmann::json;
-
 using namespace drogon;
 
 namespace inferences {
@@ -38,7 +32,8 @@ class server : public drogon::HttpController<server, false>,
                public BaseChatCompletion,
                public BaseEmbedding {
  public:
-  server(std::shared_ptr<services::InferenceService> inference_service);
+  server(std::shared_ptr<services::InferenceService> inference_service,
+         std::shared_ptr<EngineService> engine_service);
   ~server();
   METHOD_LIST_BEGIN
   // list path definitions here;
@@ -48,24 +43,14 @@ class server : public drogon::HttpController<server, false>,
   METHOD_ADD(server::UnloadModel, "unloadmodel", Post);
   METHOD_ADD(server::ModelStatus, "modelstatus", Post);
   METHOD_ADD(server::GetModels, "models", Get);
-  METHOD_ADD(server::GetEngines, "engines", Get);
 
   // cortex.python API
   METHOD_ADD(server::FineTuning, "finetuning", Post);
 
   // Openai compatible path
   ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);
-  // ADD_METHOD_TO(server::GetModels, "/v1/models", Get);
   ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Post);
-
-  // ADD_METHOD_TO(server::handlePrelight, "/v1/chat/completions", Options);
-  // NOTE: prelight will be added back when browser support is properly planned
-
   ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Post);
-  // ADD_METHOD_TO(server::handlePrelight, "/v1/embeddings", Options);
-
-  // PATH_ADD("/llama/chat_completion", Post);
-  METHOD_ADD(server::UnloadEngine, "unloadengine", Post);
 
   METHOD_LIST_END
   void ChatCompletion(
@@ -86,14 +71,9 @@ class server : public drogon::HttpController<server, false>,
   void GetModels(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) override;
-  void GetEngines(
-      const HttpRequestPtr& req,
-      std::function<void(const HttpResponsePtr&)>&& callback) override;
   void FineTuning(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) override;
-  void UnloadEngine(const HttpRequestPtr& req,
-                    std::function<void(const HttpResponsePtr&)>&& callback);
 
  private:
   void ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
@@ -103,5 +83,6 @@ class server : public drogon::HttpController<server, false>,
 
  private:
   std::shared_ptr<services::InferenceService> inference_svc_;
+  std::shared_ptr<EngineService> engine_service_;
 };
 };  // namespace inferences
diff --git a/engine/database/models.cc b/engine/database/models.cc
index 753162328..67ecb9723 100644
--- a/engine/database/models.cc
+++ b/engine/database/models.cc
@@ -178,7 +178,6 @@ cpp::result<bool, std::string> Models::AddModelEntry(ModelEntry new_entry,
     auto model_list = LoadModelListNoLock();
     if (model_list.has_error()) {
       CTL_WRN(model_list.error());
-      std::cout << "Test: " << model_list.error();
       return cpp::fail(model_list.error());
     }
     if (IsUnique(model_list.value(), new_entry.model, new_entry.model_alias)) {
diff --git a/engine/e2e-test/test_api_engine_install.py b/engine/e2e-test/test_api_engine_install.py
index 749b45dd3..b0fbb6c9c 100644
--- a/engine/e2e-test/test_api_engine_install.py
+++ b/engine/e2e-test/test_api_engine_install.py
@@ -18,5 +18,17 @@ def setup_and_teardown(self):
         stop_server()
 
     def test_engines_install_llamacpp_should_be_successful(self):
-        response = requests.post("http://localhost:3928/engines/install/llama-cpp")
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp")
+        assert response.status_code == 200
+
+    def test_engines_install_llamacpp_specific_version_and_variant(self):
+        response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35-27.10.24&variant=linux-amd64-avx-cuda-11-7"
+        )
+        assert response.status_code == 200
+
+    def test_engines_install_llamacpp_specific_version_and_null_variant(self):
+        response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35-27.10.24"
+        )
         assert response.status_code == 200
diff --git a/engine/e2e-test/test_api_engine_uninstall.py b/engine/e2e-test/test_api_engine_uninstall.py
index c171be8ee..491bc2d27 100644
--- a/engine/e2e-test/test_api_engine_uninstall.py
+++ b/engine/e2e-test/test_api_engine_uninstall.py
@@ -18,5 +18,47 @@ def setup_and_teardown(self):
         stop_server()
 
     def test_engines_uninstall_llamacpp_should_be_successful(self):
-        response = requests.delete("http://localhost:3928/engines/llama-cpp")
+        # install first
+        requests.post("http://localhost:3928/v1/engines/llama-cpp")
+
+        response = requests.delete("http://localhost:3928/v1/engines/llama-cpp")
+        assert response.status_code == 200
+
+    def test_engines_uninstall_llamacpp_with_only_version_should_be_failed(self):
+        # install first
+        install_response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35"
+        )
+        assert install_response.status_code == 200
+
+        response = requests.delete(
+            "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35"
+        )
+        assert response.status_code == 400
+        assert response.json()["message"] == "No variant provided"
+
+    def test_engines_uninstall_llamacpp_with_variant_should_be_successful(self):
+        # install first
+        install_response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64"
+        )
+        assert install_response.status_code == 200
+
+        response = requests.delete(
+            "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64"
+        )
+        assert response.status_code == 200
+
+    def test_engines_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful(
+        self,
+    ):
+        # install first
+        install_response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64&version=v0.1.35"
+        )
+        assert install_response.status_code == 200
+
+        response = requests.delete(
+            "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64&version=v0.1.35"
+        )
         assert response.status_code == 200
diff --git a/engine/e2e-test/test_api_engine_update.py b/engine/e2e-test/test_api_engine_update.py
new file mode 100644
index 000000000..23939f038
--- /dev/null
+++ b/engine/e2e-test/test_api_engine_update.py
@@ -0,0 +1,43 @@
+import pytest
+import requests
+from test_runner import (
+    start_server,
+    stop_server,
+    wait_for_websocket_download_success_event,
+)
+
+
+class TestApiEngineUpdate:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+        requests.delete("http://localhost:3928/v1/engines/llama-cpp")
+
+        yield
+        requests.delete("http://localhost:3928/v1/engines/llama-cpp")
+
+        # Teardown
+        stop_server()
+
+    @pytest.mark.asyncio
+    async def test_engines_update_should_be_successfully(self):
+        requests.post("http://localhost:3928/v1/engines/llama-cpp?version=0.1.34")
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/update")
+        assert response.status_code == 200
+
+    @pytest.mark.asyncio
+    async def test_engines_update_llamacpp_should_be_failed_if_already_latest(self):
+        requests.post("http://localhost:3928/v1/engines/llama-cpp")
+        await wait_for_websocket_download_success_event(timeout=None)
+        get_engine_response = requests.get("http://localhost:3928/v1/engines/llama-cpp")
+        assert len(get_engine_response.json()) > 0, "Response list should not be empty"
+
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/update")
+        assert (
+            "already up-to-date" in response.json()["message"]
+        ), "Should display error message"
+        assert response.status_code == 400
diff --git a/engine/e2e-test/test_api_model_delete.py b/engine/e2e-test/test_api_model_delete.py
index f45768c66..7415a3d5a 100644
--- a/engine/e2e-test/test_api_model_delete.py
+++ b/engine/e2e-test/test_api_model_delete.py
@@ -1,6 +1,5 @@
 import pytest
 import requests
-from test_runner import popen, run
 from test_runner import start_server, stop_server
 
 
diff --git a/engine/e2e-test/test_api_model_start.py b/engine/e2e-test/test_api_model_start.py
index 216fad570..3f4172d68 100644
--- a/engine/e2e-test/test_api_model_start.py
+++ b/engine/e2e-test/test_api_model_start.py
@@ -11,7 +11,7 @@ def setup_and_teardown(self):
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
-        run("Install Engine", ["engines", "install", "llama-cpp"], timeout=None)
+        requests.post("http://localhost:3928/v1/engines/llama-cpp")
         run("Delete model", ["models", "delete", "tinyllama:gguf"])
         run(
             "Pull model",
diff --git a/engine/e2e-test/test_api_model_stop.py b/engine/e2e-test/test_api_model_stop.py
index 00d7482fa..218331b98 100644
--- a/engine/e2e-test/test_api_model_stop.py
+++ b/engine/e2e-test/test_api_model_stop.py
@@ -1,6 +1,6 @@
 import pytest
 import requests
-from test_runner import run, start_server, stop_server
+from test_runner import start_server, stop_server
 
 
 class TestApiModelStop:
@@ -12,9 +12,10 @@ def setup_and_teardown(self):
         if not success:
             raise Exception("Failed to start server")
 
-        run("Install Engine", ["engines", "install", "llama-cpp"], timeout=None)
+        requests.post("http://localhost:3928/engines/llama-cpp")
         yield
 
+        requests.delete("http://localhost:3928/engines/llama-cpp")
         # Teardown
         stop_server()
 
diff --git a/engine/e2e-test/test_cli_engine_get.py b/engine/e2e-test/test_cli_engine_get.py
index d783c3421..c26bedfae 100644
--- a/engine/e2e-test/test_cli_engine_get.py
+++ b/engine/e2e-test/test_cli_engine_get.py
@@ -1,11 +1,11 @@
 import platform
 
 import pytest
-from test_runner import run
-from test_runner import start_server, stop_server
+from test_runner import run, start_server, stop_server
+
 
 class TestCliEngineGet:
-    
+
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
         # Setup
@@ -20,9 +20,7 @@ def setup_and_teardown(self):
 
     @pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
     def test_engines_get_tensorrt_llm_should_not_be_incompatible(self):
-        exit_code, output, error = run(
-            "Get engine", ["engines", "get", "tensorrt-llm"]
-        )
+        exit_code, output, error = run("Get engine", ["engines", "get", "tensorrt-llm"])
         assert exit_code == 0, f"Get engine failed with error: {error}"
         assert (
             "Incompatible" not in output
@@ -37,9 +35,7 @@ def test_engines_get_onnx_should_not_be_incompatible(self):
         ), "onnxruntime should be Ready or Not Installed on Windows"
 
     def test_engines_get_llamacpp_should_not_be_incompatible(self):
-        exit_code, output, error = run(
-            "Get engine", ["engines", "get", "llama-cpp"]
-        )
+        exit_code, output, error = run("Get engine", ["engines", "get", "llama-cpp"])
         assert exit_code == 0, f"Get engine failed with error: {error}"
         assert (
             "Incompatible" not in output
@@ -47,23 +43,25 @@ def test_engines_get_llamacpp_should_not_be_incompatible(self):
 
     @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test")
     def test_engines_get_tensorrt_llm_should_be_incompatible_on_macos(self):
-        exit_code, output, error = run(
-            "Get engine", ["engines", "get", "tensorrt-llm"]
-        )
+        exit_code, output, error = run("Get engine", ["engines", "get", "tensorrt-llm"])
         assert exit_code == 0, f"Get engine failed with error: {error}"
         assert (
-            "Incompatible" in output
+            "is not supported on" in output
         ), "tensorrt-llm should be Incompatible on MacOS"
 
     @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test")
     def test_engines_get_onnx_should_be_incompatible_on_macos(self):
         exit_code, output, error = run("Get engine", ["engines", "get", "onnxruntime"])
         assert exit_code == 0, f"Get engine failed with error: {error}"
-        assert "Incompatible" in output, "onnxruntime should be Incompatible on MacOS"
+        assert (
+            "is not supported on" in output
+        ), "onnxruntime should be Incompatible on MacOS"
 
     @pytest.mark.skipif(platform.system() != "Linux", reason="Linux-specific test")
     def test_engines_get_onnx_should_be_incompatible_on_linux(self):
         exit_code, output, error = run("Get engine", ["engines", "get", "onnxruntime"])
         print(output)
         assert exit_code == 0, f"Get engine failed with error: {error}"
-        assert "Incompatible" in output, "onnxruntime should be Incompatible on Linux"
+        assert (
+            "is not supported o" in output
+        ), "onnxruntime should be Incompatible on Linux"
diff --git a/engine/e2e-test/test_cli_engine_install.py b/engine/e2e-test/test_cli_engine_install.py
index 572e62ed9..6c8c4932b 100644
--- a/engine/e2e-test/test_cli_engine_install.py
+++ b/engine/e2e-test/test_cli_engine_install.py
@@ -1,9 +1,9 @@
 import platform
 import tempfile
-import os
-from pathlib import Path
+
 import pytest
-from test_runner import run
+import requests
+from test_runner import run, start_server, stop_server
 
 
 class TestCliEngineInstall:
@@ -20,10 +20,13 @@ def setup_and_teardown(self):
 
     def test_engines_install_llamacpp_should_be_successfully(self):
         exit_code, output, error = run(
-            "Install Engine", ["engines", "install", "llama-cpp"], timeout=None, capture = False
+            "Install Engine",
+            ["engines", "install", "llama-cpp"],
+            timeout=None,
+            capture=False,
         )
-        root = Path.home()
-        assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt")
+        response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        assert len(response.json()) > 0
         assert exit_code == 0, f"Install engine failed with error: {error}"
 
     @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test")
@@ -31,7 +34,7 @@ def test_engines_install_onnx_on_macos_should_be_failed(self):
         exit_code, output, error = run(
             "Install Engine", ["engines", "install", "onnxruntime"]
         )
-        assert "No variant found" in output, "Should display error message"
+        assert "is not supported on" in output, "Should display error message"
         assert exit_code == 0, f"Install engine failed with error: {error}"
 
     @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test")
@@ -39,27 +42,46 @@ def test_engines_install_onnx_on_tensorrt_should_be_failed(self):
         exit_code, output, error = run(
             "Install Engine", ["engines", "install", "tensorrt-llm"]
         )
-        assert "No variant found" in output, "Should display error message"
+        assert "is not supported on" in output, "Should display error message"
         assert exit_code == 0, f"Install engine failed with error: {error}"
-        
+
     def test_engines_install_pre_release_llamacpp(self):
+        engine_version = "v0.1.29"
         exit_code, output, error = run(
-            "Install Engine", ["engines", "install", "llama-cpp", "-v", "v0.1.29"], timeout=None, capture = False
+            "Install Engine",
+            ["engines", "install", "llama-cpp", "-v", engine_version],
+            timeout=None,
+            capture=False,
         )
-        root = Path.home()
-        assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt")
+        response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        assert len(response.json()) > 0
+        is_engine_version_exist = False
+        for item in response.json():
+            # Check if 'version' key exists and matches target
+            if "version" in item and item["version"] == engine_version:
+                is_engine_version_exist = True
+                break
+
+        # loop through all the installed response, expect we find
+        assert is_engine_version_exist, f"Engine version {engine_version} is not found"
         assert exit_code == 0, f"Install engine failed with error: {error}"
 
     def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self):
         exit_code, output, error = run(
-            "Install Engine", ["engines", "install", "llama-cpp", "-s", tempfile.gettempdir()], timeout=None
+            "Install Engine",
+            ["engines", "install", "llama-cpp", "-s", tempfile.gettempdir()],
+            timeout=None,
         )
-        assert "Start downloading" in output, "Should display downloading message"
+        # response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        # assert len(response.json()) > 0
+        assert "downloaded successfully" in output
         assert exit_code == 0, f"Install engine failed with error: {error}"
-        
+
     def test_engines_should_not_perform_with_dummy_path(self):
         exit_code, output, error = run(
-            "Install Engine", ["engines", "install", "llama-cpp", "-s", "abcpod"], timeout=None
+            "Install Engine",
+            ["engines", "install", "llama-cpp", "-s", "abcpod"],
+            timeout=None,
         )
         assert "Folder does not exist" in output, "Should display error"
         assert exit_code == 0, f"Install engine failed with error: {error}"
diff --git a/engine/e2e-test/test_cli_engine_list.py b/engine/e2e-test/test_cli_engine_list.py
index ede2879d9..5cd9a92fe 100644
--- a/engine/e2e-test/test_cli_engine_list.py
+++ b/engine/e2e-test/test_cli_engine_list.py
@@ -1,11 +1,11 @@
 import platform
 
 import pytest
-from test_runner import run
-from test_runner import start_server, stop_server
+from test_runner import run, start_server, stop_server
+
 
 class TestCliEngineList:
-    
+
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
         # Setup
@@ -17,21 +17,18 @@ def setup_and_teardown(self):
 
         # Teardown
         stop_server()
-        
+
     @pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
     def test_engines_list_run_successfully_on_windows(self):
         exit_code, output, error = run("List engines", ["engines", "list"])
         assert exit_code == 0, f"List engines failed with error: {error}"
-        assert "llama-cpp" in output
 
     @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test")
     def test_engines_list_run_successfully_on_macos(self):
         exit_code, output, error = run("List engines", ["engines", "list"])
         assert exit_code == 0, f"List engines failed with error: {error}"
-        assert "llama-cpp" in output
 
     @pytest.mark.skipif(platform.system() != "Linux", reason="Linux-specific test")
     def test_engines_list_run_successfully_on_linux(self):
         exit_code, output, error = run("List engines", ["engines", "list"])
         assert exit_code == 0, f"List engines failed with error: {error}"
-        assert "llama-cpp" in output
\ No newline at end of file
diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/test_cli_engine_uninstall.py
index 23b621b0e..d95e21e7b 100644
--- a/engine/e2e-test/test_cli_engine_uninstall.py
+++ b/engine/e2e-test/test_cli_engine_uninstall.py
@@ -1,6 +1,12 @@
 import pytest
-from test_runner import run
-from test_runner import start_server, stop_server
+import requests
+from test_runner import (
+    run,
+    start_server,
+    stop_server,
+    wait_for_websocket_download_success_event,
+)
+
 
 class TestCliEngineUninstall:
 
@@ -11,17 +17,14 @@ def setup_and_teardown(self):
         if not success:
             raise Exception("Failed to start server")
 
-        # Preinstall llamacpp engine
-        run("Install Engine", ["engines", "install", "llama-cpp"],timeout = None, capture = False)
-
         yield
 
-        # Teardown
-        # Clean up, removing installed engine
-        run("Uninstall Engine", ["engines", "uninstall", "llama-cpp"])
         stop_server()
 
-    def test_engines_uninstall_llamacpp_should_be_successfully(self):
+    @pytest.mark.asyncio
+    async def test_engines_uninstall_llamacpp_should_be_successfully(self):
+        requests.post("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        await wait_for_websocket_download_success_event(timeout=None)
         exit_code, output, error = run(
             "Uninstall engine", ["engines", "uninstall", "llama-cpp"]
         )
diff --git a/engine/e2e-test/test_cortex_update.py b/engine/e2e-test/test_cortex_update.py
index 2d7d652ec..8f6f8d7f8 100644
--- a/engine/e2e-test/test_cortex_update.py
+++ b/engine/e2e-test/test_cortex_update.py
@@ -1,7 +1,8 @@
+import os
+import tempfile
+
 import pytest
 from test_runner import run
-import tempfile
-import os
 
 
 class TestCortexUpdate:
@@ -12,4 +13,4 @@ def test_cortex_update(self):
         exit_code, output, error = run("Update cortex", ["update"])
         assert exit_code == 0, "Something went wrong"
         assert "Updated cortex sucessfully" in output
-        assert os.path.exists(os.path.join(tempfile.gettempdir()), 'cortex') == False
+        assert os.path.exists(os.path.join(tempfile.gettempdir()), "cortex") == False
diff --git a/engine/main.cc b/engine/main.cc
index afe843817..f8c20410f 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -47,7 +47,11 @@ void RunServer(std::optional<int> port) {
   if (port.has_value() && *port != std::stoi(config.apiServerPort)) {
     auto config_path = file_manager_utils::GetConfigurationPath();
     config.apiServerPort = std::to_string(*port);
-    config_yaml_utils::DumpYamlConfig(config, config_path.string());
+    auto result =
+        config_yaml_utils::DumpYamlConfig(config, config_path.string());
+    if (result.has_error()) {
+      CTL_ERR("Error update " << config_path.string() << result.error());
+    }
   }
   std::cout << "Host: " << config.apiServerHost
             << " Port: " << config.apiServerPort << "\n";
@@ -89,10 +93,11 @@ void RunServer(std::optional<int> port) {
 
   auto event_queue_ptr = std::make_shared<EventQueue>();
   cortex::event::EventProcessor event_processor(event_queue_ptr);
-  auto inference_svc = std::make_shared<services::InferenceService>();
 
   auto download_service = std::make_shared<DownloadService>(event_queue_ptr);
   auto engine_service = std::make_shared<EngineService>(download_service);
+  auto inference_svc =
+      std::make_shared<services::InferenceService>(engine_service);
   auto model_service =
       std::make_shared<ModelService>(download_service, inference_svc);
 
@@ -101,7 +106,8 @@ void RunServer(std::optional<int> port) {
   auto model_ctl = std::make_shared<Models>(model_service, engine_service);
   auto event_ctl = std::make_shared<Events>(event_queue_ptr);
   auto pm_ctl = std::make_shared<ProcessManager>();
-  auto server_ctl = std::make_shared<inferences::server>(inference_svc);
+  auto server_ctl =
+      std::make_shared<inferences::server>(inference_svc, engine_service);
 
   drogon::app().registerController(engine_ctl);
   drogon::app().registerController(model_ctl);
@@ -148,7 +154,12 @@ int main(int argc, char* argv[]) {
     }
   }
 
-  { file_manager_utils::CreateConfigFileIfNotExist(); }
+  {
+    auto result = file_manager_utils::CreateConfigFileIfNotExist();
+    if (result.has_error()) {
+      LOG_ERROR << "Error creating config file: " << result.error();
+    }
+  }
 
   // Delete temporary file if it exists
   auto temp =
diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc
index 0e163bfdd..43f0d731b 100644
--- a/engine/services/download_service.cc
+++ b/engine/services/download_service.cc
@@ -1,84 +1,26 @@
 #include "download_service.h"
 #include <curl/curl.h>
-#include <httplib.h>
 #include <stdio.h>
 #include <filesystem>
 #include <mutex>
 #include <optional>
 #include <ostream>
 #include <utility>
+#include "utils/curl_utils.h"
 #include "utils/format_utils.h"
-#include "utils/huggingface_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/result.hpp"
-#include "utils/url_parser.h"
-
-#ifdef _WIN32
-#define ftell64(f) _ftelli64(f)
-#define fseek64(f, o, w) _fseeki64(f, o, w)
-#else
-#define ftell64(f) ftello(f)
-#define fseek64(f, o, w) fseeko(f, o, w)
-#endif
 
 namespace {
 size_t WriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) {
   size_t written = fwrite(ptr, size, nmemb, (FILE*)userdata);
   return written;
 }
-
-inline curl_slist* CreateHeaders(const std::string& url) {
-  try {
-    auto url_obj = url_parser::FromUrlString(url);
-    if (url_obj.host == huggingface_utils::kHuggingfaceHost) {
-      return huggingface_utils::CreateCurlHfHeaders();
-    } else {
-      return nullptr;
-    }
-  } catch (const std::exception& e) {
-    CTL_WRN(e.what());
-    return nullptr;
-  }
-}
 }  // namespace
 
-cpp::result<void, std::string> DownloadService::VerifyDownloadTask(
-    DownloadTask& task) const noexcept {
-  CLI_LOG("Validating download items, please wait..");
-
-  auto total_download_size{0};
-  std::optional<std::string> err_msg = std::nullopt;
-
-  for (auto& item : task.items) {
-    auto file_size = GetFileSize(item.downloadUrl);
-    if (file_size.has_error()) {
-      err_msg = file_size.error();
-      break;
-    }
-
-    item.bytes = file_size.value();
-    total_download_size += file_size.value();
-  }
-
-  if (err_msg.has_value()) {
-    CTL_ERR(err_msg.value());
-    return cpp::fail(err_msg.value());
-  }
-
-  return {};
-}
-
 cpp::result<bool, std::string> DownloadService::AddDownloadTask(
     DownloadTask& task,
     std::optional<OnDownloadTaskSuccessfully> callback) noexcept {
-  auto validating_result = VerifyDownloadTask(task);
-  if (validating_result.has_error()) {
-    return cpp::fail(validating_result.error());
-  }
-
-  // all items are valid, start downloading
-  // if any item from the task failed to download, the whole task will be
-  // considered failed
   std::optional<std::string> dl_err_msg = std::nullopt;
   bool has_task_done = false;
   for (const auto& item : task.items) {
@@ -92,7 +34,6 @@ cpp::result<bool, std::string> DownloadService::AddDownloadTask(
     }
   }
   if (dl_err_msg.has_value()) {
-    // CTL_ERR(dl_err_msg.value());
     return cpp::fail(dl_err_msg.value());
   }
 
@@ -104,9 +45,8 @@ cpp::result<bool, std::string> DownloadService::AddDownloadTask(
 
 cpp::result<uint64_t, std::string> DownloadService::GetFileSize(
     const std::string& url) const noexcept {
-  CURL* curl;
-  curl = curl_easy_init();
 
+  auto curl = curl_easy_init();
   if (!curl) {
     return cpp::fail(static_cast<std::string>("Failed to init CURL"));
   }
@@ -114,10 +54,19 @@ cpp::result<uint64_t, std::string> DownloadService::GetFileSize(
   curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
   curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
   curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
-  if (auto headers = CreateHeaders(url); headers) {
-    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+
+  auto headers = curl_utils::GetHeaders(url);
+  if (headers.has_value()) {
+    curl_slist* curl_headers = nullptr;
+
+    for (const auto& [key, value] : headers.value()) {
+      auto header = key + ": " + value;
+      curl_headers = curl_slist_append(curl_headers, header.c_str());
+    }
+
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, curl_headers);
   }
-  CURLcode res = curl_easy_perform(curl);
+  auto res = curl_easy_perform(curl);
 
   if (res != CURLE_OK) {
     return cpp::fail(static_cast<std::string>(
@@ -135,11 +84,7 @@ cpp::result<bool, std::string> DownloadService::Download(
     const DownloadItem& download_item) noexcept {
   CTL_INF("Absolute file output: " << download_item.localPath.string());
 
-  CURL* curl;
-  FILE* file;
-  CURLcode res;
-
-  curl = curl_easy_init();
+  auto curl = curl_easy_init();
   if (!curl) {
     return cpp::fail(static_cast<std::string>("Failed to init CURL"));
   }
@@ -190,15 +135,23 @@ cpp::result<bool, std::string> DownloadService::Download(
     }
   }
 
-  file = fopen(download_item.localPath.string().c_str(), mode.c_str());
+  auto file = fopen(download_item.localPath.string().c_str(), mode.c_str());
   if (!file) {
     return cpp::fail("Failed to open output file " +
                      download_item.localPath.string());
   }
 
   curl_easy_setopt(curl, CURLOPT_URL, download_item.downloadUrl.c_str());
-  if (auto headers = CreateHeaders(download_item.downloadUrl); headers) {
-    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+  auto headers = curl_utils::GetHeaders(download_item.downloadUrl);
+  if (headers.has_value()) {
+    curl_slist* curl_headers = nullptr;
+
+    for (const auto& [key, value] : headers.value()) {
+      auto header = key + ": " + value;
+      curl_headers = curl_slist_append(curl_headers, header.c_str());
+    }
+
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, curl_headers);
   }
   curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &WriteCallback);
   curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
@@ -215,7 +168,7 @@ cpp::result<bool, std::string> DownloadService::Download(
     }
   }
 
-  res = curl_easy_perform(curl);
+  auto res = curl_easy_perform(curl);
 
   if (res != CURLE_OK) {
     return cpp::fail("Download failed! Error: " +
@@ -250,16 +203,15 @@ void DownloadService::ProcessTask(DownloadTask& task) {
 
   active_task_ = std::make_shared<DownloadTask>(task);
 
-  for (auto& item : task.items) {
-    CURL* handle = curl_easy_init();
+  for (const auto& item : task.items) {
+    auto handle = curl_easy_init();
     if (handle == nullptr) {
       // skip the task
       CTL_ERR("Failed to init curl!");
       return;
     }
 
-    FILE* file;
-    file = fopen(item.localPath.string().c_str(), "wb");
+    auto file = fopen(item.localPath.string().c_str(), "wb");
     if (!file) {
       CTL_ERR("Failed to open output file " + item.localPath.string());
       return;
@@ -271,9 +223,18 @@ void DownloadService::ProcessTask(DownloadTask& task) {
     });
     downloading_data_map_.insert(std::make_pair(item.id, dl_data_ptr));
 
-    if (auto headers = CreateHeaders(item.downloadUrl); headers) {
-      curl_easy_setopt(handle, CURLOPT_HTTPHEADER, headers);
+    auto headers = curl_utils::GetHeaders(item.downloadUrl);
+    if (headers.has_value()) {
+      curl_slist* curl_headers = nullptr;
+
+      for (const auto& [key, value] : headers.value()) {
+        auto header = key + ": " + value;
+        curl_headers = curl_slist_append(curl_headers, header.c_str());
+      }
+
+      curl_easy_setopt(handle, CURLOPT_HTTPHEADER, curl_headers);
     }
+
     curl_easy_setopt(handle, CURLOPT_URL, item.downloadUrl.c_str());
     curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, WriteCallback);
     curl_easy_setopt(handle, CURLOPT_WRITEDATA, file);
@@ -292,8 +253,8 @@ void DownloadService::ProcessTask(DownloadTask& task) {
       DownloadEvent{.type_ = DownloadEventType::DownloadStarted,
                     .download_task_ = task});
 
-  int still_running = 0;
-  bool is_terminated = false;
+  auto still_running = 0;
+  auto is_terminated = false;
   do {
     curl_multi_perform(multi_handle_, &still_running);
     curl_multi_wait(multi_handle_, NULL, 0, MAX_WAIT_MSECS, NULL);
@@ -321,7 +282,7 @@ void DownloadService::ProcessTask(DownloadTask& task) {
   }
 
   ProcessCompletedTransfers();
-  for (auto pair : task_handles) {
+  for (const auto& pair : task_handles) {
     curl_multi_remove_handle(multi_handle_, pair.first);
     curl_easy_cleanup(pair.first);
     fclose(pair.second);
@@ -389,11 +350,6 @@ void DownloadService::ProcessCompletedTransfers() {
 
 cpp::result<DownloadTask, std::string> DownloadService::AddTask(
     DownloadTask& task, std::function<void(const DownloadTask&)> callback) {
-  auto validate_result = VerifyDownloadTask(task);
-  if (validate_result.has_error()) {
-    return cpp::fail(validate_result.error());
-  }
-
   {
     std::lock_guard<std::mutex> lock(callbacks_mutex_);
     callbacks_[task.id] = std::move(callback);
diff --git a/engine/services/download_service.h b/engine/services/download_service.h
index 17270b0be..2fd39bef0 100644
--- a/engine/services/download_service.h
+++ b/engine/services/download_service.h
@@ -2,7 +2,6 @@
 
 #include <curl/curl.h>
 #include <eventpp/eventqueue.h>
-#include <filesystem>
 #include <functional>
 #include <optional>
 #include <queue>
@@ -83,9 +82,6 @@ class DownloadService {
     DownloadService* download_service;
   };
 
-  cpp::result<void, std::string> VerifyDownloadTask(
-      DownloadTask& task) const noexcept;
-
   cpp::result<bool, std::string> Download(
       const std::string& download_id,
       const DownloadItem& download_item) noexcept;
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index 9d2ef42c0..ac5b55a4b 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -1,18 +1,19 @@
 #include "engine_service.h"
-#include <httplib.h>
+#include <cstdlib>
+#include <filesystem>
 #include <optional>
 #include "algorithm"
 #include "utils/archive_utils.h"
 #include "utils/engine_constants.h"
 #include "utils/engine_matcher_utils.h"
 #include "utils/file_manager_utils.h"
+#include "utils/github_release_utils.h"
+#include "utils/logging_utils.h"
 #include "utils/result.hpp"
 #include "utils/semantic_version_utils.h"
 #include "utils/system_info_utils.h"
 #include "utils/url_parser.h"
 
-using json = nlohmann::json;
-
 namespace {
 std::string GetSuitableCudaVersion(const std::string& engine,
                                    const std::string& cuda_driver_version) {
@@ -55,81 +56,43 @@ std::string Repo2Engine(const std::string& r) {
   }
   return r;
 };
-}  // namespace
-
-cpp::result<EngineInfo, std::string> EngineService::GetEngineInfo(
-    const std::string& engine) const {
 
-  if (std::find(kSupportEngines.begin(), kSupportEngines.end(), engine) ==
-      kSupportEngines.end()) {
-    return cpp::fail("Engine " + engine + " is not supported!");
+std::string GetEnginePath(std::string_view e) {
+  if (e == kLlamaRepo) {
+    return kLlamaLibPath;
+  } else if (e == kOnnxRepo) {
+    return kOnnxLibPath;
+  } else if (e == kTrtLlmRepo) {
+    return kTensorrtLlmPath;
   }
+  return kLlamaLibPath;
+};
+}  // namespace
 
-  auto engine_status_list = GetEngineInfoList();
-
-  return *std::find_if(
-      engine_status_list.begin(), engine_status_list.end(),
-      [&engine](const EngineInfo& e) { return e.name == engine; });
-}
-
-std::vector<EngineInfo> EngineService::GetEngineInfoList() const {
-  auto ecp = file_manager_utils::GetEnginesContainerPath();
-
-  std::string onnx_status{kIncompatible};
-  std::string llamacpp_status =
-      std::filesystem::exists(ecp / kLlamaRepo) ? kReady : kNotInstalled;
-  std::string tensorrt_status{kIncompatible};
-
-#ifdef _WIN32
-  onnx_status =
-      std::filesystem::exists(ecp / kOnnxRepo) ? kReady : kNotInstalled;
-  tensorrt_status =
-      std::filesystem::exists(ecp / kTrtLlmRepo) ? kReady : kNotInstalled;
-#elif defined(__linux__)
-  tensorrt_status =
-      std::filesystem::exists(ecp / kTrtLlmRepo) ? kReady : kNotInstalled;
-#endif
-  std::vector<EngineInfo> engines = {
-      {.name = kOnnxEngine,
-       .description = "This extension enables chat completion API calls using "
-                      "the Onnx engine",
-       .format = "ONNX",
-       .product_name = kOnnxEngine,
-       .status = onnx_status},
-      {.name = kLlamaEngine,
-       .description = "This extension enables chat completion API calls using "
-                      "the LlamaCPP engine",
-       .format = "GGUF",
-       .product_name = kLlamaEngine,
-       .status = llamacpp_status},
-      {.name = kTrtLlmEngine,
-       .description = "This extension enables chat completion API calls using "
-                      "the TensorrtLLM engine",
-       .format = "TensorRT Engines",
-       .product_name = kTrtLlmEngine,
-       .status = tensorrt_status},
-  };
+cpp::result<void, std::string> EngineService::InstallEngineAsyncV2(
+    const std::string& engine, const std::string& version,
+    const std::optional<std::string> variant_name) {
+  auto ne = NormalizeEngine(engine);
+  CTL_INF("InstallEngineAsyncV2: " << ne << ", " << version << ", "
+                                   << variant_name.value_or(""));
+  auto os = hw_inf_.sys_inf->os;
+  if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) {
+    return cpp::fail("Engine " + ne + " is not supported on macOS");
+  }
 
-  for (auto& engine : engines) {
-    if (engine.status == kReady) {
-      // try to read the version.txt
-      auto engine_info_path = file_manager_utils::GetEnginesContainerPath() /
-                              NormalizeEngine(engine.name) / "version.txt";
-      if (!std::filesystem::exists(engine_info_path)) {
-        continue;
-      }
-      try {
-        auto node = YAML::LoadFile(engine_info_path.string());
-        engine.version = node["version"].as<std::string>();
-        engine.variant = node["name"].as<std::string>();
-      } catch (const YAML::Exception& e) {
-        CTL_ERR("Error reading version.txt: " << e.what());
-        continue;
-      }
-    }
+  if (os == kLinuxOs && ne == kOnnxRepo) {
+    return cpp::fail("Engine " + ne + " is not supported on Linux");
   }
 
-  return engines;
+  auto result = DownloadEngineV2(ne, version, variant_name);
+  if (result.has_error()) {
+    return cpp::fail(result.error());
+  }
+  auto cuda_res = DownloadCuda(ne, true);
+  if (cuda_res.has_error()) {
+    return cpp::fail(cuda_res.error());
+  }
+  return {};
 }
 
 cpp::result<bool, std::string> EngineService::InstallEngineAsync(
@@ -214,18 +177,36 @@ cpp::result<bool, std::string> EngineService::UnzipEngine(
   return true;
 }
 
-cpp::result<bool, std::string> EngineService::UninstallEngine(
-    const std::string& engine) {
+cpp::result<bool, std::string> EngineService::UninstallEngineVariant(
+    const std::string& engine, const std::optional<std::string> version,
+    const std::optional<std::string> variant) {
   auto ne = NormalizeEngine(engine);
-  auto ecp = file_manager_utils::GetEnginesContainerPath();
-  auto engine_path = ecp / ne;
 
-  if (!std::filesystem::exists(engine_path)) {
-    return cpp::fail("Engine " + ne + " is not installed!");
+  std::optional<std::filesystem::path> path_to_remove = std::nullopt;
+  if (version == std::nullopt && variant == std::nullopt) {
+    // if no version and variant provided, remove all engines variant of that engine
+    path_to_remove = file_manager_utils::GetEnginesContainerPath() / ne;
+  } else if (version != std::nullopt && variant != std::nullopt) {
+    // if both version and variant are provided, we only remove that variant
+    path_to_remove = file_manager_utils::GetEnginesContainerPath() / ne /
+                     variant.value() / version.value();
+  } else if (version == std::nullopt) {
+    // if only have variant, we remove all of that variant
+    path_to_remove =
+        file_manager_utils::GetEnginesContainerPath() / ne / variant.value();
+  } else {
+    return cpp::fail("No variant provided");
+  }
+
+  if (path_to_remove == std::nullopt) {
+    return cpp::fail("Uninstall engine variant failed!");
+  }
+  if (!std::filesystem::exists(path_to_remove.value())) {
+    return cpp::fail("Engine variant does not exist!");
   }
 
   try {
-    std::filesystem::remove_all(engine_path);
+    std::filesystem::remove_all(path_to_remove.value());
     CTL_INF("Engine " << ne << " uninstalled successfully!");
     return true;
   } catch (const std::exception& e) {
@@ -234,139 +215,198 @@ cpp::result<bool, std::string> EngineService::UninstallEngine(
   }
 }
 
-cpp::result<bool, std::string> EngineService::DownloadEngine(
-    const std::string& engine, const std::string& version, bool async) {
+cpp::result<void, std::string> EngineService::DownloadEngineV2(
+    const std::string& engine, const std::string& version,
+    const std::optional<std::string> variant_name) {
+  auto normalized_version = version == "latest"
+                                ? "latest"
+                                : string_utils::RemoveSubstring(version, "v");
+
+  auto res = GetEngineVariants(engine, version);
+  if (res.has_error()) {
+    return cpp::fail("Failed to fetch engine releases: " + res.error());
+  }
+
+  if (res.value().empty()) {
+    return cpp::fail("No release found for " + version);
+  }
 
-  // Check if GITHUB_TOKEN env exist
-  const char* github_token = std::getenv("GITHUB_TOKEN");
+  std::optional<EngineVariant> selected_variant = std::nullopt;
 
-  auto get_params = [&engine, &version]() -> std::vector<std::string> {
-    if (version == "latest") {
-      return {"repos", "janhq", engine, "releases", version};
-    } else {
-      return {"repos", "janhq", engine, "releases"};
+  if (variant_name.has_value()) {
+    auto latest_version_semantic = normalized_version == "latest"
+                                       ? res.value()[0].version
+                                       : normalized_version;
+    auto merged_variant_name = engine + "-" + latest_version_semantic + "-" +
+                               variant_name.value() + ".tar.gz";
+
+    for (const auto& asset : res.value()) {
+      if (asset.name == merged_variant_name) {
+        selected_variant = asset;
+        break;
+      }
+    }
+  } else {
+    std::vector<std::string> variants;
+    for (const auto& asset : res.value()) {
+      variants.push_back(asset.name);
     }
-  };
 
-  auto url_obj = url_parser::Url{
-      .protocol = "https",
-      .host = "api.github.com",
-      .pathParams = get_params(),
-  };
+    auto matched_variant_name = GetMatchedVariant(engine, variants);
+    for (const auto& v : res.value()) {
+      if (v.name == matched_variant_name) {
+        selected_variant = v;
+        break;
+      }
+    }
+  }
 
-  httplib::Client cli(url_obj.GetProtocolAndHost());
+  if (selected_variant == std::nullopt) {
+    return cpp::fail("Failed to find a suitable variant for " + engine);
+  }
+  auto normalize_version = "v" + selected_variant->version;
 
-  httplib::Headers headers;
+  auto variant_folder_name = engine_matcher_utils::GetVariantFromNameAndVersion(
+      selected_variant->name, engine, selected_variant->version);
 
-  if (github_token) {
-    std::string auth_header = "token " + std::string(github_token);
-    headers.insert({"Authorization", auth_header});
-    CTL_INF("Using authentication with GitHub token.");
-  } else {
-    CTL_INF("No GitHub token found. Sending request without authentication.");
-  }
-
-  if (auto res = cli.Get(url_obj.GetPathAndQuery(), headers);
-      res->status == httplib::StatusCode::OK_200) {
-    auto body = json::parse(res->body);
-    auto get_data =
-        [&version](const nlohmann::json& json_data) -> nlohmann::json {
-      for (auto& jr : json_data) {
-        // Get the latest or match version
-        if (auto tag = jr["tag_name"].get<std::string>(); tag == version) {
-          return jr;
-        }
-      }
-      return nlohmann::json();
-    };
+  auto variant_folder_path = file_manager_utils::GetEnginesContainerPath() /
+                             engine / variant_folder_name.value() /
+                             normalize_version;
 
-    if (version != "latest") {
-      body = get_data(body);
-    }
-    if (body.empty()) {
-      return cpp::fail("No release found for " + version);
-    }
+  auto variant_path = variant_folder_path / selected_variant->name;
+  std::filesystem::create_directories(variant_folder_path);
+  CLI_LOG("variant_folder_path: " + variant_folder_path.string());
+  auto on_finished = [this, engine, selected_variant,
+                      normalize_version](const DownloadTask& finishedTask) {
+    // try to unzip the downloaded file
+    CLI_LOG("Engine zip path: " << finishedTask.items[0].localPath.string());
+    CLI_LOG("Version: " + normalize_version);
 
-    auto assets = body["assets"];
-    auto os_arch{hw_inf_.sys_inf->os + "-" + hw_inf_.sys_inf->arch};
+    auto extract_path = finishedTask.items[0].localPath.parent_path();
 
-    std::vector<std::string> variants;
-    for (auto& asset : assets) {
-      auto asset_name = asset["name"].get<std::string>();
-      variants.push_back(asset_name);
+    archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(),
+                                  extract_path.string(), true);
+
+    auto variant = engine_matcher_utils::GetVariantFromNameAndVersion(
+        selected_variant->name, engine, normalize_version);
+    CLI_LOG("Extracted variant: " + variant.value());
+    // set as default
+    auto res =
+        SetDefaultEngineVariant(engine, normalize_version, variant.value());
+    if (res.has_error()) {
+      CTL_ERR("Failed to set default engine variant: " << res.error());
+    } else {
+      CTL_INF("Set default engine variant: " << res.value().variant);
     }
 
-    CTL_INF("engine: " << engine);
-    CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version);
-    auto matched_variant = GetMatchedVariant(engine, variants);
-    CTL_INF("Matched variant: " << matched_variant);
-    if (matched_variant.empty()) {
-      CTL_ERR("No variant found for " << os_arch);
-      return cpp::fail("No variant found for " + os_arch);
+    // remove the downloaded file
+    try {
+      std::filesystem::remove(finishedTask.items[0].localPath);
+    } catch (const std::exception& e) {
+      CTL_WRN("Could not delete file: " << e.what());
     }
+    CTL_INF("Finished!");
+  };
 
-    for (auto& asset : assets) {
-      auto assetName = asset["name"].get<std::string>();
-      if (assetName == matched_variant) {
-        auto download_url = asset["browser_download_url"].get<std::string>();
-        auto file_name = asset["name"].get<std::string>();
-        CTL_INF("Download url: " << download_url);
-
-        std::filesystem::path engine_folder_path =
-            file_manager_utils::GetContainerFolderPath(
-                file_manager_utils::DownloadTypeToString(
-                    DownloadType::Engine)) /
-            engine;
-
-        if (!std::filesystem::exists(engine_folder_path)) {
-          CTL_INF("Creating " << engine_folder_path.string());
-          std::filesystem::create_directories(engine_folder_path);
+  auto downloadTask{
+      DownloadTask{.id = engine,
+                   .type = DownloadType::Engine,
+                   .items = {DownloadItem{
+                       .id = engine,
+                       .downloadUrl = selected_variant->browser_download_url,
+                       .localPath = variant_path,
+                   }}}};
+
+  auto add_task_result = download_service_->AddTask(downloadTask, on_finished);
+  if (res.has_error()) {
+    return cpp::fail(res.error());
+  }
+  return {};
+}
+
+cpp::result<bool, std::string> EngineService::DownloadEngine(
+    const std::string& engine, const std::string& version, bool async) {
+  auto res = GetEngineVariants(engine, version);
+  if (res.has_error()) {
+    return cpp::fail("Failed to fetch engine releases: " + res.error());
+  }
+
+  if (res.value().empty()) {
+    return cpp::fail("No release found for " + version);
+  }
+
+  auto os_arch{hw_inf_.sys_inf->os + "-" + hw_inf_.sys_inf->arch};
+
+  std::vector<std::string> variants;
+  for (const auto& asset : res.value()) {
+    variants.push_back(asset.name);
+  }
+
+  CTL_INF("engine: " << engine);
+  CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version);
+  auto matched_variant = GetMatchedVariant(engine, variants);
+  CTL_INF("Matched variant: " << matched_variant);
+  if (matched_variant.empty()) {
+    CTL_ERR("No variant found for " << os_arch);
+    return cpp::fail("No variant found for " + os_arch);
+  }
+
+  for (const auto& asset : res.value()) {
+    if (asset.name == matched_variant) {
+      CTL_INF("Download url: " << asset.browser_download_url);
+
+      std::filesystem::path engine_folder_path =
+          file_manager_utils::GetContainerFolderPath(
+              file_manager_utils::DownloadTypeToString(DownloadType::Engine)) /
+          engine;
+
+      if (!std::filesystem::exists(engine_folder_path)) {
+        CTL_INF("Creating " << engine_folder_path.string());
+        std::filesystem::create_directories(engine_folder_path);
+      }
+      CTL_INF("Engine folder path: " << engine_folder_path.string() << "\n");
+      auto local_path = engine_folder_path / asset.name;
+      auto downloadTask{
+          DownloadTask{.id = engine,
+                       .type = DownloadType::Engine,
+                       .items = {DownloadItem{
+                           .id = engine,
+                           .downloadUrl = asset.browser_download_url,
+                           .localPath = local_path,
+                       }}}};
+
+      auto on_finished = [](const DownloadTask& finishedTask) {
+        // try to unzip the downloaded file
+        CTL_INF(
+            "Engine zip path: " << finishedTask.items[0].localPath.string());
+
+        std::filesystem::path extract_path =
+            finishedTask.items[0].localPath.parent_path().parent_path();
+
+        archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(),
+                                      extract_path.string());
+
+        // remove the downloaded file
+        try {
+          std::filesystem::remove(finishedTask.items[0].localPath);
+        } catch (const std::exception& e) {
+          CTL_WRN("Could not delete file: " << e.what());
         }
+        CTL_INF("Finished!");
+      };
 
-        CTL_INF("Engine folder path: " << engine_folder_path.string() << "\n");
-        auto local_path = engine_folder_path / file_name;
-        auto downloadTask{DownloadTask{.id = Repo2Engine(engine),
-                                       .type = DownloadType::Engine,
-                                       .items = {DownloadItem{
-                                           .id = Repo2Engine(engine),
-                                           .downloadUrl = download_url,
-                                           .localPath = local_path,
-                                       }}}};
-
-        auto on_finished = [](const DownloadTask& finishedTask) {
-          // try to unzip the downloaded file
-          CTL_INF(
-              "Engine zip path: " << finishedTask.items[0].localPath.string());
-
-          std::filesystem::path extract_path =
-              finishedTask.items[0].localPath.parent_path().parent_path();
-
-          archive_utils::ExtractArchive(
-              finishedTask.items[0].localPath.string(), extract_path.string());
-
-          // remove the downloaded file
-          try {
-            std::filesystem::remove(finishedTask.items[0].localPath);
-          } catch (const std::exception& e) {
-            CTL_WRN("Could not delete file: " << e.what());
-          }
-          CTL_INF("Finished!");
-        };
-        if (async) {
-          auto res = download_service_->AddTask(downloadTask, on_finished);
-          if (res.has_error()) {
-            return cpp::fail(res.error());
-          }
-          return true;
-        } else {
-          return download_service_->AddDownloadTask(downloadTask, on_finished);
+      if (async) {
+        auto res = download_service_->AddTask(downloadTask, on_finished);
+        if (res.has_error()) {
+          return cpp::fail(res.error());
         }
+        return true;
+      } else {
+        return download_service_->AddDownloadTask(downloadTask, on_finished);
       }
     }
-    return true;
-  } else {
-    return cpp::fail("Failed to fetch engine release: " + engine);
   }
+  return true;
 }
 
 cpp::result<bool, std::string> EngineService::DownloadCuda(
@@ -409,12 +449,12 @@ cpp::result<bool, std::string> EngineService::DownloadCuda(
 
   auto cuda_toolkit_url = url_parser::FromUrl(url_obj);
 
-  LOG_DEBUG << "Cuda toolkit download url: " << cuda_toolkit_url;
+  CTL_DBG("Cuda toolkit download url: " << cuda_toolkit_url);
   auto cuda_toolkit_local_path =
       file_manager_utils::GetContainerFolderPath(
           file_manager_utils::DownloadTypeToString(DownloadType::CudaToolkit)) /
       cuda_toolkit_file_name;
-  LOG_DEBUG << "Download to: " << cuda_toolkit_local_path.string();
+  CTL_DBG("Download to: " << cuda_toolkit_local_path.string());
   auto downloadCudaToolkitTask{DownloadTask{
       .id = download_id,
       .type = DownloadType::CudaToolkit,
@@ -424,10 +464,9 @@ cpp::result<bool, std::string> EngineService::DownloadCuda(
   }};
 
   auto on_finished = [engine](const DownloadTask& finishedTask) {
-    auto engine_path = file_manager_utils::GetEnginesContainerPath() / engine;
+    auto engine_path = file_manager_utils::GetCudaToolkitPath(engine);
     archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(),
                                   engine_path.string());
-
     try {
       std::filesystem::remove(finishedTask.items[0].localPath);
     } catch (std::exception& e) {
@@ -464,3 +503,458 @@ std::string EngineService::GetMatchedVariant(
   }
   return matched_variant;
 }
+
+cpp::result<std::vector<EngineService::EngineRelease>, std::string>
+EngineService::GetEngineReleases(const std::string& engine) const {
+  auto ne = NormalizeEngine(engine);
+  return github_release_utils::GetReleases("janhq", ne);
+}
+
+cpp::result<std::vector<EngineService::EngineVariant>, std::string>
+EngineService::GetEngineVariants(const std::string& engine,
+                                 const std::string& version) const {
+  auto ne = NormalizeEngine(engine);
+  auto engine_release =
+      github_release_utils::GetReleaseByVersion("janhq", ne, version);
+
+  if (engine_release.has_error()) {
+    return cpp::fail("Failed to get engine release: " + engine_release.error());
+  }
+
+  std::vector<EngineVariant> compatible_variants;
+  for (const auto& variant : engine_release.value().assets) {
+    if (variant.content_type != "application/gzip") {
+      continue;
+    }
+    if (variant.state != "uploaded") {
+      continue;
+    }
+    compatible_variants.push_back(variant);
+  }
+
+  if (compatible_variants.empty()) {
+    return cpp::fail("No compatible variants found for " + engine);
+  }
+
+  return compatible_variants;
+}
+
+cpp::result<DefaultEngineVariant, std::string>
+EngineService::SetDefaultEngineVariant(const std::string& engine,
+                                       const std::string& version,
+                                       const std::string& variant) {
+  auto ne = NormalizeEngine(engine);
+  auto is_installed = IsEngineVariantReady(engine, version, variant);
+  if (is_installed.has_error()) {
+    return cpp::fail(is_installed.error());
+  }
+
+  if (!is_installed.value()) {
+    return cpp::fail("Engine variant " + version + "-" + variant +
+                     " is not installed yet!");
+  }
+
+  if (IsEngineLoaded(ne)) {
+    CTL_INF("Engine " << ne << " is already loaded, unloading it");
+    auto unload_res = UnloadEngine(ne);
+    if (unload_res.has_error()) {
+      CTL_INF("Failed to unload engine: " << unload_res.error());
+      return cpp::fail(unload_res.error());
+    } else {
+      CTL_INF("Engine " << ne << " unloaded successfully");
+    }
+  }
+
+  auto normalized_version = string_utils::RemoveSubstring(version, "v");
+
+  auto config = file_manager_utils::GetCortexConfig();
+  config.llamacppVersion = "v" + normalized_version;
+  config.llamacppVariant = variant;
+  auto result = file_manager_utils::UpdateCortexConfig(config);
+  if (result.has_error()) {
+    return cpp::fail(result.error());
+  }
+
+  return DefaultEngineVariant{
+      .engine = engine,
+      .version = normalized_version,
+      .variant = variant,
+  };
+}
+
+cpp::result<bool, std::string> EngineService::IsEngineVariantReady(
+    const std::string& engine, const std::string& version,
+    const std::string& variant) {
+  auto ne = NormalizeEngine(engine);
+  auto normalized_version = string_utils::RemoveSubstring(version, "v");
+  auto installed_engines = GetInstalledEngineVariants(ne);
+  if (installed_engines.has_error()) {
+    return cpp::fail(installed_engines.error());
+  }
+
+  CLI_LOG("IsEngineVariantReady: " << ne << ", " << normalized_version << ", "
+                                   << variant);
+  for (const auto& installed_engine : installed_engines.value()) {
+    CLI_LOG("Installed: name: " + installed_engine.name +
+            ", version: " + installed_engine.version);
+    if (installed_engine.name == variant &&
+            installed_engine.version == normalized_version ||
+        installed_engine.version == "v" + normalized_version) {
+      return true;
+    }
+  }
+  return false;
+}
+
+cpp::result<DefaultEngineVariant, std::string>
+EngineService::GetDefaultEngineVariant(const std::string& engine) {
+  auto ne = NormalizeEngine(engine);
+  // current we don't support other engine
+  if (ne != kLlamaRepo) {
+    return cpp::fail("Engine " + engine + " is not supported yet!");
+  }
+
+  auto config = file_manager_utils::GetCortexConfig();
+  auto variant = config.llamacppVariant;
+  auto version = config.llamacppVersion;
+
+  if (variant.empty() || version.empty()) {
+    return cpp::fail("Default engine variant for " + engine +
+                     " is not set yet!");
+  }
+
+  return DefaultEngineVariant{
+      .engine = engine,
+      .version = version,
+      .variant = variant,
+  };
+}
+
+cpp::result<std::vector<EngineVariantResponse>, std::string>
+EngineService::GetInstalledEngineVariants(const std::string& engine) const {
+  auto ne = NormalizeEngine(engine);
+  auto os = hw_inf_.sys_inf->os;
+  if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) {
+    return cpp::fail("Engine " + engine + " is not supported on macOS");
+  }
+
+  if (os == kLinuxOs && ne == kOnnxRepo) {
+    return cpp::fail("Engine " + engine + " is not supported on Linux");
+  }
+
+  auto engines_variants_dir =
+      file_manager_utils::GetEnginesContainerPath() / ne;
+
+  if (!std::filesystem::exists(engines_variants_dir)) {
+    return {};
+  }
+
+  std::vector<EngineVariantResponse> variants;
+  for (const auto& entry :
+       std::filesystem::directory_iterator(engines_variants_dir)) {
+    if (entry.is_directory()) {
+      // epectation is each directory is a variant
+      for (const auto& version_entry :
+           std::filesystem::directory_iterator(entry.path())) {
+        // try to find version.txt
+        auto version_txt_path = version_entry.path() / "version.txt";
+        if (!std::filesystem::exists(version_txt_path)) {
+          continue;
+        }
+
+        try {
+          auto node = YAML::LoadFile(version_txt_path.string());
+          auto ev = EngineVariantResponse{
+              .name = node["name"].as<std::string>(),
+              .version = "v" + node["version"].as<std::string>(),
+              .engine = engine,
+          };
+          variants.push_back(ev);
+        } catch (const YAML::Exception& e) {
+          CTL_ERR("Error reading version.txt: " << e.what());
+          continue;
+        }
+      }
+    }
+  }
+
+  return variants;
+}
+
+bool EngineService::IsEngineLoaded(const std::string& engine) const {
+  auto ne = NormalizeEngine(engine);
+  return engines_.find(ne) != engines_.end();
+}
+
+cpp::result<EngineV, std::string> EngineService::GetLoadedEngine(
+    const std::string& engine_name) {
+  auto ne = NormalizeEngine(engine_name);
+  if (engines_.find(ne) == engines_.end()) {
+    return cpp::fail("Engine " + engine_name + " is not loaded yet!");
+  }
+
+  return engines_[ne].engine;
+}
+
+cpp::result<void, std::string> EngineService::LoadEngine(
+    const std::string& engine_name) {
+  auto ne = NormalizeEngine(engine_name);
+
+  if (IsEngineLoaded(ne)) {
+    CTL_INF("Engine " << ne << " is already loaded");
+    return {};
+  }
+
+  CTL_INF("Loading engine: " << ne);
+
+  auto selected_engine_variant = GetDefaultEngineVariant(ne);
+
+  if (selected_engine_variant.has_error()) {
+    // TODO: namh need to fallback
+    return cpp::fail(selected_engine_variant.error());
+  }
+
+  CTL_INF("Selected engine variant: "
+          << json_helper::DumpJsonString(selected_engine_variant->ToJson()));
+
+  auto user_defined_engine_path = getenv("ENGINE_PATH");
+  const std::filesystem::path engine_dir_path = [&] {
+    if (user_defined_engine_path != nullptr) {
+      // for backward compatible
+      return std::filesystem::path(user_defined_engine_path +
+                                   GetEnginePath(ne));
+    } else {
+      return file_manager_utils::GetEnginesContainerPath() / ne /
+             selected_engine_variant->variant /
+             selected_engine_variant->version;
+    }
+  }();
+
+  if (!std::filesystem::exists(engine_dir_path)) {
+    CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
+    return cpp::fail("Directory " + engine_dir_path.string() +
+                     " is not exist!");
+  }
+
+  CTL_INF("Engine path: " << engine_dir_path.string());
+
+  try {
+#if defined(_WIN32)
+    // TODO(?) If we only allow to load an engine at a time, the logic is simpler.
+    // We would like to support running multiple engines at the same time. Therefore,
+    // the adding/removing dll directory logic is quite complicated:
+    // 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
+    // Unload the llamacpp dll directory then load the tensorrt-llm
+    // 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
+    // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
+    // 3. Add dll directory if met other conditions
+
+    auto add_dll = [this](const std::string& e_type, const std::string& p) {
+      auto ws = std::wstring(p.begin(), p.end());
+      if (auto cookie = AddDllDirectory(ws.c_str()); cookie != 0) {
+        CTL_DBG("Added dll directory: " << p);
+        engines_[e_type].cookie = cookie;
+      } else {
+        CTL_WRN("Could not add dll directory: " << p);
+      }
+
+      auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
+      if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
+          cuda_cookie != 0) {
+        CTL_DBG("Added cuda dll directory: " << p);
+        engines_[e_type].cuda_cookie = cuda_cookie;
+      } else {
+        CTL_WRN("Could not add cuda dll directory: " << p);
+      }
+    };
+
+    if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
+        should_use_dll_search_path) {
+      if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
+          should_use_dll_search_path) {
+        // Remove llamacpp dll directory
+        if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
+          CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
+        } else {
+          CTL_DBG("Removed dll directory: " << kLlamaRepo);
+        }
+        if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
+          CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
+        } else {
+          CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
+        }
+
+        add_dll(ne, engine_dir_path.string());
+      } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
+        // Do nothing
+      } else {
+        add_dll(ne, engine_dir_path.string());
+      }
+    }
+#endif
+    engines_[ne].dl =
+        std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");
+#if defined(__linux__)
+    const char* name = "LD_LIBRARY_PATH";
+    auto data = getenv(name);
+    std::string v;
+    if (auto g = getenv(name); g) {
+      v += g;
+    }
+    CTL_INF("LD_LIBRARY_PATH: " << v);
+    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
+    CTL_INF("llamacpp_path: " << llamacpp_path);
+    // tensorrt is not supported for now
+    // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
+
+    auto new_v = llamacpp_path.string() + ":" + v;
+    setenv(name, new_v.c_str(), true);
+    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
+#endif
+
+  } catch (const cortex_cpp::dylib::load_error& e) {
+    CTL_ERR("Could not load engine: " << e.what());
+    engines_.erase(ne);
+    return cpp::fail("Could not load engine " + ne + ": " + e.what());
+  }
+
+  auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
+  engines_[ne].engine = func();
+
+  auto& en = std::get<EngineI*>(engines_[ne].engine);
+  if (ne == kLlamaRepo) {  //fix for llamacpp engine first
+    auto config = file_manager_utils::GetCortexConfig();
+    if (en->IsSupported("SetFileLogger")) {
+      en->SetFileLogger(config.maxLogLines,
+                        (std::filesystem::path(config.logFolderPath) /
+                         std::filesystem::path(config.logLlamaCppPath))
+                            .string());
+    } else {
+      CTL_WRN("Method SetFileLogger is not supported yet");
+    }
+  }
+  CTL_DBG("Loaded engine: " << ne);
+  return {};
+}
+
+cpp::result<void, std::string> EngineService::UnloadEngine(
+    const std::string& engine) {
+  auto ne = NormalizeEngine(engine);
+  if (!IsEngineLoaded(ne)) {
+    return cpp::fail("Engine " + ne + " is not loaded yet!");
+  }
+  EngineI* e = std::get<EngineI*>(engines_[ne].engine);
+  delete e;
+#if defined(_WIN32)
+  if (!RemoveDllDirectory(engines_[ne].cookie)) {
+    CTL_WRN("Could not remove dll directory: " << ne);
+  } else {
+    CTL_DBG("Removed dll directory: " << ne);
+  }
+  if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
+    CTL_WRN("Could not remove cuda dll directory: " << ne);
+  } else {
+    CTL_DBG("Removed cuda dll directory: " << ne);
+  }
+#endif
+  engines_.erase(ne);
+  CTL_DBG("Unloaded engine " + ne);
+  return {};
+}
+
+std::vector<EngineV> EngineService::GetLoadedEngines() {
+  std::vector<EngineV> loaded_engines;
+  for (const auto& [key, value] : engines_) {
+    loaded_engines.push_back(value.engine);
+  }
+  return loaded_engines;
+}
+
+cpp::result<github_release_utils::GitHubRelease, std::string>
+EngineService::GetLatestEngineVersion(const std::string& engine) const {
+  auto ne = NormalizeEngine(engine);
+  auto res = github_release_utils::GetReleaseByVersion("janhq", ne, "latest");
+  if (res.has_error()) {
+    return cpp::fail("Failed to fetch engine " + engine + " latest version!");
+  }
+  return res.value();
+}
+
+cpp::result<bool, std::string> EngineService::IsEngineReady(
+    const std::string& engine) const {
+  auto ne = NormalizeEngine(engine);
+
+  auto os = hw_inf_.sys_inf->os;
+  if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) {
+    return cpp::fail("Engine " + engine + " is not supported on macOS");
+  }
+
+  if (os == kLinuxOs && ne == kOnnxRepo) {
+    return cpp::fail("Engine " + engine + " is not supported on Linux");
+  }
+  auto installed_variants = GetInstalledEngineVariants(engine);
+  if (installed_variants.has_error()) {
+    return cpp::fail(installed_variants.error());
+  }
+
+  return installed_variants->size() > 0;
+}
+
+cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
+    const std::string& engine) {
+  auto ne = NormalizeEngine(engine);
+  auto default_variant = GetDefaultEngineVariant(ne);
+
+  if (default_variant.has_error()) {
+    // if we don't have a default variant, just stop
+    CTL_INF("No default variant found for " << ne << ". Exit update engine");
+    return cpp::fail(default_variant.error());
+  }
+  CTL_INF("Default variant: " << default_variant->variant
+                              << ", version: " + default_variant->version);
+
+  auto latest_version = GetLatestEngineVersion(ne);
+  if (latest_version.has_error()) {
+    // if can't get latest version, stop
+    CTL_INF("Can't get latest version for "
+            << ne << " error: " << latest_version.error());
+    return cpp::fail("Failed to get latest version: " + latest_version.error());
+  }
+  CTL_INF("Latest version: " + latest_version.value().name);
+
+  // check if local engines variants if latest version already exist
+  auto installed_variants = GetInstalledEngineVariants(ne);
+
+  bool latest_version_installed = false;
+  for (const auto& v : installed_variants.value()) {
+    CTL_INF("Installed version: " + v.version);
+    CTL_INF(json_helper::DumpJsonString(v.ToJson()));
+    if (default_variant->variant == v.name &&
+        string_utils::RemoveSubstring(v.version, "v") ==
+            latest_version.value().name) {
+      latest_version_installed = true;
+      break;
+    }
+  }
+
+  if (latest_version_installed) {
+    CTL_INF("Engine " + ne + ", " + default_variant->variant +
+            " is already up-to-date! Version " +
+            latest_version.value().tag_name);
+    return cpp::fail("Engine " + ne + ", " + default_variant->variant +
+                     " is already up-to-date! Version " +
+                     latest_version.value().tag_name);
+  }
+
+  CTL_INF("Engine variant "
+          << default_variant->variant << " is not up-to-date! Current: "
+          << default_variant->version << ", latest: " << latest_version->name);
+
+  auto res = InstallEngineAsyncV2(engine, latest_version->tag_name,
+                                  default_variant->variant);
+
+  return EngineUpdateResult{.engine = engine,
+                            .variant = default_variant->variant,
+                            .from = default_variant->version,
+                            .to = latest_version->tag_name};
+}
diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h
index 0f491edc7..4e58fccfd 100644
--- a/engine/services/engine_service.h
+++ b/engine/services/engine_service.h
@@ -1,35 +1,88 @@
 #pragma once
 
 #include <memory>
-#include <optional>
 #include <string>
 #include <string_view>
 #include <vector>
+#include "cortex-common/EngineI.h"
+#include "cortex-common/cortexpythoni.h"
 #include "services/download_service.h"
 #include "utils/cpuid/cpu_info.h"
+#include "utils/dylib.h"
 #include "utils/engine_constants.h"
+#include "utils/github_release_utils.h"
 #include "utils/result.hpp"
 #include "utils/system_info_utils.h"
 
-struct EngineInfo {
+// TODO: namh think of the other name
+struct DefaultEngineVariant {
+  std::string engine;
+  std::string version;
+  std::string variant;
+
+  Json::Value ToJson() const {
+    Json::Value root;
+    root["engine"] = engine;
+    root["version"] = version;
+    root["variant"] = variant;
+    return root;
+  }
+};
+
+// TODO: namh think of the other name
+struct EngineVariantResponse {
   std::string name;
-  std::string description;
-  std::string format;
-  std::optional<std::string> version;
-  std::string product_name;
-  std::string status;
-  std::optional<std::string> variant;
+  std::string version;
+  std::string engine;
+
+  Json::Value ToJson() const {
+    Json::Value root;
+    root["name"] = name;
+    root["version"] = version;
+    root["engine"] = engine;
+    return root;
+  }
+};
+
+struct EngineUpdateResult {
+  std::string engine;
+  std::string variant;
+  std::string from;
+  std::string to;
+
+  Json::Value ToJson() const {
+    Json::Value root;
+    root["engine"] = engine;
+    root["variant"] = variant;
+    root["from"] = from;
+    root["to"] = to;
+    return root;
+  }
 };
 
 namespace system_info_utils {
 struct SystemInfo;
 }
+
+using EngineV = std::variant<EngineI*, CortexPythonEngineI*>;
+
 class EngineService {
- public:
-  constexpr static auto kIncompatible = "Incompatible";
-  constexpr static auto kReady = "Ready";
-  constexpr static auto kNotInstalled = "Not Installed";
+ private:
+  using EngineRelease = github_release_utils::GitHubRelease;
+  using EngineVariant = github_release_utils::GitHubAsset;
+
+  struct EngineInfo {
+    std::unique_ptr<cortex_cpp::dylib> dl;
+    EngineV engine;
+#if defined(_WIN32)
+    DLL_DIRECTORY_COOKIE cookie;
+    DLL_DIRECTORY_COOKIE cuda_cookie;
+#endif
+  };
+
+  std::unordered_map<std::string, EngineInfo> engines_{};
 
+ public:
   const std::vector<std::string_view> kSupportEngines = {
       kLlamaEngine, kOnnxEngine, kTrtLlmEngine};
 
@@ -38,33 +91,87 @@ class EngineService {
         hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(),
                 .cuda_driver_version = system_info_utils::GetCudaVersion()} {}
 
-  cpp::result<EngineInfo, std::string> GetEngineInfo(
-      const std::string& engine) const;
-
   std::vector<EngineInfo> GetEngineInfoList() const;
 
+  /**
+   * Check if an engines is ready (have at least one variant installed)
+   */
+  cpp::result<bool, std::string> IsEngineReady(const std::string& engine) const;
+
   cpp::result<bool, std::string> InstallEngineAsync(
       const std::string& engine, const std::string& version = "latest",
       const std::string& src = "");
 
-  cpp::result<bool, std::string> UninstallEngine(const std::string& engine);
+  /**
+   * Handling install engine variant.
+   *
+   * If no version provided, choose `latest`.
+   * If no variant provided, automatically pick the best variant.
+   */
+  cpp::result<void, std::string> InstallEngineAsyncV2(
+      const std::string& engine, const std::string& version,
+      const std::optional<std::string> variant_name);
+
+  cpp::result<bool, std::string> UninstallEngineVariant(
+      const std::string& engine, const std::optional<std::string> version,
+      const std::optional<std::string> variant);
+
+  cpp::result<std::vector<EngineRelease>, std::string> GetEngineReleases(
+      const std::string& engine) const;
+
+  cpp::result<std::vector<EngineVariant>, std::string> GetEngineVariants(
+      const std::string& engine, const std::string& version) const;
+
+  cpp::result<DefaultEngineVariant, std::string> SetDefaultEngineVariant(
+      const std::string& engine, const std::string& version,
+      const std::string& variant);
+
+  cpp::result<DefaultEngineVariant, std::string> GetDefaultEngineVariant(
+      const std::string& engine);
+
+  cpp::result<std::vector<EngineVariantResponse>, std::string>
+  GetInstalledEngineVariants(const std::string& engine) const;
+
+  bool IsEngineLoaded(const std::string& engine) const;
+
+  cpp::result<EngineV, std::string> GetLoadedEngine(
+      const std::string& engine_name);
+
+  std::vector<EngineV> GetLoadedEngines();
+
+  cpp::result<void, std::string> LoadEngine(const std::string& engine_name);
+
+  cpp::result<void, std::string> UnloadEngine(const std::string& engine_name);
+
+  cpp::result<github_release_utils::GitHubRelease, std::string>
+  GetLatestEngineVersion(const std::string& engine) const;
 
   cpp::result<bool, std::string> UnzipEngine(const std::string& engine,
                                              const std::string& version,
                                              const std::string& path);
 
+  cpp::result<EngineUpdateResult, std::string> UpdateEngine(
+      const std::string& engine);
+
  private:
   cpp::result<bool, std::string> DownloadEngine(
       const std::string& engine, const std::string& version = "latest",
       bool async = false);
 
+  cpp::result<void, std::string> DownloadEngineV2(
+      const std::string& engine, const std::string& version = "latest",
+      const std::optional<std::string> variant_name = std::nullopt);
+
   cpp::result<bool, std::string> DownloadCuda(const std::string& engine,
                                               bool async = false);
 
   std::string GetMatchedVariant(const std::string& engine,
                                 const std::vector<std::string>& variants);
 
- private:
+  cpp::result<bool, std::string> IsEngineVariantReady(
+      const std::string& engine, const std::string& version,
+      const std::string& variant);
+
   std::shared_ptr<DownloadService> download_service_;
 
   struct HardwareInfo {
diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc
index c29f75e4a..e7eca3755 100644
--- a/engine/services/inference_service.cc
+++ b/engine/services/inference_service.cc
@@ -1,30 +1,9 @@
 #include "inference_service.h"
-#include "utils/cpuid/cpu_info.h"
+#include <drogon/HttpTypes.h>
 #include "utils/engine_constants.h"
-#include "utils/file_manager_utils.h"
 #include "utils/function_calling/common.h"
 
 namespace services {
-
-namespace {
-// Need to change this after we rename repositories
-std::string NormalizeEngine(const std::string& engine) {
-  if (engine == kLlamaEngine) {
-    return kLlamaRepo;
-  } else if (engine == kOnnxEngine) {
-    return kOnnxRepo;
-  } else if (engine == kTrtLlmEngine) {
-    return kTrtLlmRepo;
-  }
-  return engine;
-};
-
-constexpr const int k200OK = 200;
-constexpr const int k400BadRequest = 400;
-constexpr const int k409Conflict = 409;
-constexpr const int k500InternalServerError = 500;
-}  // namespace
-
 cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
     std::shared_ptr<SyncQueue> q, std::shared_ptr<Json::Value> json_body) {
   std::string engine_type;
@@ -33,26 +12,26 @@ cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
   } else {
     engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
   }
-  auto ne = NormalizeEngine(engine_type);
-  if (!IsEngineLoaded(ne)) {
+  function_calling_utils::PreprocessRequest(json_body);
+  auto tool_choice = json_body->get("tool_choice", Json::Value::null);
+  auto engine_result = engine_service_->GetLoadedEngine(engine_type);
+  if (engine_result.has_error()) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
     Json::Value stt;
-    stt["status_code"] = k409Conflict;
+    stt["status_code"] = drogon::k400BadRequest;
     LOG_WARN << "Engine is not loaded yet";
     return cpp::fail(std::make_pair(stt, res));
   }
 
-  function_calling_utils::PreprocessRequest(json_body);
-  Json::Value tool_choice = json_body->get("tool_choice", Json::Value::null);
-  std::get<EngineI*>(engines_[ne].engine)
-      ->HandleChatCompletion(
-          json_body, [q, tool_choice](Json::Value status, Json::Value res) {
-            if (!tool_choice.isNull()) {
-              res["tool_choice"] = tool_choice;
-            }
-            q->push(std::make_pair(status, res));
-          });
+  auto engine = std::get<EngineI*>(engine_result.value());
+  engine->HandleChatCompletion(
+      json_body, [q, tool_choice](Json::Value status, Json::Value res) {
+        if (!tool_choice.isNull()) {
+          res["tool_choice"] = tool_choice;
+        }
+        q->push(std::make_pair(status, res));
+      });
   return {};
 }
 
@@ -65,19 +44,19 @@ cpp::result<void, InferResult> InferenceService::HandleEmbedding(
     engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
   }
 
-  auto ne = NormalizeEngine(engine_type);
-  if (!IsEngineLoaded(ne)) {
+  auto engine_result = engine_service_->GetLoadedEngine(engine_type);
+  if (engine_result.has_error()) {
     Json::Value res;
-    res["message"] = "Engine is not loaded yet";
     Json::Value stt;
-    stt["status_code"] = k409Conflict;
+    res["message"] = "Engine is not loaded yet";
+    stt["status_code"] = drogon::k400BadRequest;
     LOG_WARN << "Engine is not loaded yet";
     return cpp::fail(std::make_pair(stt, res));
   }
-  std::get<EngineI*>(engines_["llama-cpp"].engine)
-      ->HandleEmbedding(json_body, [q](Json::Value status, Json::Value res) {
-        q->push(std::make_pair(status, res));
-      });
+  auto engine = std::get<EngineI*>(engine_result.value());
+  engine->HandleEmbedding(json_body, [q](Json::Value status, Json::Value res) {
+    q->push(std::make_pair(status, res));
+  });
   return {};
 }
 
@@ -90,134 +69,53 @@ InferResult InferenceService::LoadModel(
     engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
   }
 
-  auto ne = NormalizeEngine(engine_type);
   Json::Value r;
   Json::Value stt;
-  // We have not loaded engine yet, should load it before using it
-  if (engines_.find(ne) == engines_.end()) {
-    auto get_engine_path = [](std::string_view e) {
-      if (e == kLlamaRepo) {
-        return kLlamaLibPath;
-      } else if (e == kOnnxRepo) {
-        return kOnnxLibPath;
-      } else if (e == kTrtLlmRepo) {
-        return kTensorrtLlmPath;
-      }
-      return kLlamaLibPath;
-    };
-    try {
-      if (ne == kLlamaRepo) {
-        cortex::cpuid::CpuInfo cpu_info;
-        LOG_INFO << "CPU instruction set: " << cpu_info.to_string();
-      }
-
-      std::string abs_path =
-          (getenv("ENGINE_PATH")
-               ? getenv("ENGINE_PATH")
-               : file_manager_utils::GetCortexDataPath().string()) +
-          get_engine_path(ne);
-      LOG_INFO << "engine path: " << abs_path;
-#if defined(_WIN32)
-      // TODO(?) If we only allow to load an engine at a time, the logic is simpler.
-      // We would like to support running multiple engines at the same time. Therefore,
-      // the adding/removing dll directory logic is quite complicated:
-      // 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
-      // Unload the llamacpp dll directory then load the tensorrt-llm
-      // 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
-      // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
-      // 3. Add dll directory if met other conditions
-
-      auto add_dll = [this](const std::string& e_type, const std::string& p) {
-        auto ws = std::wstring(p.begin(), p.end());
-        if (auto cookie = AddDllDirectory(ws.c_str()); cookie != 0) {
-          LOG_INFO << "Added dll directory: " << p;
-          engines_[e_type].cookie = cookie;
-        } else {
-          LOG_WARN << "Could not add dll directory: " << p;
-        }
-      };
-
-      if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
-          should_use_dll_search_path) {
-        if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
-            should_use_dll_search_path) {
-          // Remove llamacpp dll directory
-          if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
-            LOG_WARN << "Could not remove dll directory: " << kLlamaRepo;
-          } else {
-            LOG_INFO << "Removed dll directory: " << kLlamaRepo;
-          }
-
-          add_dll(ne, abs_path);
-        } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
-          // Do nothing
-        } else {
-          add_dll(ne, abs_path);
-        }
-      }
-#endif
-      engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
-
-    } catch (const cortex_cpp::dylib::load_error& e) {
-      LOG_ERROR << "Could not load engine: " << e.what();
-      engines_.erase(ne);
+  auto load_engine_result = engine_service_->LoadEngine(engine_type);
+  if (load_engine_result.has_error()) {
+    LOG_ERROR << "Could not load engine: " << load_engine_result.error();
 
-      r["message"] = "Could not load engine " + ne + ": " + e.what();
-      stt["status_code"] = k500InternalServerError;
-      return std::make_pair(stt, r);
-    }
-
-    auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
-    engines_[ne].engine = func();
-
-    auto& en = std::get<EngineI*>(engines_[ne].engine);
-    if (ne == kLlamaRepo) {  //fix for llamacpp engine first
-      auto config = file_manager_utils::GetCortexConfig();
-      if (en->IsSupported("SetFileLogger")) {
-        en->SetFileLogger(config.maxLogLines,
-                          (std::filesystem::path(config.logFolderPath) /
-                           std::filesystem::path(config.logLlamaCppPath))
-                              .string());
-      } else {
-        LOG_WARN << "Method SetFileLogger is not supported yet";
-      }
-    }
-    LOG_INFO << "Loaded engine: " << ne;
+    r["message"] = "Could not load engine " + engine_type + ": " +
+                   load_engine_result.error();
+    stt["status_code"] = drogon::k500InternalServerError;
+    return std::make_pair(stt, r);
   }
 
-  // LOG_TRACE << "Load model";
-  auto& en = std::get<EngineI*>(engines_[ne].engine);
-  en->LoadModel(json_body, [&stt, &r](Json::Value status, Json::Value res) {
+  // might need mutex here
+  auto engine_result = engine_service_->GetLoadedEngine(engine_type);
+  auto engine = std::get<EngineI*>(engine_result.value());
+  engine->LoadModel(json_body, [&stt, &r](Json::Value status, Json::Value res) {
     stt = status;
     r = res;
   });
   return std::make_pair(stt, r);
 }
 
-InferResult InferenceService::UnloadModel(
-    std::shared_ptr<Json::Value> json_body) {
-  std::string engine_type;
-  if (!HasFieldInReq(json_body, "engine")) {
-    engine_type = kLlamaRepo;
-  } else {
-    engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
-  }
-
-  auto ne = NormalizeEngine(engine_type);
+InferResult InferenceService::UnloadModel(const std::string& engine_name,
+                                          const std::string& model_id) {
   Json::Value r;
   Json::Value stt;
-  if (!IsEngineLoaded(ne)) {
-    r["message"] = "Engine is not loaded yet";
-    stt["status_code"] = k409Conflict;
+  auto engine_result = engine_service_->GetLoadedEngine(engine_name);
+  if (engine_result.has_error()) {
+    Json::Value res;
+    res["message"] = "Engine is not loaded yet";
+    Json::Value stt;
+    stt["status_code"] = drogon::k400BadRequest;
     LOG_WARN << "Engine is not loaded yet";
-    return std::make_pair(stt, r);
+    return std::make_pair(stt, res);
   }
+
+  Json::Value json_body;
+  json_body["engine"] = engine_name;
+  json_body["model"] = model_id;
+
   LOG_TRACE << "Start unload model";
-  std::get<EngineI*>(engines_[ne].engine)
-      ->UnloadModel(json_body, [&r, &stt](Json::Value status, Json::Value res) {
-        stt = status;
-        r = res;
-      });
+  auto engine = std::get<EngineI*>(engine_result.value());
+  engine->UnloadModel(std::make_shared<Json::Value>(json_body),
+                      [&r, &stt](Json::Value status, Json::Value res) {
+                        stt = status;
+                        r = res;
+                      });
   return std::make_pair(stt, r);
 }
 
@@ -230,24 +128,25 @@ InferResult InferenceService::GetModelStatus(
     engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
   }
 
-  auto ne = NormalizeEngine(engine_type);
   Json::Value r;
   Json::Value stt;
-
-  if (!IsEngineLoaded(ne)) {
-    r["message"] = "Engine is not loaded yet";
-    stt["status_code"] = k409Conflict;
+  auto engine_result = engine_service_->GetLoadedEngine(engine_type);
+  if (engine_result.has_error()) {
+    Json::Value res;
+    res["message"] = "Engine is not loaded yet";
+    Json::Value stt;
+    stt["status_code"] = drogon::k400BadRequest;
     LOG_WARN << "Engine is not loaded yet";
-    return std::make_pair(stt, r);
+    return std::make_pair(stt, res);
   }
 
   LOG_TRACE << "Start to get model status";
-  std::get<EngineI*>(engines_[ne].engine)
-      ->GetModelStatus(json_body,
-                       [&stt, &r](Json::Value status, Json::Value res) {
-                         stt = status;
-                         r = res;
-                       });
+  auto engine = std::get<EngineI*>(engine_result.value());
+  engine->GetModelStatus(json_body,
+                         [&stt, &r](Json::Value status, Json::Value res) {
+                           stt = status;
+                           r = res;
+                         });
   return std::make_pair(stt, r);
 }
 
@@ -255,16 +154,18 @@ InferResult InferenceService::GetModels(
     std::shared_ptr<Json::Value> json_body) {
   Json::Value r;
   Json::Value stt;
-  if (engines_.empty()) {
-    r["message"] = "Engine is not loaded yet";
-    stt["status_code"] = k409Conflict;
+
+  auto loaded_engines = engine_service_->GetLoadedEngines();
+  if (loaded_engines.empty()) {
+    r["message"] = "No engine is loaded yet";
+    stt["status_code"] = drogon::k400BadRequest;
     return std::make_pair(stt, r);
   }
 
   LOG_TRACE << "Start to get models";
   Json::Value resp_data(Json::arrayValue);
-  for (auto const& [k, v] : engines_) {
-    auto e = std::get<EngineI*>(v.engine);
+  for (const auto& loaded_engine : loaded_engines) {
+    auto e = std::get<EngineI*>(loaded_engine);
     if (e->IsSupported("GetModels")) {
       e->GetModels(json_body,
                    [&resp_data](Json::Value status, Json::Value res) {
@@ -274,28 +175,12 @@ InferResult InferenceService::GetModels(
                    });
     }
   }
+
   Json::Value root;
   root["data"] = resp_data;
   root["object"] = "list";
-  stt["status_code"] = k200OK;
+  stt["status_code"] = drogon::k200OK;
   return std::make_pair(stt, root);
-  // LOG_TRACE << "Done get models";
-}
-
-Json::Value InferenceService::GetEngines(
-    std::shared_ptr<Json::Value> json_body) {
-  Json::Value res;
-  Json::Value engine_array(Json::arrayValue);
-  for (const auto& [s, _] : engines_) {
-    Json::Value val;
-    val["id"] = s;
-    val["object"] = "engine";
-    engine_array.append(val);
-  }
-
-  res["object"] = "list";
-  res["data"] = engine_array;
-  return res;
 }
 
 InferResult InferenceService::FineTuning(
@@ -304,92 +189,50 @@ InferResult InferenceService::FineTuning(
   Json::Value r;
   Json::Value stt;
 
-  if (engines_.find(ne) == engines_.end()) {
-    try {
-      std::string abs_path =
-          (getenv("ENGINE_PATH")
-               ? getenv("ENGINE_PATH")
-               : file_manager_utils::GetCortexDataPath().string()) +
-          kPythonRuntimeLibPath;
-      engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
-    } catch (const cortex_cpp::dylib::load_error& e) {
-
-      LOG_ERROR << "Could not load engine: " << e.what();
-      engines_.erase(ne);
-
-      Json::Value res;
-      r["message"] = "Could not load engine " + ne;
-      stt["status_code"] = k500InternalServerError;
-      return std::make_pair(stt, r);
-    }
-
-    auto func =
-        engines_[ne].dl->get_function<CortexPythonEngineI*()>("get_engine");
-    engines_[ne].engine = func();
-    LOG_INFO << "Loaded engine: " << ne;
-  }
-
-  LOG_TRACE << "Start to fine-tuning";
-  auto& en = std::get<CortexPythonEngineI*>(engines_[ne].engine);
-  if (en->IsSupported("HandlePythonFileExecutionRequest")) {
-    en->HandlePythonFileExecutionRequest(
-        json_body, [&r, &stt](Json::Value status, Json::Value res) {
-          r = res;
-          stt = status;
-        });
-  } else {
-    LOG_WARN << "Method is not supported yet";
-    r["message"] = "Method is not supported yet";
-    stt["status_code"] = k500InternalServerError;
-    return std::make_pair(stt, r);
-  }
-  LOG_TRACE << "Done fine-tuning";
-  return std::make_pair(stt, r);
-}
-
-InferResult InferenceService::UnloadEngine(
-    std::shared_ptr<Json::Value> json_body) {
-  std::string engine_type;
-  if (!HasFieldInReq(json_body, "engine")) {
-    engine_type = kLlamaRepo;
-  } else {
-    engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
-  }
-
-  auto ne = NormalizeEngine(engine_type);
-  Json::Value r;
-  Json::Value stt;
-
-  if (!IsEngineLoaded(ne)) {
-    r["message"] = "Engine is not loaded yet";
-    stt["status_code"] = k409Conflict;
-    LOG_WARN << "Engine is not loaded yet";
-    return std::make_pair(stt, r);
-  }
-
-  EngineI* e = std::get<EngineI*>(engines_[ne].engine);
-  delete e;
-#if defined(_WIN32)
-  if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
-      should_use_dll_search_path) {
-    if (!RemoveDllDirectory(engines_[ne].cookie)) {
-      LOG_WARN << "Could not remove dll directory: " << ne;
-    } else {
-      LOG_INFO << "Removed dll directory: " << ne;
-    }
-  }
-#endif
-  engines_.erase(ne);
-  LOG_INFO << "Unloaded engine " + ne;
-  r["message"] = "Unloaded engine " + ne;
-  stt["status_code"] = k200OK;
+  // TODO: namh refactor this
+  // if (engines_.find(ne) == engines_.end()) {
+  //   try {
+  //     std::string abs_path =
+  //         (getenv("ENGINE_PATH")
+  //              ? getenv("ENGINE_PATH")
+  //              : file_manager_utils::GetCortexDataPath().string()) +
+  //         kPythonRuntimeLibPath;
+  //     engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
+  //   } catch (const cortex_cpp::dylib::load_error& e) {
+  //
+  //     LOG_ERROR << "Could not load engine: " << e.what();
+  //     engines_.erase(ne);
+  //
+  //     Json::Value res;
+  //     r["message"] = "Could not load engine " + ne;
+  //     stt["status_code"] = drogon::k500InternalServerError;
+  //     return std::make_pair(stt, r);
+  //   }
+  //
+  //   auto func =
+  //       engines_[ne].dl->get_function<CortexPythonEngineI*()>("get_engine");
+  //   engines_[ne].engine = func();
+  //   LOG_INFO << "Loaded engine: " << ne;
+  // }
+  //
+  // LOG_TRACE << "Start to fine-tuning";
+  // auto& en = std::get<CortexPythonEngineI*>(engines_[ne].engine);
+  // if (en->IsSupported("HandlePythonFileExecutionRequest")) {
+  //   en->HandlePythonFileExecutionRequest(
+  //       json_body, [&r, &stt](Json::Value status, Json::Value res) {
+  //         r = res;
+  //         stt = status;
+  //       });
+  // } else {
+  //   LOG_WARN << "Method is not supported yet";
+  r["message"] = "Method is not supported yet";
+  stt["status_code"] = drogon::k500InternalServerError;
+  //   return std::make_pair(stt, r);
+  // }
+  // LOG_TRACE << "Done fine-tuning";
   return std::make_pair(stt, r);
 }
 
-bool InferenceService::IsEngineLoaded(const std::string& e) {
-  return engines_.find(e) != engines_.end();
-}
-
 bool InferenceService::HasFieldInReq(std::shared_ptr<Json::Value> json_body,
                                      const std::string& field) {
   if (!json_body || (*json_body)[field].isNull()) {
diff --git a/engine/services/inference_service.h b/engine/services/inference_service.h
index 26cee5157..7c09156ff 100644
--- a/engine/services/inference_service.h
+++ b/engine/services/inference_service.h
@@ -2,13 +2,8 @@
 
 #include <condition_variable>
 #include <mutex>
-#include <optional>
 #include <queue>
-#include <unordered_map>
-#include <variant>
-#include "cortex-common/EngineI.h"
-#include "cortex-common/cortexpythoni.h"
-#include "utils/dylib.h"
+#include "services/engine_service.h"
 #include "utils/result.hpp"
 
 namespace services {
@@ -37,6 +32,9 @@ struct SyncQueue {
 
 class InferenceService {
  public:
+  explicit InferenceService(std::shared_ptr<EngineService> engine_service)
+      : engine_service_{engine_service} {}
+
   cpp::result<void, InferResult> HandleChatCompletion(
       std::shared_ptr<SyncQueue> q, std::shared_ptr<Json::Value> json_body);
 
@@ -45,34 +43,19 @@ class InferenceService {
 
   InferResult LoadModel(std::shared_ptr<Json::Value> json_body);
 
-  InferResult UnloadModel(std::shared_ptr<Json::Value> json_body);
+  InferResult UnloadModel(const std::string& engine,
+                          const std::string& model_id);
 
   InferResult GetModelStatus(std::shared_ptr<Json::Value> json_body);
 
   InferResult GetModels(std::shared_ptr<Json::Value> json_body);
 
-  Json::Value GetEngines(std::shared_ptr<Json::Value> json_body);
-
   InferResult FineTuning(std::shared_ptr<Json::Value> json_body);
 
-  InferResult UnloadEngine(std::shared_ptr<Json::Value> json_body);
-
  private:
-  bool IsEngineLoaded(const std::string& e);
-
   bool HasFieldInReq(std::shared_ptr<Json::Value> json_body,
                      const std::string& field);
 
- private:
-  using EngineV = std::variant<EngineI*, CortexPythonEngineI*>;
-  struct EngineInfo {
-    std::unique_ptr<cortex_cpp::dylib> dl;
-    EngineV engine;
-#if defined(_WIN32)
-    DLL_DIRECTORY_COOKIE cookie;
-#endif
-  };
-  // TODO(sang) move engines_ into engine service?
-  std::unordered_map<std::string, EngineInfo> engines_;
+  std::shared_ptr<EngineService> engine_service_;
 };
-}  // namespace services
\ No newline at end of file
+}  // namespace services
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index d9656073e..387346f6d 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -47,7 +47,12 @@ void ParseGguf(const DownloadItem& ggufDownloadItem,
   }
 
   auto url_obj = url_parser::FromUrlString(ggufDownloadItem.downloadUrl);
-  auto branch = url_obj.pathParams[3];
+  if (url_obj.has_error()) {
+    CTL_WRN("Error parsing url: " << ggufDownloadItem.downloadUrl);
+    return;
+  }
+
+  auto branch = url_obj->pathParams[3];
   CTL_INF("Adding model to modellist with branch: " << branch);
 
   auto rel = file_manager_utils::ToRelativeCortexDataPath(yaml_name);
@@ -68,34 +73,30 @@ void ParseGguf(const DownloadItem& ggufDownloadItem,
 
 cpp::result<DownloadTask, std::string> GetDownloadTask(
     const std::string& modelId, const std::string& branch = "main") {
-  using namespace nlohmann;
   url_parser::Url url = {
       .protocol = "https",
-      .host = ModelService::kHuggingFaceHost,
+      .host = kHuggingFaceHost,
       .pathParams = {"api", "models", "cortexso", modelId, "tree", branch}};
 
-  httplib::Client cli(url.GetProtocolAndHost());
-  auto res =
-      cli.Get(url.GetPathAndQuery(), huggingface_utils::CreateHttpHfHeaders());
-  if (res->status != httplib::StatusCode::OK_200) {
+  auto result = curl_utils::SimpleGetJson(url.ToFullPath());
+  if (result.has_error()) {
     return cpp::fail("Model " + modelId + " not found");
   }
-  auto jsonResponse = json::parse(res->body);
 
   std::vector<DownloadItem> download_items{};
   auto model_container_path = file_manager_utils::GetModelsContainerPath() /
                               "cortex.so" / modelId / branch;
   file_manager_utils::CreateDirectoryRecursively(model_container_path.string());
 
-  for (const auto& [key, value] : jsonResponse.items()) {
-    auto path = value["path"].get<std::string>();
+  for (const auto& value : result.value()) {
+    auto path = value["path"].asString();
     if (path == ".gitattributes" || path == ".gitignore" ||
         path == "README.md") {
       continue;
     }
     url_parser::Url download_url = {
         .protocol = "https",
-        .host = ModelService::kHuggingFaceHost,
+        .host = kHuggingFaceHost,
         .pathParams = {"cortexso", modelId, "resolve", branch, path}};
 
     auto local_path = model_container_path / path;
@@ -105,12 +106,9 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
                      .localPath = local_path});
   }
 
-  DownloadTask download_tasks{
-      .id = branch == "main" ? modelId : modelId + "-" + branch,
-      .type = DownloadType::Model,
-      .items = download_items};
-
-  return download_tasks;
+  return DownloadTask{.id = branch == "main" ? modelId : modelId + "-" + branch,
+                      .type = DownloadType::Model,
+                      .items = download_items};
 }
 }  // namespace
 
@@ -231,21 +229,24 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
     const std::string& url, std::optional<std::string> temp_model_id,
     std::optional<std::string> temp_name) {
   auto url_obj = url_parser::FromUrlString(url);
+  if (url_obj.has_error()) {
+    return cpp::fail("Invalid url: " + url);
+  }
 
-  if (url_obj.host == kHuggingFaceHost) {
-    if (url_obj.pathParams[2] == "blob") {
-      url_obj.pathParams[2] = "resolve";
+  if (url_obj->host == kHuggingFaceHost) {
+    if (url_obj->pathParams[2] == "blob") {
+      url_obj->pathParams[2] = "resolve";
     }
   }
-  auto author{url_obj.pathParams[0]};
-  auto model_id{url_obj.pathParams[1]};
-  auto file_name{url_obj.pathParams.back()};
+  auto author{url_obj->pathParams[0]};
+  auto model_id{url_obj->pathParams[1]};
+  auto file_name{url_obj->pathParams.back()};
 
   if (author == "cortexso") {
-    return DownloadModelFromCortexsoAsync(model_id, url_obj.pathParams[3]);
+    return DownloadModelFromCortexsoAsync(model_id, url_obj->pathParams[3]);
   }
 
-  if (url_obj.pathParams.size() < 5) {
+  if (url_obj->pathParams.size() < 5) {
     return cpp::fail("Invalid url: " + url);
   }
 
@@ -265,7 +266,7 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
   }
 
   auto local_path{file_manager_utils::GetModelsContainerPath() /
-                  "huggingface.co" / author / model_id / file_name};
+                  kHuggingFaceHost / author / model_id / file_name};
 
   try {
     std::filesystem::create_directories(local_path.parent_path());
@@ -275,7 +276,7 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
     std::filesystem::create_directories(local_path.parent_path());
   }
 
-  auto download_url = url_parser::FromUrl(url_obj);
+  auto download_url = url_parser::FromUrl(url_obj.value());
   // this assume that the model being downloaded is a single gguf file
   auto downloadTask{DownloadTask{.id = model_id,
                                  .type = DownloadType::Model,
@@ -302,22 +303,25 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
 cpp::result<std::string, std::string> ModelService::HandleUrl(
     const std::string& url) {
   auto url_obj = url_parser::FromUrlString(url);
+  if (url_obj.has_error()) {
+    return cpp::fail("Invalid url: " + url);
+  }
 
-  if (url_obj.host == kHuggingFaceHost) {
-    if (url_obj.pathParams[2] == "blob") {
-      url_obj.pathParams[2] = "resolve";
+  if (url_obj->host == kHuggingFaceHost) {
+    if (url_obj->pathParams[2] == "blob") {
+      url_obj->pathParams[2] = "resolve";
     }
   }
-  auto author{url_obj.pathParams[0]};
-  auto model_id{url_obj.pathParams[1]};
-  auto file_name{url_obj.pathParams.back()};
+  auto author{url_obj->pathParams[0]};
+  auto model_id{url_obj->pathParams[1]};
+  auto file_name{url_obj->pathParams.back()};
 
   if (author == "cortexso") {
     return DownloadModelFromCortexso(model_id);
   }
 
-  if (url_obj.pathParams.size() < 5) {
-    if (url_obj.pathParams.size() < 2) {
+  if (url_obj->pathParams.size() < 5) {
+    if (url_obj->pathParams.size() < 2) {
       return cpp::fail("Invalid url: " + url);
     }
     return DownloadHuggingFaceGgufModel(author, model_id, std::nullopt);
@@ -335,7 +339,7 @@ cpp::result<std::string, std::string> ModelService::HandleUrl(
   }
 
   auto local_path{file_manager_utils::GetModelsContainerPath() /
-                  "huggingface.co" / author / model_id / file_name};
+                  kHuggingFaceHost / author / model_id / file_name};
 
   try {
     std::filesystem::create_directories(local_path.parent_path());
@@ -345,7 +349,7 @@ cpp::result<std::string, std::string> ModelService::HandleUrl(
     std::filesystem::create_directories(local_path.parent_path());
   }
 
-  auto download_url = url_parser::FromUrl(url_obj);
+  auto download_url = url_parser::FromUrl(url_obj.value());
   // this assume that the model being downloaded is a single gguf file
   auto downloadTask{DownloadTask{.id = model_id,
                                  .type = DownloadType::Model,
@@ -541,6 +545,14 @@ cpp::result<void, std::string> ModelService::DeleteModel(
   cortex::db::Models modellist_handler;
   config::YamlHandler yaml_handler;
 
+  auto result = StopModel(model_handle);
+  if (result.has_error()) {
+    CTL_INF("Failed to stop model " << model_handle
+                                    << ", error: " << result.error());
+  } else {
+    CTL_INF("Model " << model_handle << " stopped successfully");
+  }
+
   try {
     auto model_entry = modellist_handler.GetModelInfo(model_handle);
     if (model_entry.has_error()) {
@@ -585,7 +597,7 @@ cpp::result<void, std::string> ModelService::DeleteModel(
 }
 
 cpp::result<bool, std::string> ModelService::StartModel(
-    const std::string& host, int port, const std::string& model_handle,
+    const std::string& model_handle,
     const StartParameterOverride& params_override) {
   namespace fs = std::filesystem;
   namespace fmu = file_manager_utils;
@@ -622,7 +634,6 @@ cpp::result<bool, std::string> ModelService::StartModel(
     } else {
       bypass_stop_check_set_.insert(model_handle);
     }
-    httplib::Client cli(host + ":" + std::to_string(port));
 
     json_data["model"] = model_handle;
     if (auto& cpt = params_override.custom_prompt_template;
@@ -669,7 +680,7 @@ cpp::result<bool, std::string> ModelService::StartModel(
 }
 
 cpp::result<bool, std::string> ModelService::StopModel(
-    const std::string& host, int port, const std::string& model_handle) {
+    const std::string& model_handle) {
   namespace fs = std::filesystem;
   namespace fmu = file_manager_utils;
   cortex::db::Models modellist_handler;
@@ -678,7 +689,7 @@ cpp::result<bool, std::string> ModelService::StopModel(
   try {
     auto bypass_check = (bypass_stop_check_set_.find(model_handle) !=
                          bypass_stop_check_set_.end());
-    Json::Value json_data;
+    std::string engine_name = "";
     if (!bypass_check) {
       auto model_entry = modellist_handler.GetModelInfo(model_handle);
       if (model_entry.has_error()) {
@@ -690,18 +701,13 @@ cpp::result<bool, std::string> ModelService::StopModel(
               fs::path(model_entry.value().path_to_model_yaml))
               .string());
       auto mc = yaml_handler.GetModelConfig();
-      json_data["engine"] = mc.engine;
+      engine_name = mc.engine;
     }
-
-    httplib::Client cli(host + ":" + std::to_string(port));
-    json_data["model"] = model_handle;
     if (bypass_check) {
-      json_data["engine"] = kLlamaEngine;
+      engine_name = kLlamaEngine;
     }
-    CTL_INF(json_data.toStyledString());
     assert(inference_svc_);
-    auto ir =
-        inference_svc_->UnloadModel(std::make_shared<Json::Value>(json_data));
+    auto ir = inference_svc_->UnloadModel(engine_name, model_handle);
     auto status = std::get<0>(ir)["status_code"].asInt();
     auto data = std::get<1>(ir);
     if (status == httplib::StatusCode::OK_200) {
@@ -720,7 +726,7 @@ cpp::result<bool, std::string> ModelService::StopModel(
 }
 
 cpp::result<bool, std::string> ModelService::GetModelStatus(
-    const std::string& host, int port, const std::string& model_handle) {
+    const std::string& model_handle) {
   namespace fs = std::filesystem;
   namespace fmu = file_manager_utils;
   cortex::db::Models modellist_handler;
@@ -738,28 +744,20 @@ cpp::result<bool, std::string> ModelService::GetModelStatus(
             .string());
     auto mc = yaml_handler.GetModelConfig();
 
-    httplib::Client cli(host + ":" + std::to_string(port));
-    nlohmann::json json_data;
-    json_data["model"] = model_handle;
-    json_data["engine"] = mc.engine;
-
-    auto data_str = json_data.dump();
+    Json::Value root;
+    root["model"] = model_handle;
+    root["engine"] = mc.engine;
 
-    auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
-                        data_str.data(), data_str.size(), "application/json");
-    if (res) {
-      if (res->status == httplib::StatusCode::OK_200) {
-        return true;
-      } else {
-        CTL_INF("Model failed to get model status with status code: "
-                << res->status);
-        return cpp::fail("Model failed to get model status with status code: " +
-                         std::to_string(res->status));
-      }
+    auto ir =
+        inference_svc_->GetModelStatus(std::make_shared<Json::Value>(root));
+    auto status = std::get<0>(ir)["status_code"].asInt();
+    auto data = std::get<1>(ir);
+    if (status == httplib::StatusCode::OK_200) {
+      return true;
     } else {
-      auto err = res.error();
-      CTL_WRN("HTTP error: " << httplib::to_string(err));
-      return cpp::fail("HTTP error: " + httplib::to_string(err));
+      CTL_ERR("Model failed to get model status with status code: " << status);
+      return cpp::fail("Model failed to get model status: " +
+                       data["message"].asString());
     }
   } catch (const std::exception& e) {
     return cpp::fail("Fail to get model status with ID '" + model_handle +
@@ -777,25 +775,29 @@ cpp::result<ModelPullInfo, std::string> ModelService::GetModelPullInfo(
 
   if (string_utils::StartsWith(input, "https://")) {
     auto url_obj = url_parser::FromUrlString(input);
-
-    if (url_obj.host == kHuggingFaceHost) {
-      if (url_obj.pathParams[2] == "blob") {
-        url_obj.pathParams[2] = "resolve";
+    if (url_obj.has_error()) {
+      return cpp::fail("Invalid url: " + input);
+    }
+    if (url_obj->host == kHuggingFaceHost) {
+      if (url_obj->pathParams[2] == "blob") {
+        url_obj->pathParams[2] = "resolve";
       }
     }
-    auto author{url_obj.pathParams[0]};
-    auto model_id{url_obj.pathParams[1]};
-    auto file_name{url_obj.pathParams.back()};
+
+    auto author{url_obj->pathParams[0]};
+    auto model_id{url_obj->pathParams[1]};
+    auto file_name{url_obj->pathParams.back()};
     if (author == "cortexso") {
-      return ModelPullInfo{.id = model_id + ":" + url_obj.pathParams[3],
-                           .downloaded_models = {},
-                           .available_models = {},
-                           .download_url = url_parser::FromUrl(url_obj)};
+      return ModelPullInfo{
+          .id = model_id + ":" + url_obj->pathParams[3],
+          .downloaded_models = {},
+          .available_models = {},
+          .download_url = url_parser::FromUrl(url_obj.value())};
     }
     return ModelPullInfo{.id = author + ":" + model_id + ":" + file_name,
                          .downloaded_models = {},
                          .available_models = {},
-                         .download_url = url_parser::FromUrl(url_obj)};
+                         .download_url = url_parser::FromUrl(url_obj.value())};
   }
 
   if (input.find(":") != std::string::npos) {
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index c1600e2a6..2800606ef 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -30,8 +30,6 @@ struct StartParameterOverride {
 
 class ModelService {
  public:
-  constexpr auto static kHuggingFaceHost = "huggingface.co";
-
   explicit ModelService(std::shared_ptr<DownloadService> download_service)
       : download_service_{download_service} {};
 
@@ -39,7 +37,7 @@ class ModelService {
       std::shared_ptr<DownloadService> download_service,
       std::shared_ptr<services::InferenceService> inference_service)
       : download_service_{download_service},
-        inference_svc_(inference_service){};
+        inference_svc_(inference_service) {};
 
   /**
    * Return model id if download successfully
@@ -66,14 +64,13 @@ class ModelService {
   cpp::result<void, std::string> DeleteModel(const std::string& model_handle);
 
   cpp::result<bool, std::string> StartModel(
-      const std::string& host, int port, const std::string& model_handle,
+      const std::string& model_handle,
       const StartParameterOverride& params_override);
 
-  cpp::result<bool, std::string> StopModel(const std::string& host, int port,
-                                           const std::string& model_handle);
+  cpp::result<bool, std::string> StopModel(const std::string& model_handle);
 
   cpp::result<bool, std::string> GetModelStatus(
-      const std::string& host, int port, const std::string& model_handle);
+      const std::string& model_handle);
 
   cpp::result<ModelPullInfo, std::string> GetModelPullInfo(
       const std::string& model_handle);
diff --git a/engine/test/components/test_event.cc b/engine/test/components/test_event.cc
index d10933f52..baa5fd16b 100644
--- a/engine/test/components/test_event.cc
+++ b/engine/test/components/test_event.cc
@@ -36,15 +36,14 @@ TEST_F(EventTest, EventFromString) {
   })";
   // clang-format on
   auto root = json_helper::ParseJsonString(ev_str);
-  std::cout << root.toStyledString() << std::endl;
 
-  auto download_item = common::GetDownloadItemFromJson(root["task"]["items"][0]);
-  EXPECT_EQ(download_item.downloadUrl, root["task"]["items"][0]["downloadUrl"].asString());
-  std::cout << download_item.ToString() << std::endl;
+  auto download_item =
+      common::GetDownloadItemFromJson(root["task"]["items"][0]);
+  EXPECT_EQ(download_item.downloadUrl,
+            root["task"]["items"][0]["downloadUrl"].asString());
 
   auto download_task = common::GetDownloadTaskFromJson(root["task"]);
-  std::cout << download_task.ToString() << std::endl;
 
   auto ev = cortex::event::GetDownloadEventFromJson(root);
   EXPECT_EQ(ev.type_, cortex::event::DownloadEventType::DownloadStarted);
-}
\ No newline at end of file
+}
diff --git a/engine/test/components/test_github_release_utils.cc b/engine/test/components/test_github_release_utils.cc
new file mode 100644
index 000000000..284aed868
--- /dev/null
+++ b/engine/test/components/test_github_release_utils.cc
@@ -0,0 +1,20 @@
+#include "gtest/gtest.h"
+#include "utils/github_release_utils.h"
+
+class GitHubReleaseUtilsTest : public ::testing::Test {};
+
+TEST_F(GitHubReleaseUtilsTest, AbleToGetReleaseByVersion) {
+  auto version{"v0.1.36"};
+  auto result = github_release_utils::GetReleaseByVersion(
+      "janhq", "cortex.llamacpp", version);
+
+  ASSERT_TRUE(result.has_value());
+  ASSERT_EQ(result->tag_name, version);
+}
+
+TEST_F(GitHubReleaseUtilsTest, AbleToGetReleaseList) {
+  auto result = github_release_utils::GetReleases("janhq", "cortex.llamacpp");
+
+  ASSERT_TRUE(result.has_value());
+  ASSERT_TRUE(result->size() > 0);
+}
diff --git a/engine/test/components/test_huggingface_utils.cc b/engine/test/components/test_huggingface_utils.cc
index 88f768111..afa3092a1 100644
--- a/engine/test/components/test_huggingface_utils.cc
+++ b/engine/test/components/test_huggingface_utils.cc
@@ -19,9 +19,8 @@ TEST_F(HuggingFaceUtilTestSuite, TestGetModelRepositoryBranches) {
 TEST_F(HuggingFaceUtilTestSuite, TestGetHuggingFaceModelRepoInfoSuccessfully) {
   auto model_info =
       huggingface_utils::GetHuggingFaceModelRepoInfo("cortexso", "tinyllama");
-  auto not_null = model_info.has_value();
 
-  EXPECT_TRUE(not_null);
+  EXPECT_TRUE(model_info.has_value());
   EXPECT_EQ(model_info->id, "cortexso/tinyllama");
   EXPECT_EQ(model_info->modelId, "cortexso/tinyllama");
   EXPECT_EQ(model_info->author, "cortexso");
@@ -44,9 +43,8 @@ TEST_F(HuggingFaceUtilTestSuite,
        TestGetHuggingFaceModelRepoInfoReturnNullGgufInfoWhenNotAGgufModel) {
   auto model_info = huggingface_utils::GetHuggingFaceModelRepoInfo(
       "BAAI", "bge-reranker-v2-m3");
-  auto not_null = model_info.has_value();
 
-  EXPECT_TRUE(not_null);
+  EXPECT_TRUE(model_info.has_value());
   EXPECT_EQ(model_info->disabled, false);
   EXPECT_EQ(model_info->gated, false);
 
diff --git a/engine/test/components/test_string_utils.cc b/engine/test/components/test_string_utils.cc
index 0269f0d4a..71ab78a78 100644
--- a/engine/test/components/test_string_utils.cc
+++ b/engine/test/components/test_string_utils.cc
@@ -2,6 +2,7 @@
 #include "utils/string_utils.h"
 
 class StringUtilsTestSuite : public ::testing::Test {};
+using namespace string_utils;
 
 TEST_F(StringUtilsTestSuite, ParsePrompt) {
   {
@@ -9,7 +10,7 @@ TEST_F(StringUtilsTestSuite, ParsePrompt) {
         "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{"
         "system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{"
         "prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n";
-    auto result = string_utils::ParsePrompt(prompt);
+    auto result = ParsePrompt(prompt);
     EXPECT_EQ(result.user_prompt,
               "<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n");
     EXPECT_EQ(result.ai_prompt,
@@ -23,7 +24,7 @@ TEST_F(StringUtilsTestSuite, ParsePrompt) {
 TEST_F(StringUtilsTestSuite, TestSplitBy) {
   auto input = "this is a test";
   std::string delimiter{' '};
-  auto result = string_utils::SplitBy(input, delimiter);
+  auto result = SplitBy(input, delimiter);
 
   EXPECT_EQ(result.size(), 4);
   EXPECT_EQ(result[0], "this");
@@ -35,7 +36,7 @@ TEST_F(StringUtilsTestSuite, TestSplitBy) {
 TEST_F(StringUtilsTestSuite, TestSplitByWithEmptyString) {
   auto input = "";
   std::string delimiter{' '};
-  auto result = string_utils::SplitBy(input, delimiter);
+  auto result = SplitBy(input, delimiter);
 
   EXPECT_EQ(result.size(), 0);
 }
@@ -43,7 +44,7 @@ TEST_F(StringUtilsTestSuite, TestSplitByWithEmptyString) {
 TEST_F(StringUtilsTestSuite, TestSplitModelHandle) {
   auto input = "cortexso/tinyllama";
   std::string delimiter{'/'};
-  auto result = string_utils::SplitBy(input, delimiter);
+  auto result = SplitBy(input, delimiter);
 
   EXPECT_EQ(result.size(), 2);
   EXPECT_EQ(result[0], "cortexso");
@@ -53,7 +54,7 @@ TEST_F(StringUtilsTestSuite, TestSplitModelHandle) {
 TEST_F(StringUtilsTestSuite, TestSplitModelHandleWithEmptyModelName) {
   auto input = "cortexso/";
   std::string delimiter{'/'};
-  auto result = string_utils::SplitBy(input, delimiter);
+  auto result = SplitBy(input, delimiter);
 
   EXPECT_EQ(result.size(), 1);
   EXPECT_EQ(result[0], "cortexso");
@@ -62,167 +63,223 @@ TEST_F(StringUtilsTestSuite, TestSplitModelHandleWithEmptyModelName) {
 TEST_F(StringUtilsTestSuite, TestStartsWith) {
   auto input = "this is a test";
   auto prefix = "this";
-  EXPECT_TRUE(string_utils::StartsWith(input, prefix));
+  EXPECT_TRUE(StartsWith(input, prefix));
 }
 
 TEST_F(StringUtilsTestSuite, TestStartsWithWithEmptyString) {
   auto input = "";
   auto prefix = "this";
-  EXPECT_FALSE(string_utils::StartsWith(input, prefix));
+  EXPECT_FALSE(StartsWith(input, prefix));
 }
 
 TEST_F(StringUtilsTestSuite, TestStartsWithWithEmptyPrefix) {
   auto input = "this is a test";
   auto prefix = "";
-  EXPECT_TRUE(string_utils::StartsWith(input, prefix));
+  EXPECT_TRUE(StartsWith(input, prefix));
 }
 
 TEST_F(StringUtilsTestSuite, TestEndsWith) {
   auto input = "this is a test";
   auto suffix = "test";
-  EXPECT_TRUE(string_utils::EndsWith(input, suffix));
+  EXPECT_TRUE(EndsWith(input, suffix));
 }
 
 TEST_F(StringUtilsTestSuite, TestEndsWithWithEmptyString) {
   auto input = "";
   auto suffix = "test";
-  EXPECT_FALSE(string_utils::EndsWith(input, suffix));
+  EXPECT_FALSE(EndsWith(input, suffix));
 }
 
 TEST_F(StringUtilsTestSuite, TestEndsWithWithEmptySuffix) {
   auto input = "this is a test";
   auto suffix = "";
-  EXPECT_TRUE(string_utils::EndsWith(input, suffix));
+  EXPECT_TRUE(EndsWith(input, suffix));
 }
 
 TEST_F(StringUtilsTestSuite, EmptyString) {
   std::string s = "";
-  string_utils::Trim(s);
+  Trim(s);
   EXPECT_EQ(s, "");
 }
 
 TEST_F(StringUtilsTestSuite, NoWhitespace) {
   std::string s = "hello";
-  string_utils::Trim(s);
+  Trim(s);
   EXPECT_EQ(s, "hello");
 }
 
 TEST_F(StringUtilsTestSuite, LeadingWhitespace) {
   std::string s = "   hello";
-  string_utils::Trim(s);
+  Trim(s);
   EXPECT_EQ(s, "hello");
 }
 
 TEST_F(StringUtilsTestSuite, TrailingWhitespace) {
   std::string s = "hello   ";
-  string_utils::Trim(s);
+  Trim(s);
   EXPECT_EQ(s, "hello");
 }
 
 TEST_F(StringUtilsTestSuite, BothEndsWhitespace) {
   std::string s = "   hello   ";
-  string_utils::Trim(s);
+  Trim(s);
   EXPECT_EQ(s, "hello");
 }
 
 TEST_F(StringUtilsTestSuite, ExitString) {
   std::string s = "exit()   ";
-  string_utils::Trim(s);
+  Trim(s);
   EXPECT_EQ(s, "exit()");
 }
 
 TEST_F(StringUtilsTestSuite, AllWhitespace) {
   std::string s = "     ";
-  string_utils::Trim(s);
+  Trim(s);
   EXPECT_EQ(s, "");
 }
 
 TEST_F(StringUtilsTestSuite, MixedWhitespace) {
   std::string s = " \t\n  hello world \r\n ";
-  string_utils::Trim(s);
+  Trim(s);
   EXPECT_EQ(s, "hello world");
 }
 
 TEST_F(StringUtilsTestSuite, EqualStrings) {
-  EXPECT_TRUE(string_utils::EqualsIgnoreCase("hello", "hello"));
-  EXPECT_TRUE(string_utils::EqualsIgnoreCase("WORLD", "WORLD"));
+  EXPECT_TRUE(EqualsIgnoreCase("hello", "hello"));
+  EXPECT_TRUE(EqualsIgnoreCase("WORLD", "WORLD"));
 }
 
 TEST_F(StringUtilsTestSuite, DifferentCaseStrings) {
-  EXPECT_TRUE(string_utils::EqualsIgnoreCase("Hello", "hElLo"));
-  EXPECT_TRUE(string_utils::EqualsIgnoreCase("WORLD", "world"));
-  EXPECT_TRUE(string_utils::EqualsIgnoreCase("MiXeD", "mIxEd"));
+  EXPECT_TRUE(EqualsIgnoreCase("Hello", "hElLo"));
+  EXPECT_TRUE(EqualsIgnoreCase("WORLD", "world"));
+  EXPECT_TRUE(EqualsIgnoreCase("MiXeD", "mIxEd"));
 }
 
 TEST_F(StringUtilsTestSuite, EmptyStrings) {
-  EXPECT_TRUE(string_utils::EqualsIgnoreCase("", ""));
+  EXPECT_TRUE(EqualsIgnoreCase("", ""));
 }
 
 TEST_F(StringUtilsTestSuite, DifferentStrings) {
-  EXPECT_FALSE(string_utils::EqualsIgnoreCase("hello", "world"));
-  EXPECT_FALSE(string_utils::EqualsIgnoreCase("HELLO", "hello world"));
+  EXPECT_FALSE(EqualsIgnoreCase("hello", "world"));
+  EXPECT_FALSE(EqualsIgnoreCase("HELLO", "hello world"));
 }
 
 TEST_F(StringUtilsTestSuite, DifferentLengthStrings) {
-  EXPECT_FALSE(string_utils::EqualsIgnoreCase("short", "longer string"));
-  EXPECT_FALSE(string_utils::EqualsIgnoreCase("LONG STRING", "long"));
+  EXPECT_FALSE(EqualsIgnoreCase("short", "longer string"));
+  EXPECT_FALSE(EqualsIgnoreCase("LONG STRING", "long"));
 }
 
 TEST_F(StringUtilsTestSuite, SpecialCharacters) {
-  EXPECT_TRUE(string_utils::EqualsIgnoreCase("Hello!", "hElLo!"));
-  EXPECT_TRUE(string_utils::EqualsIgnoreCase("123 ABC", "123 abc"));
-  EXPECT_FALSE(string_utils::EqualsIgnoreCase("Hello!", "Hello"));
+  EXPECT_TRUE(EqualsIgnoreCase("Hello!", "hElLo!"));
+  EXPECT_TRUE(EqualsIgnoreCase("123 ABC", "123 abc"));
+  EXPECT_FALSE(EqualsIgnoreCase("Hello!", "Hello"));
 }
 
 TEST_F(StringUtilsTestSuite, BasicMatching) {
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("Hello, World!", "world"));
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("Hello, World!", "Hello"));
-  EXPECT_TRUE(
-      string_utils::StringContainsIgnoreCase("Hello, World!", "lo, wo"));
+  EXPECT_TRUE(StringContainsIgnoreCase("Hello, World!", "world"));
+  EXPECT_TRUE(StringContainsIgnoreCase("Hello, World!", "Hello"));
+  EXPECT_TRUE(StringContainsIgnoreCase("Hello, World!", "lo, wo"));
 }
 
 TEST_F(StringUtilsTestSuite, CaseSensitivity) {
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("HELLO", "hello"));
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("hello", "HELLO"));
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("HeLLo", "ELL"));
+  EXPECT_TRUE(StringContainsIgnoreCase("HELLO", "hello"));
+  EXPECT_TRUE(StringContainsIgnoreCase("hello", "HELLO"));
+  EXPECT_TRUE(StringContainsIgnoreCase("HeLLo", "ELL"));
 }
 
 TEST_F(StringUtilsTestSuite, EdgeCases) {
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("", ""));
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("Hello", ""));
-  EXPECT_FALSE(string_utils::StringContainsIgnoreCase("", "Hello"));
+  EXPECT_TRUE(StringContainsIgnoreCase("", ""));
+  EXPECT_TRUE(StringContainsIgnoreCase("Hello", ""));
+  EXPECT_FALSE(StringContainsIgnoreCase("", "Hello"));
 }
 
 TEST_F(StringUtilsTestSuite, NoMatch) {
-  EXPECT_FALSE(
-      string_utils::StringContainsIgnoreCase("Hello, World!", "Goodbye"));
-  EXPECT_FALSE(string_utils::StringContainsIgnoreCase("Hello", "HelloWorld"));
+  EXPECT_FALSE(StringContainsIgnoreCase("Hello, World!", "Goodbye"));
+  EXPECT_FALSE(StringContainsIgnoreCase("Hello", "HelloWorld"));
 }
 
 TEST_F(StringUtilsTestSuite, StringContainsWithSpecialCharacters) {
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("Hello, World!", "o, W"));
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("Hello! @#$%", "@#$"));
+  EXPECT_TRUE(StringContainsIgnoreCase("Hello, World!", "o, W"));
+  EXPECT_TRUE(StringContainsIgnoreCase("Hello! @#$%", "@#$"));
 }
 
 TEST_F(StringUtilsTestSuite, StringContainsWithModelId) {
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase(
+  EXPECT_TRUE(StringContainsIgnoreCase(
       "TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K."
       "gguf",
       "thebloke"));
 }
 
 TEST_F(StringUtilsTestSuite, RepeatingPatterns) {
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("Mississippi", "ssi"));
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase("Mississippi", "ssippi"));
+  EXPECT_TRUE(StringContainsIgnoreCase("Mississippi", "ssi"));
+  EXPECT_TRUE(StringContainsIgnoreCase("Mississippi", "ssippi"));
 }
 
 TEST_F(StringUtilsTestSuite, LongStrings) {
-  EXPECT_TRUE(string_utils::StringContainsIgnoreCase(
-      "This is a very long string to test our "
-      "function's performance with larger inputs",
-      "PERFORMANCE"));
-  EXPECT_FALSE(string_utils::StringContainsIgnoreCase(
-      "This is a very long string to test our "
-      "function's performance with larger inputs",
-      "not here"));
+  EXPECT_TRUE(
+      StringContainsIgnoreCase("This is a very long string to test our "
+                               "function's performance with larger inputs",
+                               "PERFORMANCE"));
+  EXPECT_FALSE(
+      StringContainsIgnoreCase("This is a very long string to test our "
+                               "function's performance with larger inputs",
+                               "not here"));
+}
+
+TEST_F(StringUtilsTestSuite, BasicRemoval) {
+  EXPECT_EQ(RemoveSubstring("hello world", "o"), "hell wrld");
+  EXPECT_EQ(RemoveSubstring("hello world", "l"), "heo word");
+}
+
+TEST_F(StringUtilsTestSuite, MultipleOccurrences) {
+  EXPECT_EQ(RemoveSubstring("banana", "a"), "bnn");
+  EXPECT_EQ(RemoveSubstring("hello hello", "hello"), " ");
+}
+
+TEST_F(StringUtilsTestSuite, NoOccurrences) {
+  EXPECT_EQ(RemoveSubstring("hello world", "x"), "hello world");
+  EXPECT_EQ(RemoveSubstring("test", "xyz"), "test");
+}
+
+TEST_F(StringUtilsTestSuite, RemoveEmptyStrings) {
+  EXPECT_EQ(RemoveSubstring("", ""), "");
+  EXPECT_EQ(RemoveSubstring("hello", ""), "hello");
+  EXPECT_EQ(RemoveSubstring("", "hello"), "");
+}
+
+TEST_F(StringUtilsTestSuite, EntireStringMatch) {
+  EXPECT_EQ(RemoveSubstring("hello", "hello"), "");
+  EXPECT_EQ(RemoveSubstring("test", "test"), "");
+}
+
+TEST_F(StringUtilsTestSuite, OverlappingPatterns) {
+  EXPECT_EQ(RemoveSubstring("aaaa", "aa"), "");  // Should remove "aa" twice
+  EXPECT_EQ(RemoveSubstring("aaa", "aa"), "a");  // Should remove first "aa"
+}
+
+TEST_F(StringUtilsTestSuite, RemoveSubstringCaseSensitivity) {
+  EXPECT_EQ(RemoveSubstring("Hello World", "hello"), "Hello World");
+  EXPECT_EQ(RemoveSubstring("Hello World", "Hello"), " World");
+}
+
+TEST_F(StringUtilsTestSuite, RemoveSubstringSpecialCharacters) {
+  EXPECT_EQ(RemoveSubstring("hello\nworld", "\n"), "helloworld");
+  EXPECT_EQ(RemoveSubstring("hello\tworld", "\t"), "helloworld");
+  EXPECT_EQ(RemoveSubstring("hello  world", "  "), "helloworld");
+}
+
+TEST_F(StringUtilsTestSuite, RemoveSubstringLongStrings) {
+  std::string long_string(1000, 'a');  // String of 1000 'a' characters
+  std::string expected("");            // String of 900 'a' characters
+  EXPECT_EQ(RemoveSubstring(long_string, std::string(100, 'a')), expected);
+}
+
+// Performance test (optional, might want to move to a benchmark suite)
+TEST_F(StringUtilsTestSuite, LargeInputPerformance) {
+  std::string large_input = std::string(1000000, 'x');  // 1M characters
+  std::string to_remove = "x";
+
+  // This test mainly ensures the function completes in a reasonable time
+  // and doesn't crash with large inputs
+  EXPECT_EQ(RemoveSubstring(large_input, to_remove), "");
 }
diff --git a/engine/test/components/test_url_parser.cc b/engine/test/components/test_url_parser.cc
index cee6cb6ed..25769bc6f 100644
--- a/engine/test/components/test_url_parser.cc
+++ b/engine/test/components/test_url_parser.cc
@@ -9,9 +9,9 @@ class UrlParserTestSuite : public ::testing::Test {
 TEST_F(UrlParserTestSuite, TestParseUrlCorrectly) {
   auto url = url_parser::FromUrlString(kValidUrlWithOnlyPaths);
 
-  EXPECT_EQ(url.host, "jan.ai");
-  EXPECT_EQ(url.protocol, "https");
-  EXPECT_EQ(url.pathParams.size(), 2);
+  EXPECT_EQ(url->host, "jan.ai");
+  EXPECT_EQ(url->protocol, "https");
+  EXPECT_EQ(url->pathParams.size(), 2);
 }
 
 TEST_F(UrlParserTestSuite, ConstructUrlCorrectly) {
diff --git a/engine/utils/archive_utils.h b/engine/utils/archive_utils.h
index fb52bdb9e..dba698731 100644
--- a/engine/utils/archive_utils.h
+++ b/engine/utils/archive_utils.h
@@ -11,15 +11,17 @@ namespace archive_utils {
 inline bool UnzipFile(const std::string& input_zip_path,
                       const std::string& destination_path);
 inline bool UntarFile(const std::string& input_tar_path,
-                      const std::string& destination_path);
+                      const std::string& destination_path,
+                      bool ignore_parent_dir = false);
 
 inline bool ExtractArchive(const std::string& input_path,
-                           const std::string& destination_path) {
+                           const std::string& destination_path,
+                           bool ignore_parent_dir = false) {
   if (input_path.find(".zip") != std::string::npos) {
     return UnzipFile(input_path, destination_path);
   } else if (input_path.find(".tar") != std::string::npos ||
              input_path.find(".tar.gz") != std::string::npos) {
-    return UntarFile(input_path, destination_path);
+    return UntarFile(input_path, destination_path, ignore_parent_dir);
   } else {
     LOG_ERROR << "Unsupported file type: " << input_path << "\n";
     return false;
@@ -94,10 +96,11 @@ inline bool UnzipFile(const std::string& input_zip_path,
 }
 
 inline bool UntarFile(const std::string& input_tar_path,
-                      const std::string& destination_path) {
+                      const std::string& destination_path,
+                      bool ignore_parent_dir) {
   struct archive* tar_archive = archive_read_new();
   archive_read_support_format_tar(tar_archive);
-  archive_read_support_compression_gzip(tar_archive);
+  archive_read_support_filter_gzip(tar_archive);
 
   if (archive_read_open_filename(tar_archive, input_tar_path.c_str(), 10240) !=
       ARCHIVE_OK) {
@@ -110,15 +113,21 @@ inline bool UntarFile(const std::string& input_tar_path,
   struct archive_entry* entry;
   while (archive_read_next_header(tar_archive, &entry) == ARCHIVE_OK) {
     const char* current_file = archive_entry_pathname(entry);
+    auto file_in_tar_path =
+        std::filesystem::path(destination_path) / current_file;
+    auto file_name = std::filesystem::path(file_in_tar_path).filename();
+    auto output_path = std::filesystem::path(destination_path) / file_name;
     std::string full_path = destination_path + "/" + current_file;
 
     if (archive_entry_filetype(entry) == AE_IFDIR) {
-      std::filesystem::create_directories(full_path);
+      if (!ignore_parent_dir) {
+        std::filesystem::create_directories(full_path);
+      }
     } else {
-      std::filesystem::create_directories(
-          std::filesystem::path(full_path).parent_path());
+      auto final_output_path =
+          ignore_parent_dir ? output_path.string() : full_path;
 
-      std::ofstream out_file(full_path, std::ios::binary);
+      std::ofstream out_file(final_output_path, std::ios::binary);
       if (!out_file.is_open()) {
         LOG_ERROR << "Failed to create file: " << full_path << "\n";
         archive_read_free(tar_archive);
@@ -141,7 +150,7 @@ inline bool UntarFile(const std::string& input_tar_path,
 
   archive_read_free(tar_archive);
   CTL_INF("Extracted successfully " << input_tar_path << " to "
-           << destination_path << "\n");
+                                    << destination_path << "\n");
   return true;
 }
-}  // namespace archive_utils
\ No newline at end of file
+}  // namespace archive_utils
diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h
index f61640db3..87a114d25 100644
--- a/engine/utils/config_yaml_utils.h
+++ b/engine/utils/config_yaml_utils.h
@@ -4,6 +4,7 @@
 #include <iostream>
 #include <string>
 #include "utils/logging_utils.h"
+#include "utils/result.hpp"
 #include "yaml-cpp/yaml.h"
 
 namespace config_yaml_utils {
@@ -13,23 +14,31 @@ struct CortexConfig {
   std::string logTensorrtLLMPath;
   std::string logOnnxPath;
   std::string dataFolderPath;
+
   int maxLogLines;
   std::string apiServerHost;
   std::string apiServerPort;
   uint64_t checkedForUpdateAt;
   std::string latestRelease;
+
   std::string huggingFaceToken;
+  /**
+   * Github's API requires a user-agent string.
+   */
+  std::string gitHubUserAgent;
+  std::string gitHubToken;
+  std::string llamacppVariant;
+  std::string llamacppVersion;
 };
 
-const std::string kCortexFolderName = "cortexcpp";
 const std::string kDefaultHost{"127.0.0.1"};
 const std::string kDefaultPort{"39281"};
 const int kDefaultMaxLines{100000};
 constexpr const uint64_t kDefaultCheckedForUpdateAt = 0u;
 constexpr const auto kDefaultLatestRelease = "default_version";
 
-inline void DumpYamlConfig(const CortexConfig& config,
-                           const std::string& path) {
+inline cpp::result<void, std::string> DumpYamlConfig(const CortexConfig& config,
+                                                     const std::string& path) {
   std::filesystem::path config_file_path{path};
 
   try {
@@ -49,12 +58,17 @@ inline void DumpYamlConfig(const CortexConfig& config,
     node["checkedForUpdateAt"] = config.checkedForUpdateAt;
     node["latestRelease"] = config.latestRelease;
     node["huggingFaceToken"] = config.huggingFaceToken;
+    node["gitHubUserAgent"] = config.gitHubUserAgent;
+    node["gitHubToken"] = config.gitHubToken;
+    node["llamacppVariant"] = config.llamacppVariant;
+    node["llamacppVersion"] = config.llamacppVersion;
 
     out_file << node;
     out_file.close();
+    return {};
   } catch (const std::exception& e) {
     CTL_ERR("Error writing to file: " << e.what());
-    throw;
+    return cpp::fail("Error writing to file: " + std::string(e.what()));
   }
 }
 
@@ -73,7 +87,9 @@ inline CortexConfig FromYaml(const std::string& path,
          !node["apiServerPort"] || !node["checkedForUpdateAt"] ||
          !node["latestRelease"] || !node["logLlamaCppPath"] ||
          !node["logOnnxPath"] || !node["logTensorrtLLMPath"] ||
-         !node["huggingFaceToken"]);
+         !node["huggingFaceToken"] || !node["gitHubUserAgent"] ||
+         !node["gitHubToken"] || !node["llamacppVariant"] ||
+         !node["llamacppVersion"]);
 
     CortexConfig config = {
         .logFolderPath = node["logFolderPath"]
@@ -105,10 +121,26 @@ inline CortexConfig FromYaml(const std::string& path,
         .latestRelease = node["latestRelease"]
                              ? node["latestRelease"].as<std::string>()
                              : default_cfg.latestRelease,
-        .huggingFaceToken = node["huggingFaceToken"] ? node["huggingFaceToken"].as<std::string>() : "",
+        .huggingFaceToken = node["huggingFaceToken"]
+                                ? node["huggingFaceToken"].as<std::string>()
+                                : "",
+        .gitHubUserAgent = node["gitHubUserAgent"]
+                               ? node["gitHubUserAgent"].as<std::string>()
+                               : "",
+        .gitHubToken =
+            node["gitHubToken"] ? node["gitHubToken"].as<std::string>() : "",
+        .llamacppVariant = node["llamacppVariant"]
+                               ? node["llamacppVariant"].as<std::string>()
+                               : "",
+        .llamacppVersion = node["llamacppVersion"]
+                               ? node["llamacppVersion"].as<std::string>()
+                               : "",
     };
     if (should_update_config) {
-      DumpYamlConfig(config, path);
+      auto result = DumpYamlConfig(config, path);
+      if (result.has_error()) {
+        CTL_ERR("Failed to update config file: " << result.error());
+      }
     }
     return config;
   } catch (const YAML::BadFile& e) {
diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h
index 2c847e17f..88b05828a 100644
--- a/engine/utils/curl_utils.h
+++ b/engine/utils/curl_utils.h
@@ -1,9 +1,16 @@
+#pragma once
+
 #include <curl/curl.h>
+#include <json/reader.h>
+#include <json/value.h>
 #include <yaml-cpp/node/node.h>
 #include <yaml-cpp/node/parse.h>
-#include <nlohmann/json.hpp>
 #include <string>
+#include "utils/engine_constants.h"
+#include "utils/file_manager_utils.h"
+#include "utils/logging_utils.h"
 #include "utils/result.hpp"
+#include "utils/url_parser.h"
 
 namespace curl_utils {
 namespace {
@@ -15,44 +22,159 @@ size_t WriteCallback(void* contents, size_t size, size_t nmemb,
 }
 }  // namespace
 
-inline cpp::result<std::string, std::string> SimpleGet(
-    const std::string& url, curl_slist* headers = nullptr) {
-  CURL* curl;
-  CURLcode res;
-  std::string readBuffer;
+inline std::optional<std::unordered_map<std::string, std::string>> GetHeaders(
+    const std::string& url);
 
+inline cpp::result<std::string, std::string> SimpleGet(const std::string& url) {
   // Initialize libcurl
   curl_global_init(CURL_GLOBAL_DEFAULT);
-  curl = curl_easy_init();
+  auto curl = curl_easy_init();
 
   if (!curl) {
     return cpp::fail("Failed to init CURL");
   }
+
+  auto headers = GetHeaders(url);
+  curl_slist* curl_headers = nullptr;
+  if (headers.has_value()) {
+    for (const auto& [key, value] : headers.value()) {
+      auto header = key + ": " + value;
+      curl_headers = curl_slist_append(curl_headers, header.c_str());
+    }
+
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, curl_headers);
+  }
+
+  std::string readBuffer;
+
   curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
-  if(headers) {
-    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
+
+  // Perform the request
+  auto res = curl_easy_perform(curl);
+
+  curl_slist_free_all(curl_headers);
+  curl_easy_cleanup(curl);
+  if (res != CURLE_OK) {
+    return cpp::fail("CURL request failed: " +
+                     static_cast<std::string>(curl_easy_strerror(res)));
+  }
+  auto http_code = 0;
+  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
+  if (http_code >= 400) {
+    CTL_ERR("HTTP request failed with status code: " +
+            std::to_string(http_code));
+    return cpp::fail(readBuffer);
   }
 
-  // Set write function callback and data buffer
+  return readBuffer;
+}
+
+inline cpp::result<std::string, std::string> SimplePost(
+    const std::string& url, const std::string& body = "") {
+  curl_global_init(CURL_GLOBAL_DEFAULT);
+  auto curl = curl_easy_init();
+
+  if (!curl) {
+    return cpp::fail("Failed to init CURL");
+  }
+
+  auto headers = GetHeaders(url);
+  curl_slist* curl_headers = nullptr;
+  if (headers.has_value()) {
+
+    for (const auto& [key, value] : headers.value()) {
+      auto header = key + ": " + value;
+      curl_headers = curl_slist_append(curl_headers, header.c_str());
+    }
+
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, curl_headers);
+  }
+
+  std::string readBuffer;
+
+  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+  curl_easy_setopt(curl, CURLOPT_POST, 1L);
+  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+  curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str());
   curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
   curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
 
   // Perform the request
-  res = curl_easy_perform(curl);
+  auto res = curl_easy_perform(curl);
 
+  curl_slist_free_all(curl_headers);
+  curl_easy_cleanup(curl);
   if (res != CURLE_OK) {
+    CTL_ERR("CURL request failed: " + std::string(curl_easy_strerror(res)));
     return cpp::fail("CURL request failed: " +
                      static_cast<std::string>(curl_easy_strerror(res)));
   }
+  auto http_code = 0;
+  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
+  if (http_code >= 400) {
+    CTL_ERR("HTTP request failed with status code: " +
+            std::to_string(http_code));
+    return cpp::fail(readBuffer);
+  }
+
+  return readBuffer;
+}
+
+inline cpp::result<std::string, std::string> SimpleDelete(
+    const std::string& url) {
 
+  std::string readBuffer;
+  auto curl = curl_easy_init();
+
+  if (!curl) {
+    throw std::runtime_error("Failed to initialize CURL");
+  }
+
+  auto headers = GetHeaders(url);
+  curl_slist* curl_headers = nullptr;
+  if (headers.has_value()) {
+    for (const auto& [key, value] : headers.value()) {
+      auto header = key + ": " + value;
+      curl_headers = curl_slist_append(curl_headers, header.c_str());
+    }
+
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, curl_headers);
+  }
+
+  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+  curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
+
+  // Perform the request
+  auto res = curl_easy_perform(curl);
+
+  long responseCode;
+  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &responseCode);
+
+  curl_slist_free_all(curl_headers);
   curl_easy_cleanup(curl);
+
+  if (res != CURLE_OK) {
+    throw std::runtime_error(std::string("Delete request failed: ") +
+                             curl_easy_strerror(res));
+  }
+
+  if (responseCode >= 400) {
+    throw std::runtime_error("HTTP error: " + std::to_string(responseCode) +
+                             "\nResponse: " + readBuffer);
+  }
+
   return readBuffer;
 }
 
 inline cpp::result<YAML::Node, std::string> ReadRemoteYaml(
-    const std::string& url, curl_slist* headers = nullptr) {
-  auto result = SimpleGet(url, headers);
+    const std::string& url) {
+  auto result = SimpleGet(url);
   if (result.has_error()) {
+    CTL_ERR("Failed to get Yaml from " + url + ": " + result.error());
     return cpp::fail(result.error());
   }
 
@@ -64,18 +186,93 @@ inline cpp::result<YAML::Node, std::string> ReadRemoteYaml(
   }
 }
 
-inline cpp::result<nlohmann::json, std::string> SimpleGetJson(
-    const std::string& url, curl_slist* headers = nullptr) {
-  auto result = SimpleGet(url, headers);
+inline cpp::result<Json::Value, std::string> SimpleGetJson(
+    const std::string& url) {
+  auto result = SimpleGet(url);
   if (result.has_error()) {
+    CTL_ERR("Failed to get JSON from " + url + ": " + result.error());
     return cpp::fail(result.error());
   }
 
-  try {
-    return nlohmann::json::parse(result.value());
-  } catch (const std::exception& e) {
+  Json::Value root;
+  Json::Reader reader;
+  if (!reader.parse(result.value(), root)) {
     return cpp::fail("JSON from " + url +
-                     " parsing error: " + std::string(e.what()));
+                     " parsing error: " + reader.getFormattedErrorMessages());
   }
+
+  return root;
+}
+
+inline cpp::result<Json::Value, std::string> SimplePostJson(
+    const std::string& url, const std::string& body = "") {
+  auto result = SimplePost(url, body);
+  if (result.has_error()) {
+    CTL_ERR("Failed to get JSON from " + url + ": " + result.error());
+    return cpp::fail(result.error());
+  }
+
+  CTL_INF("Response: " + result.value());
+  Json::Value root;
+  Json::Reader reader;
+  if (!reader.parse(result.value(), root)) {
+    return cpp::fail("JSON from " + url +
+                     " parsing error: " + reader.getFormattedErrorMessages());
+  }
+
+  return root;
+}
+
+inline std::optional<std::unordered_map<std::string, std::string>> GetHeaders(
+    const std::string& url) {
+  auto url_obj = url_parser::FromUrlString(url);
+  if (url_obj.has_error()) {
+    return std::nullopt;
+  }
+
+  if (url_obj->host == kHuggingFaceHost) {
+    std::unordered_map<std::string, std::string> headers{};
+    headers["Content-Type"] = "application/json";
+    auto const& token = file_manager_utils::GetCortexConfig().huggingFaceToken;
+    if (!token.empty()) {
+      headers["Authorization"] = "Bearer " + token;
+
+      // for debug purpose
+      auto min_token_size = 6;
+      if (token.size() < min_token_size) {
+        CTL_WRN("Hugging Face token is too short");
+      } else {
+        CTL_INF("Using authentication with Hugging Face token: " +
+                token.substr(token.size() - min_token_size));
+      }
+    }
+
+    return headers;
+  }
+
+  if (url_obj->host == kGitHubHost) {
+    std::unordered_map<std::string, std::string> headers{};
+    headers["Accept"] = "application/vnd.github.v3+json";
+    // github API requires user-agent https://docs.github.com/en/rest/using-the-rest-api/getting-started-with-the-rest-api?apiVersion=2022-11-28#user-agent
+    auto user_agent = file_manager_utils::GetCortexConfig().gitHubUserAgent;
+    auto gh_token = file_manager_utils::GetCortexConfig().gitHubToken;
+    headers["User-Agent"] =
+        user_agent.empty() ? kDefaultGHUserAgent : user_agent;
+    if (!gh_token.empty()) {
+      headers["Authorization"] = "Bearer " + gh_token;
+
+      // for debug purpose
+      auto min_token_size = 6;
+      if (gh_token.size() < min_token_size) {
+        CTL_WRN("Github token is too short");
+      } else {
+        CTL_INF("Using authentication with Github token: " +
+                gh_token.substr(gh_token.size() - min_token_size));
+      }
+    }
+    return headers;
+  }
+
+  return std::nullopt;
 }
 }  // namespace curl_utils
diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h
index dbc1e223b..72d1af815 100644
--- a/engine/utils/engine_constants.h
+++ b/engine/utils/engine_constants.h
@@ -12,4 +12,15 @@ constexpr const auto kPythonRuntimeRepo = "cortex.python";
 constexpr const auto kLlamaLibPath = "/engines/cortex.llamacpp";
 constexpr const auto kPythonRuntimeLibPath = "/engines/cortex.python";
 constexpr const auto kOnnxLibPath = "/engines/cortex.onnx";
-constexpr const auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm";
\ No newline at end of file
+constexpr const auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm";
+
+// other constants
+constexpr auto static kHuggingFaceHost = "huggingface.co";
+constexpr auto static kGitHubHost = "api.github.com";
+constexpr auto static kCortexFolderName = "cortexcpp";
+constexpr auto static kDefaultGHUserAgent = "cortexcpp";
+
+constexpr auto static kWindowsOs = "windows";
+constexpr auto static kMacOs = "mac";
+constexpr auto static kLinuxOs = "linux";
+constexpr auto static kUnsupportedOs = "Unsupported OS";
diff --git a/engine/utils/engine_matcher_utils.h b/engine/utils/engine_matcher_utils.h
index 287304f02..652e13486 100644
--- a/engine/utils/engine_matcher_utils.h
+++ b/engine/utils/engine_matcher_utils.h
@@ -1,3 +1,5 @@
+#pragma once
+
 #include <trantor/utils/Logger.h>
 #include <algorithm>
 #include <iterator>
@@ -6,8 +8,31 @@
 #include <vector>
 #include "utils/cpuid/cpu_info.h"
 #include "utils/logging_utils.h"
+#include "utils/result.hpp"
+#include "utils/string_utils.h"
 
 namespace engine_matcher_utils {
+/**
+ * Extracting variant and version info from file name.
+ */
+inline cpp::result<std::string, std::string> GetVariantFromNameAndVersion(
+    const std::string& engine_file_name, const std::string& engine,
+    const std::string& version) {
+  if (engine_file_name.empty()) {
+    return cpp::fail("Engine file name is empty");
+  }
+  if (engine.empty()) {
+    return cpp::fail("Engine name is empty");
+  }
+  auto nv = string_utils::RemoveSubstring(version, "v");
+  using namespace string_utils;
+  auto removed_extension = RemoveSubstring(engine_file_name, ".tar.gz");
+  auto version_and_variant = RemoveSubstring(removed_extension, engine + "-");
+
+  auto variant = RemoveSubstring(version_and_variant, nv + "-");
+  return variant;
+}
+
 inline std::string GetSuitableAvxVariant(cortex::cpuid::CpuInfo& cpu_info) {
   CTL_INF("GetSuitableAvxVariant:" << "\n" << cpu_info.to_string());
 
diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h
index d4b635312..b6d1f1c5a 100644
--- a/engine/utils/file_manager_utils.h
+++ b/engine/utils/file_manager_utils.h
@@ -2,9 +2,11 @@
 #include <filesystem>
 #include <string>
 #include <string_view>
+#include "common/download_task.h"
 #include "logging_utils.h"
-#include "services/download_service.h"
 #include "utils/config_yaml_utils.h"
+#include "utils/engine_constants.h"
+#include "utils/result.hpp"
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <mach-o/dyld.h>
@@ -125,7 +127,7 @@ inline std::string GetDefaultDataFolderName() {
 #ifndef CORTEX_VARIANT
 #define CORTEX_VARIANT "prod"
 #endif
-  std::string default_data_folder_name{config_yaml_utils::kCortexFolderName};
+  std::string default_data_folder_name{kCortexFolderName};
   std::string variant{CORTEX_VARIANT};
   std::string env_postfix{""};
   if (variant == kBetaVariant) {
@@ -137,11 +139,22 @@ inline std::string GetDefaultDataFolderName() {
   return default_data_folder_name;
 }
 
-inline void CreateConfigFileIfNotExist() {
+inline cpp::result<void, std::string> UpdateCortexConfig(
+    const config_yaml_utils::CortexConfig& config) {
+  auto config_path = GetConfigurationPath();
+  if (!std::filesystem::exists(config_path)) {
+    CTL_ERR("Config file not found: " << config_path.string());
+    return cpp::fail("Config file not found: " + config_path.string());
+  }
+
+  return DumpYamlConfig(config, config_path.string());
+}
+
+inline cpp::result<void, std::string> CreateConfigFileIfNotExist() {
   auto config_path = GetConfigurationPath();
   if (std::filesystem::exists(config_path)) {
-    // already exists
-    return;
+    // already exists, no need to create
+    return {};
   }
 
   auto default_data_folder_name = GetDefaultDataFolderName();
@@ -164,7 +177,7 @@ inline void CreateConfigFileIfNotExist() {
       .apiServerHost = config_yaml_utils::kDefaultHost,
       .apiServerPort = config_yaml_utils::kDefaultPort,
   };
-  DumpYamlConfig(config, config_path.string());
+  return DumpYamlConfig(config, config_path.string());
 }
 
 inline config_yaml_utils::CortexConfig GetCortexConfig() {
@@ -189,14 +202,19 @@ inline config_yaml_utils::CortexConfig GetCortexConfig() {
 }
 
 inline std::filesystem::path GetCortexDataPath() {
-  CreateConfigFileIfNotExist();
+  auto result = CreateConfigFileIfNotExist();
+  if (result.has_error()) {
+    CTL_ERR("Error creating config file: " << result.error());
+    return std::filesystem::path{};
+  }
+
   auto config = GetCortexConfig();
   std::filesystem::path data_folder_path;
   if (!config.dataFolderPath.empty()) {
     data_folder_path = std::filesystem::path(config.dataFolderPath);
   } else {
     auto home_path = GetHomeDirectoryPath();
-    data_folder_path = home_path / config_yaml_utils::kCortexFolderName;
+    data_folder_path = home_path / kCortexFolderName;
   }
 
   if (!std::filesystem::exists(data_folder_path)) {
@@ -217,7 +235,7 @@ inline std::filesystem::path GetCortexLogPath() {
     log_folder_path = std::filesystem::path(config.logFolderPath);
   } else {
     auto home_path = GetHomeDirectoryPath();
-    log_folder_path = home_path / config_yaml_utils::kCortexFolderName;
+    log_folder_path = home_path / kCortexFolderName;
   }
 
   if (!std::filesystem::exists(log_folder_path)) {
@@ -238,7 +256,10 @@ inline void CreateDirectoryRecursively(const std::string& path) {
 }
 
 inline std::filesystem::path GetModelsContainerPath() {
-  CreateConfigFileIfNotExist();
+  auto result = CreateConfigFileIfNotExist();
+  if (result.has_error()) {
+    CTL_ERR("Error creating config file: " << result.error());
+  }
   auto cortex_path = GetCortexDataPath();
   auto models_container_path = cortex_path / "models";
 
@@ -251,6 +272,19 @@ inline std::filesystem::path GetModelsContainerPath() {
   return models_container_path;
 }
 
+inline std::filesystem::path GetCudaToolkitPath(const std::string& engine) {
+  auto engine_path = getenv("ENGINE_PATH")
+                         ? std::filesystem::path(getenv("ENGINE_PATH"))
+                         : GetCortexDataPath();
+
+  auto cuda_path = engine_path / "engines" / engine / "deps";
+  if (!std::filesystem::exists(cuda_path)) {
+    std::filesystem::create_directories(cuda_path);
+  }
+
+  return cuda_path;
+}
+
 inline std::filesystem::path GetEnginesContainerPath() {
   auto cortex_path = getenv("ENGINE_PATH")
                          ? std::filesystem::path(getenv("ENGINE_PATH"))
diff --git a/engine/utils/function_calling/common.h b/engine/utils/function_calling/common.h
index d01f22423..0a4b3771c 100644
--- a/engine/utils/function_calling/common.h
+++ b/engine/utils/function_calling/common.h
@@ -1,12 +1,11 @@
 #pragma once
+
 #include <json/json.h>
-#include <algorithm>
 #include <regex>
 #include <sstream>
 #include <string>
-#include <vector>
 #include "llama3.1.h"
-#include "utils/logging_utils.h"
+
 namespace function_calling_utils {
 constexpr auto custom_template_function = "<CUSTOM_FUNCTIONS>";
 
diff --git a/engine/utils/github_release_utils.h b/engine/utils/github_release_utils.h
new file mode 100644
index 000000000..bc7c617dc
--- /dev/null
+++ b/engine/utils/github_release_utils.h
@@ -0,0 +1,207 @@
+#pragma once
+
+#include <json/value.h>
+#include "utils/curl_utils.h"
+#include "utils/engine_matcher_utils.h"
+#include "utils/result.hpp"
+#include "utils/url_parser.h"
+
+namespace github_release_utils {
+struct GitHubAsset {
+  std::string url;
+  int id;
+  std::string node_id;
+  std::string name;
+  std::string label;
+
+  std::string content_type;
+  std::string state;
+  uint64_t size;
+  uint32_t download_count;
+  std::string created_at;
+
+  std::string updated_at;
+  std::string browser_download_url;
+  std::string version;
+
+  static GitHubAsset FromJson(const Json::Value& json,
+                              const std::string& version) {
+    return GitHubAsset{
+        .url = json["url"].asString(),
+        .id = json["id"].asInt(),
+        .node_id = json["node_id"].asString(),
+        .name = json["name"].asString(),
+        .label = json["label"].asString(),
+        .content_type = json["content_type"].asString(),
+        .state = json["state"].asString(),
+        .size = json["size"].asUInt64(),
+        .download_count = json["download_count"].asUInt(),
+        .created_at = json["created_at"].asString(),
+        .updated_at = json["updated_at"].asString(),
+        .browser_download_url = json["browser_download_url"].asString(),
+        .version = version,
+    };
+  }
+
+  Json::Value ToJson() const {
+    Json::Value root;
+    root["url"] = url;
+    root["id"] = id;
+    root["node_id"] = node_id;
+    root["name"] = name;
+    root["label"] = label;
+    root["content_type"] = content_type;
+    root["state"] = state;
+    root["size"] = size;
+    root["download_count"] = download_count;
+    root["created_at"] = created_at;
+    root["updated_at"] = updated_at;
+    root["browser_download_url"] = browser_download_url;
+    root["version"] = version;
+    return root;
+  }
+
+  std::optional<Json::Value> ToApiJson(const std::string& engine,
+                                       const std::string& version) const {
+    auto variant_name_result =
+        engine_matcher_utils::GetVariantFromNameAndVersion(name, engine,
+                                                           version);
+    if (variant_name_result.has_error()) {
+      CTL_ERR("Failed to get variant name: " << variant_name_result.error());
+      return std::nullopt;
+    }
+
+    Json::Value root;
+    root["name"] = variant_name_result.value();
+    root["download_count"] = download_count;
+    root["size"] = size;
+    root["created_at"] = created_at;
+    return root;
+  }
+};
+
+struct GitHubRelease {
+  std::string url;
+  int id;
+  std::string tag_name;
+  std::string name;
+  bool draft;
+
+  bool prerelease;
+  std::string created_at;
+  std::string published_at;
+  std::vector<GitHubAsset> assets;
+
+  static GitHubRelease FromJson(const Json::Value& json) {
+    std::vector<GitHubAsset> assets = {};
+    if (json["assets"].isArray()) {
+      for (const auto& asset : json["assets"]) {
+        assets.push_back(GitHubAsset::FromJson(asset, json["name"].asString()));
+      }
+    }
+
+    return GitHubRelease{
+        .url = json["url"].asString(),
+        .id = json["id"].asInt(),
+        .tag_name = json["tag_name"].asString(),
+        .name = json["name"].asString(),
+        .draft = json["draft"].asBool(),
+        .prerelease = json["prerelease"].asBool(),
+        .created_at = json["created_at"].asString(),
+        .published_at = json["published_at"].asString(),
+        .assets = assets,
+    };
+  }
+
+  Json::Value ToApiJson() const {
+    Json::Value root;
+    root["url"] = url;
+    root["name"] = tag_name;
+    root["draft"] = draft;
+    root["prerelease"] = prerelease;
+    root["published_at"] = published_at;
+    return root;
+  }
+
+  Json::Value ToJson() const {
+    Json::Value assetsArray(Json::arrayValue);
+    for (const auto& asset : assets) {
+      assetsArray.append(asset.ToJson());
+    }
+    Json::Value root;
+    root["url"] = url;
+    root["id"] = id;
+    root["tag_name"] = tag_name;
+    root["name"] = name;
+    root["draft"] = draft;
+    root["prerelease"] = prerelease;
+    root["created_at"] = created_at;
+    root["published_at"] = published_at;
+    root["assets"] = assetsArray;
+    return root;
+  }
+};
+
+// TODO: (namh) support pagination for this api
+inline cpp::result<std::vector<GitHubRelease>, std::string> GetReleases(
+    const std::string& author, const std::string& repo,
+    const bool allow_prerelease = true) {
+  auto url = url_parser::Url{
+      .protocol = "https",
+      .host = kGitHubHost,
+      .pathParams = {"repos", author, repo, "releases"},
+  };
+
+  auto result = curl_utils::SimpleGetJson(url_parser::FromUrl(url));
+
+  if (result.has_error()) {
+    return cpp::fail(result.error());
+  }
+
+  if (!result.value().isArray()) {
+    return cpp::fail("Releases returned is not an array!");
+  }
+
+  std::vector<GitHubRelease> releases{};
+  for (const auto& release : result.value()) {
+    releases.push_back(GitHubRelease::FromJson(release));
+  }
+  return releases;
+}
+
+inline cpp::result<GitHubRelease, std::string> GetReleaseByVersion(
+    const std::string& author, const std::string& repo,
+    const std::string& tag) {
+
+  std::vector<std::string> path_params{"repos", author, repo, "releases"};
+  if (tag != "latest") {
+    path_params.push_back("tags");
+
+    if (!string_utils::StartsWith(tag, "v")) {
+      path_params.push_back("v" + tag);
+    }
+
+    path_params.push_back(tag);
+  } else {
+    path_params.push_back("latest");
+  }
+
+  auto url = url_parser::Url{
+      .protocol = "https",
+      .host = kGitHubHost,
+      .pathParams = path_params,
+  };
+
+  CTL_INF("GetReleaseByVersion: " << url.ToFullPath());
+  auto result = curl_utils::SimpleGetJson(url_parser::FromUrl(url));
+
+  if (result.has_error()) {
+    return cpp::fail(result.error());
+  }
+  if (result.value() == Json::nullValue) {
+    return cpp::fail("No release found for " + tag);
+  }
+
+  return GitHubRelease::FromJson(result.value());
+}
+};  // namespace github_release_utils
diff --git a/engine/utils/huggingface_utils.h b/engine/utils/huggingface_utils.h
index 9f78f59d3..99df2aa77 100644
--- a/engine/utils/huggingface_utils.h
+++ b/engine/utils/huggingface_utils.h
@@ -2,17 +2,15 @@
 
 #include <optional>
 #include <string>
+#include <unordered_map>
 #include <vector>
-#include "httplib.h"
 #include "utils/curl_utils.h"
-#include "utils/file_manager_utils.h"
+#include "utils/json_parser_utils.h"
 #include "utils/result.hpp"
 #include "utils/url_parser.h"
 
 namespace huggingface_utils {
 
-constexpr static auto kHuggingfaceHost{"huggingface.co"};
-
 struct HuggingFaceBranch {
   std::string name;
   std::string ref;
@@ -26,6 +24,28 @@ struct HuggingFaceFileSibling {
 struct HuggingFaceGgufInfo {
   uint64_t total;
   std::string architecture;
+
+  static cpp::result<HuggingFaceGgufInfo, std::string> FromJson(
+      const Json::Value& json) {
+    if (json.isNull() || json.type() == Json::ValueType::nullValue) {
+      return cpp::fail("gguf info is null");
+    }
+    try {
+      return HuggingFaceGgufInfo{
+          .total = json["total"].asUInt64(),
+          .architecture = json["architecture"].asString(),
+      };
+    } catch (const std::exception& e) {
+      return cpp::fail("Failed to parse gguf info: " + std::string(e.what()));
+    }
+  }
+
+  Json::Value ToJson() {
+    Json::Value root;
+    root["total"] = total;
+    root["architecture"] = architecture;
+    return root;
+  }
 };
 
 struct HuggingFaceModelRepoInfo {
@@ -46,34 +66,52 @@ struct HuggingFaceModelRepoInfo {
   std::vector<HuggingFaceFileSibling> siblings;
   std::vector<std::string> spaces;
   std::string createdAt;
-};
 
-inline std::optional<std::string> GetHuggingFaceToken() {
-  auto const& token = file_manager_utils::GetCortexConfig().huggingFaceToken;
-  if (token.empty())
-    return std::nullopt;
-  return token;
-}
+  static cpp::result<HuggingFaceModelRepoInfo, std::string> FromJson(
+      const Json::Value& body) {
+    std::optional<HuggingFaceGgufInfo> gguf = std::nullopt;
+    auto gguf_result = HuggingFaceGgufInfo::FromJson(body["gguf"]);
+    if (gguf_result.has_value()) {
+      gguf = gguf_result.value();
+    }
+
+    std::vector<HuggingFaceFileSibling> siblings{};
+    auto siblings_info = body["siblings"];
+    for (const auto& sibling : siblings_info) {
+      auto sibling_info = HuggingFaceFileSibling{
+          .rfilename = sibling["rfilename"].asString(),
+      };
+      siblings.push_back(sibling_info);
+    }
 
-inline curl_slist* CreateCurlHfHeaders() {
-  struct curl_slist* headers = nullptr;
-  auto hf_token = GetHuggingFaceToken();
-  if (hf_token) {
-    std::string auth_header = "Authorization: Bearer " + hf_token.value();
-    headers = curl_slist_append(headers, auth_header.c_str());
-    headers = curl_slist_append(headers, "Content-Type: application/json");
+    return HuggingFaceModelRepoInfo{
+        .id = body["id"].asString(),
+        .modelId = body["modelId"].asString(),
+        .author = body["author"].asString(),
+        .sha = body["sha"].asString(),
+        .lastModified = body["lastModified"].asString(),
+
+        .isPrivate = body["private"].asBool(),
+        .disabled = body["disabled"].asBool(),
+        .gated = body["gated"].asBool(),
+        .tags = json_parser_utils::ParseJsonArray<std::string>(body["tags"]),
+        .downloads = body["downloads"].asInt(),
+
+        .likes = body["likes"].asInt(),
+        .gguf = gguf,
+        .siblings = siblings,
+        .spaces =
+            json_parser_utils::ParseJsonArray<std::string>(body["spaces"]),
+        .createdAt = body["createdAt"].asString(),
+    };
   }
-  return headers;
-}
 
-inline httplib::Headers CreateHttpHfHeaders() {
-  httplib::Headers headers;
-  auto token = GetHuggingFaceToken();
-  if (token) {
-    headers.emplace("Authorization", "Bearer " + token.value());
+  Json::Value ToJson() {
+    Json::Value root;
+    root["gguf"] = gguf->ToJson();
+    return root;
   }
-  return headers;
-}
+};
 
 inline cpp::result<std::unordered_map<std::string, HuggingFaceBranch>,
                    std::string>
@@ -84,11 +122,10 @@ GetModelRepositoryBranches(const std::string& author,
   }
   auto url_obj = url_parser::Url{
       .protocol = "https",
-      .host = kHuggingfaceHost,
+      .host = kHuggingFaceHost,
       .pathParams = {"api", "models", author, modelName, "refs"}};
 
-  auto result =
-      curl_utils::SimpleGetJson(url_obj.ToFullPath(), CreateCurlHfHeaders());
+  auto result = curl_utils::SimpleGetJson(url_obj.ToFullPath());
   if (result.has_error()) {
     return cpp::fail("Failed to get model repository branches: " + author +
                      "/" + modelName);
@@ -98,10 +135,10 @@ GetModelRepositoryBranches(const std::string& author,
   std::unordered_map<std::string, HuggingFaceBranch> branches{};
 
   for (const auto& branch : branches_json) {
-    branches[branch["name"]] = HuggingFaceBranch{
-        .name = branch["name"],
-        .ref = branch["ref"],
-        .targetCommit = branch["targetCommit"],
+    branches[branch["name"].asString()] = HuggingFaceBranch{
+        .name = branch["name"].asString(),
+        .ref = branch["ref"].asString(),
+        .targetCommit = branch["targetCommit"].asString(),
     };
   }
 
@@ -117,63 +154,22 @@ GetHuggingFaceModelRepoInfo(const std::string& author,
   }
   auto url_obj =
       url_parser::Url{.protocol = "https",
-                      .host = kHuggingfaceHost,
+                      .host = kHuggingFaceHost,
                       .pathParams = {"api", "models", author, modelName}};
 
-  auto result =
-      curl_utils::SimpleGetJson(url_obj.ToFullPath(), CreateCurlHfHeaders());
+  auto result = curl_utils::SimpleGetJson(url_obj.ToFullPath());
   if (result.has_error()) {
     return cpp::fail("Failed to get model repository info: " + author + "/" +
                      modelName);
   }
 
-  auto body = result.value();
-
-  std::optional<HuggingFaceGgufInfo> gguf = std::nullopt;
-  auto gguf_info = body["gguf"];
-  if (!gguf_info.is_null()) {
-    gguf = HuggingFaceGgufInfo{
-        .total = gguf_info["total"],
-        .architecture = gguf_info["architecture"],
-    };
-  }
-
-  std::vector<HuggingFaceFileSibling> siblings{};
-  auto siblings_info = body["siblings"];
-  for (const auto& sibling : siblings_info) {
-    auto sibling_info = HuggingFaceFileSibling{
-        .rfilename = sibling["rfilename"],
-    };
-    siblings.push_back(sibling_info);
-  }
-
-  auto model_repo_info = HuggingFaceModelRepoInfo{
-      .id = body["id"],
-      .modelId = body["modelId"],
-      .author = body["author"],
-      .sha = body["sha"],
-      .lastModified = body["lastModified"],
-
-      .isPrivate = body["private"],
-      .disabled = body["disabled"],
-      .gated = body["gated"],
-      .tags = body["tags"],
-      .downloads = body["downloads"],
-
-      .likes = body["likes"],
-      .gguf = gguf,
-      .siblings = siblings,
-      .spaces = body["spaces"],
-      .createdAt = body["createdAt"],
-  };
-
-  return model_repo_info;
+  return HuggingFaceModelRepoInfo::FromJson(result.value());
 }
 
 inline std::string GetMetadataUrl(const std::string& model_id) {
   auto url_obj = url_parser::Url{
       .protocol = "https",
-      .host = kHuggingfaceHost,
+      .host = kHuggingFaceHost,
       .pathParams = {"cortexso", model_id, "resolve", "main", "metadata.yml"}};
 
   return url_obj.ToFullPath();
@@ -185,7 +181,7 @@ inline std::string GetDownloadableUrl(const std::string& author,
                                       const std::string& branch = "main") {
   auto url_obj = url_parser::Url{
       .protocol = "https",
-      .host = kHuggingfaceHost,
+      .host = kHuggingFaceHost,
       .pathParams = {author, modelName, "resolve", branch, fileName},
   };
   return url_parser::FromUrl(url_obj);
@@ -194,8 +190,8 @@ inline std::string GetDownloadableUrl(const std::string& author,
 inline std::optional<std::string> GetDefaultBranch(
     const std::string& model_name) {
   try {
-    auto default_model_branch = curl_utils::ReadRemoteYaml(
-        GetMetadataUrl(model_name), CreateCurlHfHeaders());
+    auto default_model_branch =
+        curl_utils::ReadRemoteYaml(GetMetadataUrl(model_name));
 
     if (default_model_branch.has_error()) {
       return std::nullopt;
diff --git a/engine/utils/json_helper.h b/engine/utils/json_helper.h
index 3a2023fef..82f994751 100644
--- a/engine/utils/json_helper.h
+++ b/engine/utils/json_helper.h
@@ -1,6 +1,8 @@
 #pragma once
+
 #include <json/json.h>
 #include <string>
+
 namespace json_helper {
 inline Json::Value ParseJsonString(const std::string& json_str) {
   Json::Value root;
@@ -8,4 +10,10 @@ inline Json::Value ParseJsonString(const std::string& json_str) {
   reader.parse(json_str, root);
   return root;
 }
-}
\ No newline at end of file
+
+inline std::string DumpJsonString(const Json::Value& json) {
+  Json::StreamWriterBuilder builder;
+  builder["indentation"] = "";
+  return Json::writeString(builder, json);
+}
+}  // namespace json_helper
diff --git a/engine/utils/json_parser_utils.h b/engine/utils/json_parser_utils.h
new file mode 100644
index 000000000..3ebd2c546
--- /dev/null
+++ b/engine/utils/json_parser_utils.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <json/value.h>
+#include <string>
+#include "utils/logging_utils.h"
+
+namespace json_parser_utils {
+
+template <typename T>
+T jsonToValue(const Json::Value& value);
+
+template <>
+std::string jsonToValue(const Json::Value& value) {
+  return value.asString();
+}
+
+template <typename T>
+std::vector<T> ParseJsonArray(const Json::Value& array) {
+  try {
+    std::vector<T> result;
+    if (array.isArray()) {
+      result.reserve(array.size());
+      for (const Json::Value& element : array) {
+        result.push_back(jsonToValue<T>(element));
+      }
+    }
+    return result;
+  } catch (const std::exception& e) {
+    CTL_ERR("Error parsing json array: " << e.what());
+    return {};
+  }
+}
+};  // namespace json_parser_utils
diff --git a/engine/utils/string_utils.h b/engine/utils/string_utils.h
index 99373a3ce..9e40e423b 100644
--- a/engine/utils/string_utils.h
+++ b/engine/utils/string_utils.h
@@ -25,6 +25,29 @@ inline void Trim(std::string& s) {
           s.end());
 }
 
+inline std::string RemoveSubstring(std::string_view full_str,
+                                   std::string_view to_remove) {
+  if (to_remove.empty()) {
+    return std::string(full_str);
+  }
+  std::string result;
+  result.reserve(full_str.length());
+
+  size_t pos = 0;
+  size_t prev = 0;
+
+  // Find each occurrence and copy only the parts we want to keep
+  while ((pos = full_str.find(to_remove, prev)) != std::string_view::npos) {
+    result.append(full_str.substr(prev, pos - prev));
+    prev = pos + to_remove.length();
+  }
+
+  // Append the remaining part
+  result.append(full_str.substr(prev));
+
+  return result;
+}
+
 inline bool StringContainsIgnoreCase(const std::string& haystack,
                                      const std::string& needle) {
   if (needle.empty()) {
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
index 9dbfcc7c9..01c105422 100644
--- a/engine/utils/system_info_utils.h
+++ b/engine/utils/system_info_utils.h
@@ -6,6 +6,7 @@
 #include <sstream>
 #include <vector>
 #include "utils/command_executor.h"
+#include "utils/engine_constants.h"
 #include "utils/logging_utils.h"
 #ifdef _WIN32
 #include <windows.h>
@@ -71,13 +72,13 @@ inline std::unique_ptr<SystemInfo> GetSystemInfo() {
 #endif
 
 #if defined(__APPLE__) && defined(__MACH__)
-  os << "mac";
+  os << kMacOs;
 #elif defined(__linux__)
-  os << "linux";
+  os << kLinuxOs;
 #elif defined(_WIN32)
-  os << "windows";
+  os << kWindowsOs;
 #else
-  os << kUnsupported;
+  os << kUnsupportedOs;
 #endif
   return std::make_unique<SystemInfo>(os.str(), arch.str());
 }
diff --git a/engine/utils/url_parser.h b/engine/utils/url_parser.h
index 90b62143e..244b13719 100644
--- a/engine/utils/url_parser.h
+++ b/engine/utils/url_parser.h
@@ -7,6 +7,7 @@
 #include <variant>
 #include <vector>
 #include "exceptions/malformed_url_exception.h"
+#include "utils/result.hpp"
 
 namespace url_parser {
 
@@ -37,7 +38,9 @@ struct Url {
   std::string protocol;
   std::string host;
   std::vector<std::string> pathParams;
-  std::unordered_map<std::string, std::variant<std::string, explicit_int, explicit_bool>> queries;
+  std::unordered_map<std::string,
+                     std::variant<std::string, explicit_int, explicit_bool>>
+      queries;
 
   std::string GetProtocolAndHost() const { return protocol + "://" + host; }
 
@@ -79,7 +82,8 @@ inline void SplitPathParams(const std::string& input,
   }
 }
 
-inline Url FromUrlString(const std::string& urlString) {
+inline cpp::result<Url, std::string> FromUrlString(
+    const std::string& urlString) {
   Url url = {
       .protocol = "",
       .host = "",
@@ -108,8 +112,7 @@ inline Url FromUrlString(const std::string& urlString) {
       counter++;
     }
   } else {
-    auto message{"Malformed URL: " + urlString};
-    throw MalformedUrlException(message);
+    return cpp::fail("Malformed URL: " + urlString);
   }
   return url;
 }
diff --git a/engine/vcpkg.json b/engine/vcpkg.json
index 1f8d31bcc..64e6f6d26 100644
--- a/engine/vcpkg.json
+++ b/engine/vcpkg.json
@@ -10,7 +10,6 @@
     "drogon",
     "jsoncpp",
     "minizip",
-    "nlohmann-json",
     "yaml-cpp",
     "libarchive",
     "tabulate",