|
3 | 3 | #include "nitro_utils.h"
|
4 | 4 | #include <chrono>
|
5 | 5 | #include <cstring>
|
6 |
| -#include <thread> |
| 6 | +#include <drogon/HttpResponse.h> |
7 | 7 | #include <regex>
|
| 8 | +#include <thread> |
| 9 | + |
| 10 | +using namespace inferences; |
8 | 11 |
|
9 | 12 | std::string create_return_json(const std::string &id, const std::string &model,
|
10 | 13 | const std::string &content,
|
@@ -35,7 +38,7 @@ std::string create_return_json(const std::string &id, const std::string &model,
|
35 | 38 | return Json::writeString(writer, root);
|
36 | 39 | }
|
37 | 40 |
|
38 |
| -void llamaCPP::asyncHandleHttpRequest( |
| 41 | +void llamaCPP::chatCompletion( |
39 | 42 | const HttpRequestPtr &req,
|
40 | 43 | std::function<void(const HttpResponsePtr &)> &&callback) {
|
41 | 44 | const auto &jsonBody = req->getJsonObject();
|
@@ -196,3 +199,29 @@ void llamaCPP::asyncHandleHttpRequest(
|
196 | 199 | "chat_completions.txt");
|
197 | 200 | callback(resp);
|
198 | 201 | }
|
| 202 | + |
| 203 | +void llamaCPP::embedding( |
| 204 | + const HttpRequestPtr &req, |
| 205 | + std::function<void(const HttpResponsePtr &)> &&callback) { |
| 206 | + auto lock = llama.lock(); |
| 207 | + |
| 208 | + const auto &jsonBody = req->getJsonObject(); |
| 209 | + |
| 210 | + llama.rewind(); |
| 211 | + llama_reset_timings(llama.ctx); |
| 212 | + if (jsonBody->isMember("content") != 0) { |
| 213 | + llama.prompt = (*jsonBody)["content"].asString(); |
| 214 | + } else { |
| 215 | + llama.prompt = ""; |
| 216 | + } |
| 217 | + llama.params.n_predict = 0; |
| 218 | + llama.loadPrompt(); |
| 219 | + llama.beginCompletion(); |
| 220 | + llama.doCompletion(); |
| 221 | + |
| 222 | + const json data = format_embedding_response(llama); |
| 223 | + auto resp = drogon::HttpResponse::newHttpResponse(); |
| 224 | + resp->setBody(data.dump()); |
| 225 | + resp->setContentTypeString("application/json"); |
| 226 | + callback(resp); |
| 227 | +} |
0 commit comments