Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 93ee354

Browse files
authored
Merge pull request #65 from janhq/37-api-with-llamacpp-for-embedding
37 api with llamacpp for embedding
2 parents a777b3c + bb0feab commit 93ee354

File tree

3 files changed

+1163
-1204
lines changed

3 files changed

+1163
-1204
lines changed

config.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"custom_config": {
99
"llama_model_path": "/Users/alandao/Documents/codes/nitro.cpp_temp/models/llama2_7b_chat_uncensored.Q4_0.gguf",
1010
"ctx_len": 2048,
11-
"ngl": 100
11+
"ngl": 100,
12+
"embedding":true
1213
}
1314
}

controllers/llamaCPP.cc

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
#include "nitro_utils.h"
44
#include <chrono>
55
#include <cstring>
6-
#include <thread>
6+
#include <drogon/HttpResponse.h>
77
#include <regex>
8+
#include <thread>
9+
10+
using namespace inferences;
811

912
std::string create_return_json(const std::string &id, const std::string &model,
1013
const std::string &content,
@@ -35,7 +38,7 @@ std::string create_return_json(const std::string &id, const std::string &model,
3538
return Json::writeString(writer, root);
3639
}
3740

38-
void llamaCPP::asyncHandleHttpRequest(
41+
void llamaCPP::chatCompletion(
3942
const HttpRequestPtr &req,
4043
std::function<void(const HttpResponsePtr &)> &&callback) {
4144
const auto &jsonBody = req->getJsonObject();
@@ -196,3 +199,29 @@ void llamaCPP::asyncHandleHttpRequest(
196199
"chat_completions.txt");
197200
callback(resp);
198201
}
202+
203+
void llamaCPP::embedding(
204+
const HttpRequestPtr &req,
205+
std::function<void(const HttpResponsePtr &)> &&callback) {
206+
auto lock = llama.lock();
207+
208+
const auto &jsonBody = req->getJsonObject();
209+
210+
llama.rewind();
211+
llama_reset_timings(llama.ctx);
212+
if (jsonBody->isMember("content") != 0) {
213+
llama.prompt = (*jsonBody)["content"].asString();
214+
} else {
215+
llama.prompt = "";
216+
}
217+
llama.params.n_predict = 0;
218+
llama.loadPrompt();
219+
llama.beginCompletion();
220+
llama.doCompletion();
221+
222+
const json data = format_embedding_response(llama);
223+
auto resp = drogon::HttpResponse::newHttpResponse();
224+
resp->setBody(data.dump());
225+
resp->setContentTypeString("application/json");
226+
callback(resp);
227+
}

0 commit comments

Comments
 (0)