Skip to content

Commit

Permalink
Update llama.cpp submodule to latest release b3943 (#257)
Browse files Browse the repository at this point in the history
* Update submodule to latest release b3943

* fix: API changes

* fix: build

* fix: more

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: vansangpfiev <sang@jan.ai>
  • Loading branch information
3 people authored Oct 23, 2024
1 parent f430ead commit 86146f0
Show file tree
Hide file tree
Showing 12 changed files with 219 additions and 208 deletions.
56 changes: 28 additions & 28 deletions .github/workflows/build.yml

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions .github/workflows/nightly-build.yml

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions .github/workflows/template-e2e-weekend-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- os: "linux"
name: "amd64-avx2"
runs-on: "ubuntu-20-04"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
run-e2e: true
vulkan: false
sccache: true
Expand All @@ -42,7 +42,7 @@ jobs:
- os: "linux"
name: "amd64-noavx-cuda-12-0"
runs-on: "ubuntu-20-04-cuda-12-0-gpu"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_FMA=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_FMA=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
run-e2e: true
vulkan: false
sccache: true
Expand All @@ -51,23 +51,23 @@ jobs:
- os: "mac"
name: "amd64"
runs-on: "macos-12"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL=OFF"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_METAL=OFF"
run-e2e: true
vulkan: false
sccache: false
sccache-conf-path: ""
- os: "mac"
name: "arm64"
runs-on: "macos-silicon"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL_EMBED_LIBRARY=ON"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_METAL_EMBED_LIBRARY=ON"
run-e2e: true
vulkan: false
sccache: false
sccache-conf-path: ""
- os: "windows"
name: "amd64-avx2"
runs-on: "windows-cuda-11-7"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
run-e2e: true
vulkan: false
sccache: false
Expand Down
56 changes: 28 additions & 28 deletions .github/workflows/template-quality-gate-pr.yml

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions .github/workflows/template-quality-gate-submodule.yml

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llama.cpp
2 changes: 1 addition & 1 deletion src/chat_completion_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct ChatCompletionRequest {

inline ChatCompletionRequest fromJson(std::shared_ptr<Json::Value> jsonBody) {
ChatCompletionRequest completion;
gpt_sampler_params default_params;
common_sampler_params default_params;
if (jsonBody) {
completion.stream = (*jsonBody).get("stream", false).asBool();
completion.max_tokens = (*jsonBody).get("max_tokens", 500).asInt();
Expand Down
2 changes: 1 addition & 1 deletion src/llama_client_slot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void LlamaClientSlot::Reset() {
images.clear();
}

bool LlamaClientSlot::HasBudget(gpt_params& global_params) {
bool LlamaClientSlot::HasBudget(common_params& global_params) {
n_remaining = -1;
if (params.n_predict != -1) {
n_remaining = params.n_predict - n_decoded;
Expand Down
6 changes: 3 additions & 3 deletions src/llama_client_slot.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ struct LlamaClientSlot {
std::string stopping_word;

// sampling
struct gpt_sampler_params sparams;
struct gpt_sampler* smpl = nullptr;
struct common_sampler_params sparams;
struct common_sampler* smpl = nullptr;

// multimodal
std::vector<SlotImage> images;
Expand All @@ -154,7 +154,7 @@ struct LlamaClientSlot {

void Reset();

bool HasBudget(gpt_params& global_params);
bool HasBudget(common_params& global_params);

bool Available() const;

Expand Down
8 changes: 4 additions & 4 deletions src/llama_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ LlamaEngine::LlamaEngine(int log_option) {
asynce_file_logger_ = std::make_unique<trantor::FileLogger>();
}

gpt_log_pause(gpt_log_main());
common_log_pause(common_log_main());

llama_log_set(
[](ggml_log_level level, const char* text, void* user_data) {
Expand Down Expand Up @@ -403,7 +403,7 @@ void LlamaEngine::SetFileLogger(int max_log_lines,
}

bool LlamaEngine::LoadModelImpl(std::shared_ptr<Json::Value> json_body) {
gpt_params params;
common_params params;
std::string model_type;
auto model_id = llama_utils::GetModelId(*json_body);
// By default will setting based on number of handlers
Expand Down Expand Up @@ -515,11 +515,11 @@ bool LlamaEngine::LoadModelImpl(std::shared_ptr<Json::Value> json_body) {
LOG_DEBUG << "stop: " << server_map_[model_id].stop_words.toStyledString();

if (!json_body->operator[]("llama_log_folder").isNull()) {
gpt_log_resume(gpt_log_main());
common_log_resume(common_log_main());
std::string llama_log_folder =
json_body->operator[]("llama_log_folder").asString();
llama_log_folder += "llama.log";
gpt_log_set_file(gpt_log_main(), llama_log_folder.c_str());
common_log_set_file(common_log_main(), llama_log_folder.c_str());
} // Set folder for llama log
}
if (params.model_alias == "unknown") {
Expand Down
Loading

0 comments on commit 86146f0

Please sign in to comment.