Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for quantize fail on init due to undefined static initialization of complex objects #927

Merged
merged 3 commits into from
Apr 17, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 44 additions & 28 deletions llama.cpp
Original file line number Diff line number Diff line change
@@ -42,35 +42,51 @@ static const size_t MB = 1024*1024;
// TODO: dynamically determine these sizes
// needs modifications in ggml

static const std::map<e_model, size_t> MEM_REQ_SCRATCH0 = {
{ MODEL_7B, 512ull*MB },
{ MODEL_13B, 512ull*MB },
{ MODEL_30B, 512ull*MB },
{ MODEL_65B, 512ull*MB },
};
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
{
static std::map<e_model, size_t> _MEM_REQ_SCRATCH0 = {
{ MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB },
{ MODEL_65B, 512ull * MB },
};
return _MEM_REQ_SCRATCH0;
}

static const std::map<e_model, size_t> MEM_REQ_SCRATCH1 = {
{ MODEL_7B, 512ull*MB },
{ MODEL_13B, 512ull*MB },
{ MODEL_30B, 512ull*MB },
{ MODEL_65B, 512ull*MB },
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
{
static std::map<e_model, size_t> _MEM_REQ_SCRATCH1 = {
{ MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB },
{ MODEL_65B, 512ull * MB },
};
return _MEM_REQ_SCRATCH1;
};

// 2*n_embd*n_ctx*n_layer*sizeof(float16)
static const std::map<e_model, size_t> MEM_REQ_KV_SELF = {
{ MODEL_7B, 1026ull*MB },
{ MODEL_13B, 1608ull*MB },
{ MODEL_30B, 3124ull*MB },
{ MODEL_65B, 5120ull*MB },
static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
{
static std::map<e_model, size_t> _MEM_REQ_KV_SELF = {
{ MODEL_7B, 1026ull * MB },
{ MODEL_13B, 1608ull * MB },
{ MODEL_30B, 3124ull * MB },
{ MODEL_65B, 5120ull * MB },
};
return _MEM_REQ_KV_SELF;
};

// this is mostly needed for temporary mul_mat buffers to dequantize the data
// not actually needed if BLAS is disabled
static const std::map<e_model, size_t> MEM_REQ_EVAL = {
{ MODEL_7B, 768ull*MB },
{ MODEL_13B, 1024ull*MB },
{ MODEL_30B, 1280ull*MB },
{ MODEL_65B, 1536ull*MB },
static const std::map<e_model, size_t> & MEM_REQ_EVAL()
{
static std::map<e_model, size_t> _MEM_REQ_EVAL = {
{ MODEL_7B, 768ull * MB },
{ MODEL_13B, 1024ull * MB },
{ MODEL_30B, 1280ull * MB },
{ MODEL_65B, 1536ull * MB },
};
return _MEM_REQ_EVAL;
};

// default hparams (LLaMA 7B)
@@ -909,13 +925,13 @@ static void llama_model_load_internal(
const size_t mem_required =
ctx_size +
mmapped_size +
MEM_REQ_SCRATCH0.at(model.type) +
MEM_REQ_SCRATCH1.at(model.type) +
MEM_REQ_EVAL.at (model.type);
MEM_REQ_SCRATCH0().at(model.type) +
MEM_REQ_SCRATCH1().at(model.type) +
MEM_REQ_EVAL().at(model.type);

// this is the memory required by one llama_state
const size_t mem_required_state =
scale*MEM_REQ_KV_SELF.at(model.type);
scale*MEM_REQ_KV_SELF().at(model.type);

fprintf(stderr, "%s: mem required = %7.2f MB (+ %7.2f MB per state)\n", __func__,
mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
@@ -1742,10 +1758,10 @@ struct llama_context * llama_init_from_file(
ctx->embedding.resize(hparams.n_embd);
}

ctx->buf_compute.resize(MEM_REQ_EVAL.at(ctx->model.type));
ctx->buf_compute.resize(MEM_REQ_EVAL().at(ctx->model.type));

ctx->buf_scratch[0].resize(MEM_REQ_SCRATCH0.at(ctx->model.type));
ctx->buf_scratch[1].resize(MEM_REQ_SCRATCH1.at(ctx->model.type));
ctx->buf_scratch[0].resize(MEM_REQ_SCRATCH0().at(ctx->model.type));
ctx->buf_scratch[1].resize(MEM_REQ_SCRATCH1().at(ctx->model.type));
}

return ctx;
20 changes: 12 additions & 8 deletions tests/test-tokenizer-0.cpp
Original file line number Diff line number Diff line change
@@ -5,13 +5,17 @@
#include <map>
#include <vector>

static const std::map<std::string, std::vector<llama_token>> k_tests = {
{ "Hello World", { 1, 10994, 2787, }, },
{ " Hello World", { 1, 15043, 2787, }, },
{ " Hello World!", { 1, 15043, 2787, 29991, }, },
{ " this is 🦙.cpp", { 1, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
{ "w048 7tuijk dsdfhu", { 1, 29893, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
{ "нещо на Български", { 1, 821, 4851, 665, 1386, 29713, 1305, }, },
static const std::map<std::string, std::vector<llama_token>> & k_tests()
{
static std::map<std::string, std::vector<llama_token>> _k_tests = {
{ "Hello World", { 1, 10994, 2787, }, },
{ " Hello World", { 1, 15043, 2787, }, },
{ " Hello World!", { 1, 15043, 2787, 29991, }, },
{ " this is 🦙.cpp", { 1, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
{ "w048 7tuijk dsdfhu", { 1, 29893, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
{ "нещо на Български", { 1, 821, 4851, 665, 1386, 29713, 1305, }, },
};
return _k_tests;
};

int main(int argc, char **argv) {
@@ -47,7 +51,7 @@ int main(int argc, char **argv) {
return 2;
}

for (const auto & test_kv : k_tests) {
for (const auto & test_kv : k_tests()) {
std::vector<llama_token> res(test_kv.first.size());
const int n = llama_tokenize(ctx, test_kv.first.c_str(), res.data(), res.size(), true);
res.resize(n);