@@ -42,35 +42,51 @@ static const size_t MB = 1024*1024;
4242// TODO: dynamically determine these sizes
4343// needs modifications in ggml
4444
45- static const std::map<e_model, size_t > MEM_REQ_SCRATCH0 = {
46- { MODEL_7B, 512ull *MB },
47- { MODEL_13B, 512ull *MB },
48- { MODEL_30B, 512ull *MB },
49- { MODEL_65B, 512ull *MB },
50- };
45+ static const std::map<e_model, size_t > & MEM_REQ_SCRATCH0 ()
46+ {
47+ static std::map<e_model, size_t > _MEM_REQ_SCRATCH0 = {
48+ { MODEL_7B, 512ull * MB },
49+ { MODEL_13B, 512ull * MB },
50+ { MODEL_30B, 512ull * MB },
51+ { MODEL_65B, 512ull * MB },
52+ };
53+ return _MEM_REQ_SCRATCH0;
54+ }
5155
52- static const std::map<e_model, size_t > MEM_REQ_SCRATCH1 = {
53- { MODEL_7B, 512ull *MB },
54- { MODEL_13B, 512ull *MB },
55- { MODEL_30B, 512ull *MB },
56- { MODEL_65B, 512ull *MB },
56+ static const std::map<e_model, size_t > & MEM_REQ_SCRATCH1 ()
57+ {
58+ static std::map<e_model, size_t > _MEM_REQ_SCRATCH1 = {
59+ { MODEL_7B, 512ull * MB },
60+ { MODEL_13B, 512ull * MB },
61+ { MODEL_30B, 512ull * MB },
62+ { MODEL_65B, 512ull * MB },
63+ };
64+ return _MEM_REQ_SCRATCH1;
5765};
5866
5967// 2*n_embd*n_ctx*n_layer*sizeof(float16)
60- static const std::map<e_model, size_t > MEM_REQ_KV_SELF = {
61- { MODEL_7B, 1026ull *MB },
62- { MODEL_13B, 1608ull *MB },
63- { MODEL_30B, 3124ull *MB },
64- { MODEL_65B, 5120ull *MB },
68+ static const std::map<e_model, size_t > & MEM_REQ_KV_SELF ()
69+ {
70+ static std::map<e_model, size_t > _MEM_REQ_KV_SELF = {
71+ { MODEL_7B, 1026ull * MB },
72+ { MODEL_13B, 1608ull * MB },
73+ { MODEL_30B, 3124ull * MB },
74+ { MODEL_65B, 5120ull * MB },
75+ };
76+ return _MEM_REQ_KV_SELF;
6577};
6678
6779// this is mostly needed for temporary mul_mat buffers to dequantize the data
6880// not actually needed if BLAS is disabled
69- static const std::map<e_model, size_t > MEM_REQ_EVAL = {
70- { MODEL_7B, 768ull *MB },
71- { MODEL_13B, 1024ull *MB },
72- { MODEL_30B, 1280ull *MB },
73- { MODEL_65B, 1536ull *MB },
81+ static const std::map<e_model, size_t > & MEM_REQ_EVAL ()
82+ {
83+ static std::map<e_model, size_t > _MEM_REQ_EVAL = {
84+ { MODEL_7B, 768ull * MB },
85+ { MODEL_13B, 1024ull * MB },
86+ { MODEL_30B, 1280ull * MB },
87+ { MODEL_65B, 1536ull * MB },
88+ };
89+ return _MEM_REQ_EVAL;
7490};
7591
7692// default hparams (LLaMA 7B)
@@ -899,13 +915,13 @@ static void llama_model_load_internal(
899915 const size_t mem_required =
900916 ctx_size +
901917 mmapped_size +
902- MEM_REQ_SCRATCH0.at (model.type ) +
903- MEM_REQ_SCRATCH1.at (model.type ) +
904- MEM_REQ_EVAL.at (model.type );
918+ MEM_REQ_SCRATCH0 () .at (model.type ) +
919+ MEM_REQ_SCRATCH1 () .at (model.type ) +
920+ MEM_REQ_EVAL () .at (model.type );
905921
906922 // this is the memory required by one llama_state
907923 const size_t mem_required_state =
908- scale*MEM_REQ_KV_SELF.at (model.type );
924+ scale*MEM_REQ_KV_SELF () .at (model.type );
909925
910926 fprintf (stderr, " %s: mem required = %7.2f MB (+ %7.2f MB per state)\n " , __func__,
911927 mem_required / 1024.0 / 1024.0 , mem_required_state / 1024.0 / 1024.0 );
@@ -1732,10 +1748,10 @@ struct llama_context * llama_init_from_file(
17321748 ctx->embedding .resize (hparams.n_embd );
17331749 }
17341750
1735- ctx->buf_compute .resize (MEM_REQ_EVAL.at (ctx->model .type ));
1751+ ctx->buf_compute .resize (MEM_REQ_EVAL () .at (ctx->model .type ));
17361752
1737- ctx->buf_scratch [0 ].resize (MEM_REQ_SCRATCH0.at (ctx->model .type ));
1738- ctx->buf_scratch [1 ].resize (MEM_REQ_SCRATCH1.at (ctx->model .type ));
1753+ ctx->buf_scratch [0 ].resize (MEM_REQ_SCRATCH0 () .at (ctx->model .type ));
1754+ ctx->buf_scratch [1 ].resize (MEM_REQ_SCRATCH1 () .at (ctx->model .type ));
17391755 }
17401756
17411757 return ctx;
0 commit comments