Skip to content

Commit 287e8c2

Browse files
committed
llama : move load tensors to llama_model
ggml-ci
1 parent 53e61c6 commit 287e8c2

7 files changed

+2745
-2737
lines changed

src/llama-adapter.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ static void llama_lora_adapter_init_impl(struct llama_model & model, const char
257257
}
258258

259259
// device buft and device ctx
260-
const auto * model_tensor = model.get_tensor( name.c_str());
260+
const auto * model_tensor = model.get_tensor(name.c_str());
261261
if (!model_tensor) {
262262
throw std::runtime_error("LoRA tensor '" + name + "' does not exist in base model");
263263
}

src/llama-model-loader.cpp

+61
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
#include <cstring>
88
#include <future>
99

10+
static const size_t kiB = 1024;
11+
static const size_t MiB = 1024*kiB;
12+
static const size_t GiB = 1024*MiB;
13+
1014
const char * llama_file_version_name(llama_fver version) {
1115
switch (version) {
1216
case GGUF_FILE_VERSION_V1: return "GGUF V1 (support until nov 2023)";
@@ -17,6 +21,49 @@ const char * llama_file_version_name(llama_fver version) {
1721
return "unknown";
1822
}
1923

24+
static std::string llama_model_ftype_name(llama_ftype ftype) {
25+
if (ftype & LLAMA_FTYPE_GUESSED) {
26+
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
27+
}
28+
29+
switch (ftype) {
30+
case LLAMA_FTYPE_ALL_F32: return "all F32";
31+
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
32+
case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
33+
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
34+
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
35+
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
36+
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
37+
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
38+
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K - Medium";
39+
case LLAMA_FTYPE_MOSTLY_Q2_K_S: return "Q2_K - Small";
40+
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
41+
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium";
42+
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large";
43+
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small";
44+
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium";
45+
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small";
46+
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
47+
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
48+
case LLAMA_FTYPE_MOSTLY_TQ1_0: return "TQ1_0 - 1.69 bpw ternary";
49+
case LLAMA_FTYPE_MOSTLY_TQ2_0: return "TQ2_0 - 2.06 bpw ternary";
50+
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return "IQ2_XXS - 2.0625 bpw";
51+
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
52+
case LLAMA_FTYPE_MOSTLY_IQ2_S: return "IQ2_S - 2.5 bpw";
53+
case LLAMA_FTYPE_MOSTLY_IQ2_M: return "IQ2_M - 2.7 bpw";
54+
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
55+
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return "IQ3_XXS - 3.0625 bpw";
56+
case LLAMA_FTYPE_MOSTLY_IQ1_S: return "IQ1_S - 1.5625 bpw";
57+
case LLAMA_FTYPE_MOSTLY_IQ1_M: return "IQ1_M - 1.75 bpw";
58+
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
59+
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
60+
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
61+
case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
62+
63+
default: return "unknown, may not work";
64+
}
65+
}
66+
2067
namespace GGUFMeta {
2168
template <typename T, gguf_type gt_, T (*gfun)(const gguf_context *, const int)>
2269
struct GKV_Base_Type {
@@ -1008,3 +1055,17 @@ bool llama_model_loader::load_all_data(
10081055

10091056
return true;
10101057
}
1058+
1059+
std::string llama_model_loader::ftype_name() const {
1060+
return llama_model_ftype_name(ftype);
1061+
}
1062+
1063+
void llama_model_loader::print_info() const {
1064+
LLAMA_LOG_INFO("%s: file format = %s\n", __func__, llama_file_version_name(fver));
1065+
LLAMA_LOG_INFO("%s: file type = %s\n", __func__, llama_model_ftype_name(ftype).c_str());
1066+
if (n_bytes < GiB) {
1067+
LLAMA_LOG_INFO("%s: file size = %.2f MiB (%.2f BPW) \n", __func__, n_bytes/1024.0/1024.0, n_bytes*8.0/n_elements);
1068+
} else {
1069+
LLAMA_LOG_INFO("%s: file size = %.2f GiB (%.2f BPW) \n", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
1070+
}
1071+
}

src/llama-model-loader.h

+4
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,8 @@ struct llama_model_loader {
155155
llama_mlocks * lmlocks,
156156
llama_progress_callback progress_callback,
157157
void * progress_callback_user_data);
158+
159+
std::string ftype_name() const;
160+
161+
void print_info() const;
158162
};

0 commit comments

Comments
 (0)