7
7
#include < cstring>
8
8
#include < future>
9
9
10
+ static const size_t kiB = 1024 ;
11
+ static const size_t MiB = 1024 *kiB;
12
+ static const size_t GiB = 1024 *MiB;
13
+
10
14
const char * llama_file_version_name (llama_fver version) {
11
15
switch (version) {
12
16
case GGUF_FILE_VERSION_V1: return " GGUF V1 (support until nov 2023)" ;
@@ -17,6 +21,49 @@ const char * llama_file_version_name(llama_fver version) {
17
21
return " unknown" ;
18
22
}
19
23
24
+ static std::string llama_model_ftype_name (llama_ftype ftype) {
25
+ if (ftype & LLAMA_FTYPE_GUESSED) {
26
+ return llama_model_ftype_name ((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)" ;
27
+ }
28
+
29
+ switch (ftype) {
30
+ case LLAMA_FTYPE_ALL_F32: return " all F32" ;
31
+ case LLAMA_FTYPE_MOSTLY_F16: return " F16" ;
32
+ case LLAMA_FTYPE_MOSTLY_BF16: return " BF16" ;
33
+ case LLAMA_FTYPE_MOSTLY_Q4_0: return " Q4_0" ;
34
+ case LLAMA_FTYPE_MOSTLY_Q4_1: return " Q4_1" ;
35
+ case LLAMA_FTYPE_MOSTLY_Q5_0: return " Q5_0" ;
36
+ case LLAMA_FTYPE_MOSTLY_Q5_1: return " Q5_1" ;
37
+ case LLAMA_FTYPE_MOSTLY_Q8_0: return " Q8_0" ;
38
+ case LLAMA_FTYPE_MOSTLY_Q2_K: return " Q2_K - Medium" ;
39
+ case LLAMA_FTYPE_MOSTLY_Q2_K_S: return " Q2_K - Small" ;
40
+ case LLAMA_FTYPE_MOSTLY_Q3_K_S: return " Q3_K - Small" ;
41
+ case LLAMA_FTYPE_MOSTLY_Q3_K_M: return " Q3_K - Medium" ;
42
+ case LLAMA_FTYPE_MOSTLY_Q3_K_L: return " Q3_K - Large" ;
43
+ case LLAMA_FTYPE_MOSTLY_Q4_K_S: return " Q4_K - Small" ;
44
+ case LLAMA_FTYPE_MOSTLY_Q4_K_M: return " Q4_K - Medium" ;
45
+ case LLAMA_FTYPE_MOSTLY_Q5_K_S: return " Q5_K - Small" ;
46
+ case LLAMA_FTYPE_MOSTLY_Q5_K_M: return " Q5_K - Medium" ;
47
+ case LLAMA_FTYPE_MOSTLY_Q6_K: return " Q6_K" ;
48
+ case LLAMA_FTYPE_MOSTLY_TQ1_0: return " TQ1_0 - 1.69 bpw ternary" ;
49
+ case LLAMA_FTYPE_MOSTLY_TQ2_0: return " TQ2_0 - 2.06 bpw ternary" ;
50
+ case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return " IQ2_XXS - 2.0625 bpw" ;
51
+ case LLAMA_FTYPE_MOSTLY_IQ2_XS: return " IQ2_XS - 2.3125 bpw" ;
52
+ case LLAMA_FTYPE_MOSTLY_IQ2_S: return " IQ2_S - 2.5 bpw" ;
53
+ case LLAMA_FTYPE_MOSTLY_IQ2_M: return " IQ2_M - 2.7 bpw" ;
54
+ case LLAMA_FTYPE_MOSTLY_IQ3_XS: return " IQ3_XS - 3.3 bpw" ;
55
+ case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return " IQ3_XXS - 3.0625 bpw" ;
56
+ case LLAMA_FTYPE_MOSTLY_IQ1_S: return " IQ1_S - 1.5625 bpw" ;
57
+ case LLAMA_FTYPE_MOSTLY_IQ1_M: return " IQ1_M - 1.75 bpw" ;
58
+ case LLAMA_FTYPE_MOSTLY_IQ4_NL: return " IQ4_NL - 4.5 bpw" ;
59
+ case LLAMA_FTYPE_MOSTLY_IQ4_XS: return " IQ4_XS - 4.25 bpw" ;
60
+ case LLAMA_FTYPE_MOSTLY_IQ3_S: return " IQ3_S - 3.4375 bpw" ;
61
+ case LLAMA_FTYPE_MOSTLY_IQ3_M: return " IQ3_S mix - 3.66 bpw" ;
62
+
63
+ default : return " unknown, may not work" ;
64
+ }
65
+ }
66
+
20
67
namespace GGUFMeta {
21
68
template <typename T, gguf_type gt_, T (*gfun)(const gguf_context *, const int )>
22
69
struct GKV_Base_Type {
@@ -1008,3 +1055,17 @@ bool llama_model_loader::load_all_data(
1008
1055
1009
1056
return true ;
1010
1057
}
1058
+
1059
+ std::string llama_model_loader::ftype_name () const {
1060
+ return llama_model_ftype_name (ftype);
1061
+ }
1062
+
1063
+ void llama_model_loader::print_info () const {
1064
+ LLAMA_LOG_INFO (" %s: file format = %s\n " , __func__, llama_file_version_name (fver));
1065
+ LLAMA_LOG_INFO (" %s: file type = %s\n " , __func__, llama_model_ftype_name (ftype).c_str ());
1066
+ if (n_bytes < GiB) {
1067
+ LLAMA_LOG_INFO (" %s: file size = %.2f MiB (%.2f BPW) \n " , __func__, n_bytes/1024.0 /1024.0 , n_bytes*8.0 /n_elements);
1068
+ } else {
1069
+ LLAMA_LOG_INFO (" %s: file size = %.2f GiB (%.2f BPW) \n " , __func__, n_bytes/1024.0 /1024.0 /1024.0 , n_bytes*8.0 /n_elements);
1070
+ }
1071
+ }
0 commit comments