Skip to content

Commit c587c42

Browse files
ggerganovjordankanter
authored andcommitted
llama : fix quantization when tensors are missing (ggml-org#5423)
1 parent 56360e7 commit c587c42

File tree

1 file changed

+24
-8
lines changed

1 file changed

+24
-8
lines changed

llama.cpp

+24-8
Original file line numberDiff line numberDiff line change
@@ -772,22 +772,37 @@ struct LLM_TN {
772772
llm_arch arch;
773773

774774
std::string operator()(llm_tensor tensor) const {
775+
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
776+
return "__missing__";
777+
}
775778
return LLM_TENSOR_NAMES[arch].at(tensor);
776779
}
777780

778781
std::string operator()(llm_tensor tensor, const std::string & suffix) const {
782+
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
783+
return "__missing__";
784+
}
779785
return LLM_TENSOR_NAMES[arch].at(tensor) + "." + suffix;
780786
}
781787

782788
std::string operator()(llm_tensor tensor, int bid) const {
789+
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
790+
return "__missing__";
791+
}
783792
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid);
784793
}
785794

786795
std::string operator()(llm_tensor tensor, const std::string & suffix, int bid) const {
796+
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
797+
return "__missing__";
798+
}
787799
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid) + "." + suffix;
788800
}
789801

790802
std::string operator()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const {
803+
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
804+
return "__missing__";
805+
}
791806
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid, xid) + "." + suffix;
792807
}
793808
};
@@ -10227,6 +10242,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
1022710242
}
1022810243
++qs.i_ffn_up;
1022910244
}
10245+
1023010246
// if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K;
1023110247
//}
1023210248
// IK: let's remove this, else Q2_K is almost the same as Q3_K_S
@@ -10286,19 +10302,19 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1028610302

1028710303
// K-quants
1028810304
case LLAMA_FTYPE_MOSTLY_Q2_K_S:
10289-
case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
10305+
case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
1029010306
case LLAMA_FTYPE_MOSTLY_Q3_K_XS:
1029110307
case LLAMA_FTYPE_MOSTLY_Q3_K_S:
1029210308
case LLAMA_FTYPE_MOSTLY_Q3_K_M:
10293-
case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;
10309+
case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;
1029410310
case LLAMA_FTYPE_MOSTLY_Q4_K_S:
10295-
case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break;
10311+
case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break;
1029610312
case LLAMA_FTYPE_MOSTLY_Q5_K_S:
10297-
case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break;
10298-
case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
10299-
case LLAMA_FTYPE_MOSTLY_IQ2_XXS:quantized_type = GGML_TYPE_IQ2_XXS; break;
10300-
case LLAMA_FTYPE_MOSTLY_IQ2_XS :quantized_type = GGML_TYPE_IQ2_XS; break;
10301-
case LLAMA_FTYPE_MOSTLY_IQ3_XXS:quantized_type = GGML_TYPE_IQ3_XXS; break;
10313+
case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break;
10314+
case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
10315+
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break;
10316+
case LLAMA_FTYPE_MOSTLY_IQ2_XS: quantized_type = GGML_TYPE_IQ2_XS; break;
10317+
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break;
1030210318

1030310319
default: throw std::runtime_error(format("invalid output file type %d\n", ftype));
1030410320
}

0 commit comments

Comments
 (0)