File tree Expand file tree Collapse file tree 2 files changed +16
-0
lines changed Expand file tree Collapse file tree 2 files changed +16
-0
lines changed Original file line number Diff line number Diff line change @@ -370,6 +370,14 @@ extern "C" {
370370 enum llama_ftype ftype; // quantize to this llama_ftype
371371 enum ggml_type output_tensor_type; // output tensor type
372372 enum ggml_type token_embedding_type; // token embeddings tensor type
373+ enum ggml_type attn_q_type; // attention query tensor type
374+ enum ggml_type attn_k_type; // attention key tensor type
375+ enum ggml_type attn_v_type; // attention value tensor type
376+ enum ggml_type attn_qkv_type; // attention query-key-value tensor type
377+ enum ggml_type attn_output_type; // attention output tensor type
378+ enum ggml_type ffn_gate_type; // feedforward network gate type
379+ enum ggml_type ffn_down_type; // feedforward network down type
380+ enum ggml_type ffn_up_type; // feedforward network up type
373381 bool allow_requantize; // allow quantizing non-f32/f16 tensors
374382 bool quantize_output_tensor; // quantize output.weight
375383 bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
Original file line number Diff line number Diff line change @@ -19981,6 +19981,14 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
1998119981 /*.ftype =*/ LLAMA_FTYPE_MOSTLY_Q5_1,
1998219982 /*.output_tensor_type =*/ GGML_TYPE_COUNT,
1998319983 /*.token_embedding_type =*/ GGML_TYPE_COUNT,
19984+ /*.attn_q_type =*/ GGML_TYPE_COUNT,
19985+ /*.attn_k_type =*/ GGML_TYPE_COUNT,
19986+ /*.attn_v_type =*/ GGML_TYPE_COUNT,
19987+ /*.attn_qkv_type =*/ GGML_TYPE_COUNT,
19988+ /*.attn_output_type =*/ GGML_TYPE_COUNT,
19989+ /*.ffn_gate_type =*/ GGML_TYPE_COUNT,
19990+ /*.ffn_down_type =*/ GGML_TYPE_COUNT,
19991+ /*.ffn_up_type =*/ GGML_TYPE_COUNT,
1998419992 /*.allow_requantize =*/ false,
1998519993 /*.quantize_output_tensor =*/ true,
1998619994 /*.only_copy =*/ false,
You can’t perform that action at this time.
0 commit comments