@@ -3331,13 +3331,18 @@ static void ggml_vk_load_shaders(vk_device& device) {
33313331 // the number of rows computed per shader depends on GPU model and quant
33323332 uint32_t rm_stdq = 1;
33333333 uint32_t rm_kq = 2;
3334+ uint32_t rm_stdq_int = 1;
3335+ uint32_t rm_kq_int = 1;
33343336 if (device->vendor_id == VK_VENDOR_ID_AMD) {
33353337 if (device->architecture == AMD_GCN) {
33363338 rm_stdq = 2;
33373339 rm_kq = 4;
3340+ rm_stdq_int = 4;
33383341 }
3339- } else if (device->vendor_id == VK_VENDOR_ID_INTEL)
3342+ } else if (device->vendor_id == VK_VENDOR_ID_INTEL) {
33403343 rm_stdq = 2;
3344+ rm_stdq_int = 2;
3345+ }
33413346 uint32_t rm_iq = 2 * rm_kq;
33423347
33433348 const bool use_subgroups = device->subgroup_arithmetic && device->architecture != vk_device_architecture::AMD_GCN;
@@ -3416,19 +3421,19 @@ static void ggml_vk_load_shaders(vk_device& device) {
34163421 const uint32_t subgroup_size_int = (device->vendor_id == VK_VENDOR_ID_INTEL && device->subgroup_size_control) ? device->subgroup_min_size : device->subgroup_size;
34173422 const uint32_t wg_size_subgroup_int = (w == DMMV_WG_SIZE_SUBGROUP) ? subgroup_size_int : (subgroup_size_int * 4);
34183423
3419- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_0][i], "mul_mat_vec_q4_0_q8_1_f32", arr_dmmv_q4_0_q8_1_f32_len[reduc], arr_dmmv_q4_0_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3420- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_1][i], "mul_mat_vec_q4_1_q8_1_f32", arr_dmmv_q4_1_q8_1_f32_len[reduc], arr_dmmv_q4_1_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3421- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_0][i], "mul_mat_vec_q5_0_q8_1_f32", arr_dmmv_q5_0_q8_1_f32_len[reduc], arr_dmmv_q5_0_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3422- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_1][i], "mul_mat_vec_q5_1_q8_1_f32", arr_dmmv_q5_1_q8_1_f32_len[reduc], arr_dmmv_q5_1_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3423- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q8_0][i], "mul_mat_vec_q8_0_q8_1_f32", arr_dmmv_q8_0_q8_1_f32_len[reduc], arr_dmmv_q8_0_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_stdq , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3424+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_0][i], "mul_mat_vec_q4_0_q8_1_f32", arr_dmmv_q4_0_q8_1_f32_len[reduc], arr_dmmv_q4_0_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3425+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_1][i], "mul_mat_vec_q4_1_q8_1_f32", arr_dmmv_q4_1_q8_1_f32_len[reduc], arr_dmmv_q4_1_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3426+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_0][i], "mul_mat_vec_q5_0_q8_1_f32", arr_dmmv_q5_0_q8_1_f32_len[reduc], arr_dmmv_q5_0_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3427+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_1][i], "mul_mat_vec_q5_1_q8_1_f32", arr_dmmv_q5_1_q8_1_f32_len[reduc], arr_dmmv_q5_1_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3428+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q8_0][i], "mul_mat_vec_q8_0_q8_1_f32", arr_dmmv_q8_0_q8_1_f32_len[reduc], arr_dmmv_q8_0_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
34243429
3425- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_MXFP4][i], "mul_mat_vec_mxfp4_q8_1_f32", arr_dmmv_mxfp4_q8_1_f32_len[reduc], arr_dmmv_mxfp4_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_stdq , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3430+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_MXFP4][i], "mul_mat_vec_mxfp4_q8_1_f32", arr_dmmv_mxfp4_q8_1_f32_len[reduc], arr_dmmv_mxfp4_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
34263431
3427- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q2_K][i], "mul_mat_vec_q2_k_q8_1_f32", arr_dmmv_q2_k_q8_1_f32_len[reduc], arr_dmmv_q2_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3428- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q3_K][i], "mul_mat_vec_q3_k_q8_1_f32", arr_dmmv_q3_k_q8_1_f32_len[reduc], arr_dmmv_q3_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3429- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_K][i], "mul_mat_vec_q4_k_q8_1_f32", arr_dmmv_q4_k_q8_1_f32_len[reduc], arr_dmmv_q4_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3430- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_K][i], "mul_mat_vec_q5_k_q8_1_f32", arr_dmmv_q5_k_q8_1_f32_len[reduc], arr_dmmv_q5_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3431- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q6_K][i], "mul_mat_vec_q6_k_q8_1_f32", arr_dmmv_q6_k_q8_1_f32_len[reduc], arr_dmmv_q6_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3432+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q2_K][i], "mul_mat_vec_q2_k_q8_1_f32", arr_dmmv_q2_k_q8_1_f32_len[reduc], arr_dmmv_q2_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {2*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 2*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3433+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q3_K][i], "mul_mat_vec_q3_k_q8_1_f32", arr_dmmv_q3_k_q8_1_f32_len[reduc], arr_dmmv_q3_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3434+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_K][i], "mul_mat_vec_q4_k_q8_1_f32", arr_dmmv_q4_k_q8_1_f32_len[reduc], arr_dmmv_q4_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3435+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_K][i], "mul_mat_vec_q5_k_q8_1_f32", arr_dmmv_q5_k_q8_1_f32_len[reduc], arr_dmmv_q5_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3436+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q6_K][i], "mul_mat_vec_q6_k_q8_1_f32", arr_dmmv_q6_k_q8_1_f32_len[reduc], arr_dmmv_q6_k_q8_1_f32_data[reduc], "main", 4, sizeof(vk_mat_vec_push_constants), {1*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
34323437 }
34333438#endif // GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT
34343439 }
0 commit comments