@@ -30,34 +30,34 @@ void ggml_cuda_op_mul_mat_q(
3030
3131 switch (src0->type ) {
3232 case GGML_TYPE_Q4_0:
33- mul_mat_q_case<GGML_TYPE_Q4_0>(args, stream);
33+ mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream);
3434 break ;
3535 case GGML_TYPE_Q4_1:
36- mul_mat_q_case<GGML_TYPE_Q4_1>(args, stream);
36+ mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream);
3737 break ;
3838 case GGML_TYPE_Q5_0:
39- mul_mat_q_case<GGML_TYPE_Q5_0>(args, stream);
39+ mul_mat_q_case<GGML_TYPE_Q5_0>(ctx, args, stream);
4040 break ;
4141 case GGML_TYPE_Q5_1:
42- mul_mat_q_case<GGML_TYPE_Q5_1>(args, stream);
42+ mul_mat_q_case<GGML_TYPE_Q5_1>(ctx, args, stream);
4343 break ;
4444 case GGML_TYPE_Q8_0:
45- mul_mat_q_case<GGML_TYPE_Q8_0>(args, stream);
45+ mul_mat_q_case<GGML_TYPE_Q8_0>(ctx, args, stream);
4646 break ;
4747 case GGML_TYPE_Q2_K:
48- mul_mat_q_case<GGML_TYPE_Q2_K>(args, stream);
48+ mul_mat_q_case<GGML_TYPE_Q2_K>(ctx, args, stream);
4949 break ;
5050 case GGML_TYPE_Q3_K:
51- mul_mat_q_case<GGML_TYPE_Q3_K>(args, stream);
51+ mul_mat_q_case<GGML_TYPE_Q3_K>(ctx, args, stream);
5252 break ;
5353 case GGML_TYPE_Q4_K:
54- mul_mat_q_case<GGML_TYPE_Q4_K>(args, stream);
54+ mul_mat_q_case<GGML_TYPE_Q4_K>(ctx, args, stream);
5555 break ;
5656 case GGML_TYPE_Q5_K:
57- mul_mat_q_case<GGML_TYPE_Q5_K>(args, stream);
57+ mul_mat_q_case<GGML_TYPE_Q5_K>(ctx, args, stream);
5858 break ;
5959 case GGML_TYPE_Q6_K:
60- mul_mat_q_case<GGML_TYPE_Q6_K>(args, stream);
60+ mul_mat_q_case<GGML_TYPE_Q6_K>(ctx, args, stream);
6161 break ;
6262 default :
6363 GGML_ASSERT (false );
0 commit comments