Skip to content

Commit 6961c4b

Browse files
committed
batched-bench : print params at start
1 parent cc44877 commit 6961c4b

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

examples/batched-bench/batched-bench.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ int main(int argc, char ** argv) {
154154
}
155155
}
156156

157+
LOG_TEE("\n");
158+
LOG_TEE("%s: n_kv_max = %d, is_pp_shared = %d, n_gpu_layers = %d, mmq = %d\n", __func__, n_kv_max, is_pp_shared, n_gpu_layers, mmq);
159+
LOG_TEE("\n");
160+
157161
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
158162
LOG_TEE("|%6s-|-%6s-|-%4s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|\n", "------", "------", "----", "------", "--------", "--------", "--------", "--------", "--------", "--------");
159163

ggml-cuda.cu

+6-6
Original file line numberDiff line numberDiff line change
@@ -6254,16 +6254,15 @@ inline void ggml_cuda_op_mul_mat_cublas(
62546254
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
62556255
const int64_t src1_padded_row_size, const cudaStream_t & stream) {
62566256

6257-
GGML_ASSERT(src0_dd_i != nullptr);
6257+
GGML_ASSERT(src0_dd_i != nullptr);
62586258
GGML_ASSERT(src1_ddf_i != nullptr);
6259-
GGML_ASSERT(dst_dd_i != nullptr);
6260-
6259+
GGML_ASSERT(dst_dd_i != nullptr);
62616260

62626261
const int64_t ne00 = src0->ne[0];
6263-
62646262
const int64_t ne10 = src1->ne[0];
62656263

62666264
const int64_t ne0 = dst->ne[0];
6265+
62676266
const int64_t row_diff = row_high - row_low;
62686267

62696268
int id;
@@ -7223,12 +7222,13 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
72237222
//printf("src1 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name);
72247223

72257224
if (all_on_device && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
7226-
// KQ
7225+
// KQ single-batch
72277226
ggml_cuda_mul_mat_vec_p021(src0, src1, dst);
72287227
} else if (all_on_device && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) {
7229-
// KQV
7228+
// KQV single-batch
72307229
ggml_cuda_mul_mat_vec_nc(src0, src1, dst);
72317230
} else if (all_on_device && src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
7231+
// KQ + KQV multi-batch
72327232
ggml_cuda_mul_mat_mat_batched_cublas(src0, src1, dst);
72337233
} else if (src0->type == GGML_TYPE_F32) {
72347234
ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_cublas, false);

0 commit comments

Comments
 (0)