@@ -318,6 +318,8 @@ static bool isinf_or_max(float f) {
318
318
return _isinf (f) || f == FLT_MAX || f == -FLT_MAX;
319
319
}
320
320
321
+ using extra_buffer_map_t = std::unordered_map<ggml_backend_buffer_type_t , ggml_backend_buffer_t >;
322
+
321
323
static bool ggml_is_view_op (enum ggml_op op) {
322
324
return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE;
323
325
}
@@ -1155,6 +1157,33 @@ struct test_case {
1155
1157
}
1156
1158
}
1157
1159
1160
+ static void try_assign_extra_buffer (struct ggml_tensor * node_copy, const extra_buffer_map_t & extra_buf_map) {
1161
+ struct ggml_tensor * src0_copy = node_copy->src [0 ];
1162
+ ggml_backend_buffer_t org_buf = src0_copy->buffer ;
1163
+
1164
+ for (const auto & [buft, buf] : extra_buf_map) {
1165
+ // Initialize the tensor in the extra buffer
1166
+ if (ggml_backend_buffer_init_tensor (buf, src0_copy) != GGML_STATUS_SUCCESS) {
1167
+ continue ;
1168
+ }
1169
+
1170
+ if (!src0_copy->extra ) {
1171
+ continue ;
1172
+ }
1173
+
1174
+ // Temporarily assign buffer so we can call ggml_backend_dev_supports_op
1175
+ src0_copy->buffer = buf;
1176
+
1177
+ ggml_backend_dev_t dev = ggml_backend_buft_get_device (buft);
1178
+ // Check if extra buffer type supports the operation
1179
+ if (dev && ggml_backend_dev_supports_op (dev, node_copy)) {
1180
+ return ;
1181
+ } else {
1182
+ src0_copy->buffer = org_buf; // Restore original buffer if not supported
1183
+ }
1184
+ }
1185
+ }
1186
+
1158
1187
struct ggml_backend_graph_copy ggml_backend_graph_copy (ggml_backend_t backend, struct ggml_cgraph * graph,
1159
1188
std::unordered_map<ggml_backend_buffer_type_t , ggml_backend_buffer_t > extra_buf_map) {
1160
1189
GGML_ASSERT (graph);
@@ -1219,24 +1248,10 @@ struct test_case {
1219
1248
for (int i = 0 ; i < graph->n_nodes ; i++) {
1220
1249
struct ggml_tensor * node = graph->nodes [i];
1221
1250
1222
- if (node->op != GGML_OP_NONE && node->src [0 ]) {
1223
- for (const auto & [buft, buf] : extra_buf_map) {
1224
- size_t id = ggml_hash_find (&hash_set, node);
1225
- ggml_status status = ggml_backend_buffer_init_tensor (buf, node_copies[id]->src [0 ]);
1226
- if (status == GGML_STATUS_SUCCESS) {
1227
- if (node_copies[id]->src [0 ]->extra != nullptr ) {
1228
- if (strcmp (ggml_backend_buft_name (buft)," CPU_REPACK" ) == 0 ) {
1229
- if (node_copies[id]->op == GGML_OP_MUL_MAT || node_copies[id]->op == GGML_OP_MUL_MAT_ID) {
1230
- if (ggml_n_dims (node_copies[id]->src [1 ]) == 2 ) {
1231
- node_copies[id]->src [0 ]->buffer = buf;
1232
- }
1233
- }
1234
- }
1235
- }
1236
- } else {
1237
- GGML_LOG_ERROR (" %s: failed to initialize tensor in extra buffer type '%s' for graph copy\n " , __func__, ggml_backend_buft_name (buft));
1238
- }
1239
- }
1251
+ // Handle extra buffer types (before graph_copy_init_tensor)
1252
+ if (node->op != GGML_OP_NONE && !ggml_is_view_op (node->op ) && node->src [0 ]) {
1253
+ size_t id = ggml_hash_find (&hash_set, node);
1254
+ try_assign_extra_buffer (node_copies[id], extra_buf_map);
1240
1255
}
1241
1256
1242
1257
graph_copy_init_tensor (&hash_set, node_copies, node_init, node);
@@ -7351,6 +7366,25 @@ static void print_backend_features(ggml_backend_t backend) {
7351
7366
}
7352
7367
}
7353
7368
7369
+ static extra_buffer_map_t load_cpu_extra_bufts () {
7370
+ auto * cpu_dev = ggml_backend_dev_by_type (GGML_BACKEND_DEVICE_TYPE_CPU);
7371
+ auto * cpu_reg = ggml_backend_dev_backend_reg (cpu_dev);
7372
+
7373
+ std::unordered_map<ggml_backend_buffer_type_t , ggml_backend_buffer_t > extra_buf_map;
7374
+ auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t )
7375
+ ggml_backend_reg_get_proc_address (cpu_reg, " ggml_backend_dev_get_extra_bufts" );
7376
+ if (ggml_backend_dev_get_extra_bufts_fn) {
7377
+ ggml_backend_buffer_type_t * extra_bufts = ggml_backend_dev_get_extra_bufts_fn (cpu_dev);
7378
+ while (extra_bufts && *extra_bufts) {
7379
+ // TODO: What should the size be here? Do extra buffer types need a size even?
7380
+ // We need to have a value larger than 0 to avoid the dummy backend buffer to be used.
7381
+ extra_buf_map[*extra_bufts] = ggml_backend_buft_alloc_buffer (*extra_bufts, 1 );
7382
+ ++extra_bufts;
7383
+ }
7384
+ }
7385
+ return extra_buf_map;
7386
+ }
7387
+
7354
7388
static bool test_cpu_variant (const char * variant_name, const char * op_names_filter,
7355
7389
const char * params_filter, printer * output_printer) {
7356
7390
// Load the variant first so that extra buffer types created only use that
@@ -7359,23 +7393,7 @@ static bool test_cpu_variant(const char * variant_name, const char * op_names_fi
7359
7393
ggml_backend_load_variant (" cpu" , std::string (variant_name).substr (4 ).c_str ());
7360
7394
7361
7395
// Load extra buffer types and allocate a buffer from each type.
7362
- std::unordered_map<ggml_backend_buffer_type_t , ggml_backend_buffer_t > extra_buf_map;
7363
- {
7364
- auto * cpu_dev = ggml_backend_dev_by_type (GGML_BACKEND_DEVICE_TYPE_CPU);
7365
- auto * cpu_reg = ggml_backend_dev_backend_reg (cpu_dev);
7366
-
7367
- auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t )
7368
- ggml_backend_reg_get_proc_address (cpu_reg, " ggml_backend_dev_get_extra_bufts" );
7369
- if (ggml_backend_dev_get_extra_bufts_fn) {
7370
- ggml_backend_buffer_type_t * extra_bufts = ggml_backend_dev_get_extra_bufts_fn (cpu_dev);
7371
- while (extra_bufts && *extra_bufts) {
7372
- // TODO: What should the size be here? Do extra buffer types need a size even?
7373
- // We need to have a value larger than 0 to avoid the dummy backend buffer to be used.
7374
- extra_buf_map[*extra_bufts] = ggml_backend_buft_alloc_buffer (*extra_bufts, 1 );
7375
- ++extra_bufts;
7376
- }
7377
- }
7378
- }
7396
+ auto extra_buf_map = load_cpu_extra_bufts ();
7379
7397
7380
7398
printf (" \n " );
7381
7399
for (auto buft : extra_buf_map) {
0 commit comments