metal : disable concurrency optimization

ggerganov · ggerganov · commit 86c90e34f5ef · 2023-09-18T18:00:01.000+03:00
diff --git a/llama.cpp b/llama.cpp
@@ -6605,8 +6605,8 @@ struct llama_context * llama_new_context_with_model(
                     llama_free(ctx);
                     return NULL;
                 }
-                ggml_metal_graph_find_concurrency(ctx->ctx_metal, gf, false);
-                ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
+                //ggml_metal_graph_find_concurrency(ctx->ctx_metal, gf, false);
+                //ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
             }
 #endif
             // measure memory requirements for the graph
@@ -6621,7 +6621,7 @@ struct llama_context * llama_new_context_with_model(
             ctx->alloc = ggml_allocr_new(ctx->buf_alloc.data, ctx->buf_alloc.size, tensor_alignment);
 #ifdef GGML_USE_METAL
             if (ctx->ctx_metal) {
-                ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
+                //ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
             }
 #endif
 #ifdef GGML_USE_CUBLAS

Original file line number	Diff line number	Diff line change
`@@ -6605,8 +6605,8 @@ struct llama_context * llama_new_context_with_model(`
`6605`	`6605`	`llama_free(ctx);`
`6606`	`6606`	`return NULL;`
`6607`	`6607`	`}`
`6608`		`- ggml_metal_graph_find_concurrency(ctx->ctx_metal, gf, false);`
`6609`		`- ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));`
	`6608`	`+ //ggml_metal_graph_find_concurrency(ctx->ctx_metal, gf, false);`
	`6609`	`+ //ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));`
`6610`	`6610`	`}`
`6611`	`6611`	`#endif`
`6612`	`6612`	`// measure memory requirements for the graph`
`@@ -6621,7 +6621,7 @@ struct llama_context * llama_new_context_with_model(`
`6621`	`6621`	`ctx->alloc = ggml_allocr_new(ctx->buf_alloc.data, ctx->buf_alloc.size, tensor_alignment);`
`6622`	`6622`	`#ifdef GGML_USE_METAL`
`6623`	`6623`	`if (ctx->ctx_metal) {`
`6624`		`- ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));`
	`6624`	`+ //ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));`
`6625`	`6625`	`}`
`6626`	`6626`	`#endif`
`6627`	`6627`	`#ifdef GGML_USE_CUBLAS`