@@ -1311,7 +1311,6 @@ struct ggml_vk_garbage_collector {
13111311 std::vector<vk_semaphore> tl_semaphores;
13121312 std::vector<vk_semaphore> semaphores;
13131313 std::vector<vk::Event> events;
1314- std::vector<vk_buffer> temp_buffers;
13151314 std::vector<vk_context> contexts;
13161315};
13171316
@@ -5144,40 +5143,6 @@ static vk_pipeline ggml_vk_get_dequantize_mul_mat_vec_id(ggml_backend_vk_context
51445143 return ctx->device->pipeline_dequant_mul_mat_vec_id_f32[a_type];
51455144}
51465145
5147- static vk_buffer ggml_vk_pool_malloc(ggml_backend_vk_context * ctx, size_t size) {
5148- VK_LOG_DEBUG("ggml_vk_pool_malloc(" << size << ")");
5149- VK_LOG_MEMORY("ggml_vk_pool_malloc");
5150-
5151- int best_i = -1;
5152- size_t best_size = std::numeric_limits<size_t>::max(); //smallest unused buffer that fits our needs
5153- int worst_i = -1;
5154- size_t worst_size = 0; //largest unused buffer seen so far
5155- for (int i = 0; i < MAX_VK_BUFFERS; ++i) {
5156- vk_buffer &b = ctx->buffer_pool[i];
5157- if (b != nullptr && b->size >= size && b->size < best_size) {
5158- best_i = i;
5159- best_size = b->size;
5160- }
5161- if (b != nullptr && b->size > worst_size) {
5162- worst_i = i;
5163- worst_size = b->size;
5164- }
5165- }
5166- if(best_i != -1) {
5167- //found the smallest buffer that fits our needs
5168- vk_buffer b = ctx->buffer_pool[best_i];
5169- ctx->buffer_pool[best_i].reset();
5170- return b;
5171- }
5172- if(worst_i != -1) {
5173- //no buffer that fits our needs, resize largest one to save memory
5174- vk_buffer& b = ctx->buffer_pool[worst_i];
5175- ggml_vk_destroy_buffer(b);
5176- }
5177-
5178- return ggml_vk_create_buffer_device(ctx->device, size);
5179- }
5180-
51815146static void ggml_vk_pool_free(ggml_backend_vk_context * ctx, vk_buffer& buffer) {
51825147 VK_LOG_DEBUG("ggml_vk_pool_free(" << buffer->size << ")");
51835148 for (int i = 0; i < MAX_VK_BUFFERS; ++i) {
@@ -5191,24 +5156,6 @@ static void ggml_vk_pool_free(ggml_backend_vk_context * ctx, vk_buffer& buffer)
51915156 ggml_vk_destroy_buffer(buffer);
51925157}
51935158
5194- // Returns an available temporary buffer that may only be used temporarily, it will be reused
5195- static vk_buffer ggml_vk_create_buffer_temp(ggml_backend_vk_context * ctx, size_t size) {
5196- // Try to find existing temp buffer with enough capacity
5197- for (auto& buffer : ctx->gc.temp_buffers) {
5198- if (buffer->size >= size) {
5199- return buffer;
5200- }
5201- }
5202-
5203- VK_LOG_MEMORY("ggml_vk_create_buffer_temp(" << size << ")");
5204-
5205- // Otherwise create new buffer
5206- vk_buffer buf = ggml_vk_pool_malloc(ctx, size);
5207- ctx->gc.temp_buffers.push_back(buf);
5208-
5209- return buf;
5210- }
5211-
52125159static void * ggml_vk_host_malloc(vk_device& device, size_t size) {
52135160 VK_LOG_MEMORY("ggml_vk_host_malloc(" << size << ")");
52145161 vk_buffer buf = ggml_vk_create_buffer(device, size,
@@ -11789,10 +11736,6 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph *
1178911736// Clean up after graph processing is done
1179011737static void ggml_vk_graph_cleanup(ggml_backend_vk_context * ctx) {
1179111738 VK_LOG_DEBUG("ggml_vk_graph_cleanup()");
11792- for (auto& buffer : ctx->gc.temp_buffers) {
11793- ggml_vk_pool_free(ctx, buffer);
11794- }
11795- ctx->gc.temp_buffers.clear();
1179611739 ctx->prealloc_y_last_pipeline_used = {};
1179711740
1179811741 ctx->unsynced_nodes_written.clear();
0 commit comments