From aeafca437efa7fb28166703f845e321176aa62ab Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sat, 4 Oct 2025 08:17:25 +0200 Subject: [PATCH 1/6] vad : fix memory leak by storing ggml_context in vad context struct This commit addresses a memory leak issue in the voice activity detection (VAD) where the ggml_context is not stored within the vad context structure. The motivation for this change that this is causing the context memory to stay allocated and the tensor still point to that memory but this memory is never freed. Resolves: https://github.com/ggml-org/whisper.cpp/issues/3452 --- src/whisper.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index d99dd7be68c..e9c2a786a91 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -4402,6 +4402,7 @@ struct whisper_vad_context { std::vector backends; ggml_backend_buffer_t buffer = nullptr; whisper_context_params params; + ggml_context * ctx = nullptr; std::vector ctx_buf; whisper_sched sched; @@ -4661,21 +4662,21 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) { /*.no_alloc =*/ true, }; - ggml_context * ctx = ggml_init(params); - if (!ctx) { + vctx->ctx = ggml_init(params); + if (!vctx->ctx) { WHISPER_LOG_ERROR("%s: failed to init LSTM state ggml context\n", __func__); return false; } // LSTM Hidden state - vctx->h_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size); + vctx->h_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size); ggml_set_name(vctx->h_state, "h_state"); // LSTM Cell state - vctx->c_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size); + vctx->c_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size); ggml_set_name(vctx->c_state, "c_state"); - vctx->buffer = ggml_backend_alloc_ctx_tensors(ctx, vctx->backends[0]); + vctx->buffer = ggml_backend_alloc_ctx_tensors(vctx->ctx, vctx->backends[0]); if (!vctx->buffer) { WHISPER_LOG_ERROR("%s: failed to allocate memory for the VAD state\n", __func__); return false; @@ -5433,7 +5434,7 @@ void whisper_vad_free(whisper_vad_context * ctx) { for (auto & backend : ctx->backends) { ggml_backend_free(backend); } - + ggml_free(ctx->ctx); delete ctx; } From fd95ecce35874c7cce00ed52ff260bc4d7cd7105 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sat, 4 Oct 2025 08:27:28 +0200 Subject: [PATCH 2/6] vad : free memory allocated for VAD hparams This commit frees the model hyperparameters allocated for the VAD context in the `whisper_vad_free` function. Specifically, it deletes the `encoder_in_channels`, `encoder_out_channels`, and `kernel_sizes` arrays allocated with `new[]` in the `whisper_vad_init` function. The motivation for this is to prevent memory leaks when the VAD. Resolves: https://github.com/ggml-org/whisper.cpp/issues/3452 --- src/whisper.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/whisper.cpp b/src/whisper.cpp index e9c2a786a91..35d717eb5b1 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -5434,8 +5434,13 @@ void whisper_vad_free(whisper_vad_context * ctx) { for (auto & backend : ctx->backends) { ggml_backend_free(backend); } + ggml_free(ctx->ctx); + delete[] ctx->model.hparams.encoder_in_channels; + delete[] ctx->model.hparams.encoder_out_channels; + delete[] ctx->model.hparams.kernel_sizes; + delete ctx; } } From f3c65a6d6e58082a66134e7d5e9d47dd63528260 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sat, 4 Oct 2025 08:30:54 +0200 Subject: [PATCH 3/6] vad: free ggml buffer in whisper_vad_free This commit frees the ggml buffer in the whisper_vad_free function to prevent memory leaks. Resolves: https://github.com/ggml-org/whisper.cpp/issues/3452 --- src/whisper.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/whisper.cpp b/src/whisper.cpp index 35d717eb5b1..5f466a18ccf 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -5421,6 +5421,9 @@ struct whisper_vad_segments * whisper_vad_segments_from_samples( void whisper_vad_free(whisper_vad_context * ctx) { if (ctx) { + if (ctx->buffer) { + ggml_backend_buffer_free(ctx->buffer); + } for (ggml_context * context : ctx->model.ctxs) { ggml_free(context); } From a16c6626a835ce041b4fd241d9135e4f5efe50f7 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 6 Oct 2025 12:35:58 +0200 Subject: [PATCH 4/6] Revert "vad : fix memory leak by storing ggml_context in vad context struct" This reverts commit aeafca437efa7fb28166703f845e321176aa62ab. --- src/whisper.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index 5f466a18ccf..08c1b9cfdbb 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -4402,7 +4402,6 @@ struct whisper_vad_context { std::vector backends; ggml_backend_buffer_t buffer = nullptr; whisper_context_params params; - ggml_context * ctx = nullptr; std::vector ctx_buf; whisper_sched sched; @@ -4662,21 +4661,21 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) { /*.no_alloc =*/ true, }; - vctx->ctx = ggml_init(params); - if (!vctx->ctx) { + ggml_context * ctx = ggml_init(params); + if (!ctx) { WHISPER_LOG_ERROR("%s: failed to init LSTM state ggml context\n", __func__); return false; } // LSTM Hidden state - vctx->h_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size); + vctx->h_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size); ggml_set_name(vctx->h_state, "h_state"); // LSTM Cell state - vctx->c_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size); + vctx->c_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size); ggml_set_name(vctx->c_state, "c_state"); - vctx->buffer = ggml_backend_alloc_ctx_tensors(vctx->ctx, vctx->backends[0]); + vctx->buffer = ggml_backend_alloc_ctx_tensors(ctx, vctx->backends[0]); if (!vctx->buffer) { WHISPER_LOG_ERROR("%s: failed to allocate memory for the VAD state\n", __func__); return false; @@ -5438,8 +5437,6 @@ void whisper_vad_free(whisper_vad_context * ctx) { ggml_backend_free(backend); } - ggml_free(ctx->ctx); - delete[] ctx->model.hparams.encoder_in_channels; delete[] ctx->model.hparams.encoder_out_channels; delete[] ctx->model.hparams.kernel_sizes; From 46a039add9ed4710558bcf3c151709510fd8ada3 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 6 Oct 2025 12:40:17 +0200 Subject: [PATCH 5/6] whisper : free ggml context in whisper_vad_init_context This commit frees the ggml_context after initializing the VAD context in the whisper_vad_init_context function. The motivation for this is to prevent memory leaks. --- src/whisper.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/whisper.cpp b/src/whisper.cpp index 08c1b9cfdbb..be161f19997 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -4694,6 +4694,7 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) { WHISPER_LOG_INFO("%s: compute buffer (VAD) = %7.2f MB\n", __func__, whisper_sched_size(vctx->sched) / 1e6); } + ggml_free(ctx); return true; } From 36ac9d72a3f8d6b41d18282e4954b0450b544bde Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 6 Oct 2025 13:26:11 +0200 Subject: [PATCH 6/6] squash! whisper : free ggml context in whisper_vad_init_context Move ggml_free(ctx) to before returns to avoid memory leaks. --- src/whisper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index be161f19997..39c53ba233a 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -4676,6 +4676,7 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) { ggml_set_name(vctx->c_state, "c_state"); vctx->buffer = ggml_backend_alloc_ctx_tensors(ctx, vctx->backends[0]); + ggml_free(ctx); if (!vctx->buffer) { WHISPER_LOG_ERROR("%s: failed to allocate memory for the VAD state\n", __func__); return false; @@ -4694,7 +4695,6 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) { WHISPER_LOG_INFO("%s: compute buffer (VAD) = %7.2f MB\n", __func__, whisper_sched_size(vctx->sched) / 1e6); } - ggml_free(ctx); return true; }