From aeafca437efa7fb28166703f845e321176aa62ab Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sat, 4 Oct 2025 08:17:25 +0200
Subject: [PATCH 1/6] vad : fix memory leak by storing ggml_context in vad
 context struct

This commit addresses a memory leak issue in the voice activity
detection (VAD) where the ggml_context is not stored within the vad
context structure.

The motivation for this change that this is causing the context memory
to stay allocated and the tensor still point to that memory but this
memory is never freed.

Resolves: https://github.com/ggml-org/whisper.cpp/issues/3452
---
 src/whisper.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/whisper.cpp b/src/whisper.cpp
index d99dd7be68c..e9c2a786a91 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -4402,6 +4402,7 @@ struct whisper_vad_context {
     std::vector<ggml_backend_t> backends;
     ggml_backend_buffer_t       buffer = nullptr;
     whisper_context_params      params;
+    ggml_context *              ctx = nullptr;
     std::vector<uint8_t>        ctx_buf;
     whisper_sched               sched;
 
@@ -4661,21 +4662,21 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) {
         /*.no_alloc   =*/ true,
     };
 
-    ggml_context * ctx = ggml_init(params);
-    if (!ctx) {
+    vctx->ctx = ggml_init(params);
+    if (!vctx->ctx) {
         WHISPER_LOG_ERROR("%s: failed to init LSTM state ggml context\n", __func__);
         return false;
     }
 
     // LSTM Hidden state
-    vctx->h_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size);
+    vctx->h_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size);
     ggml_set_name(vctx->h_state, "h_state");
 
     // LSTM Cell state
-    vctx->c_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size);
+    vctx->c_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size);
     ggml_set_name(vctx->c_state, "c_state");
 
-    vctx->buffer = ggml_backend_alloc_ctx_tensors(ctx, vctx->backends[0]);
+    vctx->buffer = ggml_backend_alloc_ctx_tensors(vctx->ctx, vctx->backends[0]);
     if (!vctx->buffer) {
         WHISPER_LOG_ERROR("%s: failed to allocate memory for the VAD state\n", __func__);
         return false;
@@ -5433,7 +5434,7 @@ void whisper_vad_free(whisper_vad_context * ctx) {
         for (auto & backend : ctx->backends) {
             ggml_backend_free(backend);
         }
-
+        ggml_free(ctx->ctx);
 
         delete ctx;
     }

From fd95ecce35874c7cce00ed52ff260bc4d7cd7105 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sat, 4 Oct 2025 08:27:28 +0200
Subject: [PATCH 2/6] vad : free memory allocated for VAD hparams

This commit frees the model hyperparameters allocated for the VAD
context in the `whisper_vad_free` function. Specifically, it deletes the
`encoder_in_channels`, `encoder_out_channels`, and `kernel_sizes` arrays
allocated with `new[]` in the `whisper_vad_init` function.

The motivation for this is to prevent memory leaks when the VAD.

Resolves: https://github.com/ggml-org/whisper.cpp/issues/3452
---
 src/whisper.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/whisper.cpp b/src/whisper.cpp
index e9c2a786a91..35d717eb5b1 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -5434,8 +5434,13 @@ void whisper_vad_free(whisper_vad_context * ctx) {
         for (auto & backend : ctx->backends) {
             ggml_backend_free(backend);
         }
+
         ggml_free(ctx->ctx);
 
+        delete[] ctx->model.hparams.encoder_in_channels;
+        delete[] ctx->model.hparams.encoder_out_channels;
+        delete[] ctx->model.hparams.kernel_sizes;
+
         delete ctx;
     }
 }

From f3c65a6d6e58082a66134e7d5e9d47dd63528260 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sat, 4 Oct 2025 08:30:54 +0200
Subject: [PATCH 3/6] vad: free ggml buffer in whisper_vad_free

This commit frees the ggml buffer in the whisper_vad_free function to
prevent memory leaks.

Resolves: https://github.com/ggml-org/whisper.cpp/issues/3452
---
 src/whisper.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/whisper.cpp b/src/whisper.cpp
index 35d717eb5b1..5f466a18ccf 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -5421,6 +5421,9 @@ struct whisper_vad_segments * whisper_vad_segments_from_samples(
 
 void whisper_vad_free(whisper_vad_context * ctx) {
     if (ctx) {
+        if (ctx->buffer) {
+            ggml_backend_buffer_free(ctx->buffer);
+        }
         for (ggml_context * context : ctx->model.ctxs) {
             ggml_free(context);
         }

From a16c6626a835ce041b4fd241d9135e4f5efe50f7 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Mon, 6 Oct 2025 12:35:58 +0200
Subject: [PATCH 4/6] Revert "vad : fix memory leak by storing ggml_context in
 vad context struct"

This reverts commit aeafca437efa7fb28166703f845e321176aa62ab.
---
 src/whisper.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/whisper.cpp b/src/whisper.cpp
index 5f466a18ccf..08c1b9cfdbb 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -4402,7 +4402,6 @@ struct whisper_vad_context {
     std::vector<ggml_backend_t> backends;
     ggml_backend_buffer_t       buffer = nullptr;
     whisper_context_params      params;
-    ggml_context *              ctx = nullptr;
     std::vector<uint8_t>        ctx_buf;
     whisper_sched               sched;
 
@@ -4662,21 +4661,21 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) {
         /*.no_alloc   =*/ true,
     };
 
-    vctx->ctx = ggml_init(params);
-    if (!vctx->ctx) {
+    ggml_context * ctx = ggml_init(params);
+    if (!ctx) {
         WHISPER_LOG_ERROR("%s: failed to init LSTM state ggml context\n", __func__);
         return false;
     }
 
     // LSTM Hidden state
-    vctx->h_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size);
+    vctx->h_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size);
     ggml_set_name(vctx->h_state, "h_state");
 
     // LSTM Cell state
-    vctx->c_state = ggml_new_tensor_1d(vctx->ctx, GGML_TYPE_F32, lstm_hidden_size);
+    vctx->c_state = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, lstm_hidden_size);
     ggml_set_name(vctx->c_state, "c_state");
 
-    vctx->buffer = ggml_backend_alloc_ctx_tensors(vctx->ctx, vctx->backends[0]);
+    vctx->buffer = ggml_backend_alloc_ctx_tensors(ctx, vctx->backends[0]);
     if (!vctx->buffer) {
         WHISPER_LOG_ERROR("%s: failed to allocate memory for the VAD state\n", __func__);
         return false;
@@ -5438,8 +5437,6 @@ void whisper_vad_free(whisper_vad_context * ctx) {
             ggml_backend_free(backend);
         }
 
-        ggml_free(ctx->ctx);
-
         delete[] ctx->model.hparams.encoder_in_channels;
         delete[] ctx->model.hparams.encoder_out_channels;
         delete[] ctx->model.hparams.kernel_sizes;

From 46a039add9ed4710558bcf3c151709510fd8ada3 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Mon, 6 Oct 2025 12:40:17 +0200
Subject: [PATCH 5/6] whisper : free ggml context in whisper_vad_init_context

This commit frees the ggml_context after initializing the VAD context in
the whisper_vad_init_context function.

The motivation for this is to prevent memory leaks.
---
 src/whisper.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/whisper.cpp b/src/whisper.cpp
index 08c1b9cfdbb..be161f19997 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -4694,6 +4694,7 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) {
 
         WHISPER_LOG_INFO("%s: compute buffer (VAD)   = %7.2f MB\n", __func__, whisper_sched_size(vctx->sched) / 1e6);
     }
+    ggml_free(ctx);
 
     return true;
 }

From 36ac9d72a3f8d6b41d18282e4954b0450b544bde Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Mon, 6 Oct 2025 13:26:11 +0200
Subject: [PATCH 6/6] squash! whisper : free ggml context in
 whisper_vad_init_context

Move ggml_free(ctx) to before returns to avoid memory leaks.
---
 src/whisper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/whisper.cpp b/src/whisper.cpp
index be161f19997..39c53ba233a 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -4676,6 +4676,7 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) {
     ggml_set_name(vctx->c_state, "c_state");
 
     vctx->buffer = ggml_backend_alloc_ctx_tensors(ctx, vctx->backends[0]);
+    ggml_free(ctx);
     if (!vctx->buffer) {
         WHISPER_LOG_ERROR("%s: failed to allocate memory for the VAD state\n", __func__);
         return false;
@@ -4694,7 +4695,6 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) {
 
         WHISPER_LOG_INFO("%s: compute buffer (VAD)   = %7.2f MB\n", __func__, whisper_sched_size(vctx->sched) / 1e6);
     }
-    ggml_free(ctx);
 
     return true;
 }