From d54e7d86fe8e7e3868efbec00379732a67acf5d6 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Tue, 21 Oct 2025 13:43:02 +0000
Subject: [PATCH 1/3] Add comprehensive memory leak testing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add test-memory-leaks.cpp with dedicated leak regression tests
- Test model/context/sampler lifecycle patterns
- Test error condition cleanup
- Test concurrent usage with multiple threads
- Test batch operations and KV cache clearing
- Add optional Valgrind integration via CMake target

Tests cover:
- Repeated model load/free cycles (10 iterations)
- Context creation/destruction patterns (10 iterations)
- Multiple contexts sharing same model (5 contexts)
- Sampler lifecycle with chain operations
- Backend initialization cleanup
- Error path cleanup validation (failed model load)
- Concurrent model usage (3 threads)
- Batch allocation/deallocation
- KV cache memory clearing operations
- Model load cancellation (via progress callback)

The test follows existing patterns from test-autorelease.cpp and
test-thread-safety.cpp, using get-model.cpp helper for model paths
and proper cleanup order (sampler → context → model → backend).

Run with: cmake -DLLAMA_SANITIZE_ADDRESS=ON && ctest -R test-memory-leaks
Optional: make test-valgrind (requires Valgrind installed)

Related to disabled test-opt.cpp which has known memory leak at line 300
(ggml_opt_alloc called in loop without cleanup between iterations).

Co-Authored-By: Stephen Cornwell <stephen@cognition.ai>
---
 tests/CMakeLists.txt        |  21 +++
 tests/test-memory-leaks.cpp | 353 ++++++++++++++++++++++++++++++++++++
 2 files changed, 374 insertions(+)
 create mode 100644 tests/test-memory-leaks.cpp
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 91719577564a9..48de63d87fca0 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -201,6 +201,7 @@ llama_build_and_test(test-backend-ops.cpp)
 
 llama_build_and_test(test-model-load-cancel.cpp  LABEL "model")
 llama_build_and_test(test-autorelease.cpp        LABEL "model")
+llama_build_and_test(test-memory-leaks.cpp       LABEL "model")
 
 if (NOT GGML_BACKEND_DL)
     # these tests use the backends directly and cannot be built with dynamic loading
@@ -219,3 +220,23 @@ target_link_libraries(${LLAMA_TEST_NAME} PRIVATE mtmd)
 get_filename_component(TEST_TARGET test-c.c NAME_WE)
 add_executable(${TEST_TARGET} test-c.c)
 target_link_libraries(${TEST_TARGET} PRIVATE llama)
+
+# Optional Valgrind target for memory leak checking
+find_program(VALGRIND_EXECUTABLE valgrind)
+if(VALGRIND_EXECUTABLE)
+    add_custom_target(test-valgrind
+        COMMAND ${VALGRIND_EXECUTABLE} 
+            --leak-check=full 
+            --show-leak-kinds=all 
+            --track-origins=yes 
+            --error-exitcode=1
+            ${CMAKE_CURRENT_BINARY_DIR}/test-memory-leaks
+        DEPENDS test-memory-leaks
+        COMMENT "Running memory leak tests with Valgrind"
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    )
+    message(STATUS "Valgrind found: ${VALGRIND_EXECUTABLE}")
+    message(STATUS "Run 'make test-valgrind' to check for memory leaks with Valgrind")
+else()
+    message(STATUS "Valgrind not found - install it for additional leak checking")
+endif()
diff --git a/tests/test-memory-leaks.cpp b/tests/test-memory-leaks.cpp
new file mode 100644
index 0000000000000..96f027893a8a7
--- /dev/null
+++ b/tests/test-memory-leaks.cpp
@@ -0,0 +1,353 @@
+// 
+//
+
+#include "llama.h"
+#include "get-model.h"
+#include <cstdio>
+#include <cstring>
+#include <thread>
+#include <vector>
+#include <atomic>
+
+static void test_model_load_unload_cycles(const char * model_path) {
+    fprintf(stderr, "test_model_load_unload_cycles: ");
+    
+    for (int i = 0; i < 10; i++) {
+        llama_backend_init();
+        
+        auto params = llama_model_default_params();
+        auto * model = llama_model_load_from_file(model_path, params);
+        if (model == nullptr) {
+            fprintf(stderr, "FAILED (model load failed on iteration %d)\n", i);
+            return;
+        }
+        
+        llama_model_free(model);
+        llama_backend_free();
+    }
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_context_lifecycle(const char * model_path) {
+    fprintf(stderr, "test_context_lifecycle: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    for (int i = 0; i < 10; i++) {
+        auto ctx_params = llama_context_default_params();
+        ctx_params.n_ctx = 512;
+        
+        auto * ctx = llama_init_from_model(model, ctx_params);
+        if (ctx == nullptr) {
+            fprintf(stderr, "FAILED (context creation failed on iteration %d)\n", i);
+            llama_model_free(model);
+            llama_backend_free();
+            return;
+        }
+        
+        llama_free(ctx);
+    }
+    
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_multiple_contexts_same_model(const char * model_path) {
+    fprintf(stderr, "test_multiple_contexts_same_model: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    const int num_contexts = 5;
+    std::vector<llama_context *> contexts(num_contexts);
+    
+    auto ctx_params = llama_context_default_params();
+    ctx_params.n_ctx = 512;
+    
+    for (int i = 0; i < num_contexts; i++) {
+        contexts[i] = llama_init_from_model(model, ctx_params);
+        if (contexts[i] == nullptr) {
+            fprintf(stderr, "FAILED (context %d creation failed)\n", i);
+            for (int j = 0; j < i; j++) {
+                llama_free(contexts[j]);
+            }
+            llama_model_free(model);
+            llama_backend_free();
+            return;
+        }
+    }
+    
+    for (auto * ctx : contexts) {
+        llama_free(ctx);
+    }
+    
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_sampler_lifecycle(const char * model_path) {
+    fprintf(stderr, "test_sampler_lifecycle: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    auto ctx_params = llama_context_default_params();
+    ctx_params.n_ctx = 512;
+    auto * ctx = llama_init_from_model(model, ctx_params);
+    if (ctx == nullptr) {
+        fprintf(stderr, "FAILED (context creation failed)\n");
+        llama_model_free(model);
+        llama_backend_free();
+        return;
+    }
+    
+    for (int i = 0; i < 10; i++) {
+        auto sparams = llama_sampler_chain_default_params();
+        auto * smpl = llama_sampler_chain_init(sparams);
+        if (smpl == nullptr) {
+            fprintf(stderr, "FAILED (sampler creation failed on iteration %d)\n", i);
+            llama_free(ctx);
+            llama_model_free(model);
+            llama_backend_free();
+            return;
+        }
+        
+        llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
+        llama_sampler_free(smpl);
+    }
+    
+    llama_free(ctx);
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_error_condition_cleanup(const char * /* model_path */) {
+    fprintf(stderr, "test_error_condition_cleanup: ");
+    
+    llama_backend_init();
+    
+    auto params = llama_model_default_params();
+    auto * model = llama_model_load_from_file("/nonexistent/path/to/model.gguf", params);
+    if (model != nullptr) {
+        fprintf(stderr, "FAILED (expected nullptr for nonexistent model)\n");
+        llama_model_free(model);
+        llama_backend_free();
+        return;
+    }
+    
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_model_load_cancel(const char * model_path) {
+    fprintf(stderr, "test_model_load_cancel: ");
+    
+    llama_backend_init();
+    
+    auto params = llama_model_default_params();
+    params.use_mmap = false;
+    params.progress_callback = [](float progress, void * ctx) {
+        (void) ctx;
+        return progress > 0.50f;
+    };
+    
+    auto * model = llama_model_load_from_file(model_path, params);
+    
+    if (model != nullptr) {
+        llama_model_free(model);
+    }
+    
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_batch_operations(const char * model_path) {
+    fprintf(stderr, "test_batch_operations: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    auto ctx_params = llama_context_default_params();
+    ctx_params.n_ctx = 512;
+    auto * ctx = llama_init_from_model(model, ctx_params);
+    if (ctx == nullptr) {
+        fprintf(stderr, "FAILED (context creation failed)\n");
+        llama_model_free(model);
+        llama_backend_free();
+        return;
+    }
+    
+    for (int i = 0; i < 10; i++) {
+        llama_batch batch = llama_batch_init(32, 0, 1);
+        
+        llama_batch_free(batch);
+    }
+    
+    llama_free(ctx);
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_backend_init_free_cycles() {
+    fprintf(stderr, "test_backend_init_free_cycles: ");
+    
+    for (int i = 0; i < 10; i++) {
+        llama_backend_init();
+        llama_backend_free();
+    }
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_threaded_contexts(const char * model_path) {
+    fprintf(stderr, "test_threaded_contexts: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    std::atomic<bool> failed = false;
+    std::vector<std::thread> threads;
+    const int num_threads = 3;
+    
+    for (int t = 0; t < num_threads; t++) {
+        threads.emplace_back([&, t, model]() {
+            auto ctx_params = llama_context_default_params();
+            ctx_params.n_ctx = 512;
+            
+            auto * ctx = llama_init_from_model(model, ctx_params);
+            if (ctx == nullptr) {
+                failed.store(true);
+                return;
+            }
+            
+            auto sparams = llama_sampler_chain_default_params();
+            auto * smpl = llama_sampler_chain_init(sparams);
+            if (smpl == nullptr) {
+                llama_free(ctx);
+                failed.store(true);
+                return;
+            }
+            
+            llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
+            
+            llama_sampler_free(smpl);
+            llama_free(ctx);
+        });
+    }
+    
+    for (auto & thread : threads) {
+        thread.join();
+    }
+    
+    llama_model_free(model);
+    llama_backend_free();
+    
+    if (failed) {
+        fprintf(stderr, "FAILED (thread error)\n");
+    } else {
+        fprintf(stderr, "OK\n");
+    }
+}
+
+static void test_kv_cache_clear_operations(const char * model_path) {
+    fprintf(stderr, "test_kv_cache_clear_operations: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    auto ctx_params = llama_context_default_params();
+    ctx_params.n_ctx = 512;
+    auto * ctx = llama_init_from_model(model, ctx_params);
+    if (ctx == nullptr) {
+        fprintf(stderr, "FAILED (context creation failed)\n");
+        llama_model_free(model);
+        llama_backend_free();
+        return;
+    }
+    
+    for (int i = 0; i < 10; i++) {
+        llama_memory_t mem = llama_get_memory(ctx);
+        llama_memory_clear(mem, false);
+    }
+    
+    llama_free(ctx);
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+int main(int argc, char ** argv) {
+    auto * model_path = get_model_or_exit(argc, argv);
+    
+    fprintf(stderr, "Running memory leak regression tests...\n\n");
+    
+    test_backend_init_free_cycles();
+    test_model_load_unload_cycles(model_path);
+    test_context_lifecycle(model_path);
+    test_multiple_contexts_same_model(model_path);
+    test_sampler_lifecycle(model_path);
+    test_batch_operations(model_path);
+    test_kv_cache_clear_operations(model_path);
+    test_threaded_contexts(model_path);
+    test_model_load_cancel(model_path);
+    test_error_condition_cleanup(model_path);
+    
+    fprintf(stderr, "\nAll memory leak tests completed successfully!\n");
+    
+    return 0;
+}

From 8a597139a0f985ea2cbcbb3c939e5d8562a9b7fe Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Tue, 21 Oct 2025 13:51:31 +0000
Subject: [PATCH 2/3] docs: document memory leak testing in README

- Add comprehensive Testing section with Memory Leak Testing subsection
- Document how to run tests with AddressSanitizer
- Document Valgrind integration (make test-valgrind)
- List all 10 test scenarios covered by test-memory-leaks.cpp
- Include build commands, test execution instructions
- Document environment variables and CI integration
- Reference known issues (test-opt.cpp leak)

Addresses GitHub comment from @stephencornwell requesting README update.

Co-Authored-By: Stephen Cornwell <stephen@cognition.ai>
---
 README.md | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/README.md b/README.md
index 17f59e988e3d1..d2fbc760c6a40 100644
--- a/README.md
+++ b/README.md
@@ -542,6 +542,81 @@ To learn more about model quantization, [read this documentation](tools/quantize
 - [Performance troubleshooting](docs/development/token_generation_performance_tips.md)
 - [GGML tips & tricks](https://github.com/ggml-org/llama.cpp/wiki/GGML-Tips-&-Tricks)
 
+#### Testing
+
+##### Memory Leak Testing
+
+The repository includes comprehensive memory leak regression tests to ensure proper memory management across various lifecycle scenarios. These tests go beyond the existing AddressSanitizer (ASan) integration by providing dedicated leak detection test suites.
+
+**Running with AddressSanitizer:**
+
+The primary memory leak detection mechanism uses AddressSanitizer, which is configured as a build option:
+
+```bash
+# Build with AddressSanitizer enabled
+cmake -B build -DLLAMA_SANITIZE_ADDRESS=ON -DCMAKE_BUILD_TYPE=Debug
+cmake --build build
+
+# Run the memory leak regression tests
+cd build
+ctest -R test-memory-leaks --output-on-failure
+
+# Or run directly
+./bin/test-memory-leaks
+```
+
+Other available sanitizers:
+- `LLAMA_SANITIZE_THREAD=ON` - Detects data races (note: runs without OpenMP)
+- `LLAMA_SANITIZE_UNDEFINED=ON` - Detects undefined behavior
+
+**Running with Valgrind:**
+
+Optional Valgrind integration is available for additional leak checking:
+
+```bash
+# Build the tests (Valgrind target is automatically configured if valgrind is installed)
+cmake -B build
+cmake --build build
+
+# Run memory leak tests with Valgrind
+cd build
+make test-valgrind
+```
+
+The Valgrind target runs with comprehensive leak detection flags:
+- `--leak-check=full` - Detailed leak information
+- `--show-leak-kinds=all` - Reports all leak types
+- `--track-origins=yes` - Tracks origin of uninitialized values
+
+**Test Coverage:**
+
+The `test-memory-leaks.cpp` suite includes 10 comprehensive tests covering:
+
+1. **Backend initialization cycles** - Repeated `llama_backend_init()` / `llama_backend_free()` cycles
+2. **Model load/unload cycles** - Repeated model loading and cleanup (10 iterations)
+3. **Context lifecycle** - Context creation and destruction patterns (10 iterations)
+4. **Multiple contexts per model** - Creating multiple contexts from the same model (5 contexts)
+5. **Sampler lifecycle** - Sampler creation, chain operations, and cleanup
+6. **Batch operations** - Batch allocation and deallocation patterns
+7. **KV cache clearing** - Memory clearing operations on contexts
+8. **Threaded contexts** - Concurrent model usage with multiple threads
+9. **Model load cancellation** - Cleanup when canceling model loading mid-process
+10. **Error condition cleanup** - Proper cleanup when operations fail (e.g., invalid model path)
+
+All tests follow proper cleanup order: sampler → context → model → backend.
+
+**Environment Variables:**
+
+- `LLAMACPP_TEST_MODELFILE` - Path to test model file (required for running tests)
+
+**Continuous Integration:**
+
+The GitHub Actions CI automatically runs all tests with all three sanitizers (ADDRESS, THREAD, UNDEFINED) on every pull request to catch memory issues before they reach production.
+
+**Known Issues:**
+
+- `test-opt.cpp` is currently disabled with `LLAMA_SANITIZE_ADDRESS` due to a known memory leak in `ggml_opt_alloc()` called within a loop (see `tests/test-opt.cpp:300`)
+
 #### Seminal papers and background on the models
 
 If your issue is with model generation quality, then please at least scan the following links and papers to understand the limitations of LLaMA models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between LLaMA models and ChatGPT:

From eedc2d8aca0bb284a9d5bab440d6621263421af0 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Tue, 21 Oct 2025 14:18:23 +0000
Subject: [PATCH 3/3] fix: remove trailing whitespace from test files

Fixes editorconfig check failures:
- Remove 67 trailing whitespace errors from test-memory-leaks.cpp
- Remove 4 trailing whitespace errors from CMakeLists.txt (lines 228-231)

This resolves the editorconfig CI check failure.

Co-Authored-By: Stephen Cornwell <stephen@cognition.ai>
---
 tests/CMakeLists.txt        |   8 +--
 tests/test-memory-leaks.cpp | 134 ++++++++++++++++++------------------
 2 files changed, 71 insertions(+), 71 deletions(-)

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 48de63d87fca0..ca754d18cd7d8 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -225,10 +225,10 @@ target_link_libraries(${TEST_TARGET} PRIVATE llama)
 find_program(VALGRIND_EXECUTABLE valgrind)
 if(VALGRIND_EXECUTABLE)
     add_custom_target(test-valgrind
-        COMMAND ${VALGRIND_EXECUTABLE} 
-            --leak-check=full 
-            --show-leak-kinds=all 
-            --track-origins=yes 
+        COMMAND ${VALGRIND_EXECUTABLE}
+            --leak-check=full
+            --show-leak-kinds=all
+            --track-origins=yes
             --error-exitcode=1
             ${CMAKE_CURRENT_BINARY_DIR}/test-memory-leaks
         DEPENDS test-memory-leaks
diff --git a/tests/test-memory-leaks.cpp b/tests/test-memory-leaks.cpp
index 96f027893a8a7..247dfebfcee51 100644
--- a/tests/test-memory-leaks.cpp
+++ b/tests/test-memory-leaks.cpp
@@ -1,4 +1,4 @@
-// 
+//
 //
 
 #include "llama.h"
@@ -11,29 +11,29 @@
 
 static void test_model_load_unload_cycles(const char * model_path) {
     fprintf(stderr, "test_model_load_unload_cycles: ");
-    
+
     for (int i = 0; i < 10; i++) {
         llama_backend_init();
-        
+
         auto params = llama_model_default_params();
         auto * model = llama_model_load_from_file(model_path, params);
         if (model == nullptr) {
             fprintf(stderr, "FAILED (model load failed on iteration %d)\n", i);
             return;
         }
-        
+
         llama_model_free(model);
         llama_backend_free();
     }
-    
+
     fprintf(stderr, "OK\n");
 }
 
 static void test_context_lifecycle(const char * model_path) {
     fprintf(stderr, "test_context_lifecycle: ");
-    
+
     llama_backend_init();
-    
+
     auto model_params = llama_model_default_params();
     auto * model = llama_model_load_from_file(model_path, model_params);
     if (model == nullptr) {
@@ -41,11 +41,11 @@ static void test_context_lifecycle(const char * model_path) {
         llama_backend_free();
         return;
     }
-    
+
     for (int i = 0; i < 10; i++) {
         auto ctx_params = llama_context_default_params();
         ctx_params.n_ctx = 512;
-        
+
         auto * ctx = llama_init_from_model(model, ctx_params);
         if (ctx == nullptr) {
             fprintf(stderr, "FAILED (context creation failed on iteration %d)\n", i);
@@ -53,21 +53,21 @@ static void test_context_lifecycle(const char * model_path) {
             llama_backend_free();
             return;
         }
-        
+
         llama_free(ctx);
     }
-    
+
     llama_model_free(model);
     llama_backend_free();
-    
+
     fprintf(stderr, "OK\n");
 }
 
 static void test_multiple_contexts_same_model(const char * model_path) {
     fprintf(stderr, "test_multiple_contexts_same_model: ");
-    
+
     llama_backend_init();
-    
+
     auto model_params = llama_model_default_params();
     auto * model = llama_model_load_from_file(model_path, model_params);
     if (model == nullptr) {
@@ -75,13 +75,13 @@ static void test_multiple_contexts_same_model(const char * model_path) {
         llama_backend_free();
         return;
     }
-    
+
     const int num_contexts = 5;
     std::vector<llama_context *> contexts(num_contexts);
-    
+
     auto ctx_params = llama_context_default_params();
     ctx_params.n_ctx = 512;
-    
+
     for (int i = 0; i < num_contexts; i++) {
         contexts[i] = llama_init_from_model(model, ctx_params);
         if (contexts[i] == nullptr) {
@@ -94,22 +94,22 @@ static void test_multiple_contexts_same_model(const char * model_path) {
             return;
         }
     }
-    
+
     for (auto * ctx : contexts) {
         llama_free(ctx);
     }
-    
+
     llama_model_free(model);
     llama_backend_free();
-    
+
     fprintf(stderr, "OK\n");
 }
 
 static void test_sampler_lifecycle(const char * model_path) {
     fprintf(stderr, "test_sampler_lifecycle: ");
-    
+
     llama_backend_init();
-    
+
     auto model_params = llama_model_default_params();
     auto * model = llama_model_load_from_file(model_path, model_params);
     if (model == nullptr) {
@@ -117,7 +117,7 @@ static void test_sampler_lifecycle(const char * model_path) {
         llama_backend_free();
         return;
     }
-    
+
     auto ctx_params = llama_context_default_params();
     ctx_params.n_ctx = 512;
     auto * ctx = llama_init_from_model(model, ctx_params);
@@ -127,7 +127,7 @@ static void test_sampler_lifecycle(const char * model_path) {
         llama_backend_free();
         return;
     }
-    
+
     for (int i = 0; i < 10; i++) {
         auto sparams = llama_sampler_chain_default_params();
         auto * smpl = llama_sampler_chain_init(sparams);
@@ -138,23 +138,23 @@ static void test_sampler_lifecycle(const char * model_path) {
             llama_backend_free();
             return;
         }
-        
+
         llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
         llama_sampler_free(smpl);
     }
-    
+
     llama_free(ctx);
     llama_model_free(model);
     llama_backend_free();
-    
+
     fprintf(stderr, "OK\n");
 }
 
 static void test_error_condition_cleanup(const char * /* model_path */) {
     fprintf(stderr, "test_error_condition_cleanup: ");
-    
+
     llama_backend_init();
-    
+
     auto params = llama_model_default_params();
     auto * model = llama_model_load_from_file("/nonexistent/path/to/model.gguf", params);
     if (model != nullptr) {
@@ -163,40 +163,40 @@ static void test_error_condition_cleanup(const char * /* model_path */) {
         llama_backend_free();
         return;
     }
-    
+
     llama_backend_free();
-    
+
     fprintf(stderr, "OK\n");
 }
 
 static void test_model_load_cancel(const char * model_path) {
     fprintf(stderr, "test_model_load_cancel: ");
-    
+
     llama_backend_init();
-    
+
     auto params = llama_model_default_params();
     params.use_mmap = false;
     params.progress_callback = [](float progress, void * ctx) {
         (void) ctx;
         return progress > 0.50f;
     };
-    
+
     auto * model = llama_model_load_from_file(model_path, params);
-    
+
     if (model != nullptr) {
         llama_model_free(model);
     }
-    
+
     llama_backend_free();
-    
+
     fprintf(stderr, "OK\n");
 }
 
 static void test_batch_operations(const char * model_path) {
     fprintf(stderr, "test_batch_operations: ");
-    
+
     llama_backend_init();
-    
+
     auto model_params = llama_model_default_params();
     auto * model = llama_model_load_from_file(model_path, model_params);
     if (model == nullptr) {
@@ -204,7 +204,7 @@ static void test_batch_operations(const char * model_path) {
         llama_backend_free();
         return;
     }
-    
+
     auto ctx_params = llama_context_default_params();
     ctx_params.n_ctx = 512;
     auto * ctx = llama_init_from_model(model, ctx_params);
@@ -214,36 +214,36 @@ static void test_batch_operations(const char * model_path) {
         llama_backend_free();
         return;
     }
-    
+
     for (int i = 0; i < 10; i++) {
         llama_batch batch = llama_batch_init(32, 0, 1);
-        
+
         llama_batch_free(batch);
     }
-    
+
     llama_free(ctx);
     llama_model_free(model);
     llama_backend_free();
-    
+
     fprintf(stderr, "OK\n");
 }
 
 static void test_backend_init_free_cycles() {
     fprintf(stderr, "test_backend_init_free_cycles: ");
-    
+
     for (int i = 0; i < 10; i++) {
         llama_backend_init();
         llama_backend_free();
     }
-    
+
     fprintf(stderr, "OK\n");
 }
 
 static void test_threaded_contexts(const char * model_path) {
     fprintf(stderr, "test_threaded_contexts: ");
-    
+
     llama_backend_init();
-    
+
     auto model_params = llama_model_default_params();
     auto * model = llama_model_load_from_file(model_path, model_params);
     if (model == nullptr) {
@@ -251,22 +251,22 @@ static void test_threaded_contexts(const char * model_path) {
         llama_backend_free();
         return;
     }
-    
+
     std::atomic<bool> failed = false;
     std::vector<std::thread> threads;
     const int num_threads = 3;
-    
+
     for (int t = 0; t < num_threads; t++) {
         threads.emplace_back([&, t, model]() {
             auto ctx_params = llama_context_default_params();
             ctx_params.n_ctx = 512;
-            
+
             auto * ctx = llama_init_from_model(model, ctx_params);
             if (ctx == nullptr) {
                 failed.store(true);
                 return;
             }
-            
+
             auto sparams = llama_sampler_chain_default_params();
             auto * smpl = llama_sampler_chain_init(sparams);
             if (smpl == nullptr) {
@@ -274,21 +274,21 @@ static void test_threaded_contexts(const char * model_path) {
                 failed.store(true);
                 return;
             }
-            
+
             llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
-            
+
             llama_sampler_free(smpl);
             llama_free(ctx);
         });
     }
-    
+
     for (auto & thread : threads) {
         thread.join();
     }
-    
+
     llama_model_free(model);
     llama_backend_free();
-    
+
     if (failed) {
         fprintf(stderr, "FAILED (thread error)\n");
     } else {
@@ -298,9 +298,9 @@ static void test_threaded_contexts(const char * model_path) {
 
 static void test_kv_cache_clear_operations(const char * model_path) {
     fprintf(stderr, "test_kv_cache_clear_operations: ");
-    
+
     llama_backend_init();
-    
+
     auto model_params = llama_model_default_params();
     auto * model = llama_model_load_from_file(model_path, model_params);
     if (model == nullptr) {
@@ -308,7 +308,7 @@ static void test_kv_cache_clear_operations(const char * model_path) {
         llama_backend_free();
         return;
     }
-    
+
     auto ctx_params = llama_context_default_params();
     ctx_params.n_ctx = 512;
     auto * ctx = llama_init_from_model(model, ctx_params);
@@ -318,24 +318,24 @@ static void test_kv_cache_clear_operations(const char * model_path) {
         llama_backend_free();
         return;
     }
-    
+
     for (int i = 0; i < 10; i++) {
         llama_memory_t mem = llama_get_memory(ctx);
         llama_memory_clear(mem, false);
     }
-    
+
     llama_free(ctx);
     llama_model_free(model);
     llama_backend_free();
-    
+
     fprintf(stderr, "OK\n");
 }
 
 int main(int argc, char ** argv) {
     auto * model_path = get_model_or_exit(argc, argv);
-    
+
     fprintf(stderr, "Running memory leak regression tests...\n\n");
-    
+
     test_backend_init_free_cycles();
     test_model_load_unload_cycles(model_path);
     test_context_lifecycle(model_path);
@@ -346,8 +346,8 @@ int main(int argc, char ** argv) {
     test_threaded_contexts(model_path);
     test_model_load_cancel(model_path);
     test_error_condition_cleanup(model_path);
-    
+
     fprintf(stderr, "\nAll memory leak tests completed successfully!\n");
-    
+
     return 0;
 }