From c8457572fab5272d3951140d819598fd59aeff34 Mon Sep 17 00:00:00 2001
From: Erik Schultheis <erik.schultheis@aalto.fi>
Date: Wed, 24 Jul 2024 16:55:09 +0200
Subject: [PATCH] just try to allocate on device; fallback if that fails

---
 llmc/cuda_utils.cuh | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/llmc/cuda_utils.cuh b/llmc/cuda_utils.cuh
index 22cdc6b3d..81e4066ee 100644
--- a/llmc/cuda_utils.cuh
+++ b/llmc/cuda_utils.cuh
@@ -210,18 +210,20 @@ void global_sum_deterministic(float* result, const Float* values, int count, cud
 
 // allocate memory, preferrably on the
 void cudaMallocConditionallyManaged(void** out, size_t bytes, const char *file, int line) {
-    size_t free, total;
-    cudaCheck(cudaMemGetInfo(&free, &total));
-    // check if we have enough space to pin the memory to device (with 1% slack)
-    if(100 * free < 99 * bytes) {
-        cudaCheck_(cudaMalloc((void**)out, bytes), file, line);
-    } else {
-        // if not, fallback to a managed allocation. It will be slower, but at least
+    // try to allocate `bytes` on device
+    cudaError_t err = cudaMalloc(out, bytes);
+    if(err == cudaErrorMemoryAllocation) {
+        // if that fails, fallback to a managed allocation. It will be slower, but at least
         // it won't crash.
-        fprintf(stderr, "[WARN] Not enough space to allocate %zu bytes on device.\n"
-                        "      Falling back to managed allocation.\n      Speed may be negatively affected.",
-                        bytes);
-        cudaCheck_(cudaMallocManaged((void**)out, bytes), file, line);
+        fprintf(stderr, "[WARN] Not enough space to allocate %zu MiB on device.\n"
+                        "       Falling back to managed allocation.\n"
+                        "       Speed may be negatively affected.\n",
+                bytes / 1024 / 1024);
+        // reset the error before the next API call
+        cudaGetLastError();
+        cudaCheck_(cudaMallocManaged(out, bytes), file, line);
+    } else {
+        cudaCheck_(err, file, line);
     }
 }