From c8457572fab5272d3951140d819598fd59aeff34 Mon Sep 17 00:00:00 2001 From: Erik Schultheis Date: Wed, 24 Jul 2024 16:55:09 +0200 Subject: [PATCH] just try to allocate on device; fallback if that fails --- llmc/cuda_utils.cuh | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/llmc/cuda_utils.cuh b/llmc/cuda_utils.cuh index 22cdc6b3d..81e4066ee 100644 --- a/llmc/cuda_utils.cuh +++ b/llmc/cuda_utils.cuh @@ -210,18 +210,20 @@ void global_sum_deterministic(float* result, const Float* values, int count, cud // allocate memory, preferrably on the void cudaMallocConditionallyManaged(void** out, size_t bytes, const char *file, int line) { - size_t free, total; - cudaCheck(cudaMemGetInfo(&free, &total)); - // check if we have enough space to pin the memory to device (with 1% slack) - if(100 * free < 99 * bytes) { - cudaCheck_(cudaMalloc((void**)out, bytes), file, line); - } else { - // if not, fallback to a managed allocation. It will be slower, but at least + // try to allocate `bytes` on device + cudaError_t err = cudaMalloc(out, bytes); + if(err == cudaErrorMemoryAllocation) { + // if that fails, fallback to a managed allocation. It will be slower, but at least // it won't crash. - fprintf(stderr, "[WARN] Not enough space to allocate %zu bytes on device.\n" - " Falling back to managed allocation.\n Speed may be negatively affected.", - bytes); - cudaCheck_(cudaMallocManaged((void**)out, bytes), file, line); + fprintf(stderr, "[WARN] Not enough space to allocate %zu MiB on device.\n" + " Falling back to managed allocation.\n" + " Speed may be negatively affected.\n", + bytes / 1024 / 1024); + // reset the error before the next API call + cudaGetLastError(); + cudaCheck_(cudaMallocManaged(out, bytes), file, line); + } else { + cudaCheck_(err, file, line); } }