brian-team · denisalevi · Oct 28, 2022 · Oct 30, 2022 · Oct 30, 2022 · Oct 30, 2022
diff --git a/brian2cuda/brianlib/cudaVector.h b/brian2cuda/brianlib/cudaVector.h
@@ -3,6 +3,7 @@
 
 #include <cstdio>
 #include <assert.h>
+#include "cuda_utils.h"
 
 /*
  * current memory allocation strategy:
@@ -35,7 +36,8 @@ class cudaVector
             }
             else
             {
-                printf("ERROR while creating cudaVector with size %ld in cudaVector.h (constructor)\n", sizeof(scalar)*INITIAL_SIZE);
+                LOG_CUDA_ERROR("While creating cudaVector with size %ld in cudaVector.h"
+                               "(constructor)\n", sizeof(scalar)*INITIAL_SIZE);
                 assert(m_data != NULL);
             }
         }
@@ -56,7 +58,8 @@ class cudaVector
         if (index < 0 || index >= m_size)
         {
             // TODO: check for proper exception throwing in cuda kernels
-            printf("ERROR returning a reference to index %d in cudaVector::at() (size = %u)\n", index, m_size);
+            LOG_CUDA_ERROR("Returning a reference to index %d in cudaVector::at()"
+                           "(size = %u)\n", index, m_size);
             assert(index < m_size);
         }
         return m_data[index];
@@ -85,7 +88,7 @@ class cudaVector
         }
         else
         {
-            printf("ERROR invalid index %d, must be in range 0 - %d\n", pos, m_size);
+            LOG_CUDA_ERROR("Invalid index %d, must be in range 0 - %d\n", pos, m_size);
             assert(pos <= m_size);
         }
     };
@@ -134,7 +137,8 @@ class cudaVector
             }
             else
             {
-                printf("ERROR while allocating %ld bytes in cudaVector.h/reserve()\n", sizeof(scalar)*new_capacity);
+                LOG_CUDA_ERROR("While allocating %ld bytes in cudaVector.h/reserve()\n",
+                               sizeof(scalar)*new_capacity);
                 assert(new_data != NULL);
             }
         }

diff --git a/brian2cuda/brianlib/cuda_utils.h b/brian2cuda/brianlib/cuda_utils.h
@@ -1,10 +1,5 @@
 #ifndef BRIAN2CUDA_ERROR_CHECK_H
 #define BRIAN2CUDA_ERROR_CHECK_H
-#include <stdio.h>
-#include <thrust/system_error.h>
-#include "objects.h"
-#include "curand.h"
-
 // Define this to turn on error checking
 #define BRIAN2CUDA_ERROR_CHECK
 // Define this to synchronize device before checking errors
@@ -15,6 +10,80 @@
 // Define this to synchronize device before checking memory
 //#define BRIAN2CUDA_MEMORY_CHECK_BLOCKING
 
+// Choose which LOG macros to define based on LOG_LEVEL_<level> macro defined
+// during compilation, <level> is one of DEBUG, INFO, WARNING, ERROR, CRITICAL
+// For now we treat CRITICAL the same as ERROR
+
+// TODO Could make this with less code with if !defined? Though this is easier
+// to understand.
+#if defined LOG_LEVEL_CRITICAL || defined LOG_LEVEL_ERROR
+    #define DEF_LOG_CUDA_ERROR
+    #define DEF_LOG_ERROR
+#elif defined LOG_LEVEL_WARNING
+    #define DEF_LOG_CUDA_ERROR
+    #define DEF_LOG_ERROR
+    #define DEF_LOG_WARNING
+#elif defined LOG_LEVEL_INFO
+    #define DEF_LOG_CUDA_ERROR
+    #define DEF_LOG_ERROR
+    #define DEF_LOG_WARNING
+    #define DEF_LOG_INFO
+#elif defined LOG_LEVEL_DEBUG
+    #define DEF_LOG_CUDA_ERROR
+    #define DEF_LOG_ERROR
+    #define DEF_LOG_WARNING
+    #define DEF_LOG_INFO
+    #define DEF_LOG_DEBUG
+#elif defined LOG_LEVEL_DIAGNOSTIC
+    #define DEF_LOG_CUDA_ERROR
+    #define DEF_LOG_ERROR
+    #define DEF_LOG_WARNING
+    #define DEF_LOG_INFO
+    #define DEF_LOG_DEBUG
+    #define DEF_LOG_DIAGNOSTIC
+#endif
+
+// DEFINE the LOG macros as printf statements or no_ops if not defined
+// LOG_CUDA_ERROR is the only macro usable in device code currently and will
+//   be printed to stdout when CUDA ring buffer is flushed at host/device
+//   serialization (this sometimes does not happen when the program crashes).
+// TODO: All other LOG macros could in principle be redirected to the Brian2
+// log file via fprintf (not implemented yet)
+#ifdef DEF_LOG_CUDA_ERROR
+    #define LOG_CUDA_ERROR(fmt, ...)    printf("GPU ERROR\t"        fmt, __VA_ARGS__)
+#else
+    #define LOG_CUDA_ERROR(fmt, ...)    do {} while(0)
+#endif
+
+#ifdef DEF_LOG_ERROR
+    #define LOG_ERROR(fmt, ...)         printf("CUDA ERROR\t"       fmt, __VA_ARGS__); fflush(stdout);
+#else
+    #define LOG_ERROR(fmt, ...)         do {} while(0)
+#endif
+
+#ifdef DEF_LOG_WARNING
+    #define LOG_WARNING(fmt, ...)       printf("CUDA WARNING\t"     fmt, __VA_ARGS__); fflush(stdout);
+#else
+    #define LOG_WARNING(fmt, ...)       do {} while(0)
+#endif
+
+#ifdef DEF_LOG_INFO
+    #define LOG_INFO(fmt, ...)          printf("CUDA INFO\t"        fmt, __VA_ARGS__); fflush(stdout);
+#else
+    #define LOG_INFO(fmt, ...)          do {} while(0)
+#endif
+
+#ifdef DEF_LOG_DEBUG
+    #define LOG_DEBUG(fmt, ...)         printf("CUDA DEBUG\t"       fmt, __VA_ARGS__); fflush(stdout);
+#else
+    #define LOG_DEBUG(fmt, ...)         do {} while(0)
+#endif
+
+#ifdef DEF_LOG_DIAGNOSTIC
+    #define LOG_DIAGNOSTIC(fmt, ...)    printf("CUDA DIAGNOSTIC\t"  fmt, __VA_ARGS__); fflush(stdout);
+#else
+    #define LOG_DIAGNOSTIC(fmt, ...)    do {} while(0)
+#endif
 
 // partly adapted from https://gist.github.com/ashwin/2652488
 #define CUDA_SAFE_CALL(err)     _cudaSafeCall(err, __FILE__, __LINE__, #err)
@@ -23,6 +92,11 @@
 #define THRUST_CHECK_ERROR(code)  { try {code;} \
     catch(...) {_thrustCheckError(__FILE__, __LINE__, #code);} }
 
+// Place includes after macro definitions to avoid circular includes
+#include <stdio.h>
+#include <thrust/system_error.h>
+#include "objects.h"
+#include "curand.h"
 
 // adapted from NVIDIA cuda samples, shipped with cuda 10.1 (common/inc/helper_cuda.h)
 #ifdef CURAND_H_
@@ -79,8 +153,8 @@ inline void _cudaSafeCall(cudaError err, const char *file, const int line, const
 #ifdef BRIAN2CUDA_ERROR_CHECK
     if (cudaSuccess != err)
     {
-        fprintf(stderr, "ERROR: %s failed at %s:%i : %s\n",
-                call, file, line, cudaGetErrorString(err));
+        LOG_ERROR("%s failed at %s:%i : %s\n",
+                  call, file, line, cudaGetErrorString(err));
         exit(-1);
     }
 #endif
@@ -94,8 +168,8 @@ inline void _cudaSafeCall(curandStatus_t err, const char *file, const int line,
 #ifdef BRIAN2CUDA_ERROR_CHECK
     if (CURAND_STATUS_SUCCESS != err)
     {
-        fprintf(stderr, "ERROR: %s failed at %s:%i : %s\n",
-                call, file, line, _curandGetErrorEnum(err));
+        LOG_ERROR("%s failed at %s:%i : %s\n",
+                  call, file, line, _curandGetErrorEnum(err));
         exit(-1);
     }
 #endif
@@ -111,17 +185,17 @@ inline void _cudaCheckError(const char *file, const int line, const char *msg)
     cudaError err = cudaDeviceSynchronize();
     if(cudaSuccess != err)
     {
-        fprintf(stderr, "ERROR: CUDA_CHECK_ERROR() failed after %s at %s:%i : %s\n",
-                msg, file, line, cudaGetErrorString(err));
+        LOG_ERROR("CUDA_CHECK_ERROR() failed after %s at %s:%i : %s\n",
+                  msg, file, line, cudaGetErrorString(err));
         exit(-1);
     }
 #else
 #ifdef BRIAN2CUDA_ERROR_CHECK
     cudaError err = cudaGetLastError();
     if (cudaSuccess != err)
     {
-        fprintf(stderr, "ERROR: CUDA_CHECK_ERROR() failed at %s:%i : %s\n",
-                file, line, cudaGetErrorString(err));
+        LOG_ERROR("CUDA_CHECK_ERROR() failed at %s:%i : %s\n",
+                  file, line, cudaGetErrorString(err));
         exit(-1);
     }
 
@@ -153,16 +227,16 @@ inline void _cudaCheckMemory(const char *file, const int line)
     // newly requested allocation.
     if (diff > 0)
     {
-        fprintf(stdout, "INFO: cuda device memory usage in %s:%i\n"
-               "\t used:  \t %f MB\n"
-               "\t avail: \t %f MB\n"
-               "\t total: \t %f MB\n"
-               "\t diff:  \t %f MB \t (%zu bytes)\n",
-               file, line,
-               double(used) * to_MB,
-               double(avail) * to_MB,
-               double(total) * to_MB,
-               double(diff) * to_MB, diff);
+        LOG_DEBUG("CUDA device memory usage in %s:%i\n"
+                  "\t\t\t used:  \t %f MB\n"
+                  "\t\t\t avail: \t %f MB\n"
+                  "\t\t\t total: \t %f MB\n"
+                  "\t\t\t diff:  \t %f MB \t (%zu bytes)\n",
+                  file, line,
+                  double(used) * to_MB,
+                  double(avail) * to_MB,
+                  double(total) * to_MB,
+                  double(diff) * to_MB, diff);
         brian::used_device_memory = used;
     }
 #endif
@@ -172,7 +246,7 @@ inline void _cudaCheckMemory(const char *file, const int line)
 inline void _thrustCheckError(const char *file, const int line,
         const char *code)
 {
-    fprintf(stderr, "ERROR: THRUST_CHECK_ERROR() caught an exception from %s at %s:%i\n",
+    LOG_ERROR("THRUST_CHECK_ERROR() caught an exception from %s at %s:%i\n",
             code, file, line);
     throw;
 }

diff --git a/brian2cuda/brianlib/curand_buffer.h b/brian2cuda/brianlib/curand_buffer.h
@@ -39,9 +39,9 @@ class CurandBuffer
     {
         if (current_idx != buffer_size && memory_allocated)
         {
-            printf("WARNING: CurandBuffer::generate_numbers() called before "
-                    "buffer was empty (current_idx = %u, buffer_size = %u)",
-                    current_idx, buffer_size);
+            LOG_WARNING("CurandBuffer::generate_numbers() called before "
+                        "buffer was empty (current_idx = %u, buffer_size = %u)",
+                        current_idx, buffer_size);
         }
         // TODO: should we allocate the memory in the constructor (even if we end up not using it)?
         if (!memory_allocated)
@@ -50,16 +50,16 @@ class CurandBuffer
             host_data = new randomNumber_t[buffer_size];
             if (!host_data)
             {
-                printf("ERROR allocating host_data for CurandBuffer (size %ld)\n", sizeof(randomNumber_t)*buffer_size);
+                LOG_ERROR("Allocating host_data for CurandBuffer (size %ld)\n", sizeof(randomNumber_t)*buffer_size);
                 exit(EXIT_FAILURE);
             }
             // allocate device memory
             cudaError_t status = cudaMalloc((void **)&dev_data, buffer_size*sizeof(randomNumber_t));
             if (status != cudaSuccess)
             {
-                printf("ERROR allocating memory on device (size = %ld) in %s(%d):\n\t%s\n",
-                        buffer_size*sizeof(randomNumber_t), __FILE__, __LINE__,
-                        cudaGetErrorString(status));
+                LOG_ERROR("Allocating memory on device (size = %ld) in %s(%d):\n\t\t\t%s\n",
+                          buffer_size*sizeof(randomNumber_t), __FILE__, __LINE__,
+                          cudaGetErrorString(status));
                 exit(EXIT_FAILURE);
             }
             memory_allocated = true;
@@ -70,7 +70,7 @@ class CurandBuffer
             curandStatus_t status = generateUniform(*generator, dev_data, buffer_size);
             if (status != CURAND_STATUS_SUCCESS)
             {
-                printf("ERROR generating random numbers in %s(%d):\n", __FILE__, __LINE__);
+                LOG_ERROR("Generating random numbers in %s(%d):\n", __FILE__, __LINE__);
                 exit(EXIT_FAILURE);
             }
         }
@@ -79,18 +79,18 @@ class CurandBuffer
             curandStatus_t status = generateNormal(*generator, dev_data, buffer_size, 0, 1);
             if (status != CURAND_STATUS_SUCCESS)
             {
-                printf("ERROR generating normal distributed random numbers in %s(%d):\n",
-                        __FILE__, __LINE__);
+                LOG_ERROR("Generating normal distributed random numbers in %s(%d):\n",
+                          __FILE__, __LINE__);
                 exit(EXIT_FAILURE);
             }
         }
         // copy random numbers to host
         cudaError_t status = cudaMemcpy(host_data, dev_data, buffer_size*sizeof(randomNumber_t), cudaMemcpyDeviceToHost);
         if (status != cudaSuccess)
         {
-            printf("ERROR copying device to host memory (size = %ld) in %s(%d):\n\t%s\n",
-                    buffer_size*sizeof(randomNumber_t), __FILE__, __LINE__,
-                    cudaGetErrorString(status));
+            LOG_ERROR("Copying device to host memory (size = %ld) in %s(%d):\n\t\t\t%s\n",
+                      buffer_size*sizeof(randomNumber_t), __FILE__, __LINE__,
+                      cudaGetErrorString(status));
             exit(EXIT_FAILURE);
         }
         // reset buffer index
@@ -99,14 +99,14 @@ class CurandBuffer
 
     curandStatus_t generateUniform(curandGenerator_t generator, randomNumber_t *outputPtr, size_t num)
     {
-        printf("ERROR curand can only generate random numbers as 'float' or 'double' types.\n");
+        LOG_ERROR("%s", "Curand can only generate random numbers as 'float' or 'double' types.\n");
         exit(EXIT_FAILURE);
     }
 
     curandStatus_t generateNormal(curandGenerator_t generator, randomNumber_t *outputPtr,
             size_t n, randomNumber_t mean, randomNumber_t stddev)
     {
-        printf("ERROR curand can only generate random numbers as 'float' or 'double' types.\n");
+        LOG_ERROR("%s", "Curand can only generate random numbers as 'float' or 'double' types.\n");
         exit(EXIT_FAILURE);
     }
 
@@ -140,7 +140,7 @@ class CurandBuffer
         cudaError_t status = cudaFree(dev_data);
         if (status != cudaSuccess)
         {
-            printf("ERROR freeing device memory in %s(%d):%s\n",
+            LOG_ERROR("Freeing device memory in %s(%d):%s\n",
                     __FILE__, __LINE__, cudaGetErrorString(status));
             exit(EXIT_FAILURE);
         }

diff --git a/brian2cuda/brianlib/spikequeue.h b/brian2cuda/brianlib/spikequeue.h
@@ -118,9 +118,9 @@ class CudaSpikeQueue
                 synapses_queue = new cuda_vector*[required_num_queues];
                 if (!synapses_queue)
                 {
-                    printf("ERROR while allocating memory with size %ld in"
-                           " spikequeue.h/prepare()\n",
-                           sizeof(cuda_vector*) * required_num_queues);
+                    LOG_CUDA_ERROR("While allocating memory with size %ld in"
+                                   " spikequeue.h/prepare()\n",
+                                   sizeof(cuda_vector*) * required_num_queues);
                 }
                 // only reset queue offset if we require new queues, in which
                 // case we copy the old queues such that the offset is reset
@@ -191,9 +191,9 @@ class CudaSpikeQueue
                     synapses_queue[i] = new cuda_vector[num_blocks];
                     if (!synapses_queue[i])
                     {
-                        printf("ERROR while allocating memory with size %ld in"
-                                " spikequeue.h/prepare()\n",
-                                sizeof(cuda_vector)*num_blocks);
+                        LOG_CUDA_ERROR("While allocating memory with size %ld in"
+                                       " spikequeue.h/prepare()\n",
+                                       sizeof(cuda_vector)*num_blocks);
                     }
                 }
             }

diff --git a/brian2cuda/cuda_prefs.py b/brian2cuda/cuda_prefs.py
@@ -40,6 +40,11 @@ def validate_bundle_size_expression(string):
     'devices.cuda_standalone',
     'Brian2CUDA preferences',
 
+    helpful=BrianPreference(
+        default=False,
+        docs="Give basic information message to help new Brian2CUDA users",
+        validator=lambda v: v in [True, False]),
+
     SM_multiplier = BrianPreference(
         default=1,
         docs='The number of blocks per SM. By default, this value is set to 1.',