diff --git a/nvbench/cuda_call.cuh b/nvbench/cuda_call.cuh
index ca8e765..5b2ae36 100644
--- a/nvbench/cuda_call.cuh
+++ b/nvbench/cuda_call.cuh
@@ -24,12 +24,14 @@
 #include <string>
 
 /// Throws a std::runtime_error if `call` doesn't return `cudaSuccess`.
+/// Resets the error with cudaGetLastError().
 #define NVBENCH_CUDA_CALL(call)                                                                    \
   do                                                                                               \
   {                                                                                                \
     const cudaError_t nvbench_cuda_call_error = call;                                              \
     if (nvbench_cuda_call_error != cudaSuccess)                                                    \
     {                                                                                              \
+      cudaGetLastError();                                                                          \
       nvbench::cuda_call::throw_error(__FILE__, __LINE__, #call, nvbench_cuda_call_error);         \
     }                                                                                              \
   } while (false)
diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt
index 4a03000..f407258 100644
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -16,6 +16,7 @@ set(test_srcs
   named_values.cu
   option_parser.cu
   range.cu
+  reset_error.cu
   ring_buffer.cu
   runner.cu
   state.cu
diff --git a/testing/reset_error.cu b/testing/reset_error.cu
new file mode 100644
index 0000000..8fece93
--- /dev/null
+++ b/testing/reset_error.cu
@@ -0,0 +1,30 @@
+#include <nvbench/cuda_call.cuh>
+
+#include "test_asserts.cuh"
+
+
+namespace
+{
+    __global__ void multiply5(const int32_t* __restrict__ a, int32_t* __restrict__ b)
+    {
+      const auto id = blockIdx.x * blockDim.x + threadIdx.x;
+      b[id] = 5 * a[id];
+    }
+}
+
+int main()
+{ 
+  multiply5<<<256, 256>>>(nullptr, nullptr);
+
+  try
+  {
+    NVBENCH_CUDA_CALL(cudaStreamSynchronize(0));
+    ASSERT(false);
+  }
+  catch (const std::runtime_error &)
+  {
+    ASSERT(cudaGetLastError() == cudaError_t::cudaSuccess);
+  }
+
+  return 0;
+}