zama-ai · agnesLeroy · Sep 6, 2024 · Sep 5, 2024 · pdroalves · Sep 6, 2024
diff --git a/backends/tfhe-cuda-backend/cuda/include/device.h b/backends/tfhe-cuda-backend/cuda/include/device.h
@@ -39,10 +39,6 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
 
 void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
 
-bool cuda_check_support_cooperative_groups();
-
-bool cuda_check_support_thread_block_clusters();
-
 void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
                               cudaStream_t stream, uint32_t gpu_index);
 
@@ -62,9 +58,13 @@ void cuda_synchronize_device(uint32_t gpu_index);
 void cuda_drop(void *ptr, uint32_t gpu_index);
 
 void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
+}
 
 int cuda_get_max_shared_memory(uint32_t gpu_index);
-}
+
+bool cuda_check_support_cooperative_groups();
+
+bool cuda_check_support_thread_block_clusters();
 
 template <typename Torus>
 void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,

diff --git a/backends/tfhe-cuda-backend/cuda/include/helper_multi_gpu.h b/backends/tfhe-cuda-backend/cuda/include/helper_multi_gpu.h
@@ -8,7 +8,7 @@ extern std::mutex m;
 extern bool p2p_enabled;
 
 extern "C" {
-int cuda_setup_multi_gpu();
+int32_t cuda_setup_multi_gpu();
 }
 
 // Define a variant type that can be either a vector or a single pointer

diff --git a/backends/tfhe-cuda-backend/cuda/src/utils/helper_multi_gpu.cu b/backends/tfhe-cuda-backend/cuda/src/utils/helper_multi_gpu.cu
@@ -6,7 +6,7 @@
 std::mutex m;
 bool p2p_enabled = false;
 
-int cuda_setup_multi_gpu() {
+int32_t cuda_setup_multi_gpu() {
   int num_gpus = cuda_get_number_of_gpus();
   if (num_gpus == 0)
     PANIC("GPU error: the number of GPUs should be > 0.")
@@ -32,7 +32,7 @@ int cuda_setup_multi_gpu() {
     }
     m.unlock();
   }
-  return num_used_gpus;
+  return (int32_t)(num_used_gpus);
 }
 
 int get_active_gpu_count(int num_inputs, int gpu_count) {