Fix bug when compiling with caffe2 (#41868)

Summary: Pull Request resolved: pytorch/pytorch#41868 Fix bug when compiling with caffe2 Reviewed By: jianyuh Differential Revision: D22670707 fbshipit-source-id: aa654d7b9004257e0288c8ae8819ca5752eea443
TonyTangYu · Jul 23, 2020 · 30ce7b3 · 30ce7b3
1 parent 0ec7ba4
commit 30ce7b3
Showing 1 changed file with 7 additions and 4 deletions.
diff --git a/caffe2/sgd/adagrad_fused_op_gpu.cuh b/caffe2/sgd/adagrad_fused_op_gpu.cuh
@@ -15,10 +15,13 @@
 #define SEGREDUCE_MINBLOCKS 16
 #endif
 
-#ifdef REDUCE_BLOCK
-#define REDUCE_SIZE REDUCE_BLOCK
+// Whoever include this header should define REDUCE_BLOCK_SIZE
+// which is the maximum row-wise length
+// Default is 1024 (maxThreads per block in Volta GPU)
+#ifdef REDUCE_BLOCK_SIZE
+#define REDUCE_SIZE REDUCE_BLOCK_SIZE
 #else
-#define REDUCE_SIZE CAFFE_CUDA_NUM_THREADS
+#define REDUCE_SIZE 1024
 #endif
 
 namespace caffe2 {
@@ -183,7 +186,7 @@ __global__ void rowwise_sparse_adagrad_fused_length_sum_gradient_kernel(
     }
   } else {
     // TODO: Tuning NumThreads for sum_squares
-    // TODO: Not compatible with embedding dim larger than maxThread, set Volta as default
+    // TODO: Not compatible with embedding dim larger than maxThread
     typedef cub::BlockReduce<float, REDUCE_SIZE> BlockReduce;
     __shared__ BlockReduce::TempStorage temp_storage;
     int valid = min(block_size, blockDim.x);