Bugfix for attention CUDA backends (hack to increase scratch size by …

…1.5)
dje-dev · Mar 31, 2022 · da03904 · da03904
1 parent 0218fb5
commit da03904
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/src/Ceres.Chess/NNBackends/CUDA/NNBackendLC0_CUDA.cs b/src/Ceres.Chess/NNBackends/CUDA/NNBackendLC0_CUDA.cs
@@ -460,7 +460,11 @@ private void AllocateGPUMemory(Net net, LC0LegacyWeights weights)
       // (We also split the allocations into two parts, so need 2x)
       int attentionSize = MaxAttentionSize(net, weights, MaxBatchSize);
       scratchSizeBytes = System.Math.Max(scratchSizeBytes, 2 * attentionSize);
-
+      if (weights.encoder != null && weights.encoder.Length > 0) // is attention body?
+      {
+        // HACK: workaround for insufficient scratch size with attention body
+        scratchSizeBytes = (int)(scratchSizeBytes * 1.5f);
+      }
       long scratchSizeElements = scratchSizeBytes / Marshal.SizeOf<FP16>();
 
       // =========================================================================