Skip to content

Commit

Permalink
Bugfix for attention CUDA backends (hack to increase scratch size by …
Browse files Browse the repository at this point in the history
…1.5)
  • Loading branch information
dje-dev committed Mar 31, 2022
1 parent 0218fb5 commit da03904
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/Ceres.Chess/NNBackends/CUDA/NNBackendLC0_CUDA.cs
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,11 @@ private void AllocateGPUMemory(Net net, LC0LegacyWeights weights)
// (We also split the allocations into two parts, so need 2x)
int attentionSize = MaxAttentionSize(net, weights, MaxBatchSize);
scratchSizeBytes = System.Math.Max(scratchSizeBytes, 2 * attentionSize);

if (weights.encoder != null && weights.encoder.Length > 0) // is attention body?
{
// HACK: workaround for insufficient scratch size with attention body
scratchSizeBytes = (int)(scratchSizeBytes * 1.5f);
}
long scratchSizeElements = scratchSizeBytes / Marshal.SizeOf<FP16>();

// =========================================================================
Expand Down

0 comments on commit da03904

Please sign in to comment.