Use FP32 for log probabilities (#19)

WoosukKwon · web-flow · commit a90c97d72705 · 2023-03-31T23:33:43.000-07:00
diff --git a/cacheflow/models/sample.py b/cacheflow/models/sample.py
@@ -36,10 +36,11 @@ def forward(
             # Use in-place division to avoid creating a new tensor.
             logits.div_(t.unsqueeze(dim=1))
 
+        # We use float32 for probabilities and log probabilities.
         # Compute the probabilities.
         probs = torch.softmax(logits, dim=-1, dtype=torch.float)
         # Compute the log probabilities (before applying top-p).
-        logprobs = torch.log(probs, out=logits)
+        logprobs = torch.log(probs)
 
         # Apply top-p truncation.
         top_ps = _get_top_ps(input_metadata)