Fix Bug #5: Add debug logging for smoothing check

yuz207 · yuz207 · commit fcea47f07331 · 2025-09-27T18:39:31.000-07:00
Add diagnostic logging to verify draft_mix_lambda_max value and whether
smoothing will execute.

This will help diagnose if smoothing is running (which prevents q from
becoming exactly 1.0 in corner cases).

Expected log output:
[SMOOTH_DEBUG] lambda_max from config: 0.02, will run smoothing: True

If we see 'will run smoothing: False', smoothing isn't applying and
q can still collapse to 1.0 in ultracold regimes.
diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
@@ -284,6 +284,8 @@ def _sample_draft_tokens(
 
             # --- tiny smoothing over kept set (prevents q==1.0 in ultracold corners) ---
             lam = float(getattr(self.opt_config, "draft_mix_lambda_max", 0.0))
+            print(f"[SMOOTH_DEBUG] lambda_max from config: {lam}, will run smoothing: {lam > 0.0}",
+                  file=sys.stderr, flush=True)
             if lam > 0.0:
                 K = keep.sum(dim=-1, keepdim=True).clamp_min(1)
                 uniform = keep.to(x.dtype) / K