Update tests.

hyeygit · hyeygit · commit a4bc75e65b49 · 2025-04-01T13:13:25.000Z
* Added a more comprehensive correctness test for top-p.
* Included tests/v1/tpu/test_topk_topp_sampler.py in run-tpu-v1-test.sh.

Signed-off-by: Hyesoo Yang &lt;hyeygit@gmail.com&gt;
diff --git a/.buildkite/run-tpu-v1-test.sh b/.buildkite/run-tpu-v1-test.sh
@@ -36,7 +36,9 @@ docker run --privileged --net host --shm-size=16G -it \
     && echo TEST_6 \
     && pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py \
     && echo TEST_7 \
-    && pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py" \
+    && pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py \
+    && echo TEST_8 \
+    && pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py" \
 
 
 # TODO: This test fails because it uses RANDOM_SEED sampling
diff --git a/tests/v1/tpu/test_topk_topp_sampler.py b/tests/v1/tpu/test_topk_topp_sampler.py
@@ -1,50 +1,47 @@
 # SPDX-License-Identifier: Apache-2.0
 import math
 
+import pytest
 import torch
 
 from vllm.platforms import current_platform
 from vllm.v1.sample.ops.topk_topp_sampler import apply_top_k_top_p_tpu
 
-if current_platform.is_tpu():
-    import torch_xla.core.xla_model as xm
-
-DEVICE = xm.xla_device() if current_platform.is_tpu() else torch.device("cuda")
+if not current_platform.is_tpu():
+    pytest.skip("This test needs a TPU.", allow_module_level=True)
+import torch_xla.core.xla_model as xm
 
 BATCH_SIZE = 1024
 VOCAB_SIZE = 128 * 1024
+TOLERANCE = 1e-4
 
 
-def test_topk_and_no_op_topp():
-    with torch.device(DEVICE):
-        if current_platform.is_tpu():
-            xm.set_rng_state(seed=33)
-        else:
-            torch.manual_seed(33)
+def test_topp_result_sums_past_p():
+    with torch.device(xm.xla_device()):
+        xm.set_rng_state(seed=33)
 
         logits = torch.rand((BATCH_SIZE, VOCAB_SIZE))
+        probs = logits.softmax(dim=-1)
 
-        # Random top-k values between 1 and 9.
-        k = torch.randint(1, 10, (BATCH_SIZE, ))
-
-        # Set k=vocab_size for ~50% of requests in the batch (top-k disabled).
-        k.masked_fill_(torch.randint(0, 2, (BATCH_SIZE, ), dtype=bool),
-                       VOCAB_SIZE)
+        # Random top-p values between 0 and 1.
+        p = torch.rand((BATCH_SIZE, ))
 
-        # Top-k only implementation
-        result1 = apply_top_k_top_p_tpu(logits=logits.clone(), k=k, p=None)
+        # Set p=1 for ~50% of requests in the batch (top-p disabled).
+        p.masked_fill_(torch.randint(0, 2, (BATCH_SIZE, ), dtype=bool), 1)
 
-        # Top-p + top-k
-        no_op_top_p = torch.tensor([1.0])
-        result2 = apply_top_k_top_p_tpu(logits=logits.clone(),
-                                        k=k,
-                                        p=no_op_top_p)
+        no_op_k = torch.tensor([VOCAB_SIZE])
+        logits_masked = apply_top_k_top_p_tpu(logits=logits.clone(),
+                                              k=no_op_k,
+                                              p=p)
 
-        assert torch.allclose(result1, result2)
+        # Verify that the masked logit's probability sums to at least p.
+        probs.masked_fill_(logits_masked.isinf(), 0)
+        masked_prob_sum = probs.sum(dim=-1)
+        assert torch.all(torch.ge(masked_prob_sum + TOLERANCE, p))
 
 
 def test_topp_basic():
-    with torch.device(DEVICE):
+    with torch.device(xm.xla_device()):
         logits = torch.tensor([[math.log(0.2),
                                 math.log(0.3),
                                 math.log(0.5)],
@@ -64,7 +61,7 @@ def test_topp_basic():
 
 
 def test_topp_select_all():
-    with torch.device(DEVICE):
+    with torch.device(xm.xla_device()):
         logits = torch.tensor([[math.log(0.2),
                                 math.log(0.3),
                                 math.log(0.5)],
@@ -80,7 +77,7 @@ def test_topp_select_all():
 
 
 def test_topp_with_ties():
-    with torch.device(DEVICE):
+    with torch.device(xm.xla_device()):
         # Input has multiple math.log(0.3).
         logits = torch.tensor(
             [[math.log(0.3),
@@ -98,7 +95,7 @@ def test_topp_with_ties():
 
 
 def test_both_topk_topp():
-    with torch.device(DEVICE):
+    with torch.device(xm.xla_device()):
         logits = torch.tensor([[math.log(0.2),
                                 math.log(0.3),
                                 math.log(0.5)],