From 75ac6c0e43bf133c33d42ca1e8c0f0593d9ce3a8 Mon Sep 17 00:00:00 2001
From: Chen Zhang <zhangch99@outlook.com>
Date: Thu, 11 Sep 2025 00:19:03 -0700
Subject: [PATCH 1/2] update test

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
---
 tests/v1/worker/test_gpu_model_runner.py | 47 ++++++++++++------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py
index 6d99029e404e..6b9f09f410e8 100644
--- a/tests/v1/worker/test_gpu_model_runner.py
+++ b/tests/v1/worker/test_gpu_model_runner.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import random
-
 import numpy as np
 import pytest
 import torch
@@ -409,29 +407,30 @@ def test_kv_cache_stride_order(monkeypatch, model_runner):
         model_runner.model_config.get_head_size()
     ]
     # TODO mla test
-    default_stride = list(range(5))
+    default_stride = tuple(range(5))
     # Permutation that gets you back to expected kv shape
-    rnd_stride = tuple(random.sample(default_stride, len(default_stride)))
-
-    def rnd_stride_order():
-        return rnd_stride
-
-    # Patch the attention backend class and re-trigger the KV cache creation.
-    for attn_group in model_runner._attn_group_iterator():
-        attn_backend = attn_group.backend
-        monkeypatch.setattr(attn_backend, "get_kv_cache_stride_order",
-                            rnd_stride_order)
-
-    model_runner.attn_groups = []
-    model_runner.initialize_kv_cache(model_runner.kv_cache_config)
-
-    # Shape is unchanged, but layout may differ
-    kv_cache_shape = model_runner.kv_caches[0].shape
-    assert list(kv_cache_shape) == expected_kv_cache_shape
-    if default_stride == rnd_stride:
-        assert all(kv.is_contiguous() for kv in model_runner.kv_caches)
-    else:
-        assert all(not kv.is_contiguous() for kv in model_runner.kv_caches)
+    for test_stride in ((1, 4, 0, 2, 3), (0, 1, 2, 3, 4)):
+
+        def rnd_stride_order():
+            return test_stride
+
+        # Patch the attention backend class and re-trigger the KV cache creation
+        for attn_group in model_runner._attn_group_iterator():
+            attn_backend = attn_group.backend
+            monkeypatch.setattr(attn_backend, "get_kv_cache_stride_order",
+                                rnd_stride_order)
+
+        model_runner.attn_groups = []
+        model_runner.kv_caches = []
+        model_runner.initialize_kv_cache(model_runner.kv_cache_config)
+
+        # Shape is unchanged, but layout may differ
+        kv_cache_shape = model_runner.kv_caches[0].shape
+        assert list(kv_cache_shape) == expected_kv_cache_shape
+        if default_stride == test_stride:
+            assert all(kv.is_contiguous() for kv in model_runner.kv_caches)
+        else:
+            assert all(not kv.is_contiguous() for kv in model_runner.kv_caches)
 
 
 def test_update_config(model_runner):

From 25686a26ac84bb8bf4cc2fcab5e599c884e58d91 Mon Sep 17 00:00:00 2001
From: Chen Zhang <zhangch99@outlook.com>
Date: Thu, 11 Sep 2025 23:25:24 -0700
Subject: [PATCH 2/2] fix precommit

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
---
 tests/v1/worker/test_gpu_model_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py
index 6b9f09f410e8..8e27c14b7405 100644
--- a/tests/v1/worker/test_gpu_model_runner.py
+++ b/tests/v1/worker/test_gpu_model_runner.py
@@ -411,7 +411,7 @@ def test_kv_cache_stride_order(monkeypatch, model_runner):
     # Permutation that gets you back to expected kv shape
     for test_stride in ((1, 4, 0, 2, 3), (0, 1, 2, 3, 4)):
 
-        def rnd_stride_order():
+        def rnd_stride_order(test_stride=test_stride):
             return test_stride
 
         # Patch the attention backend class and re-trigger the KV cache creation