From 75ac6c0e43bf133c33d42ca1e8c0f0593d9ce3a8 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Thu, 11 Sep 2025 00:19:03 -0700 Subject: [PATCH 1/2] update test Signed-off-by: Chen Zhang --- tests/v1/worker/test_gpu_model_runner.py | 47 ++++++++++++------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py index 6d99029e404e..6b9f09f410e8 100644 --- a/tests/v1/worker/test_gpu_model_runner.py +++ b/tests/v1/worker/test_gpu_model_runner.py @@ -1,8 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import random - import numpy as np import pytest import torch @@ -409,29 +407,30 @@ def test_kv_cache_stride_order(monkeypatch, model_runner): model_runner.model_config.get_head_size() ] # TODO mla test - default_stride = list(range(5)) + default_stride = tuple(range(5)) # Permutation that gets you back to expected kv shape - rnd_stride = tuple(random.sample(default_stride, len(default_stride))) - - def rnd_stride_order(): - return rnd_stride - - # Patch the attention backend class and re-trigger the KV cache creation. - for attn_group in model_runner._attn_group_iterator(): - attn_backend = attn_group.backend - monkeypatch.setattr(attn_backend, "get_kv_cache_stride_order", - rnd_stride_order) - - model_runner.attn_groups = [] - model_runner.initialize_kv_cache(model_runner.kv_cache_config) - - # Shape is unchanged, but layout may differ - kv_cache_shape = model_runner.kv_caches[0].shape - assert list(kv_cache_shape) == expected_kv_cache_shape - if default_stride == rnd_stride: - assert all(kv.is_contiguous() for kv in model_runner.kv_caches) - else: - assert all(not kv.is_contiguous() for kv in model_runner.kv_caches) + for test_stride in ((1, 4, 0, 2, 3), (0, 1, 2, 3, 4)): + + def rnd_stride_order(): + return test_stride + + # Patch the attention backend class and re-trigger the KV cache creation + for attn_group in model_runner._attn_group_iterator(): + attn_backend = attn_group.backend + monkeypatch.setattr(attn_backend, "get_kv_cache_stride_order", + rnd_stride_order) + + model_runner.attn_groups = [] + model_runner.kv_caches = [] + model_runner.initialize_kv_cache(model_runner.kv_cache_config) + + # Shape is unchanged, but layout may differ + kv_cache_shape = model_runner.kv_caches[0].shape + assert list(kv_cache_shape) == expected_kv_cache_shape + if default_stride == test_stride: + assert all(kv.is_contiguous() for kv in model_runner.kv_caches) + else: + assert all(not kv.is_contiguous() for kv in model_runner.kv_caches) def test_update_config(model_runner): From 25686a26ac84bb8bf4cc2fcab5e599c884e58d91 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Thu, 11 Sep 2025 23:25:24 -0700 Subject: [PATCH 2/2] fix precommit Signed-off-by: Chen Zhang --- tests/v1/worker/test_gpu_model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py index 6b9f09f410e8..8e27c14b7405 100644 --- a/tests/v1/worker/test_gpu_model_runner.py +++ b/tests/v1/worker/test_gpu_model_runner.py @@ -411,7 +411,7 @@ def test_kv_cache_stride_order(monkeypatch, model_runner): # Permutation that gets you back to expected kv shape for test_stride in ((1, 4, 0, 2, 3), (0, 1, 2, 3, 4)): - def rnd_stride_order(): + def rnd_stride_order(test_stride=test_stride): return test_stride # Patch the attention backend class and re-trigger the KV cache creation