We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 068e169 commit 1dab650Copy full SHA for 1dab650
tests/v1/tpu/test_pallas.py
@@ -47,7 +47,7 @@ class FakeAttentionLayer:
47
key = torch.zeros(num_tokens, num_kv_heads * head_size)
48
value = torch.zeros(num_tokens, num_kv_heads * head_size)
49
kv_cache = torch.zeros(num_blocks, block_size, num_kv_heads * 2, head_size)
50
- slot_mapping = torch.zeros(num_tokens, dtype=torch.int64)
+ slot_mapping = torch.zeros((3, num_tokens), dtype=torch.int64)
51
max_num_reqs = 8
52
max_num_blocks_per_req = 8
53
block_tables = torch.zeros((max_num_reqs, max_num_blocks_per_req),
0 commit comments