We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 33d5969 commit a48be76Copy full SHA for a48be76
tests/kernels/attention/test_cache.py
@@ -239,6 +239,10 @@ def test_reshape_and_cache_flash(
239
current_platform.seed_everything(seed)
240
torch.set_default_device(device)
241
242
+ # fp8 conversion requires continugous memory buffer. Reduce the number of
243
+ # blocks and tokens to consume less memory.
244
+ num_tokens = num_tokens // 2
245
+ num_blocks = num_blocks // 2
246
# Create a random slot mapping.
247
num_slots = block_size * num_blocks
248
slot_mapping_lst = random.sample(range(num_slots), num_tokens)
0 commit comments