88from neuronxcc import nki
99from neuronxcc .nki .language import par_dim
1010
11-
12- def ceil_div (a , b ):
13- return (a + b - 1 ) // b
11+ from vllm .utils import cdiv
1412
1513
1614def is_power_of_2 (x ):
@@ -35,11 +33,10 @@ def load_block_tables(block_tables_hbm, num_tiles, num_blocks_per_tile):
3533 (num_tiles , num_blocks_per_tile ))
3634
3735 block_tables_sbuf = nl .zeros (
38- (ceil_div (num_tiles ,
39- B_P_SIZE ), par_dim (B_P_SIZE ), num_blocks_per_tile ),
36+ (cdiv (num_tiles , B_P_SIZE ), par_dim (B_P_SIZE ), num_blocks_per_tile ),
4037 dtype = nl .int32 ,
4138 )
42- for i in nl .affine_range (ceil_div (num_tiles , B_P_SIZE )):
39+ for i in nl .affine_range (cdiv (num_tiles , B_P_SIZE )):
4340 i_p = nl .arange (B_P_SIZE )[:, None ]
4441 i_f = nl .arange (num_blocks_per_tile )[None , :]
4542 block_tables_sbuf [i , i_p , i_f ] = nl .load (
@@ -83,7 +80,7 @@ def transform_block_tables_for_indirect_load(
8380 assert is_power_of_2 (
8481 num_blocks_per_tile ), f"{ num_blocks_per_tile = } is not power of 2"
8582
86- num_loads = ceil_div (num_blocks_per_tile , B_P_SIZE )
83+ num_loads = cdiv (num_blocks_per_tile , B_P_SIZE )
8784 block_tables_transposed = nl .ndarray (
8885 (
8986 num_loads ,
@@ -165,7 +162,7 @@ def load_kv_tile_from_cache(
165162 equivalent to (par_dim(B_P_SIZE), seqlen_kv // B_P_SIZE * B_D_SIZE)
166163 """
167164 # load key cache
168- num_loads = ceil_div (num_blocks_per_large_tile , B_P_SIZE )
165+ num_loads = cdiv (num_blocks_per_large_tile , B_P_SIZE )
169166 for load_idx in nl .affine_range (num_loads ):
170167 i_p = nl .arange (B_P_SIZE )[:, None ]
171168 i_f = nl .arange (tiled_block_size * B_D_SIZE )[None , :]
@@ -605,7 +602,7 @@ def flash_paged_attention(
605602 )
606603
607604 for large_k_tile_idx in nl .sequential_range (0 , num_large_k_tile ):
608- num_loads = ceil_div (num_blocks_per_large_tile , B_P_SIZE )
605+ num_loads = cdiv (num_blocks_per_large_tile , B_P_SIZE )
609606 cur_k_tile = nl .ndarray (
610607 (par_dim (B_D_SIZE ), LARGE_TILE_SZ ),
611608 dtype = kernel_dtype ,
0 commit comments