Skip to content

Commit

Permalink
llvm: Implement range integer generation to match old Numpy's
Browse files Browse the repository at this point in the history
Uses bit masked rejection sampling of lower bits.
Matches to Numpy's Random.randint API call.

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
  • Loading branch information
jvesely committed Nov 20, 2024
1 parent 014ca35 commit 157d139
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 6 deletions.
47 changes: 43 additions & 4 deletions psyneulink/core/llvm/builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,10 +658,11 @@ def _setup_mt_rand_init(ctx, state_ty, init_scalar):
return builder.function


def _setup_mt_rand_integer(ctx, state_ty):
def _setup_mt_rand_int32(ctx, state_ty):
int64_ty = ir.IntType(64)

# Generate random number generator function.
# It produces random 32bit numberin a 64bit word
# It produces random 32bit number in a 64bit word
builder = _setup_builtin_func_builder(ctx, "mt_rand_int32", (state_ty.as_pointer(), int64_ty.as_pointer()))
state, out = builder.function.args

Expand Down Expand Up @@ -759,6 +760,43 @@ def _setup_mt_rand_integer(ctx, state_ty):

return builder.function


def _setup_rand_bounded_int32(ctx, state_ty, gen_int32):

out_ty = gen_int32.args[1].type.pointee
builder = _setup_builtin_func_builder(ctx, gen_int32.name + "_bounded", (state_ty.as_pointer(), ctx.int32_ty, ctx.int32_ty, out_ty.as_pointer()))
state, lower, upper, out_ptr = builder.function.args

rand_range_excl = builder.sub(upper, lower)
rand_range_excl = builder.zext(rand_range_excl, out_ty)

range_leading_zeros = builder.ctlz(rand_range_excl, ctx.bool_ty(1))
mask = builder.lshr(range_leading_zeros.type(-1), range_leading_zeros)

loop_block = builder.append_basic_block("bounded_loop_block")
out_block = builder.append_basic_block("bounded_out_block")

builder.branch(loop_block)

# Loop:
# do:
# r = random() & mask
# while r >= limit
builder.position_at_end(loop_block)

builder.call(gen_int32, [state, out_ptr])
val = builder.load(out_ptr)
val = builder.and_(val, mask)

is_above_limit = builder.icmp_unsigned(">=", val, rand_range_excl)
builder.cbranch(is_above_limit, loop_block, out_block)

builder.position_at_end(out_block)
offset = builder.zext(lower, val.type)
result = builder.add(val, offset)
builder.store(result, out_ptr)
builder.ret_void()

def _setup_mt_rand_float(ctx, state_ty, gen_int):
"""
Mersenne Twister double prcision random number generation.
Expand Down Expand Up @@ -893,8 +931,9 @@ def setup_mersenne_twister(ctx):
init_scalar = _setup_mt_rand_init_scalar(ctx, state_ty)
_setup_mt_rand_init(ctx, state_ty, init_scalar)

gen_int = _setup_mt_rand_integer(ctx, state_ty)
gen_float = _setup_mt_rand_float(ctx, state_ty, gen_int)
gen_int32 = _setup_mt_rand_int32(ctx, state_ty)
_setup_rand_bounded_int32(ctx, state_ty, gen_int32)
gen_float = _setup_mt_rand_float(ctx, state_ty, gen_int32)
_setup_mt_rand_normal(ctx, state_ty, gen_float)
_setup_rand_binomial(ctx, state_ty, gen_float, prefix="mt")

Expand Down
63 changes: 61 additions & 2 deletions tests/llvm/test_builtins_mt_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,70 @@

SEED = 0

@pytest.mark.benchmark(group="Mersenne Twister bounded integer PRNG")
@pytest.mark.parametrize('mode', ['numpy',
pytest.param('LLVM', marks=pytest.mark.llvm),
pytest.helpers.cuda_param('PTX')])
@pytest.mark.parametrize("bounds, expected",
[((0xffffffff,), [3626764237, 1654615998, 3255389356, 3823568514, 1806341205]),
((14,), [13, 12, 2, 5, 4]),
((0,14), [13, 12, 2, 5, 4]),
((5,0xffff), [2002, 28611, 19633, 1671, 37978]),
], ids=lambda x: str(x) if len(x) != 5 else "")
# Python uses sampling of upper bits (vs. lower bits in Numpy). Skip it in this test.
def test_random_int32_bounded(benchmark, mode, bounds, expected):

if mode == 'numpy':
# Numpy promotes elements to int64
state = np.random.RandomState([SEED])

def f():
return state.randint(*bounds, dtype=np.uint32)

elif mode == 'LLVM':
init_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_mt_rand_init')
state = init_fun.np_buffer_for_arg(0)

init_fun(state, SEED)

gen_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_mt_rand_int32_bounded')

def f():
lower, upper = bounds if len(bounds) == 2 else (0, bounds[0])
out = gen_fun.np_buffer_for_arg(3)
gen_fun(state, lower, upper, out)
return out

elif mode == 'PTX':
init_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_mt_rand_init')

state_size = init_fun.np_buffer_for_arg(0).nbytes
gpu_state = pnlvm.jit_engine.pycuda.driver.mem_alloc(state_size)

init_fun.cuda_call(gpu_state, np.int32(SEED))

gen_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_mt_rand_int32_bounded')
out = gen_fun.np_buffer_for_arg(3)
gpu_out = pnlvm.jit_engine.pycuda.driver.Out(out)

def f():
lower, upper = bounds if len(bounds) == 2 else (0, bounds[0])
gen_fun.cuda_call(gpu_state, np.uint32(lower), np.uint32(upper), gpu_out)
return out.copy()

else:
assert False, "Unknown mode: {}".format(mode)

res = [f(), f(), f(), f(), f()]
np.testing.assert_allclose(res, expected)
benchmark(f)

@pytest.mark.benchmark(group="Mersenne Twister integer PRNG")
@pytest.mark.parametrize('mode', ['Python', 'numpy',
pytest.param('LLVM', marks=pytest.mark.llvm),
pytest.helpers.cuda_param('PTX')])
def test_random_int32(benchmark, mode):
res = []

if mode == 'Python':
state = random.Random(SEED)

Expand Down Expand Up @@ -67,7 +125,7 @@ def f():
pytest.param('LLVM', marks=pytest.mark.llvm),
pytest.helpers.cuda_param('PTX')])
def test_random_float(benchmark, mode):
res = []

if mode == 'Python':
# Python treats every seed as array
state = random.Random(SEED)
Expand Down Expand Up @@ -124,6 +182,7 @@ def f():
pytest.helpers.cuda_param('PTX')])
# Python uses different algorithm so skip it in this test
def test_random_normal(benchmark, mode):

if mode == 'numpy':
# numpy promotes elements to int64
state = np.random.RandomState([SEED])
Expand Down

0 comments on commit 157d139

Please sign in to comment.