forked from vyasr/cudf_benchmarks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
51 lines (40 loc) · 1.76 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from numbers import Real
from config import cudf, cupy as cp
def make_frame(ncols, nkey_cols, nrows, low=0, high=100):
rstate = cp.random.RandomState(seed=0)
nval_cols = ncols - nkey_cols
key_columns = {
f"key{i}": rstate.randint(low, high, nrows) for i in range(nkey_cols)
}
val_columns = {f"val{i}": rstate.rand(nrows) for i in range(nval_cols)}
return cudf.DataFrame({**key_columns, **val_columns})
def make_col(nrows, has_nulls=True):
rstate = cp.random.RandomState(seed=0)
c = cudf.core.column.as_column(rstate.randn(nrows))
if has_nulls:
# The choice of null placement is arbitrary.
c[::2] = None
return c
def make_gather_map(len_gather_map: Real, len_column: Real, how: str):
"""Create a gather map based on "how" you'd like to gather from input.
- sequence: gather the first `len_gather_map` rows, the first thread
collects the first element
- reverse: gather the last `len_gather_map` rows, the first thread
collects the last element
- random: create a pseudorandom gather map
`len_gather_map`, `len_column` gets rounded to integer.
"""
len_gather_map = round(len_gather_map)
len_column = round(len_column)
rstate = cp.random.RandomState(seed=0)
if how == "sequence":
return cudf.Series(cp.arange(0, len_gather_map))._column
elif how == "reverse":
return cudf.Series(
cp.arange(len_column - 1, len_column - len_gather_map - 1, -1)
)._column
elif how == "random":
return cudf.Series(rstate.randint(0, len_column, len_gather_map))._column
def make_boolean_mask_column(size):
rstate = cp.random.RandomState(seed=0)
return cudf.core.column.as_column(rstate.randint(0, 2, size).astype(bool))