forked from vyasr/cudf_benchmarks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bench_get_dummies.py
34 lines (30 loc) · 1.14 KB
/
bench_get_dummies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from config import cudf, cupy as cp
import pytest
@pytest.mark.parametrize("size", [10_000, 100_000])
@pytest.mark.parametrize("cardinality", [10, 100, 1000])
@pytest.mark.parametrize("dtype", [cp.bool_, cp.float64])
def test_get_dummies_high_cardinality(benchmark, size, cardinality, dtype):
"""This test is mean to test the performance of get_dummies given the
cardinality of column to encode is high.
"""
df = cudf.DataFrame(
{
"col": cudf.Series(
cp.random.randint(low=0, high=cardinality, size=size)
).astype("category")
}
)
benchmark(cudf.get_dummies, df, columns=["col"], dtype=dtype)
@pytest.mark.parametrize("prefix", [None, "pre"])
def test_get_dummies_simple(benchmark, prefix):
"""This test provides a small input to get_dummies to test the efficiency
of the API itself.
"""
df = cudf.DataFrame(
{
"col1": list(range(10)),
"col2": list("abcdefghij"),
"col3": cudf.Series(list(range(100, 110)), dtype="category"),
}
)
benchmark(cudf.get_dummies, df, columns=["col1", "col2", "col3"], prefix=prefix)