Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #2156: Bigint stream benchmark #2157

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions benchmarks/bigint_bitwise_binops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python3

import argparse
import time

import numpy as np

import arkouda as ak


def time_ak_bitwise_binops(N_per_locale, trials, max_bits, seed):
print(">>> arkouda bigint bitwise binops")
cfg = ak.get_config()
N = N_per_locale * cfg["numLocales"]
print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
a1 = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
a2 = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
a = ak.bigint_from_uint_arrays([a1, a2], max_bits=max_bits)
b1 = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
b2 = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
b = ak.bigint_from_uint_arrays([b1, b2], max_bits=max_bits)

# bytes per bigint array (N * 16) since it's made of 2 uint64 arrays
tot_bytes = N * 8 * 2

and_timings = []
or_timings = []
shift_timings = []
for i in range(trials):
start = time.time()
c = a & b
end = time.time()
and_timings.append(end - start)

start = time.time()
c = a | b
end = time.time()
or_timings.append(end - start)

start = time.time()
c = a >> 10
end = time.time()
shift_timings.append(end - start)

andtavg = sum(and_timings) / trials
ortavg = sum(or_timings) / trials
shifttavg = sum(shift_timings) / trials

print("Average bigint AND time = {:.4f} sec".format(andtavg))
bytes_per_sec = (tot_bytes * 2) / andtavg
print("Average bigint AND rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
print()

print("Average bigint OR time = {:.4f} sec".format(ortavg))
bytes_per_sec = (tot_bytes * 2) / ortavg
print("Average bigint OR rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))
print()

print("Average bigint SHIFT time = {:.4f} sec".format(shifttavg))
bytes_per_sec = tot_bytes / shifttavg
print("Average bigint SHIFT rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))


def check_correctness(max_bits, seed):
N = 10**4
if seed is not None:
np.random.seed(seed)
np_a, np_b = np.random.randint(0, 2**32, N), np.random.randint(0, 2**32, N)
ak_a = ak.array(np_a, dtype=ak.bigint, max_bits=max_bits)
ak_b = ak.array(np_b, dtype=ak.bigint, max_bits=max_bits)
np_arrays = [np_a & np_b, np_a | np_b, np_a >> 10]
ak_arrays = [ak_a & ak_b, ak_a | ak_b, ak_a >> 10]

for npc, akc in zip(np_arrays, ak_arrays):
np_ans = (npc % (2**max_bits)).astype(np.uint) if max_bits != -1 else npc
ak_ans = akc.to_ndarray().astype(np.uint)
assert np.all(np_ans == ak_ans)


def create_parser():
parser = argparse.ArgumentParser(description="Run the bigint bitwise binops benchmarks")
parser.add_argument("hostname", help="Hostname of arkouda server")
parser.add_argument("port", type=int, help="Port of arkouda server")
parser.add_argument(
"-n", "--size", type=int, default=10**8, help="Problem size: length of arrays A and B"
)
parser.add_argument(
"-t", "--trials", type=int, default=6, help="Number of times to run the benchmark"
)
parser.add_argument(
"--max-bits",
type=int,
default=-1,
help="Maximum number of bits, so values > 2**max_bits will wraparound. -1 is interpreted as no maximum",
)
parser.add_argument(
"--correctness-only",
default=False,
action="store_true",
help="Only check correctness, not performance.",
)
parser.add_argument(
"-s", "--seed", default=None, type=int, help="Value to initialize random number generator"
)
return parser


if __name__ == "__main__":
import sys

parser = create_parser()
args = parser.parse_args()
ak.verbose = False
ak.connect(server=args.hostname, port=args.port)

if args.correctness_only:
check_correctness(args.max_bits, args.seed)
sys.exit(0)

print("array size = {:,}".format(args.size))
print("number of trials = ", args.trials)
time_ak_bitwise_binops(args.size, args.trials, args.max_bits, args.seed)

sys.exit(0)
22 changes: 18 additions & 4 deletions benchmarks/bigint_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,15 @@ def time_bigint_conversion(N_per_locale, trials, seed, max_bits):
(2 * a.size * a.itemsize) / 2**30 / avg_conversion
)
)
assert ak.all(a == u_arrays[0])
assert ak.all(b == u_arrays[1])
if max_bits == -1 or max_bits > 128:
assert ak.all(a == u_arrays[0])
assert ak.all(b == u_arrays[1])
elif max_bits <= 64:
assert ak.all(b % (2**max_bits - 1) == u_arrays[0])
else:
max_bits -= 64
assert ak.all(a & (2**max_bits - 1) == u_arrays[0])
assert ak.all(b == u_arrays[1])


def check_correctness(seed, max_bits):
Expand All @@ -54,8 +61,15 @@ def check_correctness(seed, max_bits):
b = ak.randint(0, N, N, dtype=ak.uint64, seed=seed)
u_arrays = ak.bigint_from_uint_arrays([a, b], max_bits=max_bits).bigint_to_uint_arrays()

assert ak.all(a == u_arrays[0])
assert ak.all(b == u_arrays[1])
if max_bits == -1 or max_bits > 128:
assert ak.all(a == u_arrays[0])
assert ak.all(b == u_arrays[1])
elif max_bits <= 64:
assert ak.all(b % (2**max_bits - 1) == u_arrays[0])
else:
max_bits -= 64
assert ak.all(a & (2**max_bits - 1) == u_arrays[0])
assert ak.all(b == u_arrays[1])


def create_parser():
Expand Down
117 changes: 117 additions & 0 deletions benchmarks/bigint_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/usr/bin/env python3

import argparse
import time

import numpy as np

import arkouda as ak


def time_ak_stream(N_per_locale, trials, alpha, max_bits, random, seed):
print(">>> arkouda bigint stream")
cfg = ak.get_config()
N = N_per_locale * cfg["numLocales"]
print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
# default tot_bytes to ones case
tot_bytes = N * 8 * 3
if random or seed is not None:
a1 = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
a2 = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
a = ak.bigint_from_uint_arrays([a1, a2], max_bits=max_bits)
b1 = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
b2 = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
b = ak.bigint_from_uint_arrays([b1, b2], max_bits=max_bits)
# update tot_bytes to account for using 2 uint64
tot_bytes *= 2
else:
a = ak.bigint_from_uint_arrays([ak.ones(N, dtype=ak.uint64)], max_bits=max_bits)
b = ak.bigint_from_uint_arrays([ak.ones(N, dtype=ak.uint64)], max_bits=max_bits)

timings = []
for i in range(trials):
start = time.time()
c = a + b * alpha
end = time.time()
timings.append(end - start)
tavg = sum(timings) / trials

print("Average bigint stream time = {:.4f} sec".format(tavg))
bytes_per_sec = tot_bytes / tavg
print("Average bigint stream rate = {:.2f} GiB/sec".format(bytes_per_sec / 2**30))


def check_correctness(alpha, max_bits, random, seed):
N = 10**4
if seed is not None:
np.random.seed(seed)
if random or seed is not None:
a = np.random.randint(0, 2**32, N)
b = np.random.randint(0, 2**32, N)
else:
a = np.ones(N, dtype=np.uint)
b = np.ones(N, dtype=np.uint)
npc = a + b * alpha
akc = (
ak.array(a, dtype=ak.bigint, max_bits=max_bits)
+ ak.array(b, dtype=ak.bigint, max_bits=max_bits) * alpha
)
np_ans = (npc % (2 ** max_bits)).astype(np.uint) if max_bits != -1 else npc
ak_ans = akc.to_ndarray().astype(np.uint)
assert np.all(np_ans == ak_ans)


def create_parser():
parser = argparse.ArgumentParser(description="Run the bigint stream benchmark: C = A + alpha*B")
parser.add_argument("hostname", help="Hostname of arkouda server")
parser.add_argument("port", type=int, help="Port of arkouda server")
parser.add_argument(
"-n", "--size", type=int, default=10**8, help="Problem size: length of arrays A and B"
)
parser.add_argument(
"-t", "--trials", type=int, default=6, help="Number of times to run the benchmark"
)
parser.add_argument(
"--max-bits",
type=int,
default=-1,
help="Maximum number of bits, so values > 2**max_bits will wraparound. -1 is interpreted as no maximum",
)
parser.add_argument(
"-r",
"--randomize",
default=False,
action="store_true",
help="Fill arrays with random values instead of ones",
)
parser.add_argument("-a", "--alpha", default=1, help="Scalar multiple")
parser.add_argument(
"--correctness-only",
default=False,
action="store_true",
help="Only check correctness, not performance.",
)
parser.add_argument(
"-s", "--seed", default=None, type=int, help="Value to initialize random number generator"
)
return parser


if __name__ == "__main__":
import sys

parser = create_parser()
args = parser.parse_args()
args.alpha = int(args.alpha)
ak.verbose = False
ak.connect(server=args.hostname, port=args.port)

if args.correctness_only:
check_correctness(args.alpha, args.max_bits, args.randomize, args.seed)
sys.exit(0)

print("array size = {:,}".format(args.size))
print("number of trials = ", args.trials)
time_ak_stream(args.size, args.trials, args.alpha, args.max_bits, args.randomize, args.seed)

sys.exit(0)
12 changes: 12 additions & 0 deletions benchmarks/graph_infra/arkouda.graph
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,15 @@ graphkeys: bigint_from_uint_arrays GiB/s, bigint_to_uint_arrays GiB/s
files: bigint_conversion.dat, bigint_conversion.dat
graphtitle: Bigint Conversion Performance
ylabel: Performance (GiB/s)

perfkeys: Average bigint stream rate =
graphkeys: bigint stream GiB/s
files: bigint_stream.dat
graphtitle: Bigint Stream Performance
ylabel: Performance (GiB/s)

perfkeys: Average bigint AND rate =, Average bigint OR rate =, Average bigint SHIFT rate =
graphkeys: bigint AND GiB/s, bigint OR GiB/s, bigint SHIFT GiB/s
files: bigint_bitwise_binops.dat, bigint_bitwise_binops.dat, bigint_bitwise_binops.dat
graphtitle: Bigint Bitwise Binops Performance
ylabel: Performance (GiB/s)
6 changes: 6 additions & 0 deletions benchmarks/graph_infra/bigint_bitwise_binops.perfkeys
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Average bigint AND time =
Average bigint AND rate =
Average bigint OR time =
Average bigint OR rate =
Average bigint SHIFT time =
Average bigint SHIFT rate =
2 changes: 2 additions & 0 deletions benchmarks/graph_infra/bigint_stream.perfkeys
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Average bigint stream time =
Average bigint stream rate =
2 changes: 2 additions & 0 deletions benchmarks/run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
"dataframe",
"encode",
"bigint_conversion",
"bigint_stream",
"bigint_bitwise_binops",
]

if os.getenv("ARKOUDA_SERVER_PARQUET_SUPPORT"):
Expand Down
9 changes: 5 additions & 4 deletions src/BigIntMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ module BigIntMsg {

if max_bits != -1 {
// modBy should always be non-zero since we start at 1 and left shift
var modBy = 1:bigint;
modBy <<= max_bits;
forall bA in bigIntArray with (var local_modBy = modBy) {
bA.mod(bA, local_modBy);
var max_size = 1:bigint;
max_size <<= max_bits;
max_size -= 1;
forall bA in bigIntArray with (var local_max_size = max_size) {
bA &= local_max_size;
}
}

Expand Down
Loading