Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: add p2p benchmark code #6907

Merged
merged 6 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@
[submodule "modules/yaksa"]
path = modules/yaksa
url = https://github.com/pmodels/yaksa
[submodule "modules/mydef_boot"]
path = modules/mydef_boot
url = https://github.com/pmodels/mydef_boot
15 changes: 13 additions & 2 deletions autogen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ do_hydra=yes
do_romio=yes
do_pmi=yes
do_doc=no
do_mydef=yes

yaksa_depth=

Expand Down Expand Up @@ -536,6 +537,14 @@ fn_json_gen() {
echo "done"
}

fn_mydef() {
MYDEF_BOOT=$PWD/modules/mydef_boot
export PATH=$MYDEF_BOOT/bin:$PATH
export PERL5LIB=$MYDEF_BOOT/lib/perl5
export MYDEFLIB=$MYDEF_BOOT/lib/MyDef
(cd test/mpi/bench && ./autogen.sh)
}

# internal
_patch_libtool() {
_file=$1
Expand Down Expand Up @@ -731,9 +740,9 @@ EOF
echo ">= $ver"
else
echo "bad autoconf installation"
echo "--- autoreconf diagnositcs ---"
echo "--- autoreconf diagnostics ---"
$(cat autoreconf.err)
echo "--- autoreconf diagnositcs ---"
echo "--- autoreconf diagnostics ---"
cat <<EOF
You either do not have autoconf in your path or it is too old (version
$ver or higher required). You may be able to use
Expand Down Expand Up @@ -1102,3 +1111,5 @@ fn_build_configure
fn_ch4_api

fn_json_gen

fn_mydef
1 change: 1 addition & 0 deletions modules/mydef_boot
Submodule mydef_boot added at ea2d68
3 changes: 3 additions & 0 deletions test/mpi/bench/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/*.c
/p2p_bw
/p2p_latency
17 changes: 17 additions & 0 deletions test/mpi/bench/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
##
## Copyright (C) by Argonne National Laboratory
## See COPYRIGHT in top-level directory
##

include $(top_srcdir)/Makefile_single.mtest
LDADD += -lm

## for all programs that are just built from the single corresponding source
## file, we don't need per-target _SOURCES rules, automake will infer them
## correctly
noinst_PROGRAMS = \
p2p_latency \
p2p_bw

.def.c:
mydef_page $<
3 changes: 3 additions & 0 deletions test/mpi/bench/autogen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for a in *.def ; do
mydef_page $a
done
3 changes: 3 additions & 0 deletions test/mpi/bench/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module: c
CC: mpicc
run: mpirun -n 2
107 changes: 107 additions & 0 deletions test/mpi/bench/macros/bench_frame.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* bench_frame : boilerplate for mpi program
* measure(iter) : measures `tf_dur` for $(iter) iterations
* run_stat(N, var) : run N measurements and obtain (avg, std) in sum1, sum2
* warm_up(iter, dur): repeat until measurements (iter, dur) stabilize
* report_latency(msgsize, MULTIPLICITY) : print a line of latency result
*/

subcode: bench_frame
$include stdio
$include stdlib
$(if:HAS_MTEST)
$include mpitest.h
$(else)
$include mpi

$function main
$(if:HAS_MTEST)
MTest_Init(NULL, NULL);
$(else)
MPI_Init(NULL, NULL);

$my grank, gsize: int
MPI_Comm_rank(MPI_COMM_WORLD, &grank);
MPI_Comm_size(MPI_COMM_WORLD, &gsize);
$(if:MIN_PROCS)
$if gsize < $(MIN_PROCS)
printf("! Test $(_pagename) requires $(MIN_PROCS) processes !\n");
return 1

MPI_Comm comm = MPI_COMM_WORLD;

$my void *buf
$(if:HAS_MTEST)
$call mtest_malloc, MAX_BUFSIZE
$(else)
buf = malloc(MAX_BUFSIZE)
$if !buf
printf("! Failed to allocate buffer (size=%d)\n", MAX_BUFSIZE)
return 1

$if grank == 0
printf("TEST $(_pagename):\n")
$call @report_header
$call main
$if grank == 0
printf("\n")

$(if:HAS_MTEST)
MTest_Finalize(0);
$(else)
MPI_Finalize();

macros:
use_double: 1

#----------------------------------------
subcode: _autoload
$register_prefix(comm) MPI_Comm

subcode: foreach_size
$for int size = 0; size < $(MAX_MSG); size = (size==0)?1:size*2
$(set:MSG_SIZE=size)
BLOCK

subcode: measure(iter)
tf_start = MPI_Wtime()
$for 0:$(iter)
BLOCK
tf_dur = MPI_Wtime() - tf_start

subcode: run_stat(N, var)
$my double sum1=0, double sum2=0
$for 0:$(N)
BLOCK
sum1 += $(var)
sum2 += $(var) * $(var)
sum1 /= $(N)
sum2 /= $(N)
sum2 = sqrt(sum2 - sum1 * sum1)

subcode: warm_up(iter, dur)
$(set:MIN_ITER=(int) ($(iter) * 0.001 / $(dur)))
$(iter) = 2
$my double last_dur = 1.0
$my int num_best = 0
$while num_best < 10
BLOCK
$if $(iter) < $(MIN_ITER)
$(iter) = $(MIN_ITER)
num_best = 0
continue
# check that t_dur is no longer monotonically decreasing
$if $(dur) > last_dur
num_best++
last_dur = $(dur)

subcode: header_latency
printf("%12s %10s(us) %6s(us) %12s(MB/s)\n", "msgsize", "latency", "sigma", "bandwidth")

subcode: report_latency(MSGSIZE, MULTIPLICITY)
$my tf_latency, tf_sigma, tf_bw
tf_latency = sum1 / ($(MULTIPLICITY)) * 1e6
tf_sigma = sum2 / ($(MULTIPLICITY)) * 1e6
tf_bw = $(MSGSIZE) / tf_latency
printf("%12d %10.3f %6.3f %12.3f\n", $(MSGSIZE), tf_latency, tf_sigma, tf_bw)

79 changes: 79 additions & 0 deletions test/mpi/bench/macros/bench_p2p.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Defines following functions:
* bench_p2p
* bench_send, bench_warmup
* bench_recv
*
* For each measurement -
* First sender tells receiver the `iter` parameter. `iter = 0` means to quit.
* For each iteration runs `send_side` and `recv_side` assuming the measurement on sender side represents a latency measurement.
*
* Caller page defines -
* subcode: sender_side, recv_side
* macro:
* MULTIPLICITY: divisor for each measurement
*/

macros:
MIN_PROCS: 2
MAX_BUFSIZE: 5000000 # 5 MB

subcode: _autoload
$register_name(src) int
$register_name(dst) int
$register_name(buf) void *
$register_name(size) int
$define TAG 0
$define SYNC_TAG 100
$define MAX_BUFSIZE 5000000
$define NUM_REPEAT 20

subcode: report_header
$call header_latency

fncode: bench_p2p(comm, src, dst, buf, size)
int rank;
MPI_Comm_rank(comm, &rank)

$(if:!MULTIPLICITY)
$(set:MULTIPLICITY=1)

$if rank == src
iter = bench_warmup(comm, dst, buf, size)
&call run_stat, NUM_REPEAT, tf_latency
tf_latency = bench_send(iter, comm, dst, buf, size)
tf_latency /= iter
$call report_latency, size, $(MULTIPLICITY)
$call send_stop
$elif rank == dst
bench_recv(comm, src, buf, size)

subcode: send_stop
iter = 0;
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm)

#----------------------------------------
fncode: bench_send(int iter, comm, dst, buf, size)
# synchronize with receiver
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm);

&call measure, iter
$call @send_side

return tf_dur

fncode: bench_recv(comm, src, buf, size)
$while 1
int iter;
# synchronize with sender */
MPI_Recv(&iter, 1, MPI_INT, src, SYNC_TAG, comm, MPI_STATUS_IGNORE);
$if iter == 0
# time to quit
break
$for i=0:iter
$call @recv_side

fncode: bench_warmup(comm, dst, buf, size): int
&call warm_up, iter, tf_dur
tf_dur = bench_send(iter, comm, dst, buf, size)
return iter
14 changes: 14 additions & 0 deletions test/mpi/bench/macros/mtest.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
macros:
HAS_MTEST: 1

subcode: mtest_malloc(size)
MTestArgList *head = MTestArgListCreate(argc, argv)
int send_rank = 0, recv_rank = 1;
$(for:a in send,recv)
$if grank == $(a)_rank
$my mtest_mem_type_e $(a)_memtype, int $(a)_device
$(a)_memtype = MTestArgListGetMemType(head, "$(a)mem")
$(a)_device = MTestArgListGetInt_with_default(head, "$(a)dev", 0)
MTestMalloc($(size), $(a)_memtype, NULL, &buf, $(a)_device)
MTestPrintfMsg(1, "Allocating buffer: memtype=%s, device=%d, size=%d\n", MTest_memtype_name($(a)_memtype), $(a)_device, $(size))
MTestArgListDestroy(head)
27 changes: 27 additions & 0 deletions test/mpi/bench/p2p_bw.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
include: macros/bench_frame.def
include: macros/bench_p2p.def
include: macros/mtest.def

subcode: _autoload
$define WINDOW_SIZE 64

page: p2p_bw, bench_frame
MULTIPLICITY: WINDOW_SIZE
data: buf, size, MPI_CHAR

$for int size = 1; size < MAX_BUFSIZE; size *= 2
bench_p2p(comm, 0, 1, buf, size)

subcode: send_side
$my MPI_Request reqs[WINDOW_SIZE]
$for j=0:WINDOW_SIZE
MPI_Isend($(data), dst, TAG, comm, &reqs[j])
MPI_Waitall(WINDOW_SIZE, reqs, MPI_STATUSES_IGNORE)
MPI_Recv(NULL, 0, MPI_DATATYPE_NULL, dst, TAG, comm, MPI_STATUS_IGNORE)

subcode: recv_side
$my MPI_Request reqs[WINDOW_SIZE]
$for j=0:WINDOW_SIZE
MPI_Irecv($(data), src, TAG, comm, &reqs[j])
MPI_Waitall(WINDOW_SIZE, reqs, MPI_STATUSES_IGNORE)
MPI_Send(NULL, 0, MPI_DATATYPE_NULL, src, TAG, comm)
19 changes: 19 additions & 0 deletions test/mpi/bench/p2p_latency.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
include: macros/bench_frame.def
include: macros/bench_p2p.def
include: macros/mtest.def

page: p2p_latency, bench_frame
MULTIPLICITY: 2
data: buf, size, MPI_CHAR

bench_p2p(comm, 0, 1, buf, 0)
$for int size = 1; size < MAX_BUFSIZE; size *= 2
bench_p2p(comm, 0, 1, buf, size)

subcode: send_side
MPI_Send($(data), dst, TAG, comm);
MPI_Recv($(data), dst, TAG, comm, MPI_STATUS_IGNORE);

subcode: recv_side
MPI_Recv($(data), src, TAG, comm, MPI_STATUS_IGNORE);
MPI_Send($(data), src, TAG, comm);
2 changes: 2 additions & 0 deletions test/mpi/bench/testlist
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
p2p_latency 2 resultTest=TestBench
p2p_bw 2 resultTest=TestBench
1 change: 1 addition & 0 deletions test/mpi/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1904,5 +1904,6 @@ AC_OUTPUT(maint/testmerge \
impls/mpich/ulfm/Makefile \
impls/mpich/info/Makefile \
impls/mpich/info/testlist \
bench/Makefile \
)

18 changes: 18 additions & 0 deletions test/mpi/runtests
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,8 @@ sub get_resultTest {
return \&TestStatusNoErrors;
} elsif ($resultTest eq "TestErrFatal") {
return \&TestErrFatal;
} elsif ($resultTest eq "TestBench") {
return \&TestBench;
} else {
die "resultTest $resultTest not defined!\n";
}
Expand Down Expand Up @@ -1112,6 +1114,22 @@ sub TestErrFatal {
return ($found_error, $inline);
}

# Only check exit code: 0 means success, non-zero means failure
sub TestBench {
my ($MPIOUT, $programname) = @_;
my ($found_error, $inline);

while (<$MPIOUT>) {
print STDOUT $_;
}
my $rc = close($MPIOUT);
if (!$rc) {
expect_status_zero($programname, $?);
$found_error = 1;
}
return ($found_error, $inline);
}

# ----------------------------------------------------------------------------
# Output routines:
# OpenOutputs - Open report files and print initial lines
Expand Down