Skip to content

Commit

Permalink
Merge pull request #6907 from hzhou/2311_bench
Browse files Browse the repository at this point in the history
test: add p2p benchmark code

Approved-by: Ken Raffenetti
  • Loading branch information
hzhou authored Oct 2, 2024
2 parents 9c907a4 + 99c6adf commit 1f359fe
Show file tree
Hide file tree
Showing 15 changed files with 310 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@
[submodule "modules/yaksa"]
path = modules/yaksa
url = https://github.com/pmodels/yaksa
[submodule "modules/mydef_boot"]
path = modules/mydef_boot
url = https://github.com/pmodels/mydef_boot
15 changes: 13 additions & 2 deletions autogen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ do_hydra=yes
do_romio=yes
do_pmi=yes
do_doc=no
do_mydef=yes

yaksa_depth=

Expand Down Expand Up @@ -536,6 +537,14 @@ fn_json_gen() {
echo "done"
}

fn_mydef() {
MYDEF_BOOT=$PWD/modules/mydef_boot
export PATH=$MYDEF_BOOT/bin:$PATH
export PERL5LIB=$MYDEF_BOOT/lib/perl5
export MYDEFLIB=$MYDEF_BOOT/lib/MyDef
(cd test/mpi/bench && ./autogen.sh)
}

# internal
_patch_libtool() {
_file=$1
Expand Down Expand Up @@ -731,9 +740,9 @@ EOF
echo ">= $ver"
else
echo "bad autoconf installation"
echo "--- autoreconf diagnositcs ---"
echo "--- autoreconf diagnostics ---"
$(cat autoreconf.err)
echo "--- autoreconf diagnositcs ---"
echo "--- autoreconf diagnostics ---"
cat <<EOF
You either do not have autoconf in your path or it is too old (version
$ver or higher required). You may be able to use
Expand Down Expand Up @@ -1102,3 +1111,5 @@ fn_build_configure
fn_ch4_api

fn_json_gen

fn_mydef
1 change: 1 addition & 0 deletions modules/mydef_boot
Submodule mydef_boot added at ea2d68
3 changes: 3 additions & 0 deletions test/mpi/bench/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/*.c
/p2p_bw
/p2p_latency
17 changes: 17 additions & 0 deletions test/mpi/bench/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
##
## Copyright (C) by Argonne National Laboratory
## See COPYRIGHT in top-level directory
##

include $(top_srcdir)/Makefile_single.mtest
LDADD += -lm

## for all programs that are just built from the single corresponding source
## file, we don't need per-target _SOURCES rules, automake will infer them
## correctly
noinst_PROGRAMS = \
p2p_latency \
p2p_bw

.def.c:
mydef_page $<
3 changes: 3 additions & 0 deletions test/mpi/bench/autogen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for a in *.def ; do
mydef_page $a
done
3 changes: 3 additions & 0 deletions test/mpi/bench/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module: c
CC: mpicc
run: mpirun -n 2
107 changes: 107 additions & 0 deletions test/mpi/bench/macros/bench_frame.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* bench_frame : boilerplate for mpi program
* measure(iter) : measures `tf_dur` for $(iter) iterations
* run_stat(N, var) : run N measurements and obtain (avg, std) in sum1, sum2
* warm_up(iter, dur): repeat until measurements (iter, dur) stabilize
* report_latency(msgsize, MULTIPLICITY) : print a line of latency result
*/

subcode: bench_frame
$include stdio
$include stdlib
$(if:HAS_MTEST)
$include mpitest.h
$(else)
$include mpi

$function main
$(if:HAS_MTEST)
MTest_Init(NULL, NULL);
$(else)
MPI_Init(NULL, NULL);

$my grank, gsize: int
MPI_Comm_rank(MPI_COMM_WORLD, &grank);
MPI_Comm_size(MPI_COMM_WORLD, &gsize);
$(if:MIN_PROCS)
$if gsize < $(MIN_PROCS)
printf("! Test $(_pagename) requires $(MIN_PROCS) processes !\n");
return 1

MPI_Comm comm = MPI_COMM_WORLD;

$my void *buf
$(if:HAS_MTEST)
$call mtest_malloc, MAX_BUFSIZE
$(else)
buf = malloc(MAX_BUFSIZE)
$if !buf
printf("! Failed to allocate buffer (size=%d)\n", MAX_BUFSIZE)
return 1

$if grank == 0
printf("TEST $(_pagename):\n")
$call @report_header
$call main
$if grank == 0
printf("\n")

$(if:HAS_MTEST)
MTest_Finalize(0);
$(else)
MPI_Finalize();

macros:
use_double: 1

#----------------------------------------
subcode: _autoload
$register_prefix(comm) MPI_Comm

subcode: foreach_size
$for int size = 0; size < $(MAX_MSG); size = (size==0)?1:size*2
$(set:MSG_SIZE=size)
BLOCK

subcode: measure(iter)
tf_start = MPI_Wtime()
$for 0:$(iter)
BLOCK
tf_dur = MPI_Wtime() - tf_start

subcode: run_stat(N, var)
$my double sum1=0, double sum2=0
$for 0:$(N)
BLOCK
sum1 += $(var)
sum2 += $(var) * $(var)
sum1 /= $(N)
sum2 /= $(N)
sum2 = sqrt(sum2 - sum1 * sum1)

subcode: warm_up(iter, dur)
$(set:MIN_ITER=(int) ($(iter) * 0.001 / $(dur)))
$(iter) = 2
$my double last_dur = 1.0
$my int num_best = 0
$while num_best < 10
BLOCK
$if $(iter) < $(MIN_ITER)
$(iter) = $(MIN_ITER)
num_best = 0
continue
# check that t_dur is no longer monotonically decreasing
$if $(dur) > last_dur
num_best++
last_dur = $(dur)

subcode: header_latency
printf("%12s %10s(us) %6s(us) %12s(MB/s)\n", "msgsize", "latency", "sigma", "bandwidth")

subcode: report_latency(MSGSIZE, MULTIPLICITY)
$my tf_latency, tf_sigma, tf_bw
tf_latency = sum1 / ($(MULTIPLICITY)) * 1e6
tf_sigma = sum2 / ($(MULTIPLICITY)) * 1e6
tf_bw = $(MSGSIZE) / tf_latency
printf("%12d %10.3f %6.3f %12.3f\n", $(MSGSIZE), tf_latency, tf_sigma, tf_bw)

79 changes: 79 additions & 0 deletions test/mpi/bench/macros/bench_p2p.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Defines following functions:
* bench_p2p
* bench_send, bench_warmup
* bench_recv
*
* For each measurement -
* First sender tells receiver the `iter` parameter. `iter = 0` means to quit.
* For each iteration runs `send_side` and `recv_side` assuming the measurement on sender side represents a latency measurement.
*
* Caller page defines -
* subcode: sender_side, recv_side
* macro:
* MULTIPLICITY: divisor for each measurement
*/

macros:
MIN_PROCS: 2
MAX_BUFSIZE: 5000000 # 5 MB

subcode: _autoload
$register_name(src) int
$register_name(dst) int
$register_name(buf) void *
$register_name(size) int
$define TAG 0
$define SYNC_TAG 100
$define MAX_BUFSIZE 5000000
$define NUM_REPEAT 20

subcode: report_header
$call header_latency

fncode: bench_p2p(comm, src, dst, buf, size)
int rank;
MPI_Comm_rank(comm, &rank)

$(if:!MULTIPLICITY)
$(set:MULTIPLICITY=1)

$if rank == src
iter = bench_warmup(comm, dst, buf, size)
&call run_stat, NUM_REPEAT, tf_latency
tf_latency = bench_send(iter, comm, dst, buf, size)
tf_latency /= iter
$call report_latency, size, $(MULTIPLICITY)
$call send_stop
$elif rank == dst
bench_recv(comm, src, buf, size)

subcode: send_stop
iter = 0;
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm)

#----------------------------------------
fncode: bench_send(int iter, comm, dst, buf, size)
# synchronize with receiver
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm);

&call measure, iter
$call @send_side

return tf_dur

fncode: bench_recv(comm, src, buf, size)
$while 1
int iter;
# synchronize with sender */
MPI_Recv(&iter, 1, MPI_INT, src, SYNC_TAG, comm, MPI_STATUS_IGNORE);
$if iter == 0
# time to quit
break
$for i=0:iter
$call @recv_side

fncode: bench_warmup(comm, dst, buf, size): int
&call warm_up, iter, tf_dur
tf_dur = bench_send(iter, comm, dst, buf, size)
return iter
14 changes: 14 additions & 0 deletions test/mpi/bench/macros/mtest.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
macros:
HAS_MTEST: 1

subcode: mtest_malloc(size)
MTestArgList *head = MTestArgListCreate(argc, argv)
int send_rank = 0, recv_rank = 1;
$(for:a in send,recv)
$if grank == $(a)_rank
$my mtest_mem_type_e $(a)_memtype, int $(a)_device
$(a)_memtype = MTestArgListGetMemType(head, "$(a)mem")
$(a)_device = MTestArgListGetInt_with_default(head, "$(a)dev", 0)
MTestMalloc($(size), $(a)_memtype, NULL, &buf, $(a)_device)
MTestPrintfMsg(1, "Allocating buffer: memtype=%s, device=%d, size=%d\n", MTest_memtype_name($(a)_memtype), $(a)_device, $(size))
MTestArgListDestroy(head)
27 changes: 27 additions & 0 deletions test/mpi/bench/p2p_bw.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
include: macros/bench_frame.def
include: macros/bench_p2p.def
include: macros/mtest.def

subcode: _autoload
$define WINDOW_SIZE 64

page: p2p_bw, bench_frame
MULTIPLICITY: WINDOW_SIZE
data: buf, size, MPI_CHAR

$for int size = 1; size < MAX_BUFSIZE; size *= 2
bench_p2p(comm, 0, 1, buf, size)

subcode: send_side
$my MPI_Request reqs[WINDOW_SIZE]
$for j=0:WINDOW_SIZE
MPI_Isend($(data), dst, TAG, comm, &reqs[j])
MPI_Waitall(WINDOW_SIZE, reqs, MPI_STATUSES_IGNORE)
MPI_Recv(NULL, 0, MPI_DATATYPE_NULL, dst, TAG, comm, MPI_STATUS_IGNORE)

subcode: recv_side
$my MPI_Request reqs[WINDOW_SIZE]
$for j=0:WINDOW_SIZE
MPI_Irecv($(data), src, TAG, comm, &reqs[j])
MPI_Waitall(WINDOW_SIZE, reqs, MPI_STATUSES_IGNORE)
MPI_Send(NULL, 0, MPI_DATATYPE_NULL, src, TAG, comm)
19 changes: 19 additions & 0 deletions test/mpi/bench/p2p_latency.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
include: macros/bench_frame.def
include: macros/bench_p2p.def
include: macros/mtest.def

page: p2p_latency, bench_frame
MULTIPLICITY: 2
data: buf, size, MPI_CHAR

bench_p2p(comm, 0, 1, buf, 0)
$for int size = 1; size < MAX_BUFSIZE; size *= 2
bench_p2p(comm, 0, 1, buf, size)

subcode: send_side
MPI_Send($(data), dst, TAG, comm);
MPI_Recv($(data), dst, TAG, comm, MPI_STATUS_IGNORE);

subcode: recv_side
MPI_Recv($(data), src, TAG, comm, MPI_STATUS_IGNORE);
MPI_Send($(data), src, TAG, comm);
2 changes: 2 additions & 0 deletions test/mpi/bench/testlist
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
p2p_latency 2 resultTest=TestBench
p2p_bw 2 resultTest=TestBench
1 change: 1 addition & 0 deletions test/mpi/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1904,5 +1904,6 @@ AC_OUTPUT(maint/testmerge \
impls/mpich/ulfm/Makefile \
impls/mpich/info/Makefile \
impls/mpich/info/testlist \
bench/Makefile \
)

18 changes: 18 additions & 0 deletions test/mpi/runtests
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,8 @@ sub get_resultTest {
return \&TestStatusNoErrors;
} elsif ($resultTest eq "TestErrFatal") {
return \&TestErrFatal;
} elsif ($resultTest eq "TestBench") {
return \&TestBench;
} else {
die "resultTest $resultTest not defined!\n";
}
Expand Down Expand Up @@ -1112,6 +1114,22 @@ sub TestErrFatal {
return ($found_error, $inline);
}

# Only check exit code: 0 means success, non-zero means failure
sub TestBench {
my ($MPIOUT, $programname) = @_;
my ($found_error, $inline);

while (<$MPIOUT>) {
print STDOUT $_;
}
my $rc = close($MPIOUT);
if (!$rc) {
expect_status_zero($programname, $?);
$found_error = 1;
}
return ($found_error, $inline);
}

# ----------------------------------------------------------------------------
# Output routines:
# OpenOutputs - Open report files and print initial lines
Expand Down

0 comments on commit 1f359fe

Please sign in to comment.