-
Notifications
You must be signed in to change notification settings - Fork 281
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6907 from hzhou/2311_bench
test: add p2p benchmark code Approved-by: Ken Raffenetti
- Loading branch information
Showing
15 changed files
with
310 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule mydef_boot
added at
ea2d68
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
/*.c | ||
/p2p_bw | ||
/p2p_latency |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
## | ||
## Copyright (C) by Argonne National Laboratory | ||
## See COPYRIGHT in top-level directory | ||
## | ||
|
||
include $(top_srcdir)/Makefile_single.mtest | ||
LDADD += -lm | ||
|
||
## for all programs that are just built from the single corresponding source | ||
## file, we don't need per-target _SOURCES rules, automake will infer them | ||
## correctly | ||
noinst_PROGRAMS = \ | ||
p2p_latency \ | ||
p2p_bw | ||
|
||
.def.c: | ||
mydef_page $< |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
for a in *.def ; do | ||
mydef_page $a | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
module: c | ||
CC: mpicc | ||
run: mpirun -n 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/* | ||
* bench_frame : boilerplate for mpi program | ||
* measure(iter) : measures `tf_dur` for $(iter) iterations | ||
* run_stat(N, var) : run N measurements and obtain (avg, std) in sum1, sum2 | ||
* warm_up(iter, dur): repeat until measurements (iter, dur) stabilize | ||
* report_latency(msgsize, MULTIPLICITY) : print a line of latency result | ||
*/ | ||
|
||
subcode: bench_frame | ||
$include stdio | ||
$include stdlib | ||
$(if:HAS_MTEST) | ||
$include mpitest.h | ||
$(else) | ||
$include mpi | ||
|
||
$function main | ||
$(if:HAS_MTEST) | ||
MTest_Init(NULL, NULL); | ||
$(else) | ||
MPI_Init(NULL, NULL); | ||
|
||
$my grank, gsize: int | ||
MPI_Comm_rank(MPI_COMM_WORLD, &grank); | ||
MPI_Comm_size(MPI_COMM_WORLD, &gsize); | ||
$(if:MIN_PROCS) | ||
$if gsize < $(MIN_PROCS) | ||
printf("! Test $(_pagename) requires $(MIN_PROCS) processes !\n"); | ||
return 1 | ||
|
||
MPI_Comm comm = MPI_COMM_WORLD; | ||
|
||
$my void *buf | ||
$(if:HAS_MTEST) | ||
$call mtest_malloc, MAX_BUFSIZE | ||
$(else) | ||
buf = malloc(MAX_BUFSIZE) | ||
$if !buf | ||
printf("! Failed to allocate buffer (size=%d)\n", MAX_BUFSIZE) | ||
return 1 | ||
|
||
$if grank == 0 | ||
printf("TEST $(_pagename):\n") | ||
$call @report_header | ||
$call main | ||
$if grank == 0 | ||
printf("\n") | ||
|
||
$(if:HAS_MTEST) | ||
MTest_Finalize(0); | ||
$(else) | ||
MPI_Finalize(); | ||
|
||
macros: | ||
use_double: 1 | ||
|
||
#---------------------------------------- | ||
subcode: _autoload | ||
$register_prefix(comm) MPI_Comm | ||
|
||
subcode: foreach_size | ||
$for int size = 0; size < $(MAX_MSG); size = (size==0)?1:size*2 | ||
$(set:MSG_SIZE=size) | ||
BLOCK | ||
|
||
subcode: measure(iter) | ||
tf_start = MPI_Wtime() | ||
$for 0:$(iter) | ||
BLOCK | ||
tf_dur = MPI_Wtime() - tf_start | ||
|
||
subcode: run_stat(N, var) | ||
$my double sum1=0, double sum2=0 | ||
$for 0:$(N) | ||
BLOCK | ||
sum1 += $(var) | ||
sum2 += $(var) * $(var) | ||
sum1 /= $(N) | ||
sum2 /= $(N) | ||
sum2 = sqrt(sum2 - sum1 * sum1) | ||
|
||
subcode: warm_up(iter, dur) | ||
$(set:MIN_ITER=(int) ($(iter) * 0.001 / $(dur))) | ||
$(iter) = 2 | ||
$my double last_dur = 1.0 | ||
$my int num_best = 0 | ||
$while num_best < 10 | ||
BLOCK | ||
$if $(iter) < $(MIN_ITER) | ||
$(iter) = $(MIN_ITER) | ||
num_best = 0 | ||
continue | ||
# check that t_dur is no longer monotonically decreasing | ||
$if $(dur) > last_dur | ||
num_best++ | ||
last_dur = $(dur) | ||
|
||
subcode: header_latency | ||
printf("%12s %10s(us) %6s(us) %12s(MB/s)\n", "msgsize", "latency", "sigma", "bandwidth") | ||
|
||
subcode: report_latency(MSGSIZE, MULTIPLICITY) | ||
$my tf_latency, tf_sigma, tf_bw | ||
tf_latency = sum1 / ($(MULTIPLICITY)) * 1e6 | ||
tf_sigma = sum2 / ($(MULTIPLICITY)) * 1e6 | ||
tf_bw = $(MSGSIZE) / tf_latency | ||
printf("%12d %10.3f %6.3f %12.3f\n", $(MSGSIZE), tf_latency, tf_sigma, tf_bw) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* Defines following functions: | ||
* bench_p2p | ||
* bench_send, bench_warmup | ||
* bench_recv | ||
* | ||
* For each measurement - | ||
* First sender tells receiver the `iter` parameter. `iter = 0` means to quit. | ||
* For each iteration runs `send_side` and `recv_side` assuming the measurement on sender side represents a latency measurement. | ||
* | ||
* Caller page defines - | ||
* subcode: sender_side, recv_side | ||
* macro: | ||
* MULTIPLICITY: divisor for each measurement | ||
*/ | ||
|
||
macros: | ||
MIN_PROCS: 2 | ||
MAX_BUFSIZE: 5000000 # 5 MB | ||
|
||
subcode: _autoload | ||
$register_name(src) int | ||
$register_name(dst) int | ||
$register_name(buf) void * | ||
$register_name(size) int | ||
$define TAG 0 | ||
$define SYNC_TAG 100 | ||
$define MAX_BUFSIZE 5000000 | ||
$define NUM_REPEAT 20 | ||
|
||
subcode: report_header | ||
$call header_latency | ||
|
||
fncode: bench_p2p(comm, src, dst, buf, size) | ||
int rank; | ||
MPI_Comm_rank(comm, &rank) | ||
|
||
$(if:!MULTIPLICITY) | ||
$(set:MULTIPLICITY=1) | ||
|
||
$if rank == src | ||
iter = bench_warmup(comm, dst, buf, size) | ||
&call run_stat, NUM_REPEAT, tf_latency | ||
tf_latency = bench_send(iter, comm, dst, buf, size) | ||
tf_latency /= iter | ||
$call report_latency, size, $(MULTIPLICITY) | ||
$call send_stop | ||
$elif rank == dst | ||
bench_recv(comm, src, buf, size) | ||
|
||
subcode: send_stop | ||
iter = 0; | ||
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm) | ||
|
||
#---------------------------------------- | ||
fncode: bench_send(int iter, comm, dst, buf, size) | ||
# synchronize with receiver | ||
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm); | ||
|
||
&call measure, iter | ||
$call @send_side | ||
|
||
return tf_dur | ||
|
||
fncode: bench_recv(comm, src, buf, size) | ||
$while 1 | ||
int iter; | ||
# synchronize with sender */ | ||
MPI_Recv(&iter, 1, MPI_INT, src, SYNC_TAG, comm, MPI_STATUS_IGNORE); | ||
$if iter == 0 | ||
# time to quit | ||
break | ||
$for i=0:iter | ||
$call @recv_side | ||
|
||
fncode: bench_warmup(comm, dst, buf, size): int | ||
&call warm_up, iter, tf_dur | ||
tf_dur = bench_send(iter, comm, dst, buf, size) | ||
return iter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
macros: | ||
HAS_MTEST: 1 | ||
|
||
subcode: mtest_malloc(size) | ||
MTestArgList *head = MTestArgListCreate(argc, argv) | ||
int send_rank = 0, recv_rank = 1; | ||
$(for:a in send,recv) | ||
$if grank == $(a)_rank | ||
$my mtest_mem_type_e $(a)_memtype, int $(a)_device | ||
$(a)_memtype = MTestArgListGetMemType(head, "$(a)mem") | ||
$(a)_device = MTestArgListGetInt_with_default(head, "$(a)dev", 0) | ||
MTestMalloc($(size), $(a)_memtype, NULL, &buf, $(a)_device) | ||
MTestPrintfMsg(1, "Allocating buffer: memtype=%s, device=%d, size=%d\n", MTest_memtype_name($(a)_memtype), $(a)_device, $(size)) | ||
MTestArgListDestroy(head) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
include: macros/bench_frame.def | ||
include: macros/bench_p2p.def | ||
include: macros/mtest.def | ||
|
||
subcode: _autoload | ||
$define WINDOW_SIZE 64 | ||
|
||
page: p2p_bw, bench_frame | ||
MULTIPLICITY: WINDOW_SIZE | ||
data: buf, size, MPI_CHAR | ||
|
||
$for int size = 1; size < MAX_BUFSIZE; size *= 2 | ||
bench_p2p(comm, 0, 1, buf, size) | ||
|
||
subcode: send_side | ||
$my MPI_Request reqs[WINDOW_SIZE] | ||
$for j=0:WINDOW_SIZE | ||
MPI_Isend($(data), dst, TAG, comm, &reqs[j]) | ||
MPI_Waitall(WINDOW_SIZE, reqs, MPI_STATUSES_IGNORE) | ||
MPI_Recv(NULL, 0, MPI_DATATYPE_NULL, dst, TAG, comm, MPI_STATUS_IGNORE) | ||
|
||
subcode: recv_side | ||
$my MPI_Request reqs[WINDOW_SIZE] | ||
$for j=0:WINDOW_SIZE | ||
MPI_Irecv($(data), src, TAG, comm, &reqs[j]) | ||
MPI_Waitall(WINDOW_SIZE, reqs, MPI_STATUSES_IGNORE) | ||
MPI_Send(NULL, 0, MPI_DATATYPE_NULL, src, TAG, comm) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
include: macros/bench_frame.def | ||
include: macros/bench_p2p.def | ||
include: macros/mtest.def | ||
|
||
page: p2p_latency, bench_frame | ||
MULTIPLICITY: 2 | ||
data: buf, size, MPI_CHAR | ||
|
||
bench_p2p(comm, 0, 1, buf, 0) | ||
$for int size = 1; size < MAX_BUFSIZE; size *= 2 | ||
bench_p2p(comm, 0, 1, buf, size) | ||
|
||
subcode: send_side | ||
MPI_Send($(data), dst, TAG, comm); | ||
MPI_Recv($(data), dst, TAG, comm, MPI_STATUS_IGNORE); | ||
|
||
subcode: recv_side | ||
MPI_Recv($(data), src, TAG, comm, MPI_STATUS_IGNORE); | ||
MPI_Send($(data), src, TAG, comm); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
p2p_latency 2 resultTest=TestBench | ||
p2p_bw 2 resultTest=TestBench |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters