Skip to content

Commit

Permalink
Merge pull request #12827 from burlen/dynamic_decision_alltoall_max_r…
Browse files Browse the repository at this point in the history
…equests

coll tuned dynamic rules file alltoall_algorithm_max_requests
  • Loading branch information
bosilca authored Nov 7, 2024
2 parents 25feb3b + f6387a4 commit 0f68484
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 42 deletions.
23 changes: 23 additions & 0 deletions ompi/mca/coll/base/coll_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
* Copyright (c) 2014-2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.
*
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -482,6 +485,26 @@ int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expecte
} while (1);
}

/**
* return non-zero if the next non-space to read on the current line is a digit.
* otherwise return 0.
*/
int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr)
{
do {
int next = fgetc(fptr);

if ((' ' == next) || ('\t' == next)) {
continue; /* discard space and tab. keep everything else */
}

ungetc(next, fptr); /* put the char back into the stream */

return isdigit(next); /* report back whether or not next is a digit */

} while (1);
}

/**
* There are certainly simpler implementation for this function when performance
* is not a critical point. But, as this function is used during the collective
Expand Down
2 changes: 2 additions & 0 deletions ompi/mca/coll/base/coll_base_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2014-2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -195,6 +196,7 @@ int ompi_coll_base_file_getnext_string(FILE *fptr, int *fileline, char** val);
* eat the value, otherwise put it back into the file.
*/
int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expected);
int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr);

/* Miscellaneous function */
const char* mca_coll_base_colltype_to_str(int collid);
Expand Down
131 changes: 89 additions & 42 deletions ompi/mca/coll/tuned/coll_tuned_dynamic_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -42,13 +43,24 @@
static int fileline=0; /* used for verbose error messages */

#define getnext(fptr, pval) ompi_coll_base_file_getnext_long(fptr, &fileline, pval)
#define isnext_digit(fptr) ompi_coll_base_file_peek_next_char_isdigit(fptr)

/*
* Reads a rule file called fname
* Builds the algorithm rule table for a max of n_collectives
* The rule file defines a set of sets of rules. The outer set is keyed on
* communicator size while the inner set is keyed on message size. When a
* communicator is constructed its size is used to look up the nested set of
* message size keyed rules. When a collective is called the message size
* determined from its call arguments are used to lookup a specific rule in the
* inner set.
*
* Rules for communicator and message sizes 0 and N (where N is the larger than
* largest key you provide) can be specified to fall back to the fixed decision
* framework above and below the communicator and message size ranges of
* interest.
*
* If an error occurs it removes rule table and then exits with a very verbose
* error message (this stops the user using a half baked rule table
* error message. this stops the user using a half baked rule table.
*
* Returns the number of actual collectives that a rule exists for
* (note 0 is NOT an error)
Expand All @@ -57,9 +69,18 @@ static int fileline=0; /* used for verbose error messages */

int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules, int n_collectives)
{
long CI, NCS, CS, ALG, NMS, FANINOUT, X, MS, SS;
long NCOL = 0, /* number of collectives for which rules are provided */
COLID = 0, /* identifies the collective type to associate the rules with */
NCOMSIZES = 0, /* number of sets of message size rules. the key is communicator size */
COMSIZE = 0, /* communicator size, the key identifying a specific set of message size rules. */
NMSGSIZES = 0, /* number of message size rules in the set. */
MSGSIZE = 0, /* message size, the key identifying a specific rule in the set. */
ALG = 0, /* the collective specific algorithm to use */
FANINOUT = 0, /* algorithm specific tuning parameter */
SEGSIZE = 0, /* algorithm specific tuning parameter */
MAXREQ = 0; /* algorithm specific tuning parameter */
FILE *fptr = (FILE*) NULL;
int x, ncs, nms;
int x, ncs, nms, version;

ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */

Expand Down Expand Up @@ -103,106 +124,131 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**
goto on_file_error;
}

if( (getnext(fptr, &X) < 0) || (X < 0) ) {
/* consume the optional version identifier */
if (0 == fscanf(fptr, "rule-file-version-%u", &version)) {
version = 1;
}

/* get the number of collectives for which rules are provided in the file */
if( (getnext(fptr, &NCOL) < 0) || (NCOL < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline));
goto on_file_error;
}
if (X>n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline));
if (NCOL>n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", NCOL, n_collectives, fileline));
goto on_file_error;
}

for (x=0;x<X;x++) { /* for each collective */
for (x=0;x<NCOL;x++) { /* for each collective */

if( (getnext(fptr, &CI) < 0) || (CI < 0) ) {
/* get the collective for which rules are being provided */
if( (getnext(fptr, &COLID) < 0) || (COLID < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline));
goto on_file_error;
}
if (CI>=n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline));
if (COLID>=n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", COLID, n_collectives, fileline));
goto on_file_error;
}

if (alg_rules[CI].alg_rule_id != CI) {
OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", CI));
if (alg_rules[COLID].alg_rule_id != COLID) {
OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", COLID));
goto on_file_error;
}
OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", CI));
alg_p = &alg_rules[CI];
OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", COLID));
alg_p = &alg_rules[COLID];

alg_p->alg_rule_id = CI;
alg_p->alg_rule_id = COLID;
alg_p->n_com_sizes = 0;
alg_p->com_rules = (ompi_coll_com_rule_t *) NULL;

if( (getnext (fptr, &NCS) < 0) || (NCS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", CI, fileline));
/* get the number of communicator sizes for which a set of rules are to be provided */
if( (getnext (fptr, &NCOMSIZES) < 0) || (NCOMSIZES < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", COLID, fileline));
goto on_file_error;
}
OPAL_OUTPUT((ompi_coll_tuned_stream, "Read communicator count %ld for dynamic rule for collective ID %ld\n", NCS, CI));
alg_p->n_com_sizes = NCS;
alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCS, CI);
OPAL_OUTPUT((ompi_coll_tuned_stream, "Read communicator count %ld for dynamic rule for collective ID %ld\n", NCOMSIZES, COLID));
alg_p->n_com_sizes = NCOMSIZES;
alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCOMSIZES, COLID);
if (NULL == alg_p->com_rules) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate com rules for file [%s]\n", fname));
goto on_file_error;
}

for (ncs=0;ncs<NCS;ncs++) { /* for each comm size */
for (ncs=0;ncs<NCOMSIZES;ncs++) { /* for each comm size */

com_p = &(alg_p->com_rules[ncs]);

if( (getnext (fptr, &CS) < 0) || (CS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline));
/* get the communicator size to associate the set of rules with */
if( (getnext (fptr, &COMSIZE) < 0) || (COMSIZE < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline));
goto on_file_error;
}

com_p->mpi_comsize = CS;
com_p->mpi_comsize = COMSIZE;

if( (getnext (fptr, &NMS) < 0) || (NMS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline));
/* get the number of message sizes to specify rules for. inner set size */
if( (getnext (fptr, &NMSGSIZES) < 0) || (NMSGSIZES < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline));
goto on_file_error;
}
OPAL_OUTPUT((ompi_coll_tuned_stream, "Read message count %ld for dynamic rule for collective ID %ld and comm size %ld\n",
NMS, CI, CS));
com_p->n_msg_sizes = NMS;
com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMS, CI, ncs, CS);
NMSGSIZES, COLID, COMSIZE));
com_p->n_msg_sizes = NMSGSIZES;
com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMSGSIZES, COLID, ncs, COMSIZE);
if (NULL == com_p->msg_rules) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate msg rules for file [%s]\n", fname));
goto on_file_error;
}

msg_p = com_p->msg_rules;

for (nms=0;nms<NMS;nms++) { /* for each msg size */
for (nms=0;nms<NMSGSIZES;nms++) { /* for each msg size */

msg_p = &(com_p->msg_rules[nms]);

if( (getnext (fptr, &MS) < 0) || (MS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
/* read the message size to associate the rule with */
if( (getnext (fptr, &MSGSIZE) < 0) || (MSGSIZE < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->msg_size = (size_t)MS;
msg_p->msg_size = (size_t)MSGSIZE;

/* read the collective specific algorithm identifier */
if( (getnext (fptr, &ALG) < 0) || (ALG < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_alg = ALG;

/* read faninout tuning parameter. required */
if( (getnext (fptr, &FANINOUT) < 0) || (FANINOUT < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_topo_faninout = FANINOUT;

if( (getnext (fptr, &SS) < 0) || (SS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
/* read segsize tuning parameter. required */
if( (getnext (fptr, &SEGSIZE) < 0) || (SEGSIZE < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_segsize = SS;
msg_p->result_segsize = SEGSIZE;

/* read the max requests tuning parameter. optional */
msg_p->result_max_requests = ompi_coll_tuned_alltoall_max_requests;
if( (version > 1) && isnext_digit(fptr) ) {
if( (getnext (fptr, &MAXREQ) < 0) || (MAXREQ < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read max requests for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_max_requests = MAXREQ;
}

if (!nms && MS) {
/* check the first rule is for 0 size. look-up depends on this */
if (!nms && MSGSIZE) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n"));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MSGSIZE, COLID, ncs, nms, fileline));
goto on_file_error;
}

Expand All @@ -219,13 +265,14 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**
} /* comm size */

total_alg_count++;
OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", CI));
OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", COLID));

} /* per collective */

fclose (fptr);

OPAL_OUTPUT((ompi_coll_tuned_stream,"\nConfigure file Stats\n"));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Version\t\t\t\t\t: %5u\n", version));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count));
Expand Down

0 comments on commit 0f68484

Please sign in to comment.