From 33f8c74114b1e04c32552a74841f19d177f35852 Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Fri, 13 Sep 2024 12:09:51 -0700 Subject: [PATCH 1/2] coll tuned dynamic rules file alltoall_algorithm_max_requests Teach the dynamic rules file reader to look for the alltoall_algorithm_max_requests tuning parameter. To keep the dynamic rules file format backward compatible the alltoall_algorithm_max_requests is optional. When not present in the rule definition the value of the corresponding MCA variable is used instead. Resolves #12589 Signed-off-by: Burlen Loring --- ompi/mca/coll/base/coll_base_util.c | 23 ++++ ompi/mca/coll/base/coll_base_util.h | 2 + ompi/mca/coll/tuned/coll_tuned_dynamic_file.c | 123 ++++++++++++------ 3 files changed, 107 insertions(+), 41 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index ae9010497d7..ba74aa01350 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -12,6 +12,9 @@ * Copyright (c) 2014-2020 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. + * + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -482,6 +485,26 @@ int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expecte } while (1); } +/** + * return non-zero if the next non-space to read on the current line is a digit. + * otherwise return 0. + */ +int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr) +{ + do { + int next = fgetc(fptr); + + if ((' ' == next) || ('\t' == next)) { + continue; /* discard space and tab. keep everything else */ + } + + ungetc(next, fptr); /* put the char back into the stream */ + + return isdigit(next); /* report back whether or not next is a digit */ + + } while (1); +} + /** * There are certainly simpler implementation for this function when performance * is not a critical point. But, as this function is used during the collective diff --git a/ompi/mca/coll/base/coll_base_util.h b/ompi/mca/coll/base/coll_base_util.h index 852abcedefa..7bceaa7dcc0 100644 --- a/ompi/mca/coll/base/coll_base_util.h +++ b/ompi/mca/coll/base/coll_base_util.h @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2014-2020 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -195,6 +196,7 @@ int ompi_coll_base_file_getnext_string(FILE *fptr, int *fileline, char** val); * eat the value, otherwise put it back into the file. */ int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expected); +int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr); /* Miscellaneous function */ const char* mca_coll_base_colltype_to_str(int collid); diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c index e56ece1d0b4..cfaf1f6730e 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c @@ -12,6 +12,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,13 +43,24 @@ static int fileline=0; /* used for verbose error messages */ #define getnext(fptr, pval) ompi_coll_base_file_getnext_long(fptr, &fileline, pval) +#define isnext_digit(fptr) ompi_coll_base_file_peek_next_char_isdigit(fptr) /* * Reads a rule file called fname - * Builds the algorithm rule table for a max of n_collectives + * The rule file defines a set of sets of rules. The outer set is keyed on + * communicator size while the inner set is keyed on message size. When a + * communicator is constructed its size is used to look up the nested set of + * message size keyed rules. When a collective is called the message size + * determined from its call arguments are used to lookup a specific rule in the + * inner set. + * + * Rules for communicator and message sizes 0 and N (where N is the larger than + * largest key you provide) can be specified to fall back to the fixed decision + * framework above and below the communicator and message size ranges of + * interest. * * If an error occurs it removes rule table and then exits with a very verbose - * error message (this stops the user using a half baked rule table + * error message. this stops the user using a half baked rule table. * * Returns the number of actual collectives that a rule exists for * (note 0 is NOT an error) @@ -57,7 +69,16 @@ static int fileline=0; /* used for verbose error messages */ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules, int n_collectives) { - long CI, NCS, CS, ALG, NMS, FANINOUT, X, MS, SS; + long NCOL = 0, /* number of collectives for which rules are provided */ + COLID = 0, /* identifies the collective type to associate the rules with */ + NCOMSIZES = 0, /* number of sets of message size rules. the key is communicator size */ + COMSIZE = 0, /* communicator size, the key identifying a specific set of message size rules. */ + NMSGSIZES = 0, /* number of message size rules in the set. */ + MSGSIZE = 0, /* message size, the key identifying a specific rule in the set. */ + ALG = 0, /* the collective specific algorithm to use */ + FANINOUT = 0, /* algorithm specific tuning parameter */ + SEGSIZE = 0, /* algorithm specific tuning parameter */ + MAXREQ = 0; /* algorithm specific tuning parameter */ FILE *fptr = (FILE*) NULL; int x, ncs, nms; @@ -103,68 +124,73 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** goto on_file_error; } - if( (getnext(fptr, &X) < 0) || (X < 0) ) { + /* get the number of collectives for which rules are provided in the file */ + if( (getnext(fptr, &NCOL) < 0) || (NCOL < 0) ) { OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline)); goto on_file_error; } - if (X>n_collectives) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline)); + if (NCOL>n_collectives) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", NCOL, n_collectives, fileline)); goto on_file_error; } - for (x=0;x=n_collectives) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline)); + if (COLID>=n_collectives) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", COLID, n_collectives, fileline)); goto on_file_error; } - if (alg_rules[CI].alg_rule_id != CI) { - OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", CI)); + if (alg_rules[COLID].alg_rule_id != COLID) { + OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", COLID)); goto on_file_error; } - OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", CI)); - alg_p = &alg_rules[CI]; + OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", COLID)); + alg_p = &alg_rules[COLID]; - alg_p->alg_rule_id = CI; + alg_p->alg_rule_id = COLID; alg_p->n_com_sizes = 0; alg_p->com_rules = (ompi_coll_com_rule_t *) NULL; - if( (getnext (fptr, &NCS) < 0) || (NCS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", CI, fileline)); + /* get the number of communicator sizes for which a set of rules are to be provided */ + if( (getnext (fptr, &NCOMSIZES) < 0) || (NCOMSIZES < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", COLID, fileline)); goto on_file_error; } - OPAL_OUTPUT((ompi_coll_tuned_stream, "Read communicator count %ld for dynamic rule for collective ID %ld\n", NCS, CI)); - alg_p->n_com_sizes = NCS; - alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCS, CI); + OPAL_OUTPUT((ompi_coll_tuned_stream, "Read communicator count %ld for dynamic rule for collective ID %ld\n", NCOMSIZES, COLID)); + alg_p->n_com_sizes = NCOMSIZES; + alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCOMSIZES, COLID); if (NULL == alg_p->com_rules) { OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate com rules for file [%s]\n", fname)); goto on_file_error; } - for (ncs=0;ncscom_rules[ncs]); - if( (getnext (fptr, &CS) < 0) || (CS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline)); + /* get the communicator size to associate the set of rules with */ + if( (getnext (fptr, &COMSIZE) < 0) || (COMSIZE < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline)); goto on_file_error; } - com_p->mpi_comsize = CS; + com_p->mpi_comsize = COMSIZE; - if( (getnext (fptr, &NMS) < 0) || (NMS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline)); + /* get the number of message sizes to specify rules for. inner set size */ + if( (getnext (fptr, &NMSGSIZES) < 0) || (NMSGSIZES < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline)); goto on_file_error; } OPAL_OUTPUT((ompi_coll_tuned_stream, "Read message count %ld for dynamic rule for collective ID %ld and comm size %ld\n", - NMS, CI, CS)); - com_p->n_msg_sizes = NMS; - com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMS, CI, ncs, CS); + NMSGSIZES, COLID, COMSIZE)); + com_p->n_msg_sizes = NMSGSIZES; + com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMSGSIZES, COLID, ncs, COMSIZE); if (NULL == com_p->msg_rules) { OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate msg rules for file [%s]\n", fname)); goto on_file_error; @@ -172,37 +198,52 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** msg_p = com_p->msg_rules; - for (nms=0;nmsmsg_rules[nms]); - if( (getnext (fptr, &MS) < 0) || (MS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + /* read the message size to associate the rule with */ + if( (getnext (fptr, &MSGSIZE) < 0) || (MSGSIZE < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); goto on_file_error; } - msg_p->msg_size = (size_t)MS; + msg_p->msg_size = (size_t)MSGSIZE; + /* read the collective specific algorithm identifier */ if( (getnext (fptr, &ALG) < 0) || (ALG < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); goto on_file_error; } msg_p->result_alg = ALG; + /* read faninout tuning parameter. required */ if( (getnext (fptr, &FANINOUT) < 0) || (FANINOUT < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); goto on_file_error; } msg_p->result_topo_faninout = FANINOUT; - if( (getnext (fptr, &SS) < 0) || (SS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + /* read segsize tuning parameter. required */ + if( (getnext (fptr, &SEGSIZE) < 0) || (SEGSIZE < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); goto on_file_error; } - msg_p->result_segsize = SS; + msg_p->result_segsize = SEGSIZE; + + /* read the max requests tuning parameter. optional */ + msg_p->result_max_requests = ompi_coll_tuned_alltoall_max_requests; + if( isnext_digit(fptr) ) { + if( (getnext (fptr, &MAXREQ) < 0) || (MAXREQ < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read max requests for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); + goto on_file_error; + } + msg_p->result_max_requests = MAXREQ; + } - if (!nms && MS) { + /* check the first rule is for 0 size. look-up depends on this */ + if (!nms && MSGSIZE) { OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n")); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MSGSIZE, COLID, ncs, nms, fileline)); goto on_file_error; } @@ -219,7 +260,7 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** } /* comm size */ total_alg_count++; - OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", CI)); + OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", COLID)); } /* per collective */ From f6387a4a9ee61e6c0f01a12fa1f204aa99ec4f34 Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Sat, 2 Nov 2024 15:18:45 -0500 Subject: [PATCH 2/2] coll tuned add version identifier to the rules file the version identifier is optional but when provided it must have the following format and must appear on the first line.`rule-file-version-N` where N is an unsigned integer. Older versions of the parser will fall back to fixed decision mechanism when this line is present. Version 1 is the original format, Version 2 has support for optional coll_tuned_alltoall_algorithm_max_requests specification. Signed-off-by: Burlen Loring --- ompi/mca/coll/tuned/coll_tuned_dynamic_file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c index cfaf1f6730e..5eb8ef4317e 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c @@ -80,7 +80,7 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** SEGSIZE = 0, /* algorithm specific tuning parameter */ MAXREQ = 0; /* algorithm specific tuning parameter */ FILE *fptr = (FILE*) NULL; - int x, ncs, nms; + int x, ncs, nms, version; ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */ @@ -124,6 +124,11 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** goto on_file_error; } + /* consume the optional version identifier */ + if (0 == fscanf(fptr, "rule-file-version-%u", &version)) { + version = 1; + } + /* get the number of collectives for which rules are provided in the file */ if( (getnext(fptr, &NCOL) < 0) || (NCOL < 0) ) { OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline)); @@ -232,7 +237,7 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** /* read the max requests tuning parameter. optional */ msg_p->result_max_requests = ompi_coll_tuned_alltoall_max_requests; - if( isnext_digit(fptr) ) { + if( (version > 1) && isnext_digit(fptr) ) { if( (getnext (fptr, &MAXREQ) < 0) || (MAXREQ < 0) ) { OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read max requests for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); goto on_file_error; @@ -267,6 +272,7 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** fclose (fptr); OPAL_OUTPUT((ompi_coll_tuned_stream,"\nConfigure file Stats\n")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Version\t\t\t\t\t: %5u\n", version)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count));