2626
2727#include "coll_xhc.h"
2828
29- static int xhc_make_comms (ompi_communicator_t * ompi_comm ,
29+ static int xhc_comms_make (ompi_communicator_t * ompi_comm ,
3030 xhc_peer_info_t * peer_info , xhc_comm_t * * comms_dst ,
3131 int * comm_count_dst , xhc_loc_t * hierarchy , int hierarchy_len );
32- static void xhc_destroy_comms (xhc_comm_t * comms , int comm_count );
32+ static void xhc_comms_destroy (xhc_comm_t * comms , int comm_count );
3333
34- static void xhc_print_info (xhc_module_t * module ,
34+ static int xhc_print_info (xhc_module_t * module ,
3535 ompi_communicator_t * comm , xhc_data_t * data );
3636
3737static void * xhc_shmem_create (opal_shmem_ds_t * seg_ds , size_t size ,
3838 ompi_communicator_t * ompi_comm , const char * name_chr_s , int name_chr_i );
3939static void * xhc_shmem_attach (opal_shmem_ds_t * seg_ds );
4040static mca_smsc_endpoint_t * xhc_smsc_ep (xhc_peer_info_t * peer_info );
4141
42- int xhc_lazy_init (xhc_module_t * module , ompi_communicator_t * comm ) {
42+ // ------------------------------------------------
43+
44+ int mca_coll_xhc_lazy_init (xhc_module_t * module , ompi_communicator_t * comm ) {
4345
4446 int comm_size = ompi_comm_size (comm );
4547 int rank = ompi_comm_rank (comm );
@@ -118,7 +120,7 @@ int xhc_lazy_init(xhc_module_t *module, ompi_communicator_t *comm) {
118120 /* An XHC communicator is created for each level of the hierarchy.
119121 * The hierachy must be in an order of most-specific to most-general. */
120122
121- ret = xhc_make_comms (comm , peer_info , & data -> comms , & data -> comm_count ,
123+ ret = xhc_comms_make (comm , peer_info , & data -> comms , & data -> comm_count ,
122124 module -> hierarchy , module -> hierarchy_len );
123125 if (ret != OMPI_SUCCESS ) {
124126 RETURN_WITH_ERROR (return_code , ret , end );
@@ -144,7 +146,10 @@ int xhc_lazy_init(xhc_module_t *module, ompi_communicator_t *comm) {
144146 // ----
145147
146148 if (mca_coll_xhc_component .print_info ) {
147- xhc_print_info (module , comm , data );
149+ ret = xhc_print_info (module , comm , data );
150+ if (ret != OMPI_SUCCESS ) {
151+ RETURN_WITH_ERROR (return_code , ret , end );
152+ }
148153 }
149154
150155 // ----
@@ -162,18 +167,18 @@ int xhc_lazy_init(xhc_module_t *module, ompi_communicator_t *comm) {
162167 opal_show_help ("help-coll-xhc.txt" , "xhc-init-failed" , true,
163168 return_code , errno , strerror (errno ));
164169
165- xhc_deinit (module );
170+ xhc_fini (module );
166171 }
167172
168173 return return_code ;
169174}
170175
171- void xhc_deinit (mca_coll_xhc_module_t * module ) {
176+ void mca_coll_xhc_fini (mca_coll_xhc_module_t * module ) {
172177 if (module -> data ) {
173178 xhc_data_t * data = module -> data ;
174179
175180 if (data -> comm_count >= 0 ) {
176- xhc_destroy_comms (data -> comms , data -> comm_count );
181+ xhc_comms_destroy (data -> comms , data -> comm_count );
177182 }
178183
179184 free (data -> comms );
@@ -198,7 +203,20 @@ void xhc_deinit(mca_coll_xhc_module_t *module) {
198203 }
199204}
200205
201- static int xhc_make_comms (ompi_communicator_t * ompi_comm ,
206+ // ------------------------------------------------
207+
208+ /* This method is where the hierarchy of XHC is constructed; it receives
209+ * the hierarchy specifications (hierarchy param) and groups ranks together
210+ * among them. The process begins with the first locality in the list. All
211+ * ranks that share this locality (determined via the relative peer to peer
212+ * distances) become siblings. The one amongst them with the lowest rank
213+ * number becomes the manager/leader of the group. The members don't really
214+ * need to keep track of the actual ranks of their siblings -- only the rank
215+ * of the group's leader/manager, the size of the group, and their own member
216+ * ID. The process continues with the next locality, only that now only the
217+ * ranks that became leaders in the previous level are eligible (determined
218+ * via comm_candidate, see inline comments). */
219+ static int xhc_comms_make (ompi_communicator_t * ompi_comm ,
202220 xhc_peer_info_t * peer_info , xhc_comm_t * * comms_dst ,
203221 int * comm_count_dst , xhc_loc_t * hierarchy , int hierarchy_len ) {
204222
@@ -257,6 +275,10 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
257275
258276 // ----
259277
278+ /* Only ranks that were leaders in the previous level are candidates
279+ * for this one. Every rank advertises whether others may consider
280+ * it for inclusion via an Allgather. */
281+
260282 bool is_candidate = (comm_count == 0
261283 || comms [comm_count - 1 ].manager_rank == ompi_rank );
262284
@@ -305,14 +327,16 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
305327
306328 assert (xc -> size > 0 );
307329
330+ /* If there are no local peers in regards to this locality, no
331+ * XHC comm is created for this process on this level. */
308332 if (xc -> size == 1 ) {
309333 opal_output_verbose (MCA_BASE_VERBOSE_WARN ,
310334 ompi_coll_base_framework .framework_output ,
311335 "coll:xhc: Warning: Locality 0x%04x does not result "
312336 "in any new groupings; skipping it" , xc -> locality );
313337
314- /* Must participate in this allgather, even if useless
315- * to this rank, since it's necessary for the rest */
338+ /* All ranks must participate in the "control struct sharing"
339+ * allgather, even if useless to this rank to some of them */
316340
317341 ret = ompi_comm -> c_coll -> coll_allgather (& xc -> ctrl_ds ,
318342 sizeof (opal_shmem_ds_t ), MPI_BYTE , comm_ctrl_ds ,
@@ -322,7 +346,7 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
322346 RETURN_WITH_ERROR (return_code , ret , comm_error );
323347 }
324348
325- xhc_destroy_comms (xc , 1 );
349+ xhc_comms_destroy (xc , 1 );
326350 continue ;
327351 }
328352
@@ -372,6 +396,12 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
372396 + sizeof (xhc_comm_ctrl_t ) + smsc_reg_size );
373397 }
374398
399+ /* The comm's managers share the details of the communication structs
400+ * with their children, so that they may attach to them. Because
401+ * there's not any MPI communicator formed that includes (only) the
402+ * members of the XHC comm, the sharing is achieved with a single
403+ * Allgather, instead of a Broadcast inside each XHC comm. */
404+
375405 ret = ompi_comm -> c_coll -> coll_allgather (& xc -> ctrl_ds ,
376406 sizeof (opal_shmem_ds_t ), MPI_BYTE , comm_ctrl_ds ,
377407 sizeof (opal_shmem_ds_t ), MPI_BYTE , ompi_comm ,
@@ -404,7 +434,7 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
404434 continue ;
405435
406436 comm_error : {
407- xhc_destroy_comms (comms , comm_count + 1 );
437+ xhc_comms_destroy (comms , comm_count + 1 );
408438 comm_count = -1 ;
409439
410440 goto end ;
@@ -428,7 +458,7 @@ static int xhc_make_comms(ompi_communicator_t *ompi_comm,
428458 return return_code ;
429459}
430460
431- static void xhc_destroy_comms (xhc_comm_t * comms , int comm_count ) {
461+ static void xhc_comms_destroy (xhc_comm_t * comms , int comm_count ) {
432462 bool is_manager = true;
433463
434464 for (int i = 0 ; i < comm_count ; i ++ ) {
@@ -458,16 +488,17 @@ static void xhc_destroy_comms(xhc_comm_t *comms, int comm_count) {
458488 }
459489}
460490
461- static void xhc_print_info (xhc_module_t * module ,
491+ static int xhc_print_info (xhc_module_t * module ,
462492 ompi_communicator_t * comm , xhc_data_t * data ) {
463493
464494 int rank = ompi_comm_rank (comm );
465-
466- char * drval_str = NULL ;
467- char * lb_rla_str = NULL ;
468- char * un_min_str = NULL ;
495+ int ret ;
469496
470497 if (rank == 0 ) {
498+ char * drval_str ;
499+ char * lb_rla_str ;
500+ char * un_min_str ;
501+
471502 switch (mca_coll_xhc_component .dynamic_reduce ) {
472503 case OMPI_XHC_DYNAMIC_REDUCE_DISABLED :
473504 drval_str = "OFF" ; break ;
@@ -492,8 +523,11 @@ static void xhc_print_info(xhc_module_t *module,
492523 lb_rla_str = "???" ;
493524 }
494525
495- opal_asprintf (& un_min_str , " (min '%zu' bytes)" ,
526+ ret = opal_asprintf (& un_min_str , " (min '%zu' bytes)" ,
496527 mca_coll_xhc_component .uniform_chunks_min );
528+ if (ret < 0 ) {
529+ return OMPI_ERR_OUT_OF_RESOURCE ;
530+ }
497531
498532 printf ("------------------------------------------------\n"
499533 "OMPI coll/xhc @ %s, priority %d\n"
@@ -508,39 +542,52 @@ static void xhc_print_info(xhc_module_t *module,
508542 (mca_coll_xhc_component .uniform_chunks ? "ON" : "OFF" ),
509543 (mca_coll_xhc_component .uniform_chunks ? un_min_str : "" ),
510544 mca_coll_xhc_component .cico_max );
545+
546+ free (un_min_str );
511547 }
512548
513- // TODO convert to opal_asprintf?
514549 for (int i = 0 ; i < data -> comm_count ; i ++ ) {
515- char buf [ BUFSIZ ] = { 0 } ;
516- size_t buf_idx = 0 ;
550+ char * mlist = NULL ;
551+ char * tmp ;
517552
518- buf_idx += snprintf (buf + buf_idx , sizeof (buf ) - buf_idx ,
519- "%d" , data -> comms [i ].manager_rank );
553+ ret = opal_asprintf (& mlist , "%d" , data -> comms [i ].manager_rank );
554+ if (ret < 0 ) {
555+ return OMPI_ERR_OUT_OF_RESOURCE ;
556+ }
557+
558+ for (int m = 1 ; m < data -> comms [i ].size ; m ++ ) {
559+ char c ;
520560
521- for (int j = 1 ; j < data -> comms [i ].size ; j ++ ) {
522- if (j == data -> comms [i ].member_id ) {
561+ if (m == data -> comms [i ].member_id ) {
523562 if (i == 0 || data -> comms [i - 1 ].manager_rank == rank ) {
524- buf_idx += snprintf (buf + buf_idx ,
525- sizeof (buf ) - buf_idx , " %d" , rank );
563+ ret = opal_asprintf (& tmp , " %d" , rank );
526564 } else {
527- buf_idx += snprintf (buf + buf_idx ,
528- sizeof (buf ) - buf_idx , " _" );
565+ ret = opal_asprintf (& tmp , " _" );
529566 }
530567 } else {
531- buf_idx += snprintf (buf + buf_idx ,
532- sizeof (buf ) - buf_idx , " x" );
568+ ret = opal_asprintf (& tmp , " x" );
569+ }
570+
571+ free (mlist );
572+ mlist = tmp ;
573+
574+ if (ret < 0 ) {
575+ return OMPI_ERR_OUT_OF_RESOURCE ;
533576 }
534577 }
535578
536579 printf ("XHC comm loc=0x%08x chunk_size=%zu with %d members [%s]\n" ,
537580 data -> comms [i ].locality , data -> comms [i ].chunk_size ,
538- data -> comms [i ].size , buf );
581+ data -> comms [i ].size , mlist );
582+
583+ free (mlist );
539584 }
540585
541- free ( un_min_str ) ;
586+ return OMPI_SUCCESS ;
542587}
543588
589+ // ------------------------------------------------
590+
544591static void * xhc_shmem_create (opal_shmem_ds_t * seg_ds , size_t size ,
545592 ompi_communicator_t * ompi_comm , const char * name_chr_s , int name_chr_i ) {
546593
@@ -594,18 +641,6 @@ static void *xhc_shmem_attach(opal_shmem_ds_t *seg_ds) {
594641 return addr ;
595642}
596643
597- void * xhc_get_cico (xhc_peer_info_t * peer_info , int rank ) {
598- if (OMPI_XHC_CICO_MAX == 0 ) {
599- return NULL ;
600- }
601-
602- if (peer_info [rank ].cico_buffer == NULL ) {
603- peer_info [rank ].cico_buffer = xhc_shmem_attach (& peer_info [rank ].cico_ds );
604- }
605-
606- return peer_info [rank ].cico_buffer ;
607- }
608-
609644static mca_smsc_endpoint_t * xhc_smsc_ep (xhc_peer_info_t * peer_info ) {
610645 if (!peer_info -> smsc_ep ) {
611646 peer_info -> smsc_ep = MCA_SMSC_CALL (get_endpoint , & peer_info -> proc -> super );
@@ -622,7 +657,21 @@ static mca_smsc_endpoint_t *xhc_smsc_ep(xhc_peer_info_t *peer_info) {
622657 return peer_info -> smsc_ep ;
623658}
624659
625- int xhc_copy_expose_region (void * base , size_t len , xhc_copy_data_t * * region_data ) {
660+ // ------------------------------------------------
661+
662+ void * mca_coll_xhc_get_cico (xhc_peer_info_t * peer_info , int rank ) {
663+ if (OMPI_XHC_CICO_MAX == 0 ) {
664+ return NULL ;
665+ }
666+
667+ if (peer_info [rank ].cico_buffer == NULL ) {
668+ peer_info [rank ].cico_buffer = xhc_shmem_attach (& peer_info [rank ].cico_ds );
669+ }
670+
671+ return peer_info [rank ].cico_buffer ;
672+ }
673+
674+ int mca_coll_xhc_copy_expose_region (void * base , size_t len , xhc_copy_data_t * * region_data ) {
626675 if (mca_smsc_base_has_feature (MCA_SMSC_FEATURE_REQUIRE_REGISTATION )) {
627676 void * data = MCA_SMSC_CALL (register_region , base , len );
628677
@@ -640,11 +689,11 @@ int xhc_copy_expose_region(void *base, size_t len, xhc_copy_data_t **region_data
640689 return 0 ;
641690}
642691
643- void xhc_copy_region_post (void * dst , xhc_copy_data_t * region_data ) {
692+ void mca_coll_xhc_copy_region_post (void * dst , xhc_copy_data_t * region_data ) {
644693 memcpy (dst , region_data , mca_smsc_base_registration_data_size ());
645694}
646695
647- int xhc_copy_from (xhc_peer_info_t * peer_info ,
696+ int mca_coll_xhc_copy_from (xhc_peer_info_t * peer_info ,
648697 void * dst , void * src , size_t size , void * access_token ) {
649698
650699 mca_smsc_endpoint_t * smsc_ep = xhc_smsc_ep (peer_info );
@@ -659,12 +708,12 @@ int xhc_copy_from(xhc_peer_info_t *peer_info,
659708 return (status == OPAL_SUCCESS ? 0 : -1 );
660709}
661710
662- void xhc_copy_close_region (xhc_copy_data_t * region_data ) {
711+ void mca_coll_xhc_copy_close_region (xhc_copy_data_t * region_data ) {
663712 if (mca_smsc_base_has_feature (MCA_SMSC_FEATURE_REQUIRE_REGISTATION ))
664713 MCA_SMSC_CALL (deregister_region , region_data );
665714}
666715
667- void * xhc_get_registration (xhc_peer_info_t * peer_info ,
716+ void * mca_coll_xhc_get_registration (xhc_peer_info_t * peer_info ,
668717 void * peer_vaddr , size_t size , xhc_reg_t * * reg ) {
669718
670719 mca_smsc_endpoint_t * smsc_ep = xhc_smsc_ep (peer_info );
@@ -695,6 +744,6 @@ void *xhc_get_registration(xhc_peer_info_t *peer_info,
695744
696745/* Won't actually unmap/detach, since we've set
697746 * the "persist" flag while creating the mapping */
698- void xhc_return_registration (xhc_reg_t * reg ) {
747+ void mca_coll_xhc_return_registration (xhc_reg_t * reg ) {
699748 MCA_SMSC_CALL (unmap_peer_region , reg );
700749}
0 commit comments