25
25
*/
26
26
27
27
#include "opal_config.h"
28
+ #include "opal/util/show_help.h"
28
29
29
30
#include "btl_vader.h"
30
31
#include "btl_vader_endpoint.h"
@@ -77,6 +78,28 @@ mca_btl_vader_t mca_btl_vader = {
77
78
}
78
79
};
79
80
81
+ /*
82
+ * Exit function copied from btl_usnic_util.c
83
+ *
84
+ * The following comment tells Coverity that this function does not return.
85
+ * See https://scan.coverity.com/tune.
86
+ */
87
+
88
+ /* coverity[+kill] */
89
+ static void vader_btl_exit (mca_btl_vader_t * btl )
90
+ {
91
+ if (NULL != btl && NULL != btl -> error_cb ) {
92
+ btl -> error_cb (& btl -> super , MCA_BTL_ERROR_FLAGS_FATAL ,
93
+ (opal_proc_t * ) opal_proc_local_get (),
94
+ "The vader BTL is aborting the MPI job (via PML error callback)." );
95
+ }
96
+
97
+ /* If the PML error callback returns (or if there wasn't one), just exit. Shrug. */
98
+ fprintf (stderr , "*** The Open MPI vader BTL is aborting the MPI job (via exit(3)).\n" );
99
+ fflush (stderr );
100
+ exit (1 );
101
+ }
102
+
80
103
static int vader_btl_first_time_init (mca_btl_vader_t * vader_btl , int n )
81
104
{
82
105
mca_btl_vader_component_t * component = & mca_btl_vader_component ;
@@ -158,6 +181,7 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
158
181
static int init_vader_endpoint (struct mca_btl_base_endpoint_t * ep , struct opal_proc_t * proc , int remote_rank ) {
159
182
mca_btl_vader_component_t * component = & mca_btl_vader_component ;
160
183
union vader_modex_t * modex ;
184
+ ino_t my_user_ns_id ;
161
185
size_t msg_size ;
162
186
int rc ;
163
187
@@ -182,17 +206,59 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
182
206
} else {
183
207
#endif
184
208
/* store a copy of the segment information for detach */
185
- ep -> segment_data .other .seg_ds = malloc (msg_size );
209
+ ep -> segment_data .other .seg_ds = malloc (modex -> other . seg_ds_size );
186
210
if (NULL == ep -> segment_data .other .seg_ds ) {
187
211
return OPAL_ERR_OUT_OF_RESOURCE ;
188
212
}
189
213
190
- memcpy (ep -> segment_data .other .seg_ds , & modex -> seg_ds , msg_size );
214
+ memcpy (ep -> segment_data .other .seg_ds , & modex -> other . seg_ds , modex -> other . seg_ds_size );
191
215
192
216
ep -> segment_base = opal_shmem_segment_attach (ep -> segment_data .other .seg_ds );
193
217
if (NULL == ep -> segment_base ) {
194
218
return OPAL_ERROR ;
195
219
}
220
+
221
+ if (MCA_BTL_VADER_CMA == mca_btl_vader_component .single_copy_mechanism ) {
222
+ my_user_ns_id = mca_btl_vader_get_user_ns_id ();
223
+ if (my_user_ns_id != modex -> other .user_ns_id ) {
224
+ mca_base_var_source_t source ;
225
+ int vari ;
226
+ rc = mca_base_var_find_by_name ("btl_vader_single_copy_mechanism" , & vari );
227
+ if (OPAL_ERROR == rc ) {
228
+ return OPAL_ERROR ;
229
+ }
230
+ rc = mca_base_var_get_value (vari , NULL , & source , NULL );
231
+ if (OPAL_ERROR == rc ) {
232
+ return OPAL_ERROR ;
233
+ }
234
+ /*
235
+ * CMA is not possible as different user namespaces are in use.
236
+ * Currently the kernel does not allow * process_vm_{read,write}v()
237
+ * for processes running in different user namespaces even if
238
+ * all involved user IDs are mapped to the same user ID.
239
+ *
240
+ * Fallback to MCA_BTL_VADER_NONE.
241
+ */
242
+ if (MCA_BASE_VAR_SOURCE_DEFAULT != source ) {
243
+ /* If CMA has been explicitly selected we want to error out */
244
+ opal_show_help ("help-btl-vader.txt" , "cma-different-user-namespace-error" ,
245
+ true, opal_process_info .nodename );
246
+ vader_btl_exit (& mca_btl_vader );
247
+ }
248
+ /*
249
+ * If CMA has been selected because it is the default or
250
+ * some fallback, this falls back even further.
251
+ */
252
+ opal_show_help ("help-btl-vader.txt" , "cma-different-user-namespace-warning" ,
253
+ true, opal_process_info .nodename );
254
+ mca_btl_vader_component .single_copy_mechanism = MCA_BTL_VADER_NONE ;
255
+ mca_btl_vader .super .btl_flags &= ~MCA_BTL_FLAGS_RDMA ;
256
+ mca_btl_vader .super .btl_get = NULL ;
257
+ mca_btl_vader .super .btl_put = NULL ;
258
+ mca_btl_vader .super .btl_put_limit = 0 ;
259
+ mca_btl_vader .super .btl_get_limit = 0 ;
260
+ }
261
+ }
196
262
#if OPAL_BTL_VADER_HAVE_XPMEM
197
263
}
198
264
#endif
0 commit comments