diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index 05c15e11ee..0b2412edb5 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -857,6 +857,14 @@ static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_open matching_port = j; } rem_port_cnt++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + BTL_VERBOSE(("Using different subnets!")); + if (rem_port_cnt == btl_rank) { + matching_port = j; + } + rem_port_cnt++; + } } } @@ -923,6 +931,13 @@ static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_open break; else rem_port_cnt ++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + if (rem_port_cnt == btl_rank) + break; + else + rem_port_cnt ++; + } } } @@ -989,6 +1004,13 @@ static int get_openib_btl_params(mca_btl_openib_module_t* openib_btl, int *port_ rank = port_cnt; } port_cnt++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + if (openib_btl == mca_btl_openib_component.openib_btls[j]) { + rank = port_cnt; + } + port_cnt++; + } } } *port_cnt_ptr = port_cnt; diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index 00ae4b3d95..b7637eb78a 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -299,6 +299,9 @@ struct mca_btl_openib_component_t { char* default_recv_qps; /** GID index to use */ int gid_index; + /* Whether we want to allow connecting processes from different subnets. + * set to 'no' by default */ + bool allow_different_subnets; /** Whether we want a dynamically resizing srq, enabled by default */ bool enable_srq_resize; bool allow_max_memory_registration; diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c index 8782331e11..ed74780797 100644 --- a/opal/mca/btl/openib/btl_openib_mca.c +++ b/opal/mca/btl/openib/btl_openib_mca.c @@ -703,6 +703,11 @@ int btl_openib_register_mca_params(void) 0, &mca_btl_openib_component.gid_index, REGINT_GE_ZERO)); + CHECK(reg_bool("allow_different_subnets", NULL, + "Allow connecting processes from different IB subnets." + "(0 = do not allow; 1 = allow)", + false, &mca_btl_openib_component.allow_different_subnets)); + #if MEMORY_LINUX_MALLOC_ALIGN_ENABLED tmp = mca_base_var_find ("opal", "memory", "linux", "memalign"); if (0 <= tmp) { diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c index 2e2ce6f8d0..9e3dd08153 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c @@ -2072,7 +2072,8 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, opal_btl_ sin.sin_addr.s_addr = rdmacm_addr; sin.sin_port = (uint16_t) rdmacm_port; #else - rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, 0, &server->gid); + rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, + mca_btl_openib_component.gid_index, &server->gid); if (0 != rc) { BTL_ERROR(("local gid query failed")); goto out4;