From bbcbe3cacd688fe2f841b706146227d1e6b36b1f Mon Sep 17 00:00:00 2001 From: Alina Sklarevich Date: Wed, 16 Mar 2016 10:52:06 +0200 Subject: [PATCH] btl/openib: enable connecting processes from different subnets. + Added an mca parameter to allow connecting processes from different subnets. Its current default value is 'false' - don't allow, to keep the current flow the way it is now. + rmdacm: when calling ibv_query_gid, use the gid index from btl_openib_gid_index. --- opal/mca/btl/openib/btl_openib.c | 22 +++++++++++++++++++ opal/mca/btl/openib/btl_openib.h | 3 +++ opal/mca/btl/openib/btl_openib_mca.c | 5 +++++ .../connect/btl_openib_connect_rdmacm.c | 3 ++- 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index ad11ed8b481..53d8c81e874 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -849,6 +849,14 @@ static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_open matching_port = j; } rem_port_cnt++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + BTL_VERBOSE(("Using different subnets!")); + if (rem_port_cnt == btl_rank) { + matching_port = j; + } + rem_port_cnt++; + } } } @@ -915,6 +923,13 @@ static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_open break; else rem_port_cnt ++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + if (rem_port_cnt == btl_rank) + break; + else + rem_port_cnt ++; + } } } @@ -981,6 +996,13 @@ static int get_openib_btl_params(mca_btl_openib_module_t* openib_btl, int *port_ rank = port_cnt; } port_cnt++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + if (openib_btl == mca_btl_openib_component.openib_btls[j]) { + rank = port_cnt; + } + port_cnt++; + } } } *port_cnt_ptr = port_cnt; diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index 3956f39acf7..5c87d30c32a 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -295,6 +295,9 @@ struct mca_btl_openib_component_t { char* default_recv_qps; /** GID index to use */ int gid_index; + /* Whether we want to allow connecting processes from different subnets. + * set to 'no' by default */ + bool allow_different_subnets; /** Whether we want a dynamically resizing srq, enabled by default */ bool enable_srq_resize; bool allow_max_memory_registration; diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c index 05f01b035a1..7427994aec5 100644 --- a/opal/mca/btl/openib/btl_openib_mca.c +++ b/opal/mca/btl/openib/btl_openib_mca.c @@ -706,6 +706,11 @@ int btl_openib_register_mca_params(void) 0, &mca_btl_openib_component.gid_index, REGINT_GE_ZERO)); + CHECK(reg_bool("allow_different_subnets", NULL, + "Allow connecting processes from different IB subnets." + "(0 = do not allow; 1 = allow)", + false, &mca_btl_openib_component.allow_different_subnets)); + #if MEMORY_LINUX_MALLOC_ALIGN_ENABLED tmp = mca_base_var_find ("opal", "memory", "linux", "memalign"); if (0 <= tmp) { diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c index 67a4fb2954d..95af1e3de4c 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c @@ -2109,7 +2109,8 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, opal_btl_ sin.sin_addr.s_addr = rdmacm_addr; sin.sin_port = (uint16_t) rdmacm_port; #else - rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, 0, &server->gid); + rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, + mca_btl_openib_component.gid_index, &server->gid); if (0 != rc) { BTL_ERROR(("local gid query failed")); goto out4;