Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

pml/ob1: be more selective when using rdma capable btls #1315

Merged
merged 1 commit into from
Aug 16, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ompi/mca/pml/ob1/pml_ob1.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
Expand Down Expand Up @@ -61,6 +61,7 @@ struct mca_pml_ob1_t {
int max_rdma_per_request;
int max_send_per_range;
bool leave_pinned;
bool use_all_rdma;
int leave_pinned_pipeline;

/* lock queue access */
Expand Down
6 changes: 6 additions & 0 deletions ompi/mca/pml/ob1/pml_ob1_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,12 @@ static int mca_pml_ob1_component_register(void)

mca_pml_ob1_param_register_uint("unexpected_limit", 128, &mca_pml_ob1.unexpected_limit);

mca_pml_ob1.use_all_rdma = false;
(void) mca_base_component_var_register(&mca_pml_ob1_component.pmlm_version, "use_all_rdma",
"Use all available RDMA btls for the RDMA and RDMA pipeline protocols "
"(default: false)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_GROUP, &mca_pml_ob1.use_all_rdma);

mca_pml_ob1.allocator_name = "bucket";
(void) mca_base_component_var_register(&mca_pml_ob1_component.pmlm_version, "allocator",
"Name of allocator component for unexpected messages",
Expand Down
79 changes: 70 additions & 9 deletions ompi/mca/pml/ob1/pml_ob1_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -42,6 +42,7 @@ size_t mca_pml_ob1_rdma_btls(
mca_pml_ob1_com_btl_t* rdma_btls)
{
int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager);
double weight_total = 0;
int num_btls_used = 0;

Expand All @@ -57,6 +58,22 @@ size_t mca_pml_ob1_rdma_btls(
(bml_endpoint->btl_rdma_index + n) % num_btls);
mca_btl_base_registration_handle_t *reg_handle = NULL;
mca_btl_base_module_t *btl = bml_btl->btl;
/* NTH: go ahead and use an rdma btl if is the only one */
bool ignore = !mca_pml_ob1.use_all_rdma;

/* do not use rdma btls that are not in the eager list. this is necessary to avoid using
* btls that exist on the endpoint only to support RMA. */
for (int i = 0 ; i < num_eager_btls && ignore ; ++i) {
mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i);
if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
ignore = false;
break;
}
}

if (ignore) {
continue;
}

if (btl->btl_register_mem) {
/* do not use the RDMA protocol with this btl if 1) leave pinned is disabled,
Expand Down Expand Up @@ -95,22 +112,66 @@ size_t mca_pml_ob1_rdma_btls(
return num_btls_used;
}

size_t mca_pml_ob1_rdma_pipeline_btls_count (mca_bml_base_endpoint_t* bml_endpoint)
{
int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager);
int rdma_count = 0;

for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; ++i) {
mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
/* NTH: go ahead and use an rdma btl if is the only one */
bool ignore = !mca_pml_ob1.use_all_rdma;

for (int i = 0 ; i < num_eager_btls && ignore ; ++i) {
mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i);
if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
ignore = false;
break;
}
}

if (!ignore) {
++rdma_count;
}
}

return rdma_count;
}

size_t mca_pml_ob1_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint,
size_t size,
mca_pml_ob1_com_btl_t* rdma_btls )
{
int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager);
double weight_total = 0;
int rdma_count = 0;

for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
/* NTH: go ahead and use an rdma btl if is the only one */
bool ignore = !mca_pml_ob1.use_all_rdma;

for (int i = 0 ; i < num_eager_btls && ignore ; ++i) {
mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i);
if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
ignore = false;
break;
}
}

if (ignore) {
continue;
}

for(i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
rdma_btls[i].bml_btl =
mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
rdma_btls[i].btl_reg = NULL;
rdma_btls[rdma_count].bml_btl = bml_btl;
rdma_btls[rdma_count++].btl_reg = NULL;

weight_total += rdma_btls[i].bml_btl->btl_weight;
weight_total += bml_btl->btl_weight;
}

mca_pml_ob1_calc_weighted_length(rdma_btls, i, size, weight_total);
mca_pml_ob1_calc_weighted_length (rdma_btls, rdma_count, size, weight_total);

return i;
return rdma_count;
}
6 changes: 6 additions & 0 deletions ompi/mca/pml/ob1/pml_ob1_rdma.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
Expand All @@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -37,5 +40,8 @@ size_t mca_pml_ob1_rdma_btls(struct mca_bml_base_endpoint_t* endpoint,
* bandwidth */
size_t mca_pml_ob1_rdma_pipeline_btls(struct mca_bml_base_endpoint_t* endpoint,
size_t size, mca_pml_ob1_com_btl_t* rdma_btls);

size_t mca_pml_ob1_rdma_pipeline_btls_count (mca_bml_base_endpoint_t* bml_endpoint);

#endif

2 changes: 1 addition & 1 deletion ompi/mca/pml/ob1/pml_ob1_recvreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ static int mca_pml_ob1_recv_request_ack(
/* by default copy everything */
recvreq->req_send_offset = bytes_received;
if(hdr->hdr_msg_length > bytes_received) {
size_t rdma_num = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
size_t rdma_num = mca_pml_ob1_rdma_pipeline_btls_count (bml_endpoint);
/*
* lookup request buffer to determine if memory is already
* registered.
Expand Down