Skip to content
This repository has been archived by the owner on Sep 30, 2022. It is now read-only.

usnic updates: MPI_THREAD_MULTIPLE, libfabric v1.4, ...etc. #1326

Merged
merged 7 commits into from
Sep 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions opal/mca/btl/usnic/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -335,3 +335,40 @@ libfabric abstractions:
fi_fabric: corresponds to a VIC PF
fi_domain: corresponds to a VIC VF
fi_endpoint: resources inside the VIC VF (basically a QP)

======================================

MPI_THREAD_MULTIPLE support

In order to make usnic btl thread-safe, the mutex locks are issued
to protect the critical path. ie; libfabric routines, book keeping, etc.

The said lock is btl_usnic_lock. It is a RECURSIVE lock, meaning that
the same thread can take the lock again even if it already has the lock to
allow the callback function to post another segment right away if we know
that the current segment is completed inline. (So we can call send in send
without deadlocking)

These two functions taking care of hotel checkin/checkout and we
have to protect that part. So we take the mutex lock before we enter the
function.

- opal_btl_usnic_check_rts()
- opal_btl_usnic_handle_ack()

We also have to protect the call to libfabric routines

- opal_btl_usnic_endpoint_send_segment() (fi_send)
- opal_btl_usnic_recv_call() (fi_recvmsg)

have to be protected as well.

Also cclient connection checking (opal_btl_usnic_connectivity_ping) has to be
protected. This happens only in the beginning but cclient communicate with cagent
through opal_fd_read/write() and if two or more clients do opal_fd_write() at the
same time, the data might be corrupt.

With this concept, many functions in btl/usnic that make calls to the
listed functions are protected by OPAL_THREAD_LOCK macro which will only
be active if the user specify MPI_Init_thread() with MPI_THREAD_MULTIPLE
support.
4 changes: 4 additions & 0 deletions opal/mca/btl/usnic/btl_usnic.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ BEGIN_C_DECLS
* at other times as needed or as tuning dictates.
*/
extern uint64_t opal_btl_usnic_ticks;

/* Lock for MPU_THREAD_MULTIPLE support */
extern opal_recursive_mutex_t btl_usnic_lock;

static inline uint64_t
get_nsec(void)
{
Expand Down
8 changes: 7 additions & 1 deletion opal/mca/btl/usnic/btl_usnic_cclient.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module)
/* Ensure to NULL-terminate the passed strings */
strncpy(cmd.nodename, opal_process_info.nodename,
CONNECTIVITY_NODENAME_LEN - 1);
strncpy(cmd.usnic_name, module->fabric_info->fabric_attr->name,
strncpy(cmd.usnic_name, module->linux_device_name,
CONNECTIVITY_IFNAME_LEN - 1);

if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) {
Expand Down Expand Up @@ -234,6 +234,9 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
return OPAL_SUCCESS;
}

/* Protect opal_fd_write for multithreaded case */
OPAL_THREAD_LOCK(&btl_usnic_lock);

/* Send the PING command */
int id = CONNECTIVITY_AGENT_CMD_PING;
if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(id), &id)) {
Expand All @@ -260,6 +263,9 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
/* Will not return */
}

/* Unlock and return */
OPAL_THREAD_UNLOCK(&btl_usnic_lock);

return OPAL_SUCCESS;
}

Expand Down
8 changes: 5 additions & 3 deletions opal/mca/btl/usnic/btl_usnic_compat.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -509,6 +509,7 @@ opal_btl_usnic_prepare_src(
size_t* size,
uint32_t flags)
{
OPAL_THREAD_LOCK(&btl_usnic_lock);
opal_btl_usnic_module_t *module = (opal_btl_usnic_module_t*) base_module;
opal_btl_usnic_send_frag_t *frag;
uint32_t payload_len;
Expand All @@ -535,7 +536,7 @@ opal_btl_usnic_prepare_src(

#if MSGDEBUG2
opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
(reserve + *size) <= module->max_frag_payload?"small":"large",
(void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize,
(void *)convertor);
Expand All @@ -552,6 +553,7 @@ opal_btl_usnic_prepare_src(
#endif
#endif

OPAL_THREAD_UNLOCK(&btl_usnic_lock);
return &frag->sf_base.uf_base;
}

Expand Down Expand Up @@ -721,7 +723,7 @@ opal_btl_usnic_prepare_src(struct mca_btl_base_module_t *base_module,

#if MSGDEBUG2
opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
(reserve + *size) <= module->max_frag_payload?"small":"large",
(void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize,
(void *)convertor);
Expand Down
Loading