Skip to content

Commit

Permalink
rework server bootstrap completion
Browse files Browse the repository at this point in the history
* replace server pids pthread mutex/cond with ABT versions
* add margo_state_dump() on client-server or server-server failures
  (currently commented out)
* add a 'bootstrap complete' broadcast rpc after rank 0 sees all
  servers have reported
* fix function declaration for unifyfs_invoke_broadcast_extents()
  • Loading branch information
MichaelBrim committed Oct 24, 2023
1 parent bf7cb38 commit f09156d
Show file tree
Hide file tree
Showing 12 changed files with 470 additions and 250 deletions.
8 changes: 4 additions & 4 deletions client/src/margo_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,14 +224,14 @@ static hg_handle_t create_handle(hg_id_t id)
return handle;
}

static int forward_to_server(
hg_handle_t hdl,
void* input_ptr,
double timeout_msec)
static int forward_to_server(hg_handle_t hdl,
void* input_ptr,
double timeout_msec)
{
hg_return_t hret = margo_forward_timed(hdl, input_ptr, timeout_msec);
if (hret != HG_SUCCESS) {
LOGERR("margo_forward_timed() failed - %s", HG_Error_to_string(hret));
//margo_state_dump(client_rpc_context->mid, "-", 0, NULL);
return UNIFYFS_ERROR_MARGO;
}
return UNIFYFS_SUCCESS;
Expand Down
9 changes: 8 additions & 1 deletion common/src/unifyfs_server_rpcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ typedef enum {
UNIFYFS_SERVER_RPC_LAMINATE,
UNIFYFS_SERVER_RPC_METAGET,
UNIFYFS_SERVER_RPC_METASET,
UNIFYFS_SERVER_RPC_PID_REPORT,
UNIFYFS_SERVER_RPC_TRANSFER,
UNIFYFS_SERVER_RPC_TRUNCATE,
UNIFYFS_SERVER_BCAST_RPC_BOOTSTRAP,
UNIFYFS_SERVER_BCAST_RPC_EXTENTS,
UNIFYFS_SERVER_BCAST_RPC_FILEATTR,
UNIFYFS_SERVER_BCAST_RPC_LAMINATE,
Expand Down Expand Up @@ -186,6 +186,13 @@ MERCURY_GEN_PROC(bcast_progress_out_t,
((int32_t)(ret)))
DECLARE_MARGO_RPC_HANDLER(bcast_progress_rpc)

/* Broadcast 'bootstrap complete' to all servers */
MERCURY_GEN_PROC(bootstrap_complete_bcast_in_t,
((int32_t)(root)))
MERCURY_GEN_PROC(bootstrap_complete_bcast_out_t,
((int32_t)(ret)))
DECLARE_MARGO_RPC_HANDLER(bootstrap_complete_bcast_rpc)

/* Broadcast file extents to all servers */
MERCURY_GEN_PROC(extent_bcast_in_t,
((int32_t)(root))
Expand Down
8 changes: 7 additions & 1 deletion server/src/margo_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ static void register_server_server_rpcs(margo_instance_id mid)
bcast_progress_in_t, bcast_progress_out_t,
bcast_progress_rpc);

unifyfsd_rpc_context->rpcs.bootstrap_complete_bcast_id =
MARGO_REGISTER(mid, "bootstrap_complete_bcast_rpc",
bootstrap_complete_bcast_in_t,
bootstrap_complete_bcast_out_t,
bootstrap_complete_bcast_rpc);

unifyfsd_rpc_context->rpcs.chunk_read_request_id =
MARGO_REGISTER(mid, "chunk_read_request_rpc",
chunk_read_request_in_t, chunk_read_request_out_t,
Expand Down Expand Up @@ -534,7 +540,7 @@ int margo_connect_servers(void)

/* allocate array of structs to record address for each server */
server_infos = (server_info_t*) calloc(glb_num_servers,
sizeof(server_info_t));
sizeof(server_info_t));
if (NULL == server_infos) {
LOGERR("failed to allocate server_info array");
return ENOMEM;
Expand Down
1 change: 1 addition & 0 deletions server/src/margo_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
typedef struct ServerRpcIds {
/* server-server rpcs */
hg_id_t bcast_progress_id;
hg_id_t bootstrap_complete_bcast_id;
hg_id_t chunk_read_request_id;
hg_id_t chunk_read_response_id;
hg_id_t extent_add_id;
Expand Down
8 changes: 6 additions & 2 deletions server/src/unifyfs_global.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,12 @@ bool check_pending_metaget(int gfid);
unifyfs_rc clear_pending_metaget(int gfid);


/* publish the pids of all servers to a shared file */
int unifyfs_publish_server_pids(void);

/* notify local server main thread that bootstrap is complete */
int unifyfs_signal_bootstrap_complete(void);

/* participate in collective server bootstrap completion process */
int unifyfs_complete_bootstrap(void);

/* report the pid for a server with given rank */
int unifyfs_report_server_pid(int rank, int pid);
Expand Down
Loading

0 comments on commit f09156d

Please sign in to comment.