Skip to content

Commit

Permalink
Rework MPI Info FAPL preserve PR to use VFD 'ctl' operations (#3782)
Browse files Browse the repository at this point in the history
  • Loading branch information
jhendersonHDF authored Nov 3, 2023
1 parent 61982b6 commit 7404b57
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 29 deletions.
48 changes: 43 additions & 5 deletions src/H5FDmpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,12 @@ H5FD_mpi_get_size(H5FD_t *file)
} /* end H5FD_mpi_get_size() */

/*-------------------------------------------------------------------------
* Function: H5FD_mpi_get_comm
* Function: H5FD_mpi_get_comm
*
* Purpose: Retrieves the file's communicator
* Purpose: Retrieves the file's MPI_Comm communicator object
*
* Return: Success: The communicator (non-negative)
*
* Failure: Negative
* Return: Success: The communicator object
* Failure: MPI_COMM_NULL
*
*-------------------------------------------------------------------------
*/
Expand Down Expand Up @@ -143,6 +142,45 @@ H5FD_mpi_get_comm(H5FD_t *file)
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5FD_mpi_get_comm() */

/*-------------------------------------------------------------------------
* Function: H5FD_mpi_get_info
*
* Purpose: Retrieves the file's MPI_Info info object
*
* Return: Success: The info object
* Failure: MPI_INFO_NULL
*
*-------------------------------------------------------------------------
*/
MPI_Info
H5FD_mpi_get_info(H5FD_t *file)
{
const H5FD_class_t *cls;
uint64_t flags = H5FD_CTL_FAIL_IF_UNKNOWN_FLAG | H5FD_CTL_ROUTE_TO_TERMINAL_VFD_FLAG;
MPI_Info info = MPI_INFO_NULL;
void *info_ptr = (void *)(&info);
MPI_Info ret_value;

FUNC_ENTER_NOAPI(MPI_INFO_NULL)

assert(file);
cls = (const H5FD_class_t *)(file->cls);
assert(cls);
assert(cls->ctl); /* All MPI drivers must implement this */

/* Dispatch to driver */
if ((cls->ctl)(file, H5FD_CTL_GET_MPI_INFO_OPCODE, flags, NULL, &info_ptr) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, MPI_INFO_NULL, "driver get_info request failed");

if (info == MPI_INFO_NULL)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, MPI_INFO_NULL, "driver get_info request failed -- bad info object");

ret_value = info;

done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5FD_mpi_get_info() */

/*-------------------------------------------------------------------------
* Function: H5FD_mpi_MPIOff_to_haddr
*
Expand Down
7 changes: 7 additions & 0 deletions src/H5FDmpio.c
Original file line number Diff line number Diff line change
Expand Up @@ -3795,6 +3795,7 @@ H5FD__mpio_delete(const char *filename, hid_t fapl_id)
* At present, the supported op codes are:
*
* H5FD_CTL_GET_MPI_COMMUNICATOR_OPCODE
* H5FD_CTL_GET_MPI_INFO_OPCODE
* H5FD_CTL_GET_MPI_RANK_OPCODE
* H5FD_CTL_GET_MPI_SIZE_OPCODE
* H5FD_CTL_GET_MPI_FILE_SYNC_OPCODE
Expand Down Expand Up @@ -3827,6 +3828,12 @@ H5FD__mpio_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void H5_AT
**((MPI_Comm **)output) = file->comm;
break;

case H5FD_CTL_GET_MPI_INFO_OPCODE:
assert(output);
assert(*output);
**((MPI_Info **)output) = file->info;
break;

case H5FD_CTL_GET_MPI_RANK_OPCODE:
assert(output);
assert(*output);
Expand Down
1 change: 1 addition & 0 deletions src/H5FDprivate.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ H5_DLL herr_t H5FD_get_mpio_atomicity(H5FD_t *file, bool *flag);
H5_DLL int H5FD_mpi_get_rank(H5FD_t *file);
H5_DLL int H5FD_mpi_get_size(H5FD_t *file);
H5_DLL MPI_Comm H5FD_mpi_get_comm(H5FD_t *file);
H5_DLL MPI_Info H5FD_mpi_get_info(H5FD_t *file);
H5_DLL herr_t H5FD_mpi_get_file_sync_required(H5FD_t *file, bool *file_sync_required);
#endif /* H5_HAVE_PARALLEL */

Expand Down
1 change: 1 addition & 0 deletions src/H5FDpublic.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@
#define H5FD_CTL_INVALID_OPCODE 0
#define H5FD_CTL_TEST_OPCODE 1
#define H5FD_CTL_GET_MPI_COMMUNICATOR_OPCODE 2
#define H5FD_CTL_GET_MPI_INFO_OPCODE 9
#define H5FD_CTL_GET_MPI_RANK_OPCODE 3
#define H5FD_CTL_GET_MPI_SIZE_OPCODE 4
#define H5FD_CTL_MEM_ALLOC 5
Expand Down
6 changes: 6 additions & 0 deletions src/H5FDsubfiling/H5FDsubfiling.c
Original file line number Diff line number Diff line change
Expand Up @@ -2551,6 +2551,12 @@ H5FD__subfiling_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void
**((MPI_Comm **)output) = file->ext_comm;
break;

case H5FD_CTL_GET_MPI_INFO_OPCODE:
assert(output);
assert(*output);
**((MPI_Info **)output) = file->info;
break;

case H5FD_CTL_GET_MPI_RANK_OPCODE:
assert(output);
assert(*output);
Expand Down
25 changes: 6 additions & 19 deletions src/H5Fint.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,16 +402,19 @@ H5F_get_access_plist(H5F_t *f, bool app_ref)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set collective metadata read flag");
if (H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
MPI_Comm mpi_comm;
MPI_Info mpi_info;

/* Retrieve and set MPI communicator */
if (MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(f)))
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI communicator");
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_COMM_NAME, &mpi_comm) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI communicator");

/* Retrieve MPI info object */
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI info object");
/* Retrieve and set MPI info */
if (MPI_INFO_NULL == (mpi_info = H5F_mpi_get_info(f)))
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI info");
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &mpi_info) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI info");
}
#endif /* H5_HAVE_PARALLEL */
if (H5P_set(new_plist, H5F_ACS_META_CACHE_INIT_IMAGE_CONFIG_NAME, &(f->shared->mdc_initCacheImageCfg)) <
Expand Down Expand Up @@ -1130,12 +1133,6 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
/* initialize point of no return */
f->shared->point_of_no_return = false;

#ifdef H5_HAVE_PARALLEL
/* Initialize this just in case we fail before setting this field and */
/* we try to call H5_mpi_info_free() on uninitialized memory in H5F__dest() */
f->shared->mpi_info = MPI_INFO_NULL;
#endif /* H5_HAVE_PARALLEL */

/* Copy the file creation and file access property lists into the
* new file handle. We do this early because some values might need
* to change as the file is being opened.
Expand Down Expand Up @@ -1212,8 +1209,6 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata read flag");
if (H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &(f->shared->coll_md_write)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata write flag");
if (H5P_get(plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't set MPI info object");
#endif /* H5_HAVE_PARALLEL */
if (H5P_get(plist, H5F_ACS_META_CACHE_INIT_IMAGE_CONFIG_NAME, &(f->shared->mdc_initCacheImageCfg)) <
0)
Expand Down Expand Up @@ -1419,14 +1414,6 @@ H5F__dest(H5F_t *f, bool flush, bool free_on_failure)
f->shared->efc = NULL;
} /* end if */

#ifdef H5_HAVE_PARALLEL
if (f->shared->mpi_info != MPI_INFO_NULL) {
/* Free MPI info saved in the file struct */
if (H5_mpi_info_free(&f->shared->mpi_info) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTRELEASE, FAIL, "can't free MPI info");
}
#endif

/* With the shutdown modifications, the contents of the metadata cache
* should be clean at this point, with the possible exception of the
* the superblock and superblock extension.
Expand Down
34 changes: 30 additions & 4 deletions src/H5Fmpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,10 @@ H5F_mpi_get_rank(const H5F_t *f)
/*-------------------------------------------------------------------------
* Function: H5F_mpi_get_comm
*
* Purpose: Retrieves the file's communicator
* Purpose: Retrieves the file's MPI_Comm communicator object
*
* Return: Success: The communicator (non-negative)
*
* Failure: Negative
* Return: Success: The communicator object
* Failure: MPI_COMM_NULL
*
*-------------------------------------------------------------------------
*/
Expand All @@ -122,6 +121,33 @@ H5F_mpi_get_comm(const H5F_t *f)
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5F_mpi_get_comm() */

/*-------------------------------------------------------------------------
* Function: H5F_mpi_get_info
*
* Purpose: Retrieves the file's MPI_Info info object
*
* Return: Success: The info object
* Failure: MPI_INFO_NULL
*
*-------------------------------------------------------------------------
*/
MPI_Info
H5F_mpi_get_info(const H5F_t *f)
{
MPI_Info ret_value = MPI_INFO_NULL;

FUNC_ENTER_NOAPI(MPI_INFO_NULL)

assert(f && f->shared);

/* Dispatch to driver */
if ((ret_value = H5FD_mpi_get_info(f->shared->lf)) == MPI_INFO_NULL)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, MPI_INFO_NULL, "driver get_info request failed");

done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5F_mpi_get_info() */

/*-------------------------------------------------------------------------
* Function: H5F_shared_mpi_get_size
*
Expand Down
1 change: 0 additions & 1 deletion src/H5Fpkg.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,6 @@ struct H5F_shared_t {
#ifdef H5_HAVE_PARALLEL
H5P_coll_md_read_flag_t coll_md_read; /* Do all metadata reads collectively */
bool coll_md_write; /* Do all metadata writes collectively */
MPI_Info mpi_info; /* MPI info */
#endif /* H5_HAVE_PARALLEL */
};

Expand Down
1 change: 1 addition & 0 deletions src/H5Fprivate.h
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ H5_DLL herr_t H5F_eoa_dirty(H5F_t *f);
#ifdef H5_HAVE_PARALLEL
H5_DLL int H5F_mpi_get_rank(const H5F_t *f);
H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f);
H5_DLL MPI_Info H5F_mpi_get_info(const H5F_t *f);
H5_DLL int H5F_shared_mpi_get_size(const H5F_shared_t *f_sh);
H5_DLL int H5F_mpi_get_size(const H5F_t *f);
H5_DLL herr_t H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm);
Expand Down

0 comments on commit 7404b57

Please sign in to comment.