Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework MPI Info FAPL preserve PR to use VFD 'ctl' operations #3782

Merged
merged 2 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 43 additions & 5 deletions src/H5FDmpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,12 @@ H5FD_mpi_get_size(H5FD_t *file)
} /* end H5FD_mpi_get_size() */

/*-------------------------------------------------------------------------
* Function: H5FD_mpi_get_comm
* Function: H5FD_mpi_get_comm
*
* Purpose: Retrieves the file's communicator
* Purpose: Retrieves the file's MPI_Comm communicator object
*
* Return: Success: The communicator (non-negative)
*
* Failure: Negative
* Return: Success: The communicator object
* Failure: MPI_COMM_NULL
*
*-------------------------------------------------------------------------
*/
Expand Down Expand Up @@ -143,6 +142,45 @@ H5FD_mpi_get_comm(H5FD_t *file)
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5FD_mpi_get_comm() */

/*-------------------------------------------------------------------------
* Function: H5FD_mpi_get_info
*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

H5FD_mpi_get_info

* Purpose: Retrieves the file's MPI_Info info object
*
* Return: Success: The info object
* Failure: MPI_INFO_NULL
*
*-------------------------------------------------------------------------
*/
MPI_Info
H5FD_mpi_get_info(H5FD_t *file)
{
const H5FD_class_t *cls;
uint64_t flags = H5FD_CTL_FAIL_IF_UNKNOWN_FLAG | H5FD_CTL_ROUTE_TO_TERMINAL_VFD_FLAG;
MPI_Info info = MPI_INFO_NULL;
void *info_ptr = (void *)(&info);
MPI_Info ret_value;

FUNC_ENTER_NOAPI(MPI_INFO_NULL)

assert(file);
cls = (const H5FD_class_t *)(file->cls);
assert(cls);
assert(cls->ctl); /* All MPI drivers must implement this */

/* Dispatch to driver */
if ((cls->ctl)(file, H5FD_CTL_GET_MPI_INFO_OPCODE, flags, NULL, &info_ptr) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, MPI_INFO_NULL, "driver get_info request failed");

if (info == MPI_INFO_NULL)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, MPI_INFO_NULL, "driver get_info request failed -- bad info object");

ret_value = info;

done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5FD_mpi_get_info() */

/*-------------------------------------------------------------------------
* Function: H5FD_mpi_MPIOff_to_haddr
*
Expand Down
7 changes: 7 additions & 0 deletions src/H5FDmpio.c
Original file line number Diff line number Diff line change
Expand Up @@ -3795,6 +3795,7 @@ H5FD__mpio_delete(const char *filename, hid_t fapl_id)
* At present, the supported op codes are:
*
* H5FD_CTL_GET_MPI_COMMUNICATOR_OPCODE
* H5FD_CTL_GET_MPI_INFO_OPCODE
* H5FD_CTL_GET_MPI_RANK_OPCODE
* H5FD_CTL_GET_MPI_SIZE_OPCODE
* H5FD_CTL_GET_MPI_FILE_SYNC_OPCODE
Expand Down Expand Up @@ -3827,6 +3828,12 @@ H5FD__mpio_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void H5_AT
**((MPI_Comm **)output) = file->comm;
break;

case H5FD_CTL_GET_MPI_INFO_OPCODE:
assert(output);
assert(*output);
**((MPI_Info **)output) = file->info;
break;

case H5FD_CTL_GET_MPI_RANK_OPCODE:
assert(output);
assert(*output);
Expand Down
1 change: 1 addition & 0 deletions src/H5FDprivate.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ H5_DLL herr_t H5FD_get_mpio_atomicity(H5FD_t *file, bool *flag);
H5_DLL int H5FD_mpi_get_rank(H5FD_t *file);
H5_DLL int H5FD_mpi_get_size(H5FD_t *file);
H5_DLL MPI_Comm H5FD_mpi_get_comm(H5FD_t *file);
H5_DLL MPI_Info H5FD_mpi_get_info(H5FD_t *file);
H5_DLL herr_t H5FD_mpi_get_file_sync_required(H5FD_t *file, bool *file_sync_required);
#endif /* H5_HAVE_PARALLEL */

Expand Down
1 change: 1 addition & 0 deletions src/H5FDpublic.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@
#define H5FD_CTL_INVALID_OPCODE 0
#define H5FD_CTL_TEST_OPCODE 1
#define H5FD_CTL_GET_MPI_COMMUNICATOR_OPCODE 2
#define H5FD_CTL_GET_MPI_INFO_OPCODE 9
#define H5FD_CTL_GET_MPI_RANK_OPCODE 3
#define H5FD_CTL_GET_MPI_SIZE_OPCODE 4
#define H5FD_CTL_MEM_ALLOC 5
Expand Down
6 changes: 6 additions & 0 deletions src/H5FDsubfiling/H5FDsubfiling.c
Original file line number Diff line number Diff line change
Expand Up @@ -2551,6 +2551,12 @@ H5FD__subfiling_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void
**((MPI_Comm **)output) = file->ext_comm;
break;

case H5FD_CTL_GET_MPI_INFO_OPCODE:
assert(output);
assert(*output);
**((MPI_Info **)output) = file->info;
break;

case H5FD_CTL_GET_MPI_RANK_OPCODE:
assert(output);
assert(*output);
Expand Down
25 changes: 6 additions & 19 deletions src/H5Fint.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,16 +402,19 @@ H5F_get_access_plist(H5F_t *f, bool app_ref)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set collective metadata read flag");
if (H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
MPI_Comm mpi_comm;
MPI_Info mpi_info;

/* Retrieve and set MPI communicator */
if (MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(f)))
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI communicator");
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_COMM_NAME, &mpi_comm) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI communicator");

/* Retrieve MPI info object */
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI info object");
/* Retrieve and set MPI info */
if (MPI_INFO_NULL == (mpi_info = H5F_mpi_get_info(f)))
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI info");
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &mpi_info) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI info");
}
#endif /* H5_HAVE_PARALLEL */
if (H5P_set(new_plist, H5F_ACS_META_CACHE_INIT_IMAGE_CONFIG_NAME, &(f->shared->mdc_initCacheImageCfg)) <
Expand Down Expand Up @@ -1130,12 +1133,6 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
/* initialize point of no return */
f->shared->point_of_no_return = false;

#ifdef H5_HAVE_PARALLEL
/* Initialize this just in case we fail before setting this field and */
/* we try to call H5_mpi_info_free() on uninitialized memory in H5F__dest() */
f->shared->mpi_info = MPI_INFO_NULL;
#endif /* H5_HAVE_PARALLEL */

/* Copy the file creation and file access property lists into the
* new file handle. We do this early because some values might need
* to change as the file is being opened.
Expand Down Expand Up @@ -1212,8 +1209,6 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata read flag");
if (H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &(f->shared->coll_md_write)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata write flag");
if (H5P_get(plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't set MPI info object");
#endif /* H5_HAVE_PARALLEL */
if (H5P_get(plist, H5F_ACS_META_CACHE_INIT_IMAGE_CONFIG_NAME, &(f->shared->mdc_initCacheImageCfg)) <
0)
Expand Down Expand Up @@ -1419,14 +1414,6 @@ H5F__dest(H5F_t *f, bool flush, bool free_on_failure)
f->shared->efc = NULL;
} /* end if */

#ifdef H5_HAVE_PARALLEL
if (f->shared->mpi_info != MPI_INFO_NULL) {
/* Free MPI info saved in the file struct */
if (H5_mpi_info_free(&f->shared->mpi_info) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTRELEASE, FAIL, "can't free MPI info");
}
#endif

/* With the shutdown modifications, the contents of the metadata cache
* should be clean at this point, with the possible exception of the
* the superblock and superblock extension.
Expand Down
34 changes: 30 additions & 4 deletions src/H5Fmpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,10 @@ H5F_mpi_get_rank(const H5F_t *f)
/*-------------------------------------------------------------------------
* Function: H5F_mpi_get_comm
*
* Purpose: Retrieves the file's communicator
* Purpose: Retrieves the file's MPI_Comm communicator object
*
* Return: Success: The communicator (non-negative)
*
* Failure: Negative
* Return: Success: The communicator object
* Failure: MPI_COMM_NULL
*
*-------------------------------------------------------------------------
*/
Expand All @@ -122,6 +121,33 @@ H5F_mpi_get_comm(const H5F_t *f)
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5F_mpi_get_comm() */

/*-------------------------------------------------------------------------
* Function: H5F_mpi_get_info
*
* Purpose: Retrieves the file's MPI_Info info object
*
* Return: Success: The info object
* Failure: MPI_INFO_NULL
*
*-------------------------------------------------------------------------
*/
MPI_Info
H5F_mpi_get_info(const H5F_t *f)
{
MPI_Info ret_value = MPI_INFO_NULL;

FUNC_ENTER_NOAPI(MPI_INFO_NULL)

assert(f && f->shared);

/* Dispatch to driver */
if ((ret_value = H5FD_mpi_get_info(f->shared->lf)) == MPI_INFO_NULL)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, MPI_INFO_NULL, "driver get_info request failed");

done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5F_mpi_get_info() */

/*-------------------------------------------------------------------------
* Function: H5F_shared_mpi_get_size
*
Expand Down
1 change: 0 additions & 1 deletion src/H5Fpkg.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,6 @@ struct H5F_shared_t {
#ifdef H5_HAVE_PARALLEL
H5P_coll_md_read_flag_t coll_md_read; /* Do all metadata reads collectively */
bool coll_md_write; /* Do all metadata writes collectively */
MPI_Info mpi_info; /* MPI info */
#endif /* H5_HAVE_PARALLEL */
};

Expand Down
1 change: 1 addition & 0 deletions src/H5Fprivate.h
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ H5_DLL herr_t H5F_eoa_dirty(H5F_t *f);
#ifdef H5_HAVE_PARALLEL
H5_DLL int H5F_mpi_get_rank(const H5F_t *f);
H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f);
H5_DLL MPI_Info H5F_mpi_get_info(const H5F_t *f);
H5_DLL int H5F_shared_mpi_get_size(const H5F_shared_t *f_sh);
H5_DLL int H5F_mpi_get_size(const H5F_t *f);
H5_DLL herr_t H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm);
Expand Down