Skip to content

Commit

Permalink
Cache tidy (HDFGroup#2693)
Browse files Browse the repository at this point in the history
* Correct concurrency bugs when running tests, along with a bugfix & small
warning cleanup.

* Committing clang-format changes

* Allow spaces (and tabs) in VOL connector info string from environment variable.

* Parse connector name from HDF5_PLUGIN_PATH environment variable better

* Correct H5VLquery_optional to use H5VL routine instead of H5I.  Also add an
error message to the failure return value from not finding a plugin.

* Play nice with existing plugin paths

* Use API routine to determine if native connector is terminal.

* Committing clang-format changes

* Make string size larger, to allow for connectors with longer names.

* Be more flexible about testing external pass through connectors, especially if
they have registered new optional operations.

* Bring style closer to library's agreed coding style

* Committing clang-format changes

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: AWS ParallelCluster user <ec2-user@ip-10-0-0-65.us-east-2.compute.internal>
Co-authored-by: Koziol <qkoziol@88665a374c70.ant.amazon.com>
  • Loading branch information
4 people authored and byrnHDF committed Apr 16, 2023
1 parent 8681fa3 commit 40cca93
Show file tree
Hide file tree
Showing 7 changed files with 524 additions and 1,189 deletions.
65 changes: 18 additions & 47 deletions src/H5AC.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ H5AC_create(const H5F_t *f, H5AC_cache_config_t *config_ptr, H5AC_cache_image_co
HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "can't create cleaned entry list")
} /* end if */

/* construct the candidate slist for all processes.
/* construct the candidate skip list for all processes.
* when the distributed strategy is selected as all processes
* will use it in the case of a flush.
*/
Expand Down Expand Up @@ -439,34 +439,25 @@ H5AC_dest(H5F_t *f)

/* Check if log messages are being emitted */
if (H5C_get_logging_status(f->shared->cache, &log_enabled, &curr_logging) < 0)

HGOTO_ERROR(H5E_CACHE, H5E_LOGGING, FAIL, "unable to get logging status")
if (log_enabled && curr_logging) {

if (H5C_log_write_destroy_cache_msg(f->shared->cache) < 0)

HDONE_ERROR(H5E_CACHE, H5E_LOGGING, FAIL, "unable to emit log message")
}

/* Tear down logging */
if (log_enabled) {
if (curr_logging)
if (H5C_log_write_destroy_cache_msg(f->shared->cache) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_LOGGING, FAIL, "unable to emit log message")

if (H5C_log_tear_down(f->shared->cache) < 0)

HGOTO_ERROR(H5E_CACHE, H5E_LOGGING, FAIL, "mdc logging tear-down failed")
}
HGOTO_ERROR(H5E_CACHE, H5E_LOGGING, FAIL, "metadata cache logging tear-down failed")
} /* end if */

#ifdef H5_HAVE_PARALLEL

/* destroying the cache, so clear all collective entries */
if (H5C_clear_coll_entries(f->shared->cache, FALSE) < 0)

HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed")
HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't clear collective entries")

aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(f->shared->cache);

if (aux_ptr) {

/* Sanity check */
HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);

Expand All @@ -480,67 +471,53 @@ H5AC_dest(H5F_t *f)
* H5AC__flush_entries() and disable it afterwards, as the
* skip list will be disabled after the previous flush.
*
* Note that H5C_dest() does slist setup and take down as well.
* Note that H5C_dest() does skip list setup and take down as well.
* Unfortunately, we can't do the setup and take down just once,
* as H5C_dest() is called directly in the test code.
*
* Fortunately, the cache should be clean or close to it at this
* point, so the overhead should be minimal.
*/
if (H5F_ACC_RDWR & H5F_INTENT(f)) {

/* enable and load the slist */
/* enable and load the skip list */
if (H5C_set_slist_enabled(f->shared->cache, TRUE, FALSE) < 0)

HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "set slist enabled failed")
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "can't enable skip list")

if (H5AC__flush_entries(f) < 0)

HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush")

/* disable the slist -- should be empty */
/* disable the skip list -- should be empty */
if (H5C_set_slist_enabled(f->shared->cache, FALSE, FALSE) < 0)

HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "disable slist failed")
}
}
#endif /* H5_HAVE_PARALLEL */
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "can't disable skip list")
} /* end if */
} /* end if */
#endif /* H5_HAVE_PARALLEL */

/* Destroy the cache */
if (H5C_dest(f) < 0)

HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "can't destroy cache")

f->shared->cache = NULL;

#ifdef H5_HAVE_PARALLEL

if (aux_ptr != NULL) {

if (aux_ptr->d_slist_ptr != NULL) {

HDassert(H5SL_count(aux_ptr->d_slist_ptr) == 0);
H5SL_close(aux_ptr->d_slist_ptr);

} /* end if */

if (aux_ptr->c_slist_ptr != NULL) {

HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0);
H5SL_close(aux_ptr->c_slist_ptr);

} /* end if */

if (aux_ptr->candidate_slist_ptr != NULL) {

HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0);
H5SL_close(aux_ptr->candidate_slist_ptr);

} /* end if */

aux_ptr->magic = 0;
aux_ptr = H5FL_FREE(H5AC_aux_t, aux_ptr);

} /* end if */
#endif /* H5_HAVE_PARALLEL */

Expand Down Expand Up @@ -1215,13 +1192,10 @@ H5AC_prep_for_file_flush(H5F_t *f)
HDassert(f->shared->cache);

if (H5C_set_slist_enabled(f->shared->cache, TRUE, FALSE) < 0)

HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "slist enabled failed")
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "can't enable skip list")

done:

FUNC_LEAVE_NOAPI(ret_value)

} /* H5AC_prep_for_file_flush() */

/*-------------------------------------------------------------------------
Expand All @@ -1235,7 +1209,7 @@ H5AC_prep_for_file_flush(H5F_t *f)
* to do any necessary necessary cleanup work after a cache
* flush.
*
* Initially, this means taking down the slist after the
* Initially, this means taking down the skip list after the
* flush. We do this in a separate call because
* H5F__flush_phase2() make repeated calls to H5AC_flush().
* Handling this detail in separate calls allows us to avoid
Expand All @@ -1262,13 +1236,10 @@ H5AC_secure_from_file_flush(H5F_t *f)
HDassert(f->shared->cache);

if (H5C_set_slist_enabled(f->shared->cache, FALSE, FALSE) < 0)

HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "slist enabled failed")
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "can't disable skip list")

done:

FUNC_LEAVE_NOAPI(ret_value)

} /* H5AC_secure_from_file_flush() */

/*-------------------------------------------------------------------------
Expand Down
25 changes: 9 additions & 16 deletions src/H5ACmpio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1888,12 +1888,9 @@ H5AC__rsp__p0_only__flush(H5F_t *f)
* However, when flushing from within the close operation from a file,
* it's possible to skip this barrier (on the second flush of the cache).
*/
if (!H5CX_get_mpi_file_flushing()) {

if (!H5CX_get_mpi_file_flushing())
if (MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))

HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
}

/* Flush data to disk, from rank 0 process */
if (aux_ptr->mpi_rank == 0) {
Expand Down Expand Up @@ -2102,31 +2099,28 @@ H5AC__run_sync_point(H5F_t *f, int sync_point_op)

/* Sanity checks */
HDassert(f != NULL);

cache_ptr = f->shared->cache;

HDassert(cache_ptr != NULL);

aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);

HDassert(aux_ptr != NULL);
HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) ||
(sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED));

#ifdef H5AC_DEBUG_DIRTY_BYTES_CREATION
HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/m/mu) = %zu/%u/%zu/%u/%zu/%u\n", aux_ptr->mpi_rank,
aux_ptr->dirty_bytes_propagations, aux_ptr->unprotect_dirty_bytes,
HDfprintf(stdout, "%d:%s...:%u: (u/uu/i/iu/m/mu) = %zu/%u/%zu/%u/%zu/%u\n", aux_ptr->mpi_rank,
__func__ aux_ptr->dirty_bytes_propagations, aux_ptr->unprotect_dirty_bytes,
aux_ptr->unprotect_dirty_bytes_updates, aux_ptr->insert_dirty_bytes,
aux_ptr->insert_dirty_bytes_updates, aux_ptr->move_dirty_bytes,
aux_ptr->move_dirty_bytes_updates);
#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */

/* clear collective access flag on half of the entries in the
cache and mark them as independent in case they need to be
evicted later. All ranks are guaranteed to mark the same entries
since we don't modify the order of the collectively accessed
entries except through collective access. */
/* Clear collective access flag on half of the entries in the cache and
* mark them as independent in case they need to be evicted later. All
* ranks are guaranteed to mark the same entries since we don't modify the
* order of the collectively accessed entries except through collective
* access.
*/
if (H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.")

Expand Down Expand Up @@ -2188,7 +2182,6 @@ H5AC__run_sync_point(H5F_t *f, int sync_point_op)
#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */

done:

FUNC_LEAVE_NOAPI(ret_value)
} /* H5AC__run_sync_point() */

Expand Down
Loading

0 comments on commit 40cca93

Please sign in to comment.