Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cached datatype conversion path table entries on file close #3942

Merged
merged 2 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/H5Fint.c
Original file line number Diff line number Diff line change
Expand Up @@ -1615,6 +1615,18 @@ H5F__dest(H5F_t *f, bool flush, bool free_on_failure)
if (vol_wrap_ctx && (NULL == H5VL_object_unwrap(f->vol_obj)))
HDONE_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't unwrap VOL object");

/*
* Clean up any cached type conversion path table entries that
* may have been keeping a reference to the file's VOL object
* in order to prevent the file from being closed out from
* underneath other places that may access the conversion path
* or its src/dst datatypes later on (currently, conversions on
* variable-length and reference datatypes involve this)
*/
if (H5T_unregister(H5T_PERS_SOFT, NULL, NULL, NULL, f->vol_obj, NULL) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTRELEASE, FAIL,
"unable to free cached type conversion path table entries");

if (H5VL_free_object(f->vol_obj) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTDEC, FAIL, "unable to free VOL object");
f->vol_obj = NULL;
Expand Down
52 changes: 42 additions & 10 deletions src/H5T.c
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ typedef H5T_t *(*H5T_copy_func_t)(H5T_t *old_dt);
static herr_t H5T__register_int(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst,
H5T_lib_conv_t func);
static herr_t H5T__register(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_conv_func_t *conv);
static herr_t H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_conv_t func);
static htri_t H5T__compiler_conv(H5T_t *src, H5T_t *dst);
static herr_t H5T__set_size(H5T_t *dt, size_t size);
static herr_t H5T__close_cb(H5T_t *dt, void **request);
Expand Down Expand Up @@ -2672,7 +2671,7 @@ H5Tregister(H5T_pers_t pers, const char *name, hid_t src_id, hid_t dst_id, H5T_c
} /* end H5Tregister() */

/*-------------------------------------------------------------------------
* Function: H5T__unregister
* Function: H5T_unregister
*
* Purpose: Removes conversion paths that match the specified criteria.
* All arguments are optional. Missing arguments are wild cards.
Expand All @@ -2683,15 +2682,16 @@ H5Tregister(H5T_pers_t pers, const char *name, hid_t src_id, hid_t dst_id, H5T_c
*
*-------------------------------------------------------------------------
*/
static herr_t
H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_conv_t func)
herr_t
H5T_unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5VL_object_t *owned_vol_obj,
H5T_conv_t func)
{
H5T_path_t *path = NULL; /*conversion path */
H5T_soft_t *soft = NULL; /*soft conversion information */
int nprint = 0; /*number of paths shut down */
int i; /*counter */

FUNC_ENTER_PACKAGE_NOERR
FUNC_ENTER_NOAPI_NOERR

/* Remove matching entries from the soft list */
if (H5T_PERS_DONTCARE == pers || H5T_PERS_SOFT == pers) {
Expand All @@ -2704,6 +2704,8 @@ H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_c
continue;
if (dst && dst->shared->type != soft->dst)
continue;
if (owned_vol_obj)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't want to remove the vlen conversion from the soft conversions table here, so always continue on if the owned_vol_obj parameter is specified. Ideally, the logic for removing the soft conversion functions would be separate from removing the cached conversion pathway table entries, but this seems minimally invasive.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The entire for loop can be skipped if owned_vol_obj is non-NULL. (So you could add that check to the surrounding if statement, which will reduce the performance impact a little)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very true; will do

continue;
if (func && func != soft->conv.u.app_func)
continue;

Expand All @@ -2714,13 +2716,20 @@ H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_c

/* Remove matching conversion paths, except no-op path */
for (i = H5T_g.npaths - 1; i > 0; --i) {
bool nomatch;

path = H5T_g.path[i];
assert(path);

nomatch = ((H5T_PERS_SOFT == pers && path->is_hard) || (H5T_PERS_HARD == pers && !path->is_hard)) ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has become increasingly unwieldy - a small "match" routine is justified?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed that it's become unwieldy, though is the point of a "match" routine just to move the visual ugliness out of this routine?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mostly, and possibly to re-use

(name && *name && strcmp(name, path->name) != 0) ||
(src && H5T_cmp(src, path->src, false)) || (dst && H5T_cmp(dst, path->dst, false)) ||
(owned_vol_obj && (owned_vol_obj != path->src->shared->owned_vol_obj) &&
(owned_vol_obj != path->dst->shared->owned_vol_obj)) ||
(func && func != path->conv.u.app_func);

/* Not a match */
if (((H5T_PERS_SOFT == pers && path->is_hard) || (H5T_PERS_HARD == pers && !path->is_hard)) ||
(name && *name && strcmp(name, path->name) != 0) || (src && H5T_cmp(src, path->src, false)) ||
(dst && H5T_cmp(dst, path->dst, false)) || (func && func != path->conv.u.app_func)) {
if (nomatch) {
/*
* Notify all other functions to recalculate private data since some
* functions might cache a list of conversion functions. For
Expand Down Expand Up @@ -2769,7 +2778,7 @@ H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_c
} /* end for */

FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5T__unregister() */
} /* end H5T_unregister() */

/*-------------------------------------------------------------------------
* Function: H5Tunregister
Expand Down Expand Up @@ -2799,7 +2808,7 @@ H5Tunregister(H5T_pers_t pers, const char *name, hid_t src_id, hid_t dst_id, H5T
if (dst_id > 0 && (NULL == (dst = (H5T_t *)H5I_object_verify(dst_id, H5I_DATATYPE))))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "dst is not a data type");

if (H5T__unregister(pers, name, src, dst, func) < 0)
if (H5T_unregister(pers, name, src, dst, NULL, func) < 0)
HGOTO_ERROR(H5E_DATATYPE, H5E_CANTDELETE, FAIL, "internal unregister function failed");

done:
Expand Down Expand Up @@ -6096,3 +6105,26 @@ H5T_own_vol_obj(H5T_t *dt, H5VL_object_t *vol_obj)
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5T_own_vol_obj() */

/*-------------------------------------------------------------------------
* Function: H5T__get_path_table_npaths
*
* Purpose: Testing function to return the number of type conversion
* paths currently stored in the type conversion path table
* cache.
*
* Return: Number of type conversion paths (can't fail)
*
*-------------------------------------------------------------------------
*/
int
H5T__get_path_table_npaths(void)
{
int ret_value = 0;

FUNC_ENTER_PACKAGE_NOERR

ret_value = H5T_g.npaths;

FUNC_LEAVE_NOAPI(ret_value)
}
3 changes: 3 additions & 0 deletions src/H5Tpkg.h
Original file line number Diff line number Diff line change
Expand Up @@ -877,4 +877,7 @@ H5_DLL herr_t H5T__sort_name(const H5T_t *dt, int *map);
/* Debugging functions */
H5_DLL herr_t H5T__print_stats(H5T_path_t *path, int *nprint /*in,out*/);

/* Testing functions */
H5_DLL int H5T__get_path_table_npaths(void);

#endif /* H5Tpkg_H */
2 changes: 2 additions & 0 deletions src/H5Tprivate.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ H5_DLL H5T_path_t *H5T_path_find(const H5T_t *src, const H5T_t *dst);
H5_DLL bool H5T_path_noop(const H5T_path_t *p);
H5_DLL H5T_bkg_t H5T_path_bkg(const H5T_path_t *p);
H5_DLL H5T_subset_info_t *H5T_path_compound_subset(const H5T_path_t *p);
H5_DLL herr_t H5T_unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst,
H5VL_object_t *owned_vol_obj, H5T_conv_t func);
H5_DLL herr_t H5T_convert(H5T_path_t *tpath, hid_t src_id, hid_t dst_id, size_t nelmts, size_t buf_stride,
size_t bkg_stride, void *buf, void *bkg);
H5_DLL herr_t H5T_reclaim(hid_t type_id, struct H5S_t *space, void *buf);
Expand Down
190 changes: 190 additions & 0 deletions test/tmisc.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
*************************************************************/

#define H5D_FRIEND /*suppress error about including H5Dpkg */
#define H5T_FRIEND /*suppress error about including H5Tpkg */

/* Define this macro to indicate that the testing APIs should be available */
#define H5D_TESTING

#include "testhdf5.h"
#include "H5srcdir.h"
#include "H5Dpkg.h" /* Datasets */
#include "H5Tpkg.h" /* Datatypes */
#include "H5MMprivate.h" /* Memory */

/* Definitions for misc. test #1 */
Expand Down Expand Up @@ -335,6 +337,8 @@ typedef struct {
See https://nvd.nist.gov/vuln/detail/CVE-2020-10812 */
#define CVE_2020_10812_FILENAME "cve_2020_10812.h5"

#define MISC38_FILE "type_conversion_path_table_issue.h5"

/****************************************************************
**
** test_misc1(): test unlinking a dataset from a group and immediately
Expand Down Expand Up @@ -6257,6 +6261,190 @@ test_misc37(void)

} /* end test_misc37() */

/****************************************************************
**
** test_misc38():
** Test for issue where the type conversion path table cache
** would grow continuously when variable-length datatypes
** are involved due to file VOL object comparisons causing
** the library not to reuse type conversion paths
**
****************************************************************/
static void
test_misc38(void)
{
H5VL_object_t *file_vol_obj = NULL;
const char *buf[] = {"attr_value"};
herr_t ret = SUCCEED;
hid_t file_id = H5I_INVALID_HID;
hid_t attr_id = H5I_INVALID_HID;
hid_t str_type = H5I_INVALID_HID;
hid_t space_id = H5I_INVALID_HID;
int init_npaths = 0;
int *irbuf = NULL;
char **rbuf = NULL;
bool vol_is_native;

/* Output message about test being performed */
MESSAGE(5, ("Fix for type conversion path table issue"));

/*
* Get the initial number of type conversion path table
* entries that are currently defined
*/
init_npaths = H5T__get_path_table_npaths();

file_id = H5Fcreate(MISC38_FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
CHECK(file_id, H5I_INVALID_HID, "H5Fcreate");

/* Check if native VOL is being used */
CHECK(h5_using_native_vol(H5P_DEFAULT, file_id, &vol_is_native), FAIL, "h5_using_native_vol");
if (!vol_is_native) {
CHECK(H5Fclose(file_id), FAIL, "H5Fclose");
MESSAGE(5, (" -- SKIPPED --\n"));
return;
}

/* Retrieve file's VOL object field for further use */
file_vol_obj = H5F_VOL_OBJ((H5F_t *)H5VL_object(file_id));

/*
* Check reference count of file's VOL object field. At this point,
* the object should have a reference count of 1 since the file
* was just created.
*/
VERIFY(file_vol_obj->rc, 1, "checking reference count");

str_type = H5Tcopy(H5T_C_S1);
CHECK(str_type, H5I_INVALID_HID, "H5Tcopy");

ret = H5Tset_size(str_type, H5T_VARIABLE);
CHECK(ret, FAIL, "H5Tset_size");

space_id = H5Screate(H5S_SCALAR);
CHECK(space_id, H5I_INVALID_HID, "H5Screate");

/*
* Check the number of type conversion path table entries currently
* stored in the cache. It shouldn't have changed yet.
*/
VERIFY(H5T__get_path_table_npaths(), init_npaths,
"checking number of type conversion path table entries");

attr_id = H5Acreate2(file_id, "attribute", str_type, space_id, H5P_DEFAULT, H5P_DEFAULT);
CHECK(attr_id, H5I_INVALID_HID, "H5Acreate2");

/*
* Check the number of type conversion path table entries currently
* stored in the cache. It shouldn't have changed yet.
*/
VERIFY(H5T__get_path_table_npaths(), init_npaths,
"checking number of type conversion path table entries");

/*
* Check reference count of file's VOL object field. At this point,
* the object should have a reference count of 2. Creating the
* attribute on the dataset will have caused a H5T_set_loc call that
* associates the attribute's datatype with the file's VOL object
* and will have incremented the reference count by 1.
*/
VERIFY(file_vol_obj->rc, 2, "checking reference count");

ret = H5Awrite(attr_id, str_type, buf);
CHECK(ret, FAIL, "H5Awrite");

/*
* Check the number of type conversion path table entries currently
* stored in the cache. The H5Awrite call should have added a new
* type conversion path. Note that if another test in this file uses
* the same conversion path, this check may fail and need to be
* refactored.
*/
VERIFY(H5T__get_path_table_npaths(), init_npaths + 1,
"checking number of type conversion path table entries");

/*
* Check reference count of file's VOL object field. At this point,
* the object should have a reference count of 3. Writing to the
* variable-length typed attribute will have caused an H5T_convert
* call that ends up incrementing the reference count of the
* associated file's VOL object.
*/
VERIFY(file_vol_obj->rc, 3, "checking reference count");

ret = H5Aclose(attr_id);
CHECK(ret, FAIL, "H5Aclose");
ret = H5Fclose(file_id);
CHECK(ret, FAIL, "H5Fclose");

irbuf = malloc(100 * 100 * sizeof(int));
CHECK_PTR(irbuf, "int read buf allocation");
rbuf = malloc(sizeof(char *));
CHECK_PTR(rbuf, "varstr read buf allocation");

for (size_t i = 0; i < 10; i++) {
file_id = H5Fopen(MISC38_FILE, H5F_ACC_RDONLY, H5P_DEFAULT);
CHECK(file_id, H5I_INVALID_HID, "H5Fopen");

/* Retrieve file's VOL object field for further use */
file_vol_obj = H5F_VOL_OBJ((H5F_t *)H5VL_object(file_id));

/*
* Check reference count of file's VOL object field. At this point,
* the object should have a reference count of 1 since the file
* was just opened.
*/
VERIFY(file_vol_obj->rc, 1, "checking reference count");

attr_id = H5Aopen(file_id, "attribute", H5P_DEFAULT);
CHECK(attr_id, H5I_INVALID_HID, "H5Aopen");

/*
* Check reference count of file's VOL object field. At this point,
* the object should have a reference count of 2 since opening
* the attribute will also have associated its type with the file's
* VOL object.
*/
VERIFY(file_vol_obj->rc, 2, "checking reference count");

ret = H5Aread(attr_id, str_type, rbuf);
CHECK(ret, FAIL, "H5Aread");

/*
* Check the number of type conversion path table entries currently
* stored in the cache. Each H5Aread call shouldn't cause this number
* to go up, as the library should have removed the cached conversion
* paths on file close.
*/
VERIFY(H5T__get_path_table_npaths(), init_npaths + 1,
"checking number of type conversion path table entries");

/*
* Check reference count of file's VOL object field. At this point,
* the object should have a reference count of 3. Writing to the
* variable-length typed attribute will have caused an H5T_convert
* call that ends up incrementing the reference count of the
* associated file's VOL object.
*/
VERIFY(file_vol_obj->rc, 3, "checking reference count");

ret = H5Treclaim(str_type, space_id, H5P_DEFAULT, rbuf);

ret = H5Aclose(attr_id);
CHECK(ret, FAIL, "H5Aclose");
ret = H5Fclose(file_id);
CHECK(ret, FAIL, "H5Fclose");
}

free(irbuf);
free(rbuf);

ret = H5Tclose(str_type);
CHECK(ret, FAIL, "H5Tclose");
ret = H5Sclose(space_id);
CHECK(ret, FAIL, "H5Sclose");
}

/****************************************************************
**
** test_misc(): Main misc. test routine.
Expand Down Expand Up @@ -6325,6 +6513,7 @@ test_misc(void)
test_misc35(); /* Test behavior of free-list & allocation statistics API calls */
test_misc36(); /* Exercise H5atclose and H5is_library_terminating */
test_misc37(); /* Test for seg fault failure at file close */
test_misc38(); /* Test for type conversion path table issue */

} /* test_misc() */

Expand Down Expand Up @@ -6380,6 +6569,7 @@ cleanup_misc(void)
#ifndef H5_NO_DEPRECATED_SYMBOLS
H5Fdelete(MISC31_FILE, H5P_DEFAULT);
#endif /* H5_NO_DEPRECATED_SYMBOLS */
H5Fdelete(MISC38_FILE, H5P_DEFAULT);
}
H5E_END_TRY
} /* end cleanup_misc() */
Loading