From cf6a56b4aaefc6b68a753ea37770e192351ce7be Mon Sep 17 00:00:00 2001 From: Dave Allured Date: Fri, 7 Jan 2022 17:55:59 -0700 Subject: [PATCH] HDF5 format compatibility for existing files This PR selects the best HDF5 format compatibility options when re-opening an existing netCDF-4 file for writing, such as appending, or adding new groups or variables. This change selects the optimal HDF5 v1.8 compatibility when possible, and otherwise falls back to the adequate v1.6 compatibility. Format compatibility is a transient property of the HDF5 library, rather than baked in at file creation time. Therefore, format compatibility options must be re-selected every time a netCDF-4 file is re-opened for writing. This builds on the similar update for initial file creation, PR #1931, by @brtnfld, released in netcdf-c version 4.8.1. In particular, this PR moves the compatibility controls into a single central location, a new common function that is shared by both create and open functions. Fore more details, see issue #951, and documentation at the top of libhdf5/hdf5set_format_compatibility.c. --- libhdf5/CMakeLists.txt | 6 +-- libhdf5/Makefile.am | 11 ++-- libhdf5/hdf5create.c | 37 ++++++------- libhdf5/hdf5open.c | 16 ++++-- libhdf5/hdf5set_format_compatibility.c | 74 ++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 37 deletions(-) create mode 100644 libhdf5/hdf5set_format_compatibility.c diff --git a/libhdf5/CMakeLists.txt b/libhdf5/CMakeLists.txt index ad9b67a234..5cf4aa9267 100644 --- a/libhdf5/CMakeLists.txt +++ b/libhdf5/CMakeLists.txt @@ -1,5 +1,5 @@ ## This is a CMake file, part of Unidata's netCDF package. -# Copyright 2018, see the COPYRIGHT file for more information. +# Copyright 2018-2022, see the COPYRIGHT file for more information. # # This builds the HDF5 dispatch layer. # @@ -9,13 +9,13 @@ SET(libnchdf5_SOURCES nc4hdf.c nc4info.c hdf5file.c hdf5attr.c hdf5dim.c hdf5grp.c hdf5type.c hdf5internal.c hdf5create.c hdf5open.c hdf5var.c nc4mem.c nc4memcb.c hdf5dispatch.c hdf5filter.c -hdf5debug.c) +hdf5set_format_compatibility.c hdf5debug.c) IF(ENABLE_BYTERANGE) SET(libnchdf5_SOURCES ${libnchdf5_SOURCES} H5FDhttp.c) ENDIF() -# Build the HDF4 dispatch layer as a library that will be included in +# Build the HDF5 dispatch layer as a library that will be included in # the netCDF library. add_library(netcdfhdf5 OBJECT ${libnchdf5_SOURCES}) diff --git a/libhdf5/Makefile.am b/libhdf5/Makefile.am index 3e5d3359dc..e7aa2164f1 100644 --- a/libhdf5/Makefile.am +++ b/libhdf5/Makefile.am @@ -1,5 +1,5 @@ -# This is part of Unidata's netCDF package. Copyright 2018, see the -# COPYRIGHT file for more information. +# This is part of Unidata's netCDF package. Copyright 2018-2022, +# see the COPYRIGHT file for more information. # Build the HDF5 dispatch layer. @@ -16,7 +16,7 @@ noinst_LTLIBRARIES = libnchdf5.la libnchdf5_la_SOURCES = nc4hdf.c nc4info.c hdf5file.c hdf5attr.c \ hdf5dim.c hdf5grp.c hdf5type.c hdf5internal.c hdf5create.c hdf5open.c \ hdf5var.c nc4mem.c nc4memcb.c hdf5dispatch.c hdf5filter.c \ -hdf5debug.c hdf5debug.h hdf5err.h +hdf5set_format_compatibility.c hdf5debug.c hdf5debug.h hdf5err.h if ENABLE_BYTERANGE libnchdf5_la_SOURCES += H5FDhttp.c H5FDhttp.h @@ -24,8 +24,3 @@ endif # Package this for cmake build. EXTRA_DIST = CMakeLists.txt - - - - - diff --git a/libhdf5/hdf5create.c b/libhdf5/hdf5create.c index 0475c525c7..139c431c5c 100644 --- a/libhdf5/hdf5create.c +++ b/libhdf5/hdf5create.c @@ -117,9 +117,10 @@ nc4_create_file(const char *path, int cmode, size_t initialsz, } } - /* Need this access plist to control how HDF5 handles open objects - * on file close. (Setting H5F_CLOSE_WEAK will cause H5Fclose not to - * fail if there are any open objects in the file. This may happen when virtual + /* Need this FILE ACCESS plist to control how HDF5 handles open + * objects on file close; as well as for other controls below. + * (Setting H5F_CLOSE_WEAK will cause H5Fclose not to fail if there + * are any open objects in the file. This may happen when virtual * datasets are opened). */ if ((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) BAIL(NC_EHDFERR); @@ -127,8 +128,8 @@ nc4_create_file(const char *path, int cmode, size_t initialsz, BAIL(NC_EHDFERR); #ifdef USE_PARALLEL4 - /* If this is a parallel file create, set up the file creation - property list. */ + /* If this is a parallel file create, set up the file access + property list for MPI/IO. */ if (mpiinfo != NULL) { nc4_info->parallel = NC_TRUE; LOG((4, "creating parallel file with MPI/IO")); @@ -164,21 +165,14 @@ nc4_create_file(const char *path, int cmode, size_t initialsz, nc4_chunk_cache_preemption)); } -#if H5_VERSION_GE(1,10,2) - /* lib versions 1.10.2 and higher */ - if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_V18, H5F_LIBVER_LATEST) < 0) -#else -#if H5_VERSION_GE(1,10,0) - /* lib versions 1.10.0, 1.10.1 */ - if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_EARLIEST, H5F_LIBVER_LATEST) < 0) -#else - /* all HDF5 1.8 lib versions */ - if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) < 0) -#endif -#endif - BAIL(NC_EHDFERR); + /* Set HDF5 format compatibility in the FILE ACCESS property list. + * Compatibility is transient and must be reselected every time + * a file is opened for writing. */ + retval = hdf5set_format_compatibility(fapl_id); + if (retval != NC_NOERR) + BAIL(retval); - /* Create the property list. */ + /* Begin setuo for the FILE CREATION property list. */ if ((fcpl_id = H5Pcreate(H5P_FILE_CREATE)) < 0) BAIL(NC_EHDFERR); @@ -186,9 +180,8 @@ nc4_create_file(const char *path, int cmode, size_t initialsz, if (H5Pset_obj_track_times(fcpl_id,0)<0) BAIL(NC_EHDFERR); - /* Set latest_format in access propertly list and - * H5P_CRT_ORDER_TRACKED in the creation property list. This turns - * on HDF5 creation ordering. */ + /* Set H5P_CRT_ORDER_TRACKED in the creation property list. + * This turns on HDF5 creation ordering. */ if (H5Pset_link_creation_order(fcpl_id, (H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED)) < 0) BAIL(NC_EHDFERR); diff --git a/libhdf5/hdf5open.c b/libhdf5/hdf5open.c index f3ede3ede4..09a251b127 100644 --- a/libhdf5/hdf5open.c +++ b/libhdf5/hdf5open.c @@ -1,4 +1,4 @@ -/* Copyright 2003-2018, University Corporation for Atmospheric +/* Copyright 2003-2022, University Corporation for Atmospheric * Research. See COPYRIGHT file for copying and redistribution * conditions. */ /** @@ -765,9 +765,10 @@ nc4_open_file(const char *path, int mode, void* parameters, int ncid) mpiinfo = (NC_MPI_INFO *)parameters; /* assume, may be changed if inmemory is true */ #endif /* !USE_PARALLEL4 */ - /* Need this access plist to control how HDF5 handles open objects - * on file close. (Setting H5F_CLOSE_WEAK will cause H5Fclose not to - * fail if there are any open objects in the file. This may happen when virtual + /* Need this FILE ACCESS plist to control how HDF5 handles open + * objects on file close; as well as for other controls below. + * (Setting H5F_CLOSE_WEAK will cause H5Fclose not to fail if there + * are any open objects in the file. This may happen when virtual * datasets are opened). */ if ((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) BAIL(NC_EHDFERR); @@ -820,6 +821,13 @@ nc4_open_file(const char *path, int mode, void* parameters, int ncid) nc4_chunk_cache_preemption)); } + /* Set HDF5 format compatibility in the FILE ACCESS property list. + * Compatibility is transient and must be reselected every time + * a file is opened for writing. */ + retval = hdf5set_format_compatibility(fapl_id); + if (retval != NC_NOERR) + BAIL(retval); + /* Process NC_INMEMORY */ if(nc4_info->mem.inmemory) { NC_memio* memio; diff --git a/libhdf5/hdf5set_format_compatibility.c b/libhdf5/hdf5set_format_compatibility.c new file mode 100644 index 0000000000..f6e7421147 --- /dev/null +++ b/libhdf5/hdf5set_format_compatibility.c @@ -0,0 +1,74 @@ +/* Copyright 2022, University Corporation for Atmospheric Research. + * See COPYRIGHT file for copying and redistribution conditions. */ +/** + * @file + * @internal This function selects the best HDF5 file format options + * to create netCDF-4 files that can be read and written by older + * library versions. + * + * Format compatibility is transient, not baked in to an HDF5 file + * at creation time. Therefore the desired compatibilty options + * must be selected every time a file is opened for writing. + * + * This function should be called before every call to create a new + * netCDF-4 file, or to open an existing netCDF-4 file for writing. + * This function has no effect when opening a file for read only. + * + * This function should work correctly with all HDF5 library versions + * from 1.8.0 through 1.13.0 and beyond, with no further changes. + * This assumes that HDF5 versioning controls remain consistent + * into the future. + * + * The basic functionality is to select the traditional HDF5 v1.8 + * format compatibility, whenever possible. The less desirable + * v1.6 compatibily is selected in a few strange cases when it is + * not possible to select v1.8. + * + * Files created with v1.6 compatibility have superblock version 0. + * Files created with v1.8 compatibility have superblock version 2. + * + * The superblock version is locked in when a file is first created. + * It is then possible to get a mix of v1.6 and v1.8 internal + * object versions, when an existing netCDF-4 file is modified by + * a different software version than the one that originally + * created the file. Mixed-object files of this nature are common + * and do not suffer any serious problems. + * + * See netcdf-c github issues #250 and #951 for more details about + * the rationale and evolution of netCDF-4 format compatibility. + */ + +#include "config.h" +#include "hdf5internal.h" + +/** + * @internal Function to set HDF5 file access options for backward + * format compatibility. Call this before every call to H5Fcreate + * or H5Fopen. + * + * @param fapl_id Identifier for valid file access property list to + * be used in the next call to H5Fcreate or H5Fopen. + * + * @return ::NC_EHDFERR General failure in HDF5. + */ +int +hdf5set_format_compatibility(hid_t fapl_id) +{ +#if H5_VERSION_GE(1,10,2) + /* lib versions 1.10.2 and higher */ + if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_V18, H5F_LIBVER_LATEST) < 0) + +#else +#if H5_VERSION_GE(1,10,0) + /* lib versions 1.10.0, 1.10.1 */ + if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_EARLIEST, H5F_LIBVER_LATEST) < 0) + +#else + /* all HDF5 1.8 lib versions */ + if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) < 0) +#endif +#endif + return NC_EHDFERR; /* failure exit */ + + return NC_NOERR; /* normal exit */ +}