Skip to content

Commit

Permalink
Merge pull request #1589 from NOAA-GSD/ejh_szip
Browse files Browse the repository at this point in the history
re-implement the nc_def_var_szip() function, including for parallel I/O
  • Loading branch information
WardF authored Jan 22, 2020
2 parents e1e20ef + 0735a45 commit aadd5a2
Show file tree
Hide file tree
Showing 13 changed files with 777 additions and 521 deletions.
16 changes: 13 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ IF(USE_HDF5 OR ENABLE_NETCDF_4)
# Accommodate developers who have hdf5 libraries and
# headers on their system, but do not have a the hdf
# .cmake files. If this is the case, they should
# specify HDF5_HL_LIB, HDF5_LIB, HDF5_INCLUDE_DIR manually.
# specify HDF5_HL_LIBRARY, HDF5_LIBRARY, HDF5_INCLUDE_DIR manually.
##
IF(HDF5_C_LIBRARY AND HDF5_HL_LIBRARY AND HDF5_INCLUDE_DIR)
SET(HDF5_LIBRARIES ${HDF5_C_LIBRARY} ${HDF5_HL_LIBRARY})
Expand Down Expand Up @@ -631,6 +631,7 @@ IF(USE_HDF5 OR ENABLE_NETCDF_4)
# Assert HDF5 version meets minimum required version.
##
SET(HDF5_VERSION_REQUIRED 1.8.10)
SET(HDF5_PAR_FILTER_VERSION 1.10.3)

IF(HDF5_VERSION_STRING AND NOT HDF5_VERSION)
SET(HDF5_VERSION ${HDF5_VERSION_STRING})
Expand All @@ -647,6 +648,13 @@ IF(USE_HDF5 OR ENABLE_NETCDF_4)
ENDIF()
ENDIF()

# Determine whether parallel filter operation is supported.
IF(${HDF5_VERSION} VERSION_LESS ${HDF5_PAR_FILTER_VERSION})
SET(HDF5_HAS_PAR_FILTERS, "no")
ELSE()
SET(HDF5_HAS_PAR_FILTERS, "yes")
ENDIF()

##
# Include the HDF5 include directory.
##
Expand Down Expand Up @@ -743,8 +751,8 @@ IF(USE_HDF5 OR ENABLE_NETCDF_4)
SET(HDF5_CC h5cc)
ENDIF()

# Check to see if this is hdf5-1.10.2 or later.
CHECK_LIBRARY_EXISTS(${HDF5_C_LIBRARY_hdf5} H5DOread_chunk "" HDF5_SUPPORTS_PAR_FILTERS)
# Check to see if this is hdf5-1.10.3 or later.
CHECK_LIBRARY_EXISTS(${HDF5_C_LIBRARY_hdf5} H5Dread_chunk "" HDF5_SUPPORTS_PAR_FILTERS)

SET(H5_USE_16_API 1)
OPTION(NC_ENABLE_HDF_16_API "Enable HDF5 1.6.x Compatibility(Required)" ON)
Expand Down Expand Up @@ -1937,6 +1945,7 @@ is_enabled(ENABLE_NETCDF_4 HAS_NC4)
is_enabled(ENABLE_HDF4 HAS_HDF4)
is_enabled(ENABLE_NETCDF_4 HAS_HDF5)
is_enabled(USE_SZIP HAS_SZIP)
is_enabled(USE_SZIP HAS_SZIP_WRITE)
is_enabled(STATUS_PNETCDF HAS_PNETCDF)
is_enabled(STATUS_PARALLEL HAS_PARALLEL)
is_enabled(ENABLE_PARALLEL4 HAS_PARALLEL4)
Expand All @@ -1950,6 +1959,7 @@ is_enabled(JNA HAS_JNA)
is_enabled(ENABLE_ZERO_LENGTH_COORD_BOUND RELAX_COORD_BOUND)
is_enabled(USE_CDF5 HAS_CDF5)
is_enabled(ENABLE_ERANGE_FILL HAS_ERANGE_FILL)
is_enabled(HDF5_HAS_PAR_FILTERS HAS_PAR_FILTERS)

# Generate file from template.
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/libnetcdf.settings.in"
Expand Down
19 changes: 12 additions & 7 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,7 @@ if test "x$enable_netcdf_4" = xyes; then
fi

hdf5_parallel=no
hdf5_supports_par_filters=no
if test "x$enable_hdf5" = xyes; then

AC_DEFINE([USE_HDF5], [1], [if true, use HDF5])
Expand All @@ -1039,7 +1040,7 @@ if test "x$enable_hdf5" = xyes; then

# H5Pset_fapl_mpiposix and H5Pget_fapl_mpiposix have been removed since HDF5 1.8.12.
# Use H5Pset_fapl_mpio and H5Pget_fapl_mpio, instead.
AC_CHECK_FUNCS([H5Pget_fapl_mpio H5Pset_deflate H5Z_SZIP H5free_memory H5resize_memory H5allocate_memory H5Pset_libver_bounds H5Pset_all_coll_metadata_ops H5Z_SZIP H5DOread_chunk])
AC_CHECK_FUNCS([H5Pget_fapl_mpio H5Pset_deflate H5Z_SZIP H5free_memory H5resize_memory H5allocate_memory H5Pset_libver_bounds H5Pset_all_coll_metadata_ops H5Z_SZIP H5Dread_chunk])

# Check to see if HDF5 library has collective metadata APIs, (HDF5 >= 1.10.0)
if test "x$ac_cv_func_H5Pset_all_coll_metadata_ops" = xyes; then
Expand All @@ -1054,14 +1055,16 @@ if test "x$enable_hdf5" = xyes; then
AC_MSG_CHECKING([whether parallel io is enabled in hdf5])
AC_MSG_RESULT([$hdf5_parallel])

# Check to see if HDF5 library is 1.10.2 or greater. If so, allows parallel_zip.
if test "x$ac_cv_func_H5DOread_chunk" = xyes; then
AC_DEFINE([HDF5_SUPPORTS_PAR_FILTERS], [1], [if true, HDF5 is at least version 1.10.2 and allows parallel I/O with zip])
# Check to see if HDF5 library is 1.10.3 or greater. If so, allows parallel_zip.
if test "x$ac_cv_func_H5Dread_chunk" = xyes; then
AC_DEFINE([HDF5_SUPPORTS_PAR_FILTERS], [1], [if true, HDF5 is at least version 1.10.3 and allows parallel I/O with zip])
hdf5_supports_par_filters=yes
fi
AC_MSG_CHECKING([whether HDF5 is version 1.10.2 or greater])
AC_MSG_RESULT([$ac_cv_func_H5DOread_chunk])
AC_MSG_CHECKING([whether HDF5 is version 1.10.3 or greater])
AC_MSG_RESULT([$ac_cv_func_H5Dread_chunk])
AC_SUBST([HAS_PAR_FILTERS], [$hdf5_supports_par_filters])

# Check to see if we need to search for and link against szlib.
# Check to see if we need to search for and link against szlib.
if test "x$ac_cv_func_H5Z_SZIP" = xyes; then
AC_SEARCH_LIBS([SZ_BufftoBuffCompress], [szip sz], [],
[AC_MSG_ERROR([libhdf5 installed with szip support, but cannot find or link to the szip library.])])
Expand All @@ -1084,6 +1087,7 @@ if test "x$enable_hdf5" = xyes; then
AC_MSG_RESULT([$enable_szlib])

fi
AM_CONDITIONAL(HAS_PAR_FILTERS, [test x$hdf5_supports_par_filters = xyes ])

# If the user wants hdf4 built in, check it out.
if test "x$enable_hdf4" = xyes; then
Expand Down Expand Up @@ -1490,6 +1494,7 @@ AX_SET_META([NC_HAS_PARALLEL],[$enable_parallel],[yes])
AX_SET_META([NC_HAS_PARALLEL4],[$enable_parallel4],[yes])
AX_SET_META([NC_HAS_CDF5],[$enable_cdf5],[yes])
AX_SET_META([NC_HAS_ERANGE_FILL], [$enable_erange_fill],[yes])
AX_SET_META([NC_HAS_PAR_FILTERS], [$hdf5_supports_par_filters],[yes])
AX_SET_META([NC_HAS_BYTERANGE],[$enable_byterange],[yes])
AC_SUBST([NC_DISPATCH_VERSION], [1])
#####
Expand Down
5 changes: 4 additions & 1 deletion include/nc4internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,12 @@ typedef struct NC_VAR_INFO
nc_bool_t dimscale; /**< True if var is a dimscale */
nc_bool_t *dimscale_attached; /**< Array of flags that are true if dimscale is attached for that dim index */
nc_bool_t deflate; /**< True if var has deflate filter applied */
int deflate_level;
int deflate_level; /**< If deflate is true, this is the deflate level, between 0 and 9. */
nc_bool_t shuffle; /**< True if var has shuffle filter applied */
nc_bool_t fletcher32; /**< True if var has fletcher32 filter applied */
nc_bool_t szip; /**< True if szip filter is in use. */
int options_mask; /**< Setting for szip filter, NC_SZIP_EC or NC_SZIP_NN. */
int pixels_per_block; /**< Setting for szip filter, even and <= 32. */
size_t chunk_cache_size, chunk_cache_nelems;
float chunk_cache_preemption;
void *format_var_info; /**< Pointer to any binary format info. */
Expand Down
2 changes: 1 addition & 1 deletion include/nc_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
#include <string.h>
#include <stdlib.h>
#include "netcdf.h"
#include "netcdf_filter.h"
#include "nc_logging.h"
#ifdef USE_PARALLEL
#include "netcdf_par.h"
#endif


/** NC_MAX_DIMS for tests. Allows different NC_MAX_DIMS values
* without breaking this test with a heap or stack overflow. */
#define NC_TESTS_MAX_DIMS 1024
Expand Down
6 changes: 6 additions & 0 deletions include/netcdf_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
#define H5Z_FILTER_SZIP 4
#endif

#define NC_SZIP_EC 4 /**< Selects entropy coding method for szip. */
#define NC_SZIP_NN 32 /**< Selects nearest neighbor coding method for szip. */

/* Define the known filter formats */
#define NC_FILTER_FORMAT_HDF5 1 /* Use the H5Z_class2_t format */

Expand Down Expand Up @@ -51,6 +54,9 @@ EXTERNL int nc_filter_register(NC_FILTER_INFO* filter_info);
EXTERNL int nc_filter_unregister(int format, int id);
EXTERNL int nc_filter_inq(int format, int id, NC_FILTER_INFO* filter_info);

/* Set szip compression for a variable. */
EXTERNL int nc_def_var_szip(int ncid, int varid, int options_mask, int pixels_per_block);

#if defined(__cplusplus)
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion include/netcdf_meta.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#define NC_HAS_HDF4 @NC_HAS_HDF4@ /*!< HDF4 support. */
#define NC_HAS_HDF5 @NC_HAS_HDF5@ /*!< HDF5 support. */
#define NC_HAS_SZIP @NC_HAS_SZIP@ /*!< szip support (HDF5 only) */
#define NC_HAS_SZIP_WRITE @NC_HAS_SZIP@ /*!< szip write support (HDF5 only) */
#define NC_HAS_DAP2 @NC_HAS_DAP2@ /*!< DAP2 support. */
#define NC_HAS_DAP4 @NC_HAS_DAP4@ /*!< DAP4 support. */
#define NC_HAS_BYTERANGE @HAS_BYTERANGE@
Expand All @@ -58,5 +59,5 @@
#define NC_HAS_ERANGE_FILL @NC_HAS_ERANGE_FILL@ /*!< ERANGE_FILL Support */
#define NC_RELAX_COORD_BOUND 1 /*!< RELAX_COORD_BOUND */
#define NC_DISPATCH_VERSION @NC_DISPATCH_VERSION@ /*!< Dispatch table version */

#define NC_HAS_PAR_FILTERS @NC_HAS_PAR_FILTERS@ /* Parallel I/O with filter support. */
#endif
119 changes: 111 additions & 8 deletions libhdf5/hdf5var.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
/** Number of bytes in 64 KB. */
#define SIXTY_FOUR_KB (65536)

/** Number of parameters needed when turning on szip filter. */
#define NUM_SZIP_PARAM 2

/** The HDF5 ID for the szip filter. */
#define HDF5_FILTER_SZIP 4

#ifdef LOGGING
/**
* Report the chunksizes selected for a variable.
Expand All @@ -49,7 +55,7 @@ reportchunking(const char *title, NC_VAR_INFO_T *var)
char digits[64];
if(i > 0) strlcat(buf,",",sizeof(buf));
snprintf(digits,sizeof(digits),"%ld",(unsigned long)var->chunksizes[i]);
strlcat(buf,digits,sizeof(buf));
strlcat(buf,digits,sizeof(buf));
}
LOG((3,"%s",buf));
}
Expand Down Expand Up @@ -661,6 +667,7 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *deflate,
return NC_ENOTVAR;
assert(var && var->hdr.id == varid);


/* Can't turn on parallel and deflate/fletcher32/szip/shuffle
* before HDF5 1.10.2. */
#ifndef HDF5_SUPPORTS_PAR_FILTERS
Expand Down Expand Up @@ -713,10 +720,10 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *deflate,
}

#ifdef USE_PARALLEL
/* If deflate, shuffle, or fletcher32 was turned on with
* parallel I/O writes, then switch to collective access. HDF5
* requires collevtive access for filter use with parallel
* I/O. */
/* If deflate, shuffle, or fletcher32 was turned on with
* parallel I/O writes, then switch to collective access. HDF5
* requires collevtive access for filter use with parallel
* I/O. */
if (deflate || shuffle || fletcher32)
{
if (h5->parallel && (var->deflate || var->shuffle || var->fletcher32))
Expand Down Expand Up @@ -872,8 +879,8 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *deflate,
}

/**
* @internal Set compression settings on a variable. This is called by
* nc_def_var_deflate().
* @internal Set zlib compression settings on a variable. This is
* called by nc_def_var_deflate().
*
* @param ncid File ID.
* @param varid Variable ID.
Expand All @@ -900,6 +907,102 @@ NC4_def_var_deflate(int ncid, int varid, int shuffle, int deflate,
&deflate_level, NULL, NULL, NULL, NULL, NULL, NULL);
}

/**
* Set szip compression settings on a variable. Szip is an
* implementation of the extended-Rice lossless compression
* algorithm. Szip is reported to provide fast and effective
* compression.
*
* SZIP compression cannot be applied to variables with any
* user-defined type.
*
* @note The options_mask parameter may be either NC_SZIP_EC (entropy
* coding) or NC_SZIP_NN (nearest neighbor):
* * The entropy coding method is best suited for data that has been
* processed. The EC method works best for small numbers.
* * The nearest neighbor coding method preprocesses the data then the
* applies EC method as above.
*
* @param ncid File ID.
* @param varid Variable ID.
* @param options_mask The options mask. Can be NC_SZIP_EC or
* NC_SZIP_NN.
* @param pixels_per_block Pixels per block. Must be even and not
* greater than 32, with typical values being 8, 10, 16, or 32. This
* parameter affects compression ratio; the more pixel values vary,
* the smaller this number should be to achieve better performance. If
* pixels_per_block is bigger than the total number of elements in a
* dataset chunk, H5Pset_szip will succeed but the subsequent call to
* H5Dcreate will fail; the conflict can be detected only when the
* property list is used.
*
* @returns ::NC_NOERR No error.
* @returns ::NC_ENOTBUILT This HDF5 install was not built with szip.
* @returns ::NC_EBADID Bad ncid.
* @returns ::NC_ENOTVAR Invalid variable ID.
* @returns ::NC_ENOTNC4 Attempting netcdf-4 operation on file that is
* not netCDF-4/HDF5.
* @returns ::NC_ELATEDEF Too late to change settings for this variable.
* @returns ::NC_ENOTINDEFINE Not in define mode.
* @returns ::NC_EINVAL Invalid input
* @author Ed Hartnett
*/
int
nc_def_var_szip(int ncid, int varid, int options_mask, int pixels_per_block)
{
NC_GRP_INFO_T *grp;
NC_FILE_INFO_T *h5;
NC_VAR_INFO_T *var;
int built = 0;
int ret;

LOG((2, "%s: ncid 0x%x varid %d", __func__, ncid, varid));

/* If HDF5 was not built with szip, then return error. */
#ifdef HAVE_H5Z_SZIP
built = 1;
#endif /* HAVE_H5Z_SZIP */
if (!built)
return NC_EFILTER;

/* Find info for this file and group, and set pointer to each. */
if ((ret = nc4_find_nc_grp_h5(ncid, NULL, &grp, &h5)))
return ret;
assert(grp && h5);

/* Trying to write to a read-only file? No way, Jose! */
if (h5->no_write)
return NC_EPERM;

/* Can't turn on parallel and szip before HDF5 1.10.2. */
#ifdef USE_PARALLEL
#ifndef HDF5_SUPPORTS_PAR_FILTERS
if (h5->parallel == NC_TRUE)
return NC_EINVAL;
#endif /* HDF5_SUPPORTS_PAR_FILTERS */
#endif /* USE_PARALLEL */

/* Find the var. */
if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, varid)))
return NC_ENOTVAR;
assert(var && var->hdr.id == varid);

#ifdef USE_PARALLEL
/* Switch to collective access. HDF5 requires collevtive access
* for filter use with parallel I/O. */
if (h5->parallel)
var->parallel_access = NC_COLLECTIVE;
#endif /* USE_PARALLEL */

/* This will cause H5Pset_szip to be called when the var is created. */
var->szip = 1;
var->contiguous = NC_FALSE;
var->options_mask = options_mask;
var->pixels_per_block = pixels_per_block;

return NC_NOERR;
}

/**
* @internal Set checksum on a variable. This is called by
* nc_def_var_fletcher32().
Expand Down Expand Up @@ -1149,7 +1252,7 @@ NC4_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams,
/* Determine default chunksizes for this variable unless already specified */
if(var->chunksizes && !var->chunksizes[0]) {
if((retval = nc4_find_default_chunksizes2(grp, var)))
return retval;
return retval;
/* Adjust the cache. */
if ((retval = nc4_adjust_var_cache(grp, var)))
return retval;
Expand Down
Loading

0 comments on commit aadd5a2

Please sign in to comment.