diff --git a/Changelog b/Changelog index 6fb7a676c..6b3946e48 100644 --- a/Changelog +++ b/Changelog @@ -3,6 +3,8 @@ * add Dataset methods has__filter (where =zstd,blosc,bzip2,szip) to check for availability of extra compression filters. * release GIL for all C-lib calls (issue #1180). + * Add support for nc_set_alignment and nc_get_alignment to control alignment + of data within HDF5 files. version 1.6.0 (tag v1.6.0rel) ============================== diff --git a/include/netCDF4.pxi b/include/netCDF4.pxi index c6c6ac134..60afc1538 100644 --- a/include/netCDF4.pxi +++ b/include/netCDF4.pxi @@ -441,6 +441,11 @@ IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: NC_MPIPOSIX NC_PNETCDF +IF HAS_SET_ALIGNMENT: + cdef extern from "netcdf.h": + int nc_set_alignment(int threshold, int alignment) + int nc_get_alignment(int *threshold, int *alignment) + # taken from numpy.pxi in numpy 1.0rc2. cdef extern from "numpy/arrayobject.h": ctypedef int npy_intp diff --git a/setup.py b/setup.py index 2eea22e1c..ec1dc2bc8 100644 --- a/setup.py +++ b/setup.py @@ -71,6 +71,7 @@ def check_api(inc_dirs,netcdf_lib_version): has_zstandard = False has_bzip2 = False has_blosc = False + has_set_alignment = False for d in inc_dirs: try: @@ -92,6 +93,8 @@ def check_api(inc_dirs,netcdf_lib_version): has_cdf5_format = True if line.startswith('nc_def_var_quantize'): has_quantize = True + if line.startswith('nc_set_alignment'): + has_set_alignment = True if has_nc_open_mem: try: @@ -141,7 +144,7 @@ def check_api(inc_dirs,netcdf_lib_version): return has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \ has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \ has_parallel4_support, has_pnetcdf_support, has_szip_support, has_quantize, \ - has_zstandard, has_bzip2, has_blosc + has_zstandard, has_bzip2, has_blosc, has_set_alignment def getnetcdfvers(libdirs): @@ -228,7 +231,7 @@ def extract_version(CYTHON_FNAME): setup_cfg = 'setup.cfg' # contents of setup.cfg will override env vars, unless -# USE_SETUPCFG evaluates to False. +# USE_SETUPCFG evaluates to False. ncconfig = None use_ncconfig = None if USE_SETUPCFG and os.path.exists(setup_cfg): @@ -338,7 +341,7 @@ def extract_version(CYTHON_FNAME): elif USE_NCCONFIG is None: # if nc-config exists, and USE_NCCONFIG not set, try to use it. if HAS_NCCONFIG: USE_NCCONFIG=True -#elif USE_NCCONFIG is None: +#elif USE_NCCONFIG is None: # USE_NCCONFIG = False # don't try to use nc-config if USE_NCCONFIG not set try: @@ -555,7 +558,7 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \ has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \ has_parallel4_support, has_pnetcdf_support, has_szip_support, has_quantize, \ - has_zstandard, has_bzip2, has_blosc = \ + has_zstandard, has_bzip2, has_blosc, has_set_alignment = \ check_api(inc_dirs,netcdf_lib_version) # for netcdf 4.4.x CDF5 format is always enabled. if netcdf_lib_version is not None and\ @@ -662,6 +665,13 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): sys.stdout.write('netcdf lib does not have szip compression functions\n') f.write('DEF HAS_SZIP_SUPPORT = 0\n') + if has_set_alignment: + sys.stdout.write('netcdf lib has nc_set_alignment function\n') + f.write('DEF HAS_SET_ALIGNMENT = 1\n') + else: + sys.stdout.write('netcdf lib does not have nc_set_alignment function\n') + f.write('DEF HAS_SET_ALIGNMENT = 0\n') + f.close() if has_parallel4_support or has_pnetcdf_support: diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 0efc54df3..ba7db81f8 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -1324,6 +1324,52 @@ details.""" ierr = nc_set_chunk_cache(sizep,nelemsp, preemptionp) _ensure_nc_success(ierr) +IF HAS_SET_ALIGNMENT: + def get_alignment(): + """ + **`get_alignment()`** + + return current netCDF alignment within HDF5 files in a tuple + (threshold,alignment). See netcdf C library documentation for + `nc_get_alignment` for details. Values can be reset with + `set_alignment`. + + This function was added in netcdf 4.9.0.""" + cdef int ierr + cdef int thresholdp, alignmentp + ierr = nc_get_alignment(&thresholdp, &alignmentp) + _ensure_nc_success(ierr) + threshold = thresholdp + alignment = alignmentp + return (threshold,alignment) + + def set_alignment(threshold, alignment): + """ + **`set_alignment(threshold,alignment)`** + + Change the HDF5 file alignment. + See netcdf C library documentation for `nc_set_alignment` for + details. + + This function was added in netcdf 4.9.0.""" + cdef int ierr + cdef int thresholdp, alignmentp + thresholdp = threshold + alignmentp = alignment + + ierr = nc_set_alignment(thresholdp, alignmentp) + _ensure_nc_success(ierr) +ELSE: + def get_alignment(): + raise RuntimeError( + "This function requires netcdf4 4.9.0+ to be used at compile time" + ) + + def set_alignment(threshold, alignment): + raise RuntimeError( + "This function requires netcdf4 4.9.0+ to be used at compile time" + ) + __netcdf4libversion__ = getlibversion().split()[0] __hdf5libversion__ = _gethdf5libversion() __has_rename_grp__ = HAS_RENAME_GRP @@ -1339,6 +1385,7 @@ __has_zstandard_support__ = HAS_ZSTANDARD_SUPPORT __has_bzip2_support__ = HAS_BZIP2_SUPPORT __has_blosc_support__ = HAS_BLOSC_SUPPORT __has_szip_support__ = HAS_SZIP_SUPPORT +__has_set_alignment__ = HAS_SET_ALIGNMENT _needsworkaround_issue485 = __netcdf4libversion__ < "4.4.0" or \ (__netcdf4libversion__.startswith("4.4.0") and \ "-development" in __netcdf4libversion__) diff --git a/test/tst_alignment.py b/test/tst_alignment.py new file mode 100644 index 000000000..fa56b5d0a --- /dev/null +++ b/test/tst_alignment.py @@ -0,0 +1,147 @@ +import numpy as np +from netCDF4 import set_alignment, get_alignment, Dataset +import netCDF4 +import os +import subprocess +import tempfile +import unittest + +# During testing, sometimes development versions are used. +# They may be written as 4.9.1-development +libversion_no_development = netCDF4.__netcdf4libversion__.split('-')[0] +libversion = tuple(int(v) for v in libversion_no_development.split('.')) +has_alignment = (libversion[0] > 4) or ( + libversion[0] == 4 and (libversion[1] >= 9) +) + +file_name = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name + + +class AlignmentTestCase(unittest.TestCase): + def setUp(self): + self.file = file_name + + # This is a global variable in netcdf4, it must be set before File + # creation + if has_alignment: + set_alignment(1024, 4096) + assert get_alignment() == (1024, 4096) + + f = Dataset(self.file, 'w') + f.createDimension('x', 4096) + # Create many datasets so that we decrease the chance of + # the dataset being randomly aligned + for i in range(10): + f.createVariable(f'data{i:02d}', np.float64, ('x',)) + v = f.variables[f'data{i:02d}'] + v[...] = 0 + f.close() + + def test_version_settings(self): + if has_alignment: + # One should always be able to set the alignment to 0, 0 + set_alignment(0, 0) + assert get_alignment() == (0, 0) + else: + with self.assertRaises(RuntimeError): + set_alignment(0, 0) + with self.assertRaises(RuntimeError): + get_alignment() + + # if we have no support for alignment, we have no guarantees on + # how the data can be aligned + @unittest.skipIf( + not has_alignment, + "No support for set_alignment in libnetcdf." + ) + def test_setting_alignment(self): + # TODO: ensure that the underlying alignment is set. but I'm not sure + # how to do this with netcdf + # https://github.com/h5py/h5py/pull/2040/files?diff=unified&w=0#diff-3166eca28ff7f5d816f07f37eaba428b4351077384d65a2630e7d85c1284698fR7 + + # We choose to use h5ls instead of h5py since h5ls is very likely + # to be installed alongside the rest of the tooling required to build + # netcdf4-python + # Output from h5ls is expected to look like: + """ +Opened "/tmp/tmpqexgozg1.nc" with sec2 driver. +data00 Dataset {4096/4096} + Attribute: DIMENSION_LIST {1} + Type: variable length of + object reference + Attribute: _Netcdf4Coordinates {1} + Type: 32-bit little-endian integer + Location: 1:563 + Links: 1 + Storage: 32768 logical bytes, 32768 allocated bytes, 100.00% utilization + Type: IEEE 64-bit little-endian float + Address: 8192 +data01 Dataset {4096/4096} + Attribute: DIMENSION_LIST {1} + Type: variable length of + object reference + Attribute: _Netcdf4Coordinates {1} + Type: 32-bit little-endian integer + Location: 1:1087 + Links: 1 + Storage: 32768 logical bytes, 32768 allocated bytes, 100.00% utilization + Type: IEEE 64-bit little-endian float + Address: 40960 +[...] +x Dataset {4096/4096} + Attribute: CLASS scalar + Type: 16-byte null-terminated ASCII string + Attribute: NAME scalar + Type: 64-byte null-terminated ASCII string + Attribute: REFERENCE_LIST {10} + Type: struct { + "dataset" +0 object reference + "dimension" +8 32-bit little-endian unsigned integer + } 16 bytes + Attribute: _Netcdf4Dimid scalar + Type: 32-bit little-endian integer + Location: 1:239 + Links: 1 + Storage: 16384 logical bytes, 0 allocated bytes + Type: IEEE 32-bit big-endian float + Address: 18446744073709551615 +""" + h5ls_results = subprocess.check_output( + ["h5ls", "--verbose", "--address", "--simple", self.file] + ).decode() + + addresses = { + f'data{i:02d}': -1 + for i in range(10) + } + + data_variable = None + for line in h5ls_results.split('\n'): + if not line.startswith(' '): + data_variable = line.split(' ')[0] + # only process the data variables we care to inpsect + if data_variable not in addresses: + continue + line = line.strip() + if line.startswith('Address:'): + address = int(line.split(':')[1].strip()) + addresses[data_variable] = address + + for key, address in addresses.items(): + is_aligned = (address % 4096) == 0 + assert is_aligned, f"{key} is not aligned. Address = 0x{address:x}" + + # Alternative implementation in h5py + # import h5py + # with h5py.File(self.file, 'r') as h5file: + # for i in range(10): + # v = h5file[f'data{i:02d}'] + # assert (dataset.id.get_offset() % 4096) == 0 + + def tearDown(self): + # Remove the temporary files + os.remove(self.file) + + +if __name__ == '__main__': + unittest.main()