Skip to content

Commit

Permalink
Merge pull request #1183 from hmaarrfk/add_set_alignment
Browse files Browse the repository at this point in the history
Add support for nc_set_alignment and nc_get_alignment
  • Loading branch information
jswhit authored Aug 31, 2022
2 parents 5d35046 + 3a81994 commit 065ba17
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 4 deletions.
2 changes: 2 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
* add Dataset methods has_<name>_filter (where <name>=zstd,blosc,bzip2,szip)
to check for availability of extra compression filters.
* release GIL for all C-lib calls (issue #1180).
* Add support for nc_set_alignment and nc_get_alignment to control alignment
of data within HDF5 files.

version 1.6.0 (tag v1.6.0rel)
==============================
Expand Down
5 changes: 5 additions & 0 deletions include/netCDF4.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,11 @@ IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT:
NC_MPIPOSIX
NC_PNETCDF

IF HAS_SET_ALIGNMENT:
cdef extern from "netcdf.h":
int nc_set_alignment(int threshold, int alignment)
int nc_get_alignment(int *threshold, int *alignment)

# taken from numpy.pxi in numpy 1.0rc2.
cdef extern from "numpy/arrayobject.h":
ctypedef int npy_intp
Expand Down
18 changes: 14 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def check_api(inc_dirs,netcdf_lib_version):
has_zstandard = False
has_bzip2 = False
has_blosc = False
has_set_alignment = False

for d in inc_dirs:
try:
Expand All @@ -92,6 +93,8 @@ def check_api(inc_dirs,netcdf_lib_version):
has_cdf5_format = True
if line.startswith('nc_def_var_quantize'):
has_quantize = True
if line.startswith('nc_set_alignment'):
has_set_alignment = True

if has_nc_open_mem:
try:
Expand Down Expand Up @@ -141,7 +144,7 @@ def check_api(inc_dirs,netcdf_lib_version):
return has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \
has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \
has_parallel4_support, has_pnetcdf_support, has_szip_support, has_quantize, \
has_zstandard, has_bzip2, has_blosc
has_zstandard, has_bzip2, has_blosc, has_set_alignment


def getnetcdfvers(libdirs):
Expand Down Expand Up @@ -228,7 +231,7 @@ def extract_version(CYTHON_FNAME):

setup_cfg = 'setup.cfg'
# contents of setup.cfg will override env vars, unless
# USE_SETUPCFG evaluates to False.
# USE_SETUPCFG evaluates to False.
ncconfig = None
use_ncconfig = None
if USE_SETUPCFG and os.path.exists(setup_cfg):
Expand Down Expand Up @@ -338,7 +341,7 @@ def extract_version(CYTHON_FNAME):
elif USE_NCCONFIG is None:
# if nc-config exists, and USE_NCCONFIG not set, try to use it.
if HAS_NCCONFIG: USE_NCCONFIG=True
#elif USE_NCCONFIG is None:
#elif USE_NCCONFIG is None:
# USE_NCCONFIG = False # don't try to use nc-config if USE_NCCONFIG not set

try:
Expand Down Expand Up @@ -555,7 +558,7 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs):
has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \
has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \
has_parallel4_support, has_pnetcdf_support, has_szip_support, has_quantize, \
has_zstandard, has_bzip2, has_blosc = \
has_zstandard, has_bzip2, has_blosc, has_set_alignment = \
check_api(inc_dirs,netcdf_lib_version)
# for netcdf 4.4.x CDF5 format is always enabled.
if netcdf_lib_version is not None and\
Expand Down Expand Up @@ -662,6 +665,13 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs):
sys.stdout.write('netcdf lib does not have szip compression functions\n')
f.write('DEF HAS_SZIP_SUPPORT = 0\n')

if has_set_alignment:
sys.stdout.write('netcdf lib has nc_set_alignment function\n')
f.write('DEF HAS_SET_ALIGNMENT = 1\n')
else:
sys.stdout.write('netcdf lib does not have nc_set_alignment function\n')
f.write('DEF HAS_SET_ALIGNMENT = 0\n')

f.close()

if has_parallel4_support or has_pnetcdf_support:
Expand Down
47 changes: 47 additions & 0 deletions src/netCDF4/_netCDF4.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,52 @@ details."""
ierr = nc_set_chunk_cache(sizep,nelemsp, preemptionp)
_ensure_nc_success(ierr)

IF HAS_SET_ALIGNMENT:
def get_alignment():
"""
**`get_alignment()`**
return current netCDF alignment within HDF5 files in a tuple
(threshold,alignment). See netcdf C library documentation for
`nc_get_alignment` for details. Values can be reset with
`set_alignment`.
This function was added in netcdf 4.9.0."""
cdef int ierr
cdef int thresholdp, alignmentp
ierr = nc_get_alignment(&thresholdp, &alignmentp)
_ensure_nc_success(ierr)
threshold = thresholdp
alignment = alignmentp
return (threshold,alignment)

def set_alignment(threshold, alignment):
"""
**`set_alignment(threshold,alignment)`**
Change the HDF5 file alignment.
See netcdf C library documentation for `nc_set_alignment` for
details.
This function was added in netcdf 4.9.0."""
cdef int ierr
cdef int thresholdp, alignmentp
thresholdp = threshold
alignmentp = alignment

ierr = nc_set_alignment(thresholdp, alignmentp)
_ensure_nc_success(ierr)
ELSE:
def get_alignment():
raise RuntimeError(
"This function requires netcdf4 4.9.0+ to be used at compile time"
)

def set_alignment(threshold, alignment):
raise RuntimeError(
"This function requires netcdf4 4.9.0+ to be used at compile time"
)

__netcdf4libversion__ = getlibversion().split()[0]
__hdf5libversion__ = _gethdf5libversion()
__has_rename_grp__ = HAS_RENAME_GRP
Expand All @@ -1339,6 +1385,7 @@ __has_zstandard_support__ = HAS_ZSTANDARD_SUPPORT
__has_bzip2_support__ = HAS_BZIP2_SUPPORT
__has_blosc_support__ = HAS_BLOSC_SUPPORT
__has_szip_support__ = HAS_SZIP_SUPPORT
__has_set_alignment__ = HAS_SET_ALIGNMENT
_needsworkaround_issue485 = __netcdf4libversion__ < "4.4.0" or \
(__netcdf4libversion__.startswith("4.4.0") and \
"-development" in __netcdf4libversion__)
Expand Down
156 changes: 156 additions & 0 deletions test/tst_alignment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import numpy as np
from netCDF4 import set_alignment, get_alignment, Dataset
import netCDF4
import os
import subprocess
import tempfile
import unittest

# During testing, sometimes development versions are used.
# They may be written as 4.9.1-development
libversion_no_development = netCDF4.__netcdf4libversion__.split('-')[0]
libversion = tuple(int(v) for v in libversion_no_development.split('.'))
has_alignment = (libversion[0] > 4) or (
libversion[0] == 4 and (libversion[1] >= 9)
)
try:
has_h5ls = subprocess.check_call(['h5ls', '--version'], stdout=subprocess.PIPE) == 0
except Exception:
has_h5ls = False

file_name = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name


class AlignmentTestCase(unittest.TestCase):
def setUp(self):
self.file = file_name

# This is a global variable in netcdf4, it must be set before File
# creation
if has_alignment:
set_alignment(1024, 4096)
assert get_alignment() == (1024, 4096)

f = Dataset(self.file, 'w')
f.createDimension('x', 4096)
# Create many datasets so that we decrease the chance of
# the dataset being randomly aligned
for i in range(10):
f.createVariable(f'data{i:02d}', np.float64, ('x',))
v = f.variables[f'data{i:02d}']
v[...] = 0
f.close()
if has_alignment:
# ensure to reset the alignment to 1 (default values) so as not to
# disrupt other tests
set_alignment(1, 1)
assert get_alignment() == (1, 1)

def test_version_settings(self):
if has_alignment:
# One should always be able to set the alignment to 1, 1
set_alignment(1, 1)
assert get_alignment() == (1, 1)
else:
with self.assertRaises(RuntimeError):
set_alignment(1, 1)
with self.assertRaises(RuntimeError):
get_alignment()

# if we have no support for alignment, we have no guarantees on
# how the data can be aligned
@unittest.skipIf(
not has_h5ls,
"h5ls not found."
)
@unittest.skipIf(
not has_alignment,
"No support for set_alignment in libnetcdf."
)
def test_setting_alignment(self):
# We choose to use h5ls instead of h5py since h5ls is very likely
# to be installed alongside the rest of the tooling required to build
# netcdf4-python
# Output from h5ls is expected to look like:
"""
Opened "/tmp/tmpqexgozg1.nc" with sec2 driver.
data00 Dataset {4096/4096}
Attribute: DIMENSION_LIST {1}
Type: variable length of
object reference
Attribute: _Netcdf4Coordinates {1}
Type: 32-bit little-endian integer
Location: 1:563
Links: 1
Storage: 32768 logical bytes, 32768 allocated bytes, 100.00% utilization
Type: IEEE 64-bit little-endian float
Address: 8192
data01 Dataset {4096/4096}
Attribute: DIMENSION_LIST {1}
Type: variable length of
object reference
Attribute: _Netcdf4Coordinates {1}
Type: 32-bit little-endian integer
Location: 1:1087
Links: 1
Storage: 32768 logical bytes, 32768 allocated bytes, 100.00% utilization
Type: IEEE 64-bit little-endian float
Address: 40960
[...]
x Dataset {4096/4096}
Attribute: CLASS scalar
Type: 16-byte null-terminated ASCII string
Attribute: NAME scalar
Type: 64-byte null-terminated ASCII string
Attribute: REFERENCE_LIST {10}
Type: struct {
"dataset" +0 object reference
"dimension" +8 32-bit little-endian unsigned integer
} 16 bytes
Attribute: _Netcdf4Dimid scalar
Type: 32-bit little-endian integer
Location: 1:239
Links: 1
Storage: 16384 logical bytes, 0 allocated bytes
Type: IEEE 32-bit big-endian float
Address: 18446744073709551615
"""
h5ls_results = subprocess.check_output(
["h5ls", "--verbose", "--address", "--simple", self.file]
).decode()

addresses = {
f'data{i:02d}': -1
for i in range(10)
}

data_variable = None
for line in h5ls_results.split('\n'):
if not line.startswith(' '):
data_variable = line.split(' ')[0]
# only process the data variables we care to inpsect
if data_variable not in addresses:
continue
line = line.strip()
if line.startswith('Address:'):
address = int(line.split(':')[1].strip())
addresses[data_variable] = address

for key, address in addresses.items():
is_aligned = (address % 4096) == 0
assert is_aligned, f"{key} is not aligned. Address = 0x{address:x}"

# Alternative implementation in h5py
# import h5py
# with h5py.File(self.file, 'r') as h5file:
# for i in range(10):
# v = h5file[f'data{i:02d}']
# assert (dataset.id.get_offset() % 4096) == 0

def tearDown(self):
# Remove the temporary files
os.remove(self.file)


if __name__ == '__main__':
unittest.main()

0 comments on commit 065ba17

Please sign in to comment.