Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add set alignment #1

Merged
merged 1 commit into from
Sep 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
* add Dataset methods has_<name>_filter (where <name>=zstd,blosc,bzip2,szip)
to check for availability of extra compression filters.
* release GIL for all C-lib calls (issue #1180).
* Add support for nc_set_alignment and nc_get_alignment to control alignment
of data within HDF5 files.

version 1.6.0 (tag v1.6.0rel)
==============================
Expand Down
5 changes: 5 additions & 0 deletions include/netCDF4.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,11 @@ IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT:
NC_MPIPOSIX
NC_PNETCDF

IF HAS_SET_ALIGNMENT:
cdef extern from "netcdf.h":
int nc_set_alignment(int threshold, int alignment)
int nc_get_alignment(int *threshold, int *alignment)

# taken from numpy.pxi in numpy 1.0rc2.
cdef extern from "numpy/arrayobject.h":
ctypedef int npy_intp
Expand Down
18 changes: 14 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def check_api(inc_dirs,netcdf_lib_version):
has_zstandard = False
has_bzip2 = False
has_blosc = False
has_set_alignment = False

for d in inc_dirs:
try:
Expand All @@ -92,6 +93,8 @@ def check_api(inc_dirs,netcdf_lib_version):
has_cdf5_format = True
if line.startswith('nc_def_var_quantize'):
has_quantize = True
if line.startswith('nc_set_alignment'):
has_set_alignment = True

if has_nc_open_mem:
try:
Expand Down Expand Up @@ -141,7 +144,7 @@ def check_api(inc_dirs,netcdf_lib_version):
return has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \
has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \
has_parallel4_support, has_pnetcdf_support, has_szip_support, has_quantize, \
has_zstandard, has_bzip2, has_blosc
has_zstandard, has_bzip2, has_blosc, has_set_alignment


def getnetcdfvers(libdirs):
Expand Down Expand Up @@ -228,7 +231,7 @@ def extract_version(CYTHON_FNAME):

setup_cfg = 'setup.cfg'
# contents of setup.cfg will override env vars, unless
# USE_SETUPCFG evaluates to False.
# USE_SETUPCFG evaluates to False.
ncconfig = None
use_ncconfig = None
if USE_SETUPCFG and os.path.exists(setup_cfg):
Expand Down Expand Up @@ -338,7 +341,7 @@ def extract_version(CYTHON_FNAME):
elif USE_NCCONFIG is None:
# if nc-config exists, and USE_NCCONFIG not set, try to use it.
if HAS_NCCONFIG: USE_NCCONFIG=True
#elif USE_NCCONFIG is None:
#elif USE_NCCONFIG is None:
# USE_NCCONFIG = False # don't try to use nc-config if USE_NCCONFIG not set

try:
Expand Down Expand Up @@ -555,7 +558,7 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs):
has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \
has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \
has_parallel4_support, has_pnetcdf_support, has_szip_support, has_quantize, \
has_zstandard, has_bzip2, has_blosc = \
has_zstandard, has_bzip2, has_blosc, has_set_alignment = \
check_api(inc_dirs,netcdf_lib_version)
# for netcdf 4.4.x CDF5 format is always enabled.
if netcdf_lib_version is not None and\
Expand Down Expand Up @@ -662,6 +665,13 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs):
sys.stdout.write('netcdf lib does not have szip compression functions\n')
f.write('DEF HAS_SZIP_SUPPORT = 0\n')

if has_set_alignment:
sys.stdout.write('netcdf lib has nc_set_alignment function\n')
f.write('DEF HAS_SET_ALIGNMENT = 1\n')
else:
sys.stdout.write('netcdf lib does not have nc_set_alignment function\n')
f.write('DEF HAS_SET_ALIGNMENT = 0\n')

f.close()

if has_parallel4_support or has_pnetcdf_support:
Expand Down
47 changes: 47 additions & 0 deletions src/netCDF4/_netCDF4.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,52 @@ details."""
ierr = nc_set_chunk_cache(sizep,nelemsp, preemptionp)
_ensure_nc_success(ierr)

IF HAS_SET_ALIGNMENT:
def get_alignment():
"""
**`get_alignment()`**

return current netCDF alignment within HDF5 files in a tuple
(threshold,alignment). See netcdf C library documentation for
`nc_get_alignment` for details. Values can be reset with
`set_alignment`.

This function was added in netcdf 4.9.0."""
cdef int ierr
cdef int thresholdp, alignmentp
ierr = nc_get_alignment(&thresholdp, &alignmentp)
_ensure_nc_success(ierr)
threshold = thresholdp
alignment = alignmentp
return (threshold,alignment)

def set_alignment(threshold, alignment):
"""
**`set_alignment(threshold,alignment)`**

Change the HDF5 file alignment.
See netcdf C library documentation for `nc_set_alignment` for
details.

This function was added in netcdf 4.9.0."""
cdef int ierr
cdef int thresholdp, alignmentp
thresholdp = threshold
alignmentp = alignment

ierr = nc_set_alignment(thresholdp, alignmentp)
_ensure_nc_success(ierr)
ELSE:
def get_alignment():
raise RuntimeError(
"This function requires netcdf4 4.9.0+ to be used at compile time"
)

def set_alignment(threshold, alignment):
raise RuntimeError(
"This function requires netcdf4 4.9.0+ to be used at compile time"
)

__netcdf4libversion__ = getlibversion().split()[0]
__hdf5libversion__ = _gethdf5libversion()
__has_rename_grp__ = HAS_RENAME_GRP
Expand All @@ -1339,6 +1385,7 @@ __has_zstandard_support__ = HAS_ZSTANDARD_SUPPORT
__has_bzip2_support__ = HAS_BZIP2_SUPPORT
__has_blosc_support__ = HAS_BLOSC_SUPPORT
__has_szip_support__ = HAS_SZIP_SUPPORT
__has_set_alignment__ = HAS_SET_ALIGNMENT
_needsworkaround_issue485 = __netcdf4libversion__ < "4.4.0" or \
(__netcdf4libversion__.startswith("4.4.0") and \
"-development" in __netcdf4libversion__)
Expand Down
156 changes: 156 additions & 0 deletions test/tst_alignment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import numpy as np
from netCDF4 import set_alignment, get_alignment, Dataset
import netCDF4
import os
import subprocess
import tempfile
import unittest

# During testing, sometimes development versions are used.
# They may be written as 4.9.1-development
libversion_no_development = netCDF4.__netcdf4libversion__.split('-')[0]
libversion = tuple(int(v) for v in libversion_no_development.split('.'))
has_alignment = (libversion[0] > 4) or (
libversion[0] == 4 and (libversion[1] >= 9)
)
try:
has_h5ls = subprocess.check_call(['h5ls', '--version'], stdout=subprocess.PIPE) == 0
except Exception:
has_h5ls = False

file_name = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name


class AlignmentTestCase(unittest.TestCase):
def setUp(self):
self.file = file_name

# This is a global variable in netcdf4, it must be set before File
# creation
if has_alignment:
set_alignment(1024, 4096)
assert get_alignment() == (1024, 4096)

f = Dataset(self.file, 'w')
f.createDimension('x', 4096)
# Create many datasets so that we decrease the chance of
# the dataset being randomly aligned
for i in range(10):
f.createVariable(f'data{i:02d}', np.float64, ('x',))
v = f.variables[f'data{i:02d}']
v[...] = 0
f.close()
if has_alignment:
# ensure to reset the alignment to 1 (default values) so as not to
# disrupt other tests
set_alignment(1, 1)
assert get_alignment() == (1, 1)

def test_version_settings(self):
if has_alignment:
# One should always be able to set the alignment to 1, 1
set_alignment(1, 1)
assert get_alignment() == (1, 1)
else:
with self.assertRaises(RuntimeError):
set_alignment(1, 1)
with self.assertRaises(RuntimeError):
get_alignment()

# if we have no support for alignment, we have no guarantees on
# how the data can be aligned
@unittest.skipIf(
not has_h5ls,
"h5ls not found."
)
@unittest.skipIf(
not has_alignment,
"No support for set_alignment in libnetcdf."
)
def test_setting_alignment(self):
# We choose to use h5ls instead of h5py since h5ls is very likely
# to be installed alongside the rest of the tooling required to build
# netcdf4-python
# Output from h5ls is expected to look like:
"""
Opened "/tmp/tmpqexgozg1.nc" with sec2 driver.
data00 Dataset {4096/4096}
Attribute: DIMENSION_LIST {1}
Type: variable length of
object reference
Attribute: _Netcdf4Coordinates {1}
Type: 32-bit little-endian integer
Location: 1:563
Links: 1
Storage: 32768 logical bytes, 32768 allocated bytes, 100.00% utilization
Type: IEEE 64-bit little-endian float
Address: 8192
data01 Dataset {4096/4096}
Attribute: DIMENSION_LIST {1}
Type: variable length of
object reference
Attribute: _Netcdf4Coordinates {1}
Type: 32-bit little-endian integer
Location: 1:1087
Links: 1
Storage: 32768 logical bytes, 32768 allocated bytes, 100.00% utilization
Type: IEEE 64-bit little-endian float
Address: 40960
[...]
x Dataset {4096/4096}
Attribute: CLASS scalar
Type: 16-byte null-terminated ASCII string
Attribute: NAME scalar
Type: 64-byte null-terminated ASCII string
Attribute: REFERENCE_LIST {10}
Type: struct {
"dataset" +0 object reference
"dimension" +8 32-bit little-endian unsigned integer
} 16 bytes
Attribute: _Netcdf4Dimid scalar
Type: 32-bit little-endian integer
Location: 1:239
Links: 1
Storage: 16384 logical bytes, 0 allocated bytes
Type: IEEE 32-bit big-endian float
Address: 18446744073709551615
"""
h5ls_results = subprocess.check_output(
["h5ls", "--verbose", "--address", "--simple", self.file]
).decode()

addresses = {
f'data{i:02d}': -1
for i in range(10)
}

data_variable = None
for line in h5ls_results.split('\n'):
if not line.startswith(' '):
data_variable = line.split(' ')[0]
# only process the data variables we care to inpsect
if data_variable not in addresses:
continue
line = line.strip()
if line.startswith('Address:'):
address = int(line.split(':')[1].strip())
addresses[data_variable] = address

for key, address in addresses.items():
is_aligned = (address % 4096) == 0
assert is_aligned, f"{key} is not aligned. Address = 0x{address:x}"

# Alternative implementation in h5py
# import h5py
# with h5py.File(self.file, 'r') as h5file:
# for i in range(10):
# v = h5file[f'data{i:02d}']
# assert (dataset.id.get_offset() % 4096) == 0

def tearDown(self):
# Remove the temporary files
os.remove(self.file)


if __name__ == '__main__':
unittest.main()