From 0a0c0e6cc7c48f4cec297fda612088ce3bd2cad5 Mon Sep 17 00:00:00 2001 From: Alan Greer Date: Thu, 7 Jul 2022 11:17:35 +0100 Subject: [PATCH] Added API to set chunking size for datasets in meta writer. --- tools/python/odin_data/meta_writer/hdf5dataset.py | 4 ++++ tools/python/odin_data/meta_writer/meta_writer.py | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/python/odin_data/meta_writer/hdf5dataset.py b/tools/python/odin_data/meta_writer/hdf5dataset.py index ea013eff..bb564fab 100644 --- a/tools/python/odin_data/meta_writer/hdf5dataset.py +++ b/tools/python/odin_data/meta_writer/hdf5dataset.py @@ -131,6 +131,7 @@ def __init__( rank=1, shape=None, maxshape=None, + chunks=None, cache=True, block_size=1000000, block_timeout=600, @@ -154,6 +155,7 @@ def __init__( self.dtype = dtype self.fillvalue = fillvalue self.shape = shape if shape is not None else (0,) * rank + self.chunks = chunks self.maxshape = maxshape if maxshape is not None else shape if shape is not None else (None,) * rank self._cache = None @@ -285,6 +287,7 @@ def __init__( fillvalue=-1, shape=None, maxshape=None, + chunks=None, cache=True, block_size=1000000, block_timeout=600, @@ -296,6 +299,7 @@ def __init__( fillvalue=fillvalue, shape=shape, maxshape=maxshape, + chunks=chunks, cache=cache, block_size=block_size, block_timeout=block_timeout, diff --git a/tools/python/odin_data/meta_writer/meta_writer.py b/tools/python/odin_data/meta_writer/meta_writer.py index 5f741546..9cfa159b 100644 --- a/tools/python/odin_data/meta_writer/meta_writer.py +++ b/tools/python/odin_data/meta_writer/meta_writer.py @@ -203,11 +203,15 @@ def _create_datasets(self, dataset_size): self._logger.debug("%s | Creating datasets", self._name) for dataset in self._datasets.values(): - chunks = dataset.maxshape + chunks = dataset.chunks + if chunks is None: + chunks = dataset.maxshape if isinstance(chunks, int): chunks = (chunks,) if None in chunks: chunks = None + self._logger.debug("Dataset {} chunking: {}".format(dataset.name, chunks)) + dataset_handle = self._hdf5_file.create_dataset( name=dataset.name, shape=dataset.shape,