diff --git a/CHANGELOG.md b/CHANGELOG.md
index c1c490089..b4f0fde80 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
### Enhancements
- Added support for expandable datasets of references for untyped and compound data types. @stephprince [#1188](https://github.com/hdmf-dev/hdmf/pull/1188)
+- Improved html representation of data in `Containers` @h-mayorquin [#1100](https://github.com/hdmf-dev/hdmf/pull/1100)
### Bug fixes
- Fixed inaccurate error message when validating reference data types. @stephprince [#1199](https://github.com/hdmf-dev/hdmf/pull/1199)
diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index 36aeb7c8f..e9156dc50 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -19,7 +19,8 @@
from ...container import Container
from ...data_utils import AbstractDataChunkIterator
from ...spec import RefSpec, DtypeSpec, NamespaceCatalog
-from ...utils import docval, getargs, popargs, get_data_shape, get_docval, StrDataset
+from ...utils import (docval, getargs, popargs, get_data_shape, get_docval, StrDataset,
+ get_basic_array_info, generate_array_html_repr)
from ..utils import NamespaceToBuilderHelper, WriteStatusTracker
ROOT_NAME = 'root'
@@ -1603,3 +1604,29 @@ def set_dataio(cls, **kwargs):
data = H5DataIO(data)
"""
return H5DataIO.__init__(**kwargs)
+
+ @staticmethod
+ def generate_dataset_html(dataset):
+ """Generates an html representation for a dataset for the HDF5IO class"""
+
+ # get info from hdf5 dataset
+ compressed_size = dataset.id.get_storage_size()
+ if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
+ uncompressed_size = dataset.nbytes
+ else:
+ uncompressed_size = dataset.size * dataset.dtype.itemsize
+ compression_ratio = uncompressed_size / compressed_size if compressed_size != 0 else "undefined"
+
+ hdf5_info_dict = {"Chunk shape": dataset.chunks,
+ "Compression": dataset.compression,
+ "Compression opts": dataset.compression_opts,
+ "Compression ratio": compression_ratio}
+
+ # get basic array info
+ array_info_dict = get_basic_array_info(dataset)
+ array_info_dict.update(hdf5_info_dict)
+
+ # generate html repr
+ repr_html = generate_array_html_repr(array_info_dict, dataset, "HDF5 dataset")
+
+ return repr_html
diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py
index 35023066f..86fd25b26 100644
--- a/src/hdmf/backends/io.py
+++ b/src/hdmf/backends/io.py
@@ -5,7 +5,7 @@
from ..build import BuildManager, GroupBuilder
from ..container import Container, HERDManager
from .errors import UnsupportedOperation
-from ..utils import docval, getargs, popargs
+from ..utils import docval, getargs, popargs, get_basic_array_info, generate_array_html_repr
from warnings import warn
@@ -188,6 +188,14 @@ def close(self):
''' Close this HDMFIO object to further reading/writing'''
pass
+ @staticmethod
+ def generate_dataset_html(dataset):
+ """Generates an html representation for a dataset"""
+ array_info_dict = get_basic_array_info(dataset)
+ repr_html = generate_array_html_repr(array_info_dict, dataset)
+
+ return repr_html
+
def __enter__(self):
return self
diff --git a/src/hdmf/container.py b/src/hdmf/container.py
index 7c450770a..8f961936f 100644
--- a/src/hdmf/container.py
+++ b/src/hdmf/container.py
@@ -12,7 +12,8 @@
import pandas as pd
from .data_utils import DataIO, append_data, extend_data, AbstractDataChunkIterator
-from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict
+from .utils import (docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict,
+ get_basic_array_info, generate_array_html_repr)
from .term_set import TermSet, TermSetWrapper
@@ -707,8 +708,6 @@ def _generate_html_repr(self, fields, level=0, access_code="", is_field=False):
for index, item in enumerate(fields):
access_code += f'[{index}]'
html_repr += self._generate_field_html(index, item, level, access_code)
- elif isinstance(fields, np.ndarray):
- html_repr += self._generate_array_html(fields, level)
else:
pass
@@ -724,18 +723,23 @@ def _generate_field_html(self, key, value, level, access_code):
return f'
{key}: {value}
'
- if hasattr(value, "generate_html_repr"):
- html_content = value.generate_html_repr(level + 1, access_code)
+ is_array_data = isinstance(value, (np.ndarray, h5py.Dataset, DataIO)) or \
+ (hasattr(value, "store") and hasattr(value, "shape")) # Duck typing for zarr array
+ if is_array_data:
+ html_content = self._generate_array_html(value, level + 1)
+ elif hasattr(value, "generate_html_repr"):
+ html_content = value.generate_html_repr(level + 1, access_code)
elif hasattr(value, '__repr_html__'):
html_content = value.__repr_html__()
-
- elif hasattr(value, "fields"):
+ elif hasattr(value, "fields"): # Note that h5py.Dataset has a fields attribute so there is an implicit order
html_content = self._generate_html_repr(value.fields, level + 1, access_code, is_field=True)
elif isinstance(value, (list, dict, np.ndarray)):
html_content = self._generate_html_repr(value, level + 1, access_code, is_field=False)
else:
html_content = f'{value}'
+
+
html_repr = (
f'{key}
'
@@ -745,10 +749,18 @@ def _generate_field_html(self, key, value, level, access_code):
return html_repr
+
def _generate_array_html(self, array, level):
- """Generates HTML for a NumPy array."""
- str_ = str(array).replace("\n", "")
- return f'{str_}
'
+ """Generates HTML for array data"""
+
+ read_io = self.get_read_io() # if the Container was read from file, get IO object
+ if read_io is not None:
+ repr_html = read_io.generate_dataset_html(array)
+ else:
+ array_info_dict = get_basic_array_info(array)
+ repr_html = generate_array_html_repr(array_info_dict, array, "NumPy array")
+
+ return f'{repr_html}
'
@staticmethod
def __smart_str(v, num_indent):
diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py
index 50db79c40..ccd3f0b0b 100644
--- a/src/hdmf/utils.py
+++ b/src/hdmf/utils.py
@@ -967,6 +967,54 @@ def is_ragged(data):
return False
+def get_basic_array_info(array):
+ def convert_bytes_to_str(bytes_size):
+ suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
+ i = 0
+ while bytes_size >= 1024 and i < len(suffixes)-1:
+ bytes_size /= 1024.
+ i += 1
+ return f"{bytes_size:.2f} {suffixes[i]}"
+
+ if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
+ array_size_in_bytes = array.nbytes
+ else:
+ array_size_in_bytes = array.size * array.dtype.itemsize
+ array_size_repr = convert_bytes_to_str(array_size_in_bytes)
+ basic_array_info_dict = {"Data type": array.dtype, "Shape": array.shape, "Array size": array_size_repr}
+
+ return basic_array_info_dict
+
+def generate_array_html_repr(backend_info_dict, array, dataset_type=None):
+ def html_table(item_dicts) -> str:
+ """
+ Generates an html table from a dictionary
+ """
+ report = ''
+ report += ""
+ for k, v in item_dicts.items():
+ report += (
+ f""
+ f'{k} | '
+ f'{v} | '
+ f"
"
+ )
+ report += ""
+ report += "
"
+ return report
+
+ array_info_html = html_table(backend_info_dict)
+ repr_html = dataset_type + "
" + array_info_html if dataset_type is not None else array_info_html
+
+ if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
+ array_size = array.nbytes
+ else:
+ array_size = array.size * array.dtype.itemsize
+ array_is_small = array_size < 1024 * 0.1 # 10 % a kilobyte to display the array
+ if array_is_small:
+ repr_html += "
" + str(np.asarray(array))
+
+ return repr_html
class LabelledDict(dict):
"""A dict wrapper that allows querying by an attribute of the values and running a callable on removed items.
diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py
index 9ac81ba13..35d8e480c 100644
--- a/tests/unit/test_container.py
+++ b/tests/unit/test_container.py
@@ -8,6 +8,7 @@
from hdmf.utils import docval
from hdmf.common import DynamicTable, VectorData, DynamicTableRegion
from hdmf.backends.hdf5.h5tools import HDF5IO
+from hdmf.backends.io import HDMFIO
class Subcontainer(Container):
@@ -423,6 +424,23 @@ def __init__(self, **kwargs):
self.data = kwargs['data']
self.str = kwargs['str']
+ class ContainerWithData(Container):
+
+ __fields__ = (
+ "data",
+ "str"
+ )
+
+ @docval(
+ {'name': "data", "doc": 'data', 'type': 'array_data', "default": None},
+ {'name': "str", "doc": 'str', 'type': str, "default": None},
+
+ )
+ def __init__(self, **kwargs):
+ super().__init__('test name')
+ self.data = kwargs['data']
+ self.str = kwargs['str']
+
def test_repr_html_(self):
child_obj1 = Container('test child 1')
obj1 = self.ContainerWithChildAndData(child=child_obj1, data=[1, 2, 3], str="hello")
@@ -455,6 +473,82 @@ def test_repr_html_(self):
'class="field-value">hello'
)
+ def test_repr_html_array(self):
+ obj = self.ContainerWithData(data=np.array([1, 2, 3, 4], dtype=np.int64), str="hello")
+ expected_html_table = (
+ 'class="container-fields">NumPy array
Data type | int64 |
---|
Shape'
+ ' | (4,) |
---|
Array size | 32.00 bytes |
---|
[1 2 3 4]'
+ )
+ self.assertIn(expected_html_table, obj._repr_html_())
+
+ def test_repr_html_array_large_arrays_not_displayed(self):
+ obj = self.ContainerWithData(data=np.arange(200, dtype=np.int64), str="hello")
+ expected_html_table = (
+ 'class="container-fields">NumPy array
Data type | int64 |
---|
Shape'
+ ' | (200,) |
---|
Array size | 1.56 KiB |
---|
'
+ )
+ self.assertIn(expected_html_table, obj._repr_html_())
+
+ def test_repr_html_hdf5_dataset(self):
+ with HDF5IO('array_data.h5', mode='w') as io:
+ dataset = io._file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64))
+ obj = self.ContainerWithData(data=dataset, str="hello")
+ obj.read_io = io
+
+ expected_html_table = (
+ 'class="container-fields">HDF5 dataset
Data type | int64 |
---|
'
+ 'Shape | (4,) |
---|
Array size'
+ ' | 32.00 bytes |
---|
Chunk shape'
+ ' | None |
---|
Compression | None |
---|
Compression opts | None |
---|
Compression ratio | 1.0 |
---|
[1 2 3 4]'
+ )
+
+ self.assertIn(expected_html_table, obj._repr_html_())
+
+ os.remove('array_data.h5')
+
+ def test_repr_html_hdmf_io(self):
+ with HDF5IO('array_data.h5', mode='w') as io:
+ dataset = io._file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64))
+ obj = self.ContainerWithData(data=dataset, str="hello")
+
+ class OtherIO(HDMFIO):
+
+ @staticmethod
+ def can_read(path):
+ pass
+
+ def read_builder(self):
+ pass
+
+ def write_builder(self, **kwargs):
+ pass
+
+ def open(self):
+ pass
+
+ def close(self):
+ pass
+
+ obj.read_io = OtherIO()
+
+ expected_html_table = (
+ 'class="container-fields">Data type | int64 |
---|
'
+ 'Shape | (4,) |
---|
Array size'
+ ' | 32.00 bytes |
---|
[1 2 3 4]'
+ )
+
+ self.assertIn(expected_html_table, obj._repr_html_())
+
+ os.remove('array_data.h5')
class TestData(TestCase):