Skip to content

Commit

Permalink
Validate builders against both top level data type specs and inner sp…
Browse files Browse the repository at this point in the history
…ecs (#609)

* Validate builders against both top level data type specs and inner specs

* Fix #585
* Ref #542
* Builder can now be validated against more than one spec in order to validate against additional
  fields added to inner data types
* Also make validation Errors comparable as a way to remove duplicates that can sometimes be generated

* Update changelog

* Ref #585

* Fix pynwb validation errors related to reference and compound data types

* Ref #585
* This is just a workaround for checking the data_type of
  BuilderH5ReferenceDataset and BuilderH5TableDataset objects
* Plan to add unit tests after some discussion to validate the approach

* Remove validator reference to H5-specific classes and add unit tests

* use ReferenceResolver instead of referencing BuilderH5ReferenceDataset or BuilderH5TableDataset
* Fix #585

* Update tests/unit/validator_tests/test_validate.py

* Update tests/unit/validator_tests/test_validate.py

Co-authored-by: Ryan Ly <rly@lbl.gov>
  • Loading branch information
dsleiter and rly authored Jun 25, 2021
1 parent 6e0c03d commit b93cde4
Show file tree
Hide file tree
Showing 5 changed files with 327 additions and 23 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
-

### Bug fixes
-
- Update the validator to allow extensions to data types which only define data_type_inc @dsleiter (#609)
- Fix error when validating lazy-loaded datasets containing references @dsleiter (#609)
-
-
-
Expand Down
43 changes: 37 additions & 6 deletions src/hdmf/validate/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ def __init__(self, **kwargs):
self.__name = getargs('name', kwargs)
self.__reason = getargs('reason', kwargs)
self.__location = getargs('location', kwargs)
if self.__location is not None:
self.__str = "%s (%s): %s" % (self.__name, self.__location, self.__reason)
else:
self.__str = "%s: %s" % (self.name, self.reason)

@property
def name(self):
Expand All @@ -45,14 +41,49 @@ def location(self):
@location.setter
def location(self, loc):
self.__location = loc
self.__str = "%s (%s): %s" % (self.__name, self.__location, self.__reason)

def __str__(self):
return self.__str
return self.__format_str(self.name, self.location, self.reason)

@staticmethod
def __format_str(name, location, reason):
if location is not None:
return "%s (%s): %s" % (name, location, reason)
else:
return "%s: %s" % (name, reason)

def __repr__(self):
return self.__str__()

def __hash__(self):
"""Returns the hash value of this Error
Note: if the location property is set after creation, the hash value will
change. Therefore, it is important to finalize the value of location
before getting the hash value.
"""
return hash(self.__equatable_str())

def __equatable_str(self):
"""A string representation of the error which can be used to check for equality
For a single error, name can end up being different depending on whether it is
generated from a base data type spec or from an inner type definition. These errors
should still be considered equal because they are caused by the same problem.
When a location is provided, we only consider the name of the field and drop the
rest of the spec name. However, when a location is not available, then we need to
use the fully-provided name.
"""
if self.location is not None:
equatable_name = self.name.split('/')[-1]
else:
equatable_name = self.name
return self.__format_str(equatable_name, self.location, self.reason)

def __eq__(self, other):
return hash(self) == hash(other)


class DtypeError(Error):

Expand Down
56 changes: 43 additions & 13 deletions src/hdmf/validate/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import ABCMeta, abstractmethod
from copy import copy
from itertools import chain
from collections import defaultdict
from collections import defaultdict, OrderedDict

import numpy as np

Expand All @@ -14,6 +14,8 @@
from ..spec import SpecNamespace
from ..spec.spec import BaseStorageSpec, DtypeHelper
from ..utils import docval, getargs, call_docval_func, pystr, get_data_shape
from ..query import ReferenceResolver


__synonyms = DtypeHelper.primary_dtype_synonyms

Expand Down Expand Up @@ -107,6 +109,8 @@ def get_type(data):
return 'region'
elif isinstance(data, ReferenceBuilder):
return 'object'
elif isinstance(data, ReferenceResolver):
return data.dtype
elif isinstance(data, np.ndarray):
if data.size == 0:
raise EmptyArrayError()
Expand Down Expand Up @@ -417,7 +421,7 @@ def validate(self, **kwargs): # noqa: C901
builder = getargs('builder', kwargs)
errors = super().validate(builder)
errors.extend(self.__validate_children(builder))
return errors
return self._remove_duplicates(errors)

def __validate_children(self, parent_builder):
"""Validates the children of the group builder against the children in the spec.
Expand Down Expand Up @@ -490,8 +494,8 @@ def __validate_child_builder(self, child_spec, child_builder, parent_builder):
yield self.__construct_illegal_link_error(child_spec, parent_builder)
return # do not validate illegally linked objects
child_builder = child_builder.builder
child_validator = self.__get_child_validator(child_spec)
yield from child_validator.validate(child_builder)
for child_validator in self.__get_child_validators(child_spec):
yield from child_validator.validate(child_builder)

def __construct_illegal_link_error(self, child_spec, parent_builder):
name_of_erroneous = self.get_spec_loc(child_spec)
Expand All @@ -502,22 +506,48 @@ def __construct_illegal_link_error(self, child_spec, parent_builder):
def __cannot_be_link(spec):
return not isinstance(spec, LinkSpec) and not spec.linkable

def __get_child_validator(self, spec):
"""Returns the appropriate validator for a child spec
If a specific data type can be resolved, the validator is acquired from
the ValidatorMap, otherwise a new Validator is created.
def __get_child_validators(self, spec):
"""Returns the appropriate list of validators for a child spec
Due to the fact that child specs can both inherit a data type via data_type_inc
and also modify the type without defining a new data type via data_type_def,
we need to validate against both the spec for the base data type and the spec
at the current hierarchy of the data type in case there have been any
modifications.
If a specific data type can be resolved, a validator for that type is acquired
from the ValidatorMap and included in the returned validators. If the spec is
a GroupSpec or a DatasetSpec, then a new Validator is created and also
returned. If the spec is a LinkSpec, no additional Validator is returned
because the LinkSpec cannot add or modify fields and the target_type will be
validated by the Validator returned from the ValidatorMap.
"""
if _resolve_data_type(spec) is not None:
return self.vmap.get_validator(_resolve_data_type(spec))
elif isinstance(spec, GroupSpec):
return GroupValidator(spec, self.vmap)
yield self.vmap.get_validator(_resolve_data_type(spec))

if isinstance(spec, GroupSpec):
yield GroupValidator(spec, self.vmap)
elif isinstance(spec, DatasetSpec):
return DatasetValidator(spec, self.vmap)
yield DatasetValidator(spec, self.vmap)
elif isinstance(spec, LinkSpec):
return
else:
msg = "Unable to resolve a validator for spec %s" % spec
raise ValueError(msg)

@staticmethod
def _remove_duplicates(errors):
"""Return a list of validation errors where duplicates have been removed
In some cases a child of a group to be validated against two specs which can
redundantly define the same fields/children. If the builder doesn't match the
spec, it is possible for duplicate errors to be generated.
"""
ordered_errors = OrderedDict()
for error in errors:
ordered_errors[error] = error
return list(ordered_errors)


class SpecMatches:
"""A utility class to hold a spec and the builders matched to it"""
Expand Down
54 changes: 54 additions & 0 deletions tests/unit/validator_tests/test_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from unittest import TestCase

from hdmf.validate.errors import Error


class TestErrorEquality(TestCase):
def test_self_equality(self):
"""Verify that one error equals itself"""
error = Error('foo', 'bad thing', 'a.b.c')
self.assertEqual(error, error)

def test_equality_with_same_field_values(self):
"""Verify that two errors with the same field values are equal"""
err1 = Error('foo', 'bad thing', 'a.b.c')
err2 = Error('foo', 'bad thing', 'a.b.c')
self.assertEqual(err1, err2)

def test_not_equal_with_different_reason(self):
"""Verify that two errors with a different reason are not equal"""
err1 = Error('foo', 'bad thing', 'a.b.c')
err2 = Error('foo', 'something else', 'a.b.c')
self.assertNotEqual(err1, err2)

def test_not_equal_with_different_name(self):
"""Verify that two errors with a different name are not equal"""
err1 = Error('foo', 'bad thing', 'a.b.c')
err2 = Error('bar', 'bad thing', 'a.b.c')
self.assertNotEqual(err1, err2)

def test_not_equal_with_different_location(self):
"""Verify that two errors with a different location are not equal"""
err1 = Error('foo', 'bad thing', 'a.b.c')
err2 = Error('foo', 'bad thing', 'd.e.f')
self.assertNotEqual(err1, err2)

def test_equal_with_no_location(self):
"""Verify that two errors with no location but the same name are equal"""
err1 = Error('foo', 'bad thing')
err2 = Error('foo', 'bad thing')
self.assertEqual(err1, err2)

def test_not_equal_with_overlapping_name_when_no_location(self):
"""Verify that two errors with an overlapping name but no location are
not equal
"""
err1 = Error('foo', 'bad thing')
err2 = Error('x/y/foo', 'bad thing')
self.assertNotEqual(err1, err2)

def test_equal_with_overlapping_name_when_location_present(self):
"""Verify that two errors with an overlapping name and a location are equal"""
err1 = Error('foo', 'bad thing', 'a.b.c')
err2 = Error('x/y/foo', 'bad thing', 'a.b.c')
self.assertEqual(err1, err2)
Loading

0 comments on commit b93cde4

Please sign in to comment.