hdmf-dev · rly · Jun 25, 2021 · May 13, 2021 · May 13, 2021 · Jun 2, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,7 +30,8 @@
 -
 
 ### Bug fixes
--
+- Update the validator to allow extensions to data types which only define data_type_inc @dsleiter (#609)
+- Fix error when validating lazy-loaded datasets containing references @dsleiter (#609)
 -
 -
 -

diff --git a/src/hdmf/validate/errors.py b/src/hdmf/validate/errors.py
@@ -25,10 +25,6 @@ def __init__(self, **kwargs):
         self.__name = getargs('name', kwargs)
         self.__reason = getargs('reason', kwargs)
         self.__location = getargs('location', kwargs)
-        if self.__location is not None:
-            self.__str = "%s (%s): %s" % (self.__name, self.__location, self.__reason)
-        else:
-            self.__str = "%s: %s" % (self.name, self.reason)
 
     @property
     def name(self):
@@ -45,14 +41,49 @@ def location(self):
     @location.setter
     def location(self, loc):
         self.__location = loc
-        self.__str = "%s (%s): %s" % (self.__name, self.__location, self.__reason)
 
     def __str__(self):
-        return self.__str
+        return self.__format_str(self.name, self.location, self.reason)
+
+    @staticmethod
+    def __format_str(name, location, reason):
+        if location is not None:
+            return "%s (%s): %s" % (name, location, reason)
+        else:
+            return "%s: %s" % (name, reason)
 
     def __repr__(self):
         return self.__str__()
 
+    def __hash__(self):
+        """Returns the hash value of this Error
+
+        Note: if the location property is set after creation, the hash value will
+        change. Therefore, it is important to finalize the value of location
+        before getting the hash value.
+        """
+        return hash(self.__equatable_str())
+
+    def __equatable_str(self):
+        """A string representation of the error which can be used to check for equality
+
+        For a single error, name can end up being different depending on whether it is
+        generated from a base data type spec or from an inner type definition. These errors
+        should still be considered equal because they are caused by the same problem.
+
+        When a location is provided, we only consider the name of the field and drop the
+        rest of the spec name. However, when a location is not available, then we need to
+        use the fully-provided name.
+        """
+        if self.location is not None:
+            equatable_name = self.name.split('/')[-1]
+        else:
+            equatable_name = self.name
+        return self.__format_str(equatable_name, self.location, self.reason)
+
+    def __eq__(self, other):
+        return hash(self) == hash(other)
+
 
 class DtypeError(Error):
 

diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
@@ -2,7 +2,7 @@
 from abc import ABCMeta, abstractmethod
 from copy import copy
 from itertools import chain
-from collections import defaultdict
+from collections import defaultdict, OrderedDict
 
 import numpy as np
 
@@ -14,6 +14,8 @@
 from ..spec import SpecNamespace
 from ..spec.spec import BaseStorageSpec, DtypeHelper
 from ..utils import docval, getargs, call_docval_func, pystr, get_data_shape
+from ..query import ReferenceResolver
+
 
 __synonyms = DtypeHelper.primary_dtype_synonyms
 
@@ -107,6 +109,8 @@ def get_type(data):
         return 'region'
     elif isinstance(data, ReferenceBuilder):
         return 'object'
+    elif isinstance(data, ReferenceResolver):
+        return data.dtype
     elif isinstance(data, np.ndarray):
         if data.size == 0:
             raise EmptyArrayError()
@@ -417,7 +421,7 @@ def validate(self, **kwargs):  # noqa: C901
         builder = getargs('builder', kwargs)
         errors = super().validate(builder)
         errors.extend(self.__validate_children(builder))
-        return errors
+        return self._remove_duplicates(errors)
 
     def __validate_children(self, parent_builder):
         """Validates the children of the group builder against the children in the spec.
@@ -490,8 +494,8 @@ def __validate_child_builder(self, child_spec, child_builder, parent_builder):
                 yield self.__construct_illegal_link_error(child_spec, parent_builder)
                 return  # do not validate illegally linked objects
             child_builder = child_builder.builder
-        child_validator = self.__get_child_validator(child_spec)
-        yield from child_validator.validate(child_builder)
+        for child_validator in self.__get_child_validators(child_spec):
+            yield from child_validator.validate(child_builder)
 
     def __construct_illegal_link_error(self, child_spec, parent_builder):
         name_of_erroneous = self.get_spec_loc(child_spec)
@@ -502,22 +506,48 @@ def __construct_illegal_link_error(self, child_spec, parent_builder):
     def __cannot_be_link(spec):
         return not isinstance(spec, LinkSpec) and not spec.linkable
 
-    def __get_child_validator(self, spec):
-        """Returns the appropriate validator for a child spec
-
-        If a specific data type can be resolved, the validator is acquired from
-        the ValidatorMap, otherwise a new Validator is created.
+    def __get_child_validators(self, spec):
+        """Returns the appropriate list of validators for a child spec
+
+        Due to the fact that child specs can both inherit a data type via data_type_inc
+        and also modify the type without defining a new data type via data_type_def,
+        we need to validate against both the spec for the base data type and the spec
+        at the current hierarchy of the data type in case there have been any
+        modifications.
+
+        If a specific data type can be resolved, a validator for that type is acquired
+        from the ValidatorMap and included in the returned validators. If the spec is
+        a GroupSpec or a DatasetSpec, then a new Validator is created and also
+        returned. If the spec is a LinkSpec, no additional Validator is returned
+        because the LinkSpec cannot add or modify fields and the target_type will be
+        validated by the Validator returned from the ValidatorMap.
         """
         if _resolve_data_type(spec) is not None:
-            return self.vmap.get_validator(_resolve_data_type(spec))
-        elif isinstance(spec, GroupSpec):
-            return GroupValidator(spec, self.vmap)
+            yield self.vmap.get_validator(_resolve_data_type(spec))
+
+        if isinstance(spec, GroupSpec):
+            yield GroupValidator(spec, self.vmap)
         elif isinstance(spec, DatasetSpec):
-            return DatasetValidator(spec, self.vmap)
+            yield DatasetValidator(spec, self.vmap)
+        elif isinstance(spec, LinkSpec):
+            return
         else:
             msg = "Unable to resolve a validator for spec %s" % spec
             raise ValueError(msg)
 
+    @staticmethod
+    def _remove_duplicates(errors):
+        """Return a list of validation errors where duplicates have been removed
+
+        In some cases a child of a group to be validated against two specs which can
+        redundantly define the same fields/children. If the builder doesn't match the
+        spec, it is possible for duplicate errors to be generated.
+        """
+        ordered_errors = OrderedDict()
+        for error in errors:
+            ordered_errors[error] = error
+        return list(ordered_errors)
+
 
 class SpecMatches:
     """A utility class to hold a spec and the builders matched to it"""

diff --git a/tests/unit/validator_tests/test_errors.py b/tests/unit/validator_tests/test_errors.py
@@ -0,0 +1,54 @@
+from unittest import TestCase
+
+from hdmf.validate.errors import Error
+
+
+class TestErrorEquality(TestCase):
+    def test_self_equality(self):
+        """Verify that one error equals itself"""
+        error = Error('foo', 'bad thing', 'a.b.c')
+        self.assertEqual(error, error)
+
+    def test_equality_with_same_field_values(self):
+        """Verify that two errors with the same field values are equal"""
+        err1 = Error('foo', 'bad thing', 'a.b.c')
+        err2 = Error('foo', 'bad thing', 'a.b.c')
+        self.assertEqual(err1, err2)
+
+    def test_not_equal_with_different_reason(self):
+        """Verify that two errors with a different reason are not equal"""
+        err1 = Error('foo', 'bad thing', 'a.b.c')
+        err2 = Error('foo', 'something else', 'a.b.c')
+        self.assertNotEqual(err1, err2)
+
+    def test_not_equal_with_different_name(self):
+        """Verify that two errors with a different name are not equal"""
+        err1 = Error('foo', 'bad thing', 'a.b.c')
+        err2 = Error('bar', 'bad thing', 'a.b.c')
+        self.assertNotEqual(err1, err2)
+
+    def test_not_equal_with_different_location(self):
+        """Verify that two errors with a different location are not equal"""
+        err1 = Error('foo', 'bad thing', 'a.b.c')
+        err2 = Error('foo', 'bad thing', 'd.e.f')
+        self.assertNotEqual(err1, err2)
+
+    def test_equal_with_no_location(self):
+        """Verify that two errors with no location but the same name are equal"""
+        err1 = Error('foo', 'bad thing')
+        err2 = Error('foo', 'bad thing')
+        self.assertEqual(err1, err2)
+
+    def test_not_equal_with_overlapping_name_when_no_location(self):
+        """Verify that two errors with an overlapping name but no location are
+        not equal
+        """
+        err1 = Error('foo', 'bad thing')
+        err2 = Error('x/y/foo', 'bad thing')
+        self.assertNotEqual(err1, err2)
+
+    def test_equal_with_overlapping_name_when_location_present(self):
+        """Verify that two errors with an overlapping name and a location are equal"""
+        err1 = Error('foo', 'bad thing', 'a.b.c')
+        err2 = Error('x/y/foo', 'bad thing', 'a.b.c')
+        self.assertEqual(err1, err2)