From 67cffecad50fbc31f8406367f9442b6e3bd87a4a Mon Sep 17 00:00:00 2001 From: Darin Erat Sleiter Date: Wed, 21 Apr 2021 17:15:12 +1000 Subject: [PATCH] Validation finds unexpected elements (#542) --- CHANGELOG.md | 2 + src/hdmf/build/manager.py | 6 +- src/hdmf/spec/spec.py | 17 +- src/hdmf/testing/testcase.py | 25 +- src/hdmf/validate/errors.py | 22 +- src/hdmf/validate/validator.py | 328 +++++++++++++----- .../unit/spec_tests/test_base_storage_spec.py | 74 ++++ tests/unit/spec_tests/test_dataset_spec.py | 14 + tests/unit/spec_tests/test_group_spec.py | 14 + tests/unit/validator_tests/test_validate.py | 98 +++++- 10 files changed, 500 insertions(+), 100 deletions(-) create mode 100644 tests/unit/spec_tests/test_base_storage_spec.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f1cc66f2b..0a50ba7cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ @oruebel (#551) - Add tutoral for new `AlignedDynamicTable` type. @oruebel (#571) - Equality check for `DynamicTable` now also checks that the name and description of the table are the same. @rly (#566) +- Add `ExtraFieldWarning` validator warning, which is created when fields that are not part of the spec are + encountered. @dsleiter, @t-b, @rly (#542) ### Internal improvements - Update CI and copyright year. @rly (#523, #524) diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index b60d848b9..27bac8764 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -605,7 +605,7 @@ def get_builder_ns(self, **kwargs): builder = getargs('builder', kwargs) if isinstance(builder, LinkBuilder): builder = builder.builder - ret = builder.attributes.get('namespace') + ret = builder.attributes.get(self.namespace_catalog.group_spec_cls.namespace_key()) return ret @docval({'name': 'builder', 'type': Builder, @@ -713,7 +713,7 @@ def register_container_type(self, **kwargs): self.__data_types.setdefault(container_cls, (namespace, data_type)) if not isinstance(container_cls, TypeSource): setattr(container_cls, spec.type_key(), data_type) - setattr(container_cls, 'namespace', namespace) + setattr(container_cls, spec.namespace_key(), namespace) @docval({"name": "container_cls", "type": type, "doc": "the AbstractContainer class for which the given ObjectMapper class gets used for"}, @@ -751,7 +751,7 @@ def build(self, **kwargs): # add additional attributes (namespace, data_type, object_id) to builder namespace, data_type = self.get_container_ns_dt(container) - builder.set_attribute('namespace', namespace) + builder.set_attribute(obj_mapper.spec.namespace_key(), namespace) builder.set_attribute(self.__type_key(obj_mapper.spec), data_type) builder.set_attribute(obj_mapper.spec.id_key(), container.object_id) return builder diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index 3fafa0203..aee9117e6 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -314,6 +314,7 @@ class BaseStorageSpec(Spec): __inc_key = 'data_type_inc' __def_key = 'data_type_def' + __namespace_key = 'namespace' __type_key = 'data_type' __id_key = 'object_id' @@ -437,13 +438,27 @@ def get_data_type_spec(cls, data_type_def): @classmethod def get_namespace_spec(cls): - return AttributeSpec('namespace', 'the namespace for the data type of this object', 'text', required=False) + return AttributeSpec(cls.namespace_key(), 'the namespace for the data type of this object', + 'text', required=False) @property def attributes(self): ''' The attributes for this specification ''' return tuple(self.get('attributes', tuple())) + @classmethod + def reserved_attrs(cls): + ''' Get the names of the reserved attributes ''' + return (cls.namespace_key(), cls.type_key(), cls.id_key()) + + @classmethod + def namespace_key(cls): + ''' Get the key used to store namespace on an instance + + Override this method to use a different name for 'namespace' + ''' + return cls.__namespace_key + @property def linkable(self): ''' True if object can be a link, False otherwise ''' diff --git a/src/hdmf/testing/testcase.py b/src/hdmf/testing/testcase.py index 89a377d9e..95f1e7e2e 100644 --- a/src/hdmf/testing/testcase.py +++ b/src/hdmf/testing/testcase.py @@ -4,6 +4,7 @@ import re import unittest from abc import ABCMeta, abstractmethod +from warnings import warn from .utils import remove_test_file from ..backends.hdf5 import HDF5IO @@ -11,6 +12,7 @@ from ..common import validate as common_validate, get_manager from ..container import AbstractContainer, Container, Data from ..query import HDMFDataset +from ..validate.errors import WARNING_SEVERITY class TestCase(unittest.TestCase): @@ -225,14 +227,25 @@ def validate(self, experimental=False): """Validate the written and exported files, if they exist.""" if os.path.exists(self.filename): with HDF5IO(self.filename, manager=get_manager(), mode='r') as io: - errors = common_validate(io, experimental=experimental) + result = common_validate(io, experimental=experimental) + errors, warnings = self._split_validation_result(result) if errors: - for err in errors: - raise Exception(err) + raise Exception("Validation Errors %s" % errors) + if warnings: + warn("Validation Warnings: %s" % warnings) if os.path.exists(self.export_filename): with HDF5IO(self.filename, manager=get_manager(), mode='r') as io: - errors = common_validate(io, experimental=experimental) + result = common_validate(io, experimental=experimental) + errors, warnings = self._split_validation_result(result) if errors: - for err in errors: - raise Exception(err) + raise Exception("Validation Errors %s" % errors) + if warnings: + warn("Validation Warnings: %s" % warnings) + + @staticmethod + def _split_validation_result(result): + """Split validation result into errors and warnings""" + errors = [alert for alert in result if alert.severity > WARNING_SEVERITY] + warnings = [alert for alert in result if alert.severity == WARNING_SEVERITY] + return errors, warnings diff --git a/src/hdmf/validate/errors.py b/src/hdmf/validate/errors.py index 1a6aeb904..dcf23eb98 100644 --- a/src/hdmf/validate/errors.py +++ b/src/hdmf/validate/errors.py @@ -12,19 +12,25 @@ "MissingDataType", "IllegalLinkError", "IncorrectDataType", - "IncorrectQuantityError" + "IncorrectQuantityError", + "ExtraFieldWarning" ] +WARNING_SEVERITY = 0 +ERROR_SEVERITY = 10 + class Error: @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, {'name': 'reason', 'type': str, 'doc': 'the reason for the error'}, + {'name': 'severity', 'type': int, 'doc': 'severity of the error', 'default': ERROR_SEVERITY}, {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) def __init__(self, **kwargs): self.__name = getargs('name', kwargs) self.__reason = getargs('reason', kwargs) self.__location = getargs('location', kwargs) + self.__severity = getargs('severity', kwargs) if self.__location is not None: self.__str = "%s (%s): %s" % (self.__name, self.__location, self.__reason) else: @@ -42,6 +48,10 @@ def reason(self): def location(self): return self.__location + @property + def severity(self): + return self.__severity + @location.setter def location(self, loc): self.__location = loc @@ -176,3 +186,13 @@ def __init__(self, **kwargs): reason = "incorrect data_type - expected '%s', got '%s'" % (expected, received) loc = getargs('location', kwargs) super().__init__(name, reason, location=loc) + + +class ExtraFieldWarning(Error): + @docval({'name': 'name', 'type': str, 'doc': 'the name of the component with the extra field'}, + {'name': 'location', 'type': str, 'doc': 'the location of the warning', 'default': None}) + def __init__(self, **kwargs): + name = getargs('name', kwargs) + reason = "encountered extra field" + loc = getargs('location', kwargs) + super().__init__(name, reason, location=loc, severity=WARNING_SEVERITY) diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index c2963e5ed..8931896a5 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -3,11 +3,12 @@ from copy import copy from itertools import chain from collections import defaultdict +from typing import NamedTuple, Any import numpy as np -from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType -from .errors import ExpectedArrayError, IncorrectQuantityError +from .errors import (Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, + IncorrectDataType, ExpectedArrayError, IncorrectQuantityError, ExtraFieldWarning) from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder from ..build.builders import BaseBuilder from ..spec import Spec, AttributeSpec, GroupSpec, DatasetSpec, RefSpec, LinkSpec @@ -152,6 +153,26 @@ def check_shape(expected, received): return ret +def _resolve_spec_links(spec, vmap): + """Resolves a spec, following a link if it exists""" + if isinstance(spec, LinkSpec): + validator = vmap.get_validator(spec.target_type) + spec = validator.spec + return spec + + +def _resolve_spec_data_type(spec): + """Resolves the `data_type` of a spec, following a link if it exists""" + if isinstance(spec, LinkSpec): + return spec.target_type + return spec.data_type + + +def _resolve_builder_data_type(builder, spec): + """Returns the `data_type` of a builder according to the `type_key` of the associated spec""" + return builder.attributes.get(spec.type_key()) + + class ValidatorMap: """A class for keeping track of Validator objects for all data types in a namespace""" @@ -332,6 +353,12 @@ def validate(self, **kwargs): return ret +class NamedAttribute(NamedTuple): + """An immutable utility container representing an attribute value paired with its name.""" + name: str + value: Any + + class BaseStorageValidator(Validator): '''A base class for validating against Spec objects that have attributes i.e. BaseStorageSpec''' @@ -339,28 +366,71 @@ class BaseStorageValidator(Validator): {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) - self.__attribute_validators = dict() - for attr in self.spec.attributes: - self.__attribute_validators[attr.name] = AttributeValidator(attr, self.vmap) @docval({"name": "builder", "type": BaseBuilder, "doc": "the builder to validate"}, returns='a list of Errors', rtype=list) def validate(self, **kwargs): builder = getargs('builder', kwargs) - attributes = builder.attributes - ret = list() - for attr, validator in self.__attribute_validators.items(): - attr_val = attributes.get(attr) - if attr_val is None: - if validator.spec.required: - ret.append(MissingError(self.get_spec_loc(validator.spec), - location=self.get_builder_loc(builder))) - else: - errors = validator.validate(attr_val) - for err in errors: - err.location = self.get_builder_loc(builder) + ".%s" % validator.spec.name - ret.extend(errors) - return ret + errors = list(self.__validate_attributes(builder)) + return errors + + def __validate_attributes(self, builder): + """Validates the Attributes attached to `builder` against the builder's spec. + + Validation works by first matching attributes on the builder against the attributes + attached to the spec using an AttributeMatcher in a one-to-one relationship. Following + that, each attribute spec is validated against the matched or absent attribute value. + + Warnings are generated for each attribute attached to `builder` which is not matched to + any attribute specs. + """ + attribute_matcher = AttributeSpecMatcher(self.vmap, list(self.spec.attributes)) + nonreserved_attributes = self.__get_nonreserved_attributes(builder) + attribute_matcher.assign_to_specs(nonreserved_attributes) + + for attr_spec, attr_match in attribute_matcher.one_to_one_matches: + if attr_spec.required and attr_match is None: + yield self.__construct_missing_attribute_error(attr_spec, builder) + elif attr_match is not None: + yield from self.__validate_existing_attribute(attr_spec, attr_match, builder) + + yield from self.__warn_on_extra_attributes(attribute_matcher.unmatched, builder) + + def __get_nonreserved_attributes(self, builder): + """"Returns the attributes of this builder which are not reserved according to the spec""" + def is_nonreserved(attribute): + return attribute.name not in self.spec.reserved_attrs() + + attributes = [NamedAttribute(name, value) for name, value in builder.attributes.items()] + return list(filter(is_nonreserved, attributes)) + + def __construct_missing_attribute_error(self, attr_spec, parent_builder): + """Returns a MissingError for the missing required attribute""" + spec_loc = self.get_spec_loc(attr_spec) + builder_loc = self.get_builder_loc(parent_builder) + return MissingError(spec_loc, location=builder_loc) + + def __validate_existing_attribute(self, attr_spec, attribute, parent_builder): + """Validates the value of an attribute against its spec""" + validator = AttributeValidator(attr_spec, self.vmap) + errors = validator.validate(attribute.value) + + # since attributes are not (necessarily) builders, they don't contain the information required for + # AttributeValidator to determine the location error, so we need to set it separately here + for err in errors: + err.location = self.get_builder_loc(parent_builder) + ".%s" % validator.spec.name + yield from errors + + def __warn_on_extra_attributes(self, extra_attributes, builder): + """Warn for each attribute on `builder` which is classified as an extra field + + Extra fields are any children attached to the parent builder + which are not part of the parent spec. + """ + for attr_name, attr_value in extra_attributes: + name_of_erroneous = self.get_spec_loc(self.spec) + attribute_loc = self.get_builder_loc(builder) + ".%s" % attr_name + yield ExtraFieldWarning(name_of_erroneous, location=attribute_loc) class DatasetValidator(BaseStorageValidator): @@ -397,12 +467,6 @@ def validate(self, **kwargs): return ret -def _resolve_data_type(spec): - if isinstance(spec, LinkSpec): - return spec.target_type - return spec.data_type - - class GroupValidator(BaseStorageValidator): '''A class for validating GroupBuilders against GroupSpecs''' @@ -431,20 +495,25 @@ def __validate_children(self, parent_builder): separate class). Once the matching is complete, it is a straightforward procedure for validating the set of matching builders against each child spec. + + Warnings are generated for each child attached to `builder` which is + not matched to any specs. """ - spec_children = chain(self.spec.datasets, self.spec.groups, self.spec.links) - matcher = SpecMatcher(self.vmap, spec_children) + spec_children = list(chain(self.spec.datasets, self.spec.groups, self.spec.links)) + matcher = BuilderSpecMatcher(self.vmap, spec_children) - builder_children = chain(parent_builder.datasets.values(), - parent_builder.groups.values(), - parent_builder.links.values()) + builder_children = list(chain(parent_builder.datasets.values(), + parent_builder.groups.values(), + parent_builder.links.values())) matcher.assign_to_specs(builder_children) - for child_spec, matched_builders in matcher.spec_matches: + for child_spec, matched_builders in matcher.one_to_many_matches: yield from self.__validate_presence_and_quantity(child_spec, len(matched_builders), parent_builder) for child_builder in matched_builders: yield from self.__validate_child_builder(child_spec, child_builder, parent_builder) + yield from self.__warn_on_extra_fields(matcher.unmatched) + def __validate_presence_and_quantity(self, child_spec, n_builders, parent_builder): """Validate that at least one matching builder exists if the spec is required and that the number of builders agrees with the spec quantity @@ -458,7 +527,7 @@ def __construct_missing_child_error(self, child_spec, parent_builder): """Returns either a MissingDataType or a MissingError depending on whether or not a specific data type can be resolved from the spec """ - data_type = _resolve_data_type(child_spec) + data_type = _resolve_spec_data_type(child_spec) builder_loc = self.get_builder_loc(parent_builder) if data_type is not None: name_of_erroneous = self.get_spec_loc(self.spec) @@ -479,7 +548,7 @@ def __incorrect_quantity(n_found, spec): def __construct_incorrect_quantity_error(self, child_spec, parent_builder, n_builders): name_of_erroneous = self.get_spec_loc(self.spec) - data_type = _resolve_data_type(child_spec) + data_type = _resolve_spec_data_type(child_spec) builder_loc = self.get_builder_loc(parent_builder) return IncorrectQuantityError(name_of_erroneous, data_type, expected=child_spec.quantity, received=n_builders, location=builder_loc) @@ -491,7 +560,7 @@ def __validate_child_builder(self, child_spec, child_builder, parent_builder): yield self.__construct_illegal_link_error(child_spec, parent_builder) return # do not validate illegally linked objects child_builder = child_builder.builder - child_validator = self.__get_child_validator(child_spec) + child_validator = self.__get_child_validator(child_builder, child_spec) yield from child_validator.validate(child_builder) def __construct_illegal_link_error(self, child_spec, parent_builder): @@ -503,75 +572,166 @@ def __construct_illegal_link_error(self, child_spec, parent_builder): def __cannot_be_link(spec): return not isinstance(spec, LinkSpec) and not spec.linkable - def __get_child_validator(self, spec): - """Returns the appropriate validator for a child spec + def __get_child_validator(self, builder, spec): + """Returns the appropriate validator for a child builder - If a specific data type can be resolved, the validator is acquired from - the ValidatorMap, otherwise a new Validator is created. + If a specific data type of the builder can be resolved, the validator + is acquired from the ValidatorMap, otherwise a new Validator is created + from the spec. """ - if _resolve_data_type(spec) is not None: - return self.vmap.get_validator(_resolve_data_type(spec)) + base_spec = _resolve_spec_links(spec, self.vmap) + dt = _resolve_builder_data_type(builder, base_spec) + if dt is not None: + return self.vmap.get_validator(dt) elif isinstance(spec, GroupSpec): return GroupValidator(spec, self.vmap) elif isinstance(spec, DatasetSpec): return DatasetValidator(spec, self.vmap) else: - msg = "Unable to resolve a validator for spec %s" % spec + msg = "Unable to resolve a validator for builder %s" % builder raise ValueError(msg) + def __warn_on_extra_fields(self, extra_builders): + """Warn for each child builder which is not part of the spec + + Extra fields are any children attached to the parent builder + which are not part of the parent spec. + """ + for builder in extra_builders: + name_of_erroneous = self.get_spec_loc(self.spec) + builder_loc = self.get_builder_loc(builder) + yield ExtraFieldWarning(name_of_erroneous, location=builder_loc) + + +class SpecMatch: + """A container to hold a spec paired with a list of matching fields""" + + @docval({'name': 'spec', 'type': Spec, 'doc': 'the spec for this container to hold'}) + def __init__(self, **kwargs): + self.spec = getargs('spec', kwargs) + self.matches = list() -class SpecMatches: - """A utility class to hold a spec and the builders matched to it""" + @docval({'name': 'match', 'type': None, 'doc': 'adds a new field matching the spec'}) + def add_match(self, **kwargs): + match = getargs('match', kwargs) + self.matches.append(match) - def __init__(self, spec): - self.spec = spec - self.builders = list() + @property + @docval(returns='A list of tuples containing each spec and their single matching field', rtype=list) + def single_match(self): + """Returns a single match or `None` if there are no matches. - def add(self, builder): - self.builders.append(builder) + Raises a `ValueError` if there are more than one matches. This function should never be + called unless you can be certain there is at most one match. + """ + if len(self.matches) == 0: + return None + elif len(self.matches) == 1: + return self.matches[0] + else: # pragma: no cover + msg = 'Should never have more than 1 attribute match, found %s.' % len(self.matches) + raise ValueError(msg) -class SpecMatcher: - """Matches a set of builders against a set of specs +class SpecMatcher(metaclass=ABCMeta): + """Matches a set of fields against a set of specs - This class is intended to isolate the task of choosing which spec a - builder should be validated against from the task of performing that - validation. + This class is intended to isolate the task of choosing which spec an attribute, + dataset, group, or link should be validated against from the task of performing + that validation. + + Matching rules should be defined in subclasses of this class. """ - def __init__(self, vmap, specs): + @docval({'name': 'vmap', 'type': ValidatorMap, 'doc': 'the ValidatorMap used for validation'}, + {'name': 'specs', 'type': list, 'doc': 'the list of specs to which fields will be matched'}) + def __init__(self, **kwargs): + vmap, specs = getargs('vmap', 'specs', kwargs) self.vmap = vmap - self._spec_matches = [SpecMatches(spec) for spec in specs] - self._unmatched_builders = SpecMatches(None) + self._spec_matches = [SpecMatch(spec) for spec in specs] + self._unmatched = list() @property - def unmatched_builders(self): - """Returns the builders for which no matching spec was found - - These builders can be considered superfluous, and will generate a - warning in the future. - """ - return self._unmatched_builders.builders + @docval(returns='The list of fields which did not match any specs', rtype=list) + def unmatched(self): + return self._unmatched @property - def spec_matches(self): - """Returns a list of tuples of: (spec, assigned builders)""" - return [(sm.spec, sm.builders) for sm in self._spec_matches] + @docval(returns='A list of `SpecMatch`s containing each spec and their matching fields', rtype=list) + def matches(self): + return self._spec_matches - def assign_to_specs(self, builders): - """Assigns a set of builders against a set of specs (many-to-one) + @docval({'name': 'fields', 'type': list, 'doc': 'a list of the fields to match against the specs'}) + def assign_to_specs(self, **kwargs): + """Assigns a set of fields against the specs according to matching rules defined in a subclass - In the case that no matching spec is found, a builder will be - added to a list of unmatched builders. + In the case that no matching spec is found for a field, that field will be added to + the list of unmatched fields. """ - for builder in builders: - spec_match = self._best_matching_spec(builder) - if spec_match is None: - self._unmatched_builders.add(builder) + fields = getargs('fields', kwargs) + for field in fields: + spec_match = self.choose_spec_match(field) + if spec_match is not None: + spec_match.add_match(field) else: - spec_match.add(builder) + self._unmatched.append(field) - def _best_matching_spec(self, builder): + @abstractmethod + @docval({'name': 'field', 'type': None, 'doc': 'the field to be matched against the specs'}, + returns='The SpecMatch for the best matching spec, or None if no match is found', rtype=SpecMatch) + def choose_spec_match(self, **kwargs): + """Determines which spec is the best match for a field and returns the appropriate SpecMatch + + This method should be implemented in subclasses to define the matching rules. + """ + pass + + +class AttributeSpecMatcher(SpecMatcher): + """Matches attributes against AttributeSpecs with a 1-to-1 mapping""" + + @docval({'name': 'field', 'type': None, + 'doc': 'the name and value of the attribute to be matched against the specs'}, + returns='The SpecMatch for the best matching spec, or `None` if no match is found', rtype=SpecMatch) + def choose_spec_match(self, **kwargs): + field = getargs('field', kwargs) + attr_name, attr_value = field + for spec_match in self._spec_matches: + if spec_match.spec.name == attr_name: + return spec_match + return None + + @property + @docval(returns='A list of tuples containing each spec and their single matching `NamedAttribute`', rtype=list) + def one_to_one_matches(self): + """Returns attribute specs matched to attributes in a one-to-one mapping. + + Since attribute specs always have a name, there can be at most one attribute matched to an + `AttributeSpec`. This function unwraps the one-to-many mapping returned by the `matches` + property to return a one-to-one mapping. + + For each `SpecMatch`, a tuple containing the spec is returned along with a value + according to the following rules: + * If the `SpecMatch` tuple has exactly 1 `matches`, that attribute is returned. + * If the `SpecMatch` tuple has 0 `matches`, `None` is returned. + * Raises a `ValueError` if there are is more than one `matches` as this situation + should never occur when matching attributes. + """ + return [(sm.spec, sm.single_match) for sm in self.matches] + + +class BuilderSpecMatcher(SpecMatcher): + """Matches a set of builders against a set of specs with an N-to-1 mapping""" + + @property + @docval(returns='A list of tuples containing each spec and their possibly multiple matches', rtype=list) + def one_to_many_matches(self): + """Returns specs and their matches as a sequence of tuples of the form (spec, matches).""" + return [(sm.spec, sm.matches) for sm in self.matches] + + @docval({'name': 'field', 'type': None, 'doc': 'the field to be matched against the specs'}, + returns='The SpecMatch for the best matching spec, or None if no match is found', rtype=SpecMatch) + def choose_spec_match(self, **kwargs): """Finds the best matching spec for builder The current algorithm is: @@ -588,6 +748,7 @@ def _best_matching_spec(self, builder): inheritance hierarchy. Future improvements to this matching algorithm should resolve these discrepancies. """ + builder = getargs('field', kwargs) candidates = self._filter_by_name(self._spec_matches, builder) candidates = self._filter_by_type(candidates, builder) if len(candidates) == 0: @@ -615,19 +776,16 @@ def _filter_by_type(self, candidates, builder): """Returns the candidate specs which have a data type consistent with the builder's data type. """ + if isinstance(builder, LinkBuilder): + builder = builder.builder + def compatible_type(spec_matches): - spec = spec_matches.spec - if isinstance(spec, LinkSpec): - validator = self.vmap.get_validator(spec.target_type) - spec = validator.spec + spec = _resolve_spec_links(spec_matches.spec, self.vmap) if spec.data_type is None: return True valid_validators = self.vmap.valid_types(spec.data_type) valid_types = [v.spec.data_type for v in valid_validators] - if isinstance(builder, LinkBuilder): - dt = builder.builder.attributes.get(spec.type_key()) - else: - dt = builder.attributes.get(spec.type_key()) + dt = _resolve_builder_data_type(builder, spec) return dt in valid_types return list(filter(compatible_type, candidates)) @@ -638,7 +796,7 @@ def _filter_by_unsatisfied(self, candidates): """ def is_unsatisfied(spec_matches): spec = spec_matches.spec - n_match = len(spec_matches.builders) + n_match = len(spec_matches.matches) if spec.required and n_match == 0: return True if isinstance(spec.quantity, int) and n_match < spec.quantity: diff --git a/tests/unit/spec_tests/test_base_storage_spec.py b/tests/unit/spec_tests/test_base_storage_spec.py new file mode 100644 index 000000000..2c170e57c --- /dev/null +++ b/tests/unit/spec_tests/test_base_storage_spec.py @@ -0,0 +1,74 @@ +from hdmf.spec import AttributeSpec +from hdmf.spec.spec import BaseStorageSpec +from hdmf.testing import TestCase + + +class FakeStorageSpec(BaseStorageSpec): + """A fake class inheriting from BaseStorageSpec to be used for testing""" + + __type_key = 'foodata_type' + __namespace_key = 'foo_namespace' + __id_key = 'foo_id' + __inc_key = 'foodata_type_inc' + __def_key = 'foodata_type_def' + + @classmethod + def type_key(cls): + """Get the key used to store data type on an instance""" + return cls.__type_key + + @classmethod + def namespace_key(cls): + """Get the key used to store namespace on an instance""" + return cls.__namespace_key + + @classmethod + def id_key(cls): + """Get the key used to store data ID on an instance""" + return cls.__id_key + + @classmethod + def inc_key(cls): + """Get the key used to define a data_type include.""" + return cls.__inc_key + + @classmethod + def def_key(cls): + """ Get the key used to define a data_type definition.""" + return cls.__def_key + + +class BaseStorageSpecInheritanceTests(TestCase): + """Tests which verify that classes inheriting from BaseStorageSpec can override key values""" + + def test_reserved_attrs(self): + """Test that reserved_attrs returns values defined by the child class""" + self.assertEqual(FakeStorageSpec.reserved_attrs(), ('foo_namespace', 'foodata_type', 'foo_id')) + + def test_get_namespace_spec(self): + """Test that get_namespace_spec return an AttributeSpec with a name + matching the namespace_key defined by the child class + """ + spec = FakeStorageSpec.get_namespace_spec() + self.assertIsInstance(spec, AttributeSpec) + self.assertEqual(spec.name, 'foo_namespace') + + def test_get_data_type_spec(self): + """Test that get_data_type_spec returns an AttributeSpec with a name + matching the data_type_key defined by the child class + """ + spec = FakeStorageSpec.get_data_type_spec('Foo') + self.assertIsInstance(spec, AttributeSpec) + self.assertEqual(spec.name, 'foodata_type') + + def test_data_type_inc(self): + """Test that data_type_inc looks up the attribute using the inc_key defined by the child class""" + spec = FakeStorageSpec('A fake spec', 'foo') + spec['foodata_type_inc'] = 'Foo' + self.assertEqual(spec.data_type_inc, 'Foo') + + def test_data_type_def(self): + """Test that data_type_def looks up the attribute using the def_key defined by the child class""" + spec = FakeStorageSpec('A fake spec', 'bar') + spec['foodata_type_def'] = 'Bar' + self.assertEqual(spec.data_type_def, 'Bar') diff --git a/tests/unit/spec_tests/test_dataset_spec.py b/tests/unit/spec_tests/test_dataset_spec.py index 0309aced4..308707190 100644 --- a/tests/unit/spec_tests/test_dataset_spec.py +++ b/tests/unit/spec_tests/test_dataset_spec.py @@ -245,3 +245,17 @@ def test_data_type_property_value(self): group = GroupSpec('A group', name='group', data_type_inc=data_type_inc, data_type_def=data_type_def) self.assertEqual(group.data_type, data_type) + + def test_namespace_key(self): + """The namespace_key function should have the value 'namespace'""" + self.assertEqual(DatasetSpec.namespace_key(), 'namespace') + + def test_reserved_attrs(self): + """The reserved_attrs function should return the expected list""" + self.assertEqual(DatasetSpec.reserved_attrs(), ('namespace', 'data_type', 'object_id')) + + def test_get_namespace_spec(self): + """get_namespace_spec should return an AttributeSpec with the name of 'namespace'""" + spec = DatasetSpec.get_namespace_spec() + self.assertIsInstance(spec, AttributeSpec) + self.assertEqual(spec.name, 'namespace') diff --git a/tests/unit/spec_tests/test_group_spec.py b/tests/unit/spec_tests/test_group_spec.py index 1ac4b5aea..267538e5b 100644 --- a/tests/unit/spec_tests/test_group_spec.py +++ b/tests/unit/spec_tests/test_group_spec.py @@ -267,3 +267,17 @@ def test_data_type_property_value(self): dataset = DatasetSpec('A dataset', 'int', name='dataset', data_type_inc=data_type_inc, data_type_def=data_type_def) self.assertEqual(dataset.data_type, data_type) + + def test_namespace_key(self): + """The namespace_key function should have the value 'namespace'""" + self.assertEqual(GroupSpec.namespace_key(), 'namespace') + + def test_reserved_attrs(self): + """The reserved_attrs function should return the expected list""" + self.assertEqual(GroupSpec.reserved_attrs(), ('namespace', 'data_type', 'object_id')) + + def test_get_namespace_spec(self): + """get_namespace_spec should return an AttributeSpec with the name of 'namespace'""" + spec = GroupSpec.get_namespace_spec() + self.assertIsInstance(spec, AttributeSpec) + self.assertEqual(spec.name, 'namespace') diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index b93dcdd55..d6231d394 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -10,7 +10,7 @@ from hdmf.testing import TestCase from hdmf.validate import ValidatorMap from hdmf.validate.errors import (DtypeError, MissingError, ExpectedArrayError, MissingDataType, - IncorrectQuantityError, IllegalLinkError) + IncorrectQuantityError, IllegalLinkError, ExtraFieldWarning, ERROR_SEVERITY) CORE_NAMESPACE = 'test_core' @@ -188,9 +188,10 @@ def test_invalid_wrong_name_req_type(self): groups=[bar_builder]) results = self.vmap.validate(foo_builder) - self.assertEqual(len(results), 1) - self.assertValidationError(results[0], MissingDataType, name='Foo') - self.assertEqual(results[0].data_type, 'Bar') + self.assertEqual(len(results), 2) + error = next(err for err in results if err.severity == ERROR_SEVERITY) + self.assertValidationError(error, MissingDataType, name='Foo') + self.assertEqual(error.data_type, 'Bar') def test_invalid_missing_unnamed_req_group(self): """Test that a MissingDataType is returned when a required unnamed nested data type is missing.""" @@ -821,3 +822,92 @@ def test_both_levels_of_hierarchy_validated_inverted_order(self): builder = GroupBuilder('my_baz', attributes={'data_type': 'Baz'}, datasets=datasets) result = self.vmap.validate(builder) self.assertEqual(len(result), 0) + + +class TestExtraFields(TestCase): + """Test the ExtraFieldWarnings are correctly generated whenever an object includes fields that + are not part of the spec. Extra fields include Groups, Datasets, Links, and Attributes + """ + + def set_up_spec(self): + spec_catalog = SpecCatalog() + parent_group_spec = GroupSpec('A test group specification', data_type_def='Foo') + child_group_spec = GroupSpec('A child group', data_type_def='Bar') + child_dataset_spec = GroupSpec('A child dataset', data_type_def='Baz') + for spec in [parent_group_spec, child_group_spec, child_dataset_spec]: + spec_catalog.register_spec(spec, 'test.yaml') + self.namespace = SpecNamespace( + 'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog) + self.vmap = ValidatorMap(self.namespace) + + def validate_extra_fields_test_case(self, builder): + """Validate that builder generates exactly one ExtraFieldWarning and no Errors""" + self.set_up_spec() + result = self.vmap.validate(builder) + self.assertEqual(len(result), 1) + self.assertIsInstance(result[0], ExtraFieldWarning) + + def test_extra_typed_group(self): + """Test that an ExtraFieldWarning is returned when there is an extra field which is a typed group""" + groups = [GroupBuilder('bar', attributes={'data_type': 'Bar'})] + builder = GroupBuilder('foo', attributes={'data_type': 'Foo'}, groups=groups) + self.validate_extra_fields_test_case(builder) + + def test_extra_untyped_group(self): + """Test that an ExtraFieldWarning is returned when there is an extra field which is an untyped group""" + groups = [GroupBuilder('bar')] + builder = GroupBuilder('foo', attributes={'data_type': 'Foo'}, groups=groups) + self.validate_extra_fields_test_case(builder) + + def test_extra_typed_dataset(self): + """Test that an ExtraFieldWarning is returned when there is an extra field which is a typed dataset""" + datasets = [DatasetBuilder('baz', attributes={'data_type': 'Baz'})] + builder = GroupBuilder('foo', attributes={'data_type': 'Foo'}, datasets=datasets) + self.validate_extra_fields_test_case(builder) + + def test_extra_untyped_dataset(self): + """Test that an ExtraFieldWarning is returned when there is an extra field which is an untyped dataset""" + datasets = [DatasetBuilder('baz')] + builder = GroupBuilder('foo', attributes={'data_type': 'Foo'}, datasets=datasets) + self.validate_extra_fields_test_case(builder) + + def test_extra_link(self): + """Test that an ExtraFieldWarning is returned when there is an extra field which is a link""" + links = [LinkBuilder(GroupBuilder('bar', attributes={'data_type': 'Bar'}), 'bar_link')] + builder = GroupBuilder('foo', attributes={'data_type': 'Foo'}, links=links) + self.validate_extra_fields_test_case(builder) + + def test_extra_attribute(self): + """Test that an ExtraFieldWarning is returned when there is an extra field which is an attribute""" + extra_attribute = {'x': 42} + builder = GroupBuilder('foo', attributes={'data_type': 'Foo', **extra_attribute}) + self.validate_extra_fields_test_case(builder) + + def test_reserved_attributes_are_ok(self): + """Test that ExtraFieldWarnings are not generated for the presence of reserved attributes""" + reserved_attributes = {'data_type': 'Foo', 'namespace': CORE_NAMESPACE, 'object_id': 1324} + builder = GroupBuilder('foo', attributes=reserved_attributes) + self.set_up_spec() + result = self.vmap.validate(builder) + self.assertEqual(len(result), 0) + + def test_fields_from_inheriting_types_do_not_raise_warning(self): + """Test that no warnings are generated if a child builder inherits from the type in the spec""" + self.set_up_inheriting_specs_special_case() + groups = [GroupBuilder('qux', attributes={'data_type': 'Qux', 'quux': 42})] + builder = GroupBuilder('foo', attributes={'data_type': 'Foo'}, groups=groups) + result = self.vmap.validate(builder) + self.assertEqual(len(result), 0) + + def set_up_inheriting_specs_special_case(self): + spec_catalog = SpecCatalog() + group_spec = GroupSpec('A child group', data_type_def='Bar') + group_spec_inh = GroupSpec('An inheriting child group', data_type_def='Qux', data_type_inc='Bar', + attributes=[AttributeSpec('quux', 'A new attribute', 'int')]) + parent_group_spec = GroupSpec('A test group specification', data_type_def='Foo', + groups=[group_spec]) + for spec in [parent_group_spec, group_spec, group_spec_inh]: + spec_catalog.register_spec(spec, 'test.yaml') + self.namespace = SpecNamespace( + 'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog) + self.vmap = ValidatorMap(self.namespace)