Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow search of ids in ElementIdentifiers #187

Merged
merged 5 commits into from
Nov 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/hdmf/common/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,27 @@ class ElementIdentifiers(Data):
def __init__(self, **kwargs):
call_docval_func(super(ElementIdentifiers, self).__init__, kwargs)

@docval({'name': 'other', 'type': (Data, np.ndarray, list, tuple, int),
'doc': 'List of ids to search for in this ElementIdentifer object'},
rtype=np.ndarray,
returns='Array with the list of indices where the elements in the list where found.'
'Note, the elements in the returned list are ordered in increasing index'
'of the found elements, rather than in the order in which the elements'
'where given for the search. Also the length of the result may be different from the length'
'of the input array. E.g., if our ids are [1,2,3] and we are search for [3,1,5] the '
'result would be [0,2] and NOT [2,0,None]')
Copy link
Contributor

@rly rly Oct 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for docval to do its intended behavior, the function has to be defined as def __eq__(self, **kwargs). Given that this function must be def __eq__(self, other) in order to override the == functionality, the docval decorator should be replaced with a docstring. Then it is clear that validation and all the other docval functionality does not run.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rly @ajtritt I added in fe6f124 a test to check if docval correctly performs the type checking and it seems to work fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I.e., the test in ed95b58 actually tests this and seems fine too

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. So it does work then. Out of curiousity, does the name need to match the name of the variable? Or is it just doing positional arg matching?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When calling ElementIdentifiers == 1 Python will pass as a positional argument, i.e., the error checking in docval happens for this here

hdmf/src/hdmf/utils.py

Lines 423 to 436 in 8ddae22

parsed = __parse_args(
_copy.deepcopy(loc_val),
args[1:],
kwargs,
enforce_type=enforce_type,
enforce_shape=enforce_shape,
allow_extra=allow_extra)
for error_type, ExceptionType in (('type_errors', TypeError),
('value_errors', ValueError)):
parse_err = parsed.get(error_type)
if parse_err:
msg = ', '.join(parse_err)
raise_from(ExceptionType(msg), None)
The name of the docval argument and the argument of the __eq__ function need to match, e.g, if I change the docval argname it something you would get TypeError: __eq__() got an unexpected keyword argument 'something'

def __eq__(self, other):
"""
Given a list of ids return the indices in the ElementIdentifiers array where the
indices are found.
"""
# Determine the ids we want to find
search_ids = other if not isinstance(other, Data) else other.data
if isinstance(search_ids, int):
search_ids = [search_ids]
# Find all matching locations
return np.in1d(self.data, search_ids).nonzero()[0]


@register_class('DynamicTable')
class DynamicTable(Container):
Expand Down
68 changes: 68 additions & 0 deletions tests/unit/common/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,13 @@ def test_not_enforce_unique_id_error(self):
except ValueError as e:
self.fail("add row with non unique id raised error %s" % str(e))

def test_bad_id_type_error(self):
table = self.with_spec()
with self.assertRaises(TypeError):
table.add_row(id=10.1, data={'foo': 1, 'bar': 10.0, 'baz': 'cat'}, enforce_unique_id=True)
with self.assertRaises(TypeError):
table.add_row(id='str', data={'foo': 1, 'bar': 10.0, 'baz': 'cat'}, enforce_unique_id=True)

def test_extra_columns(self):
table = self.with_spec()

Expand Down Expand Up @@ -226,6 +233,20 @@ def test_nd_array_to_df(self):
index=pd.Index(name='id', data=[0, 1, 2]))
pd.testing.assert_frame_equal(df, df2)

def test_id_search(self):
table = self.with_spec()
data = [{'foo': 1, 'bar': 10.0, 'baz': 'cat'},
{'foo': 2, 'bar': 20.0, 'baz': 'dog'},
{'foo': 3, 'bar': 30.0, 'baz': 'bird'},
{'foo': 4, 'bar': 40.0, 'baz': 'fish'},
{'foo': 5, 'bar': 50.0, 'baz': 'lizard'}]
for i in data:
table.add_row(i)
res = table[table.id == [2, 4]]
self.assertEqual(len(res), 2)
self.assertTupleEqual(res[0], (2, 3, 30.0, 'bird'))
self.assertTupleEqual(res[1], (4, 5, 50.0, 'lizard'))


class TestDynamicTableRoundTrip(base.TestMapRoundTrip):

Expand Down Expand Up @@ -258,3 +279,50 @@ def test_from_dataframe(self):
'b': ['4', '5', '6']
}), 'test_table', table_description='the expected table', column_descriptions=coldesc)
self.assertContainerEqual(expected, received)


class TestElementIdentifiers(unittest.TestCase):

def test_identifier_search_single_list(self):
e = ElementIdentifiers('ids', [0, 1, 2, 3, 4])
a = (e == [1])
np.testing.assert_array_equal(a, [1])

def test_identifier_search_single_int(self):
e = ElementIdentifiers('ids', [0, 1, 2, 3, 4])
a = (e == 2)
np.testing.assert_array_equal(a, [2])

def test_identifier_search_single_list_not_found(self):
e = ElementIdentifiers('ids', [0, 1, 2, 3, 4])
a = (e == [10])
np.testing.assert_array_equal(a, [])

def test_identifier_search_single_int_not_found(self):
e = ElementIdentifiers('ids', [0, 1, 2, 3, 4])
a = (e == 10)
np.testing.assert_array_equal(a, [])

def test_identifier_search_single_list_all_match(self):
e = ElementIdentifiers('ids', [0, 1, 2, 3, 4])
a = (e == [1, 2, 3])
np.testing.assert_array_equal(a, [1, 2, 3])

def test_identifier_search_single_list_partial_match(self):
e = ElementIdentifiers('ids', [0, 1, 2, 3, 4])
a = (e == [1, 2, 10])
np.testing.assert_array_equal(a, [1, 2])
a = (e == [-1, 2, 10])
np.testing.assert_array_equal(a, [2, ])

def test_identifier_search_with_element_identifier(self):
e = ElementIdentifiers('ids', [0, 1, 2, 3, 4])
a = (e == ElementIdentifiers('ids', [1, 2, 10]))
np.testing.assert_array_equal(a, [1, 2])

def test_identifier_search_with_bad_ids(self):
e = ElementIdentifiers('ids', [0, 1, 2, 3, 4])
with self.assertRaises(TypeError):
_ = (e == 0.1)
with self.assertRaises(TypeError):
_ = (e == 'test')