Skip to content

Commit

Permalink
Fix to_checksums with None values in dicts and recursion
Browse files Browse the repository at this point in the history
Having a `'src': None` entry in a dict for checksums is as valid as
having a `None` entry directly in the list. However the current function
didn't handle it and crashed.
Fix that as well as a few corner cases especially in the recursive case
by introducing a new function for handling checksum entries in the
checksum list and limiting the recursiveness.

Fixes #4142
  • Loading branch information
Flamefire committed Jul 17, 2024
1 parent 4e72116 commit fb20fd2
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 38 deletions.
68 changes: 43 additions & 25 deletions easybuild/framework/easyconfig/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,33 +510,51 @@ def to_dependencies(dep_list):
return [to_dependency(dep) for dep in dep_list]


def to_checksums(checksums):
"""Ensure correct element types for list of checksums: convert list elements to tuples."""
res = []
for checksum in checksums:
# each list entry can be:
# * None (indicates no checksum)
# * a string (SHA256 checksum)
# * a tuple with 2 elements: checksum type + checksum value
# * a list of checksums (i.e. multiple checksums for a single file)
# * a dict (filename to checksum mapping)
if isinstance(checksum, str):
res.append(checksum)
elif isinstance(checksum, (list, tuple)):
# 2 elements + only string/int values => a checksum tuple
if len(checksum) == 2 and all(isinstance(x, (str, int)) for x in checksum):
res.append(tuple(checksum))
def _to_checksum(checksum, list_level=0, allow_dict=True):
"""Ensure the correct element type for each checksum in the checksum list"""
# each entry can be:
# * None (indicates no checksum)
# * a string (SHA256 checksum)
# * a list or tuple with 2 elements: checksum type + checksum value
# * a list or tuple of checksums (i.e. multiple checksums for a single file)
# * a dict (filename to checksum mapping)
if checksum is None or isinstance(checksum, str):
return checksum
elif isinstance(checksum, (list, tuple)):
if len(checksum) == 2 and isinstance(checksum[0], str) and isinstance(checksum[1], (str, int)):
# 2 elements so either:
# - a checksum tuple (2nd element string or int)
# - 2 alternative checksums (tuple)
# - 2 checksums that must each match (list)
# --> Convert to tuple only if we can exclude the 3rd case
if not isinstance(checksum[1], str) or list_level > 0:
return tuple(checksum)
else:
res.append(to_checksums(checksum))
elif isinstance(checksum, dict):
validated_dict = {}
for key, value in checksum.items():
validated_dict[key] = to_checksums(value)
res.append(validated_dict)
else:
res.append(checksum)
return checksum
elif list_level < 2:
# Alternative checksums or multiple checksums for a single file
# Allowed to nest (at most) 2 times, e.g. [[[type, value]]] == [[(type, value)]]
# None is not allowed here
if any(x is None for x in checksum):
raise ValueError('Unexpected None in ' + str(checksum))
if isinstance(checksum, tuple) or list_level > 0:
# When we already are in a tuple no further recursion is allowed -> set list_level very high
return tuple(_to_checksum(x, list_level=99, allow_dict=allow_dict) for x in checksum)
else:
return list(_to_checksum(x, list_level=list_level+1, allow_dict=allow_dict) for x in checksum)
elif isinstance(checksum, dict) and allow_dict:
return {key: _to_checksum(value, allow_dict=False) for key, value in checksum.items()}

return res
# Not returned -> Wrong type/format
raise ValueError('Unexpected type of "%s": %s' % (type(checksum), str(checksum)))


def to_checksums(checksums):
"""Ensure correct element types for list of checksums: convert list elements to tuples."""
try:
return [_to_checksum(checksum) for checksum in checksums]
except ValueError as e:
raise EasyBuildError('Invalid checksums: %s\n\tError: %s', checksums, e)


def ensure_iterable_license_specs(specs):
Expand Down
105 changes: 92 additions & 13 deletions test/framework/type_checking.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,18 +171,18 @@ def test_check_type_of_param_value_sanity_check_paths(self):
out = {'files': ['bin/foo', ('bin/bar', 'bin/baz')], 'dirs': [('lib', 'lib64', 'lib32')]}
self.assertEqual(check_type_of_param_value('sanity_check_paths', inp, auto_convert=True), (True, out))

def test_check_type_of_param_value_checksums(self):
"""Test check_type_of_param_value function for checksums."""
@staticmethod
def get_valid_checksums_values():
"""Return list of values valid for the 'checksums' EC parameter"""

# Using (actually invalid) prefix to better detect those in case of errors
md5_checksum = 'md518be8435447a017fd1bf2c7ae9224'
sha256_checksum1 = 'sha18be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265'
sha256_checksum2 = 'sha2cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551'
sha256_checksum3 = 'sha3e54514a03e255df75c5aee8f9e672f663f93abb723444caec8fe43437bde'
filesize = 45617379

# valid values for 'checksums' easyconfig parameters
inputs = [
return [
[],
# single checksum (one file)
[md5_checksum],
Expand Down Expand Up @@ -239,7 +239,11 @@ def test_check_type_of_param_value_checksums(self):
{'foo.txt': sha256_checksum1, 'bar.txt': None},
],
]
for inp in inputs:

def test_check_type_of_param_value_checksums(self):
"""Test check_type_of_param_value function for checksums."""

for inp in TypeCheckingTest.get_valid_checksums_values():
type_ok, newval = check_type_of_param_value('checksums', inp)
self.assertIs(type_ok, True, 'Failed for ' + str(inp))
self.assertEqual(newval, inp)
Expand Down Expand Up @@ -724,19 +728,94 @@ def test_to_sanity_check_paths_dict(self):

def test_to_checksums(self):
"""Test to_checksums function."""
# Some hand-crafted examples. Only the types are important, values are for easier verification
test_inputs = [
['be662daa971a640e40be5c804d9d7d10'],
['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')],
[['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')]],
[('md5', 'be662daa971a640e40be5c804d9d7d10')],
['be662daa971a640e40be5c804d9d7d10', ('adler32', '0x998410035'), ('crc32', '0x1553842328'),
('md5', 'be662daa971a640e40be5c804d9d7d10'), ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'),
('size', 273)],
['checksumvalue'],
[('md5', 'md5checksumvalue')],
['file_1_checksum', ('md5', 'file_2_md5_checksum')],
# One checksum per file, some with checksum type
[
'be662daa971a640e40be5c804d9d7d10',
('adler32', '0x998410035'),
('crc32', '0x1553842328'),
('md5', 'be662daa971a640e40be5c804d9d7d10'),
('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'),
# int type as the 2nd value
('size', 273),
],
# None values should not be filtered out, but left in place
[None, 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265', None],
[None, 'checksum', None],
# Alternative checksums, not to be confused with multiple checksums for a file
[('main_checksum', 'alternative_checksum')],
[('1st_of_3', '2nd_of_3', '3rd_of_3')],
# Lists must be kept: This means all must match
[['checksum_1_in_list']],
[['checksum_must_match', 'this_must_also_match']],
[['1st_of_3_list', '2nd_of_3_list', '3rd_of_3_list']],
# Alternative checksums with types
[
(('adler32', '1st_adler'), ('crc32', '1st_crc')),
(('adler32', '2nd_adler'), ('crc32', '2nd_crc'), ('sha1', '2nd_sha')),
],
# Entries can be dicts even containing `None`
[
{
'src-arm.tgz': 'arm_checksum',
'src-x86.tgz': ('mainchecksum', 'altchecksum'),
'src-ppc.tgz': ('mainchecksum', ('md5', 'altchecksum')),
'git-clone.tgz': None,
},
{
'src': ['checksum_must_match', 'this_must_also_match']
},
# 2nd required checksum a dict
['first_checksum', {'src-arm': 'arm_checksum'}]
],
]
for checksums in test_inputs:
self.assertEqual(to_checksums(checksums), checksums)
# Also reuse the checksums we use in test_check_type_of_param_value_checksums
# When a checksum is valid it must not be modified
for checksums in TypeCheckingTest.get_valid_checksums_values():
self.assertEqual(to_checksums(checksums), checksums)

# List in list converted to tuple -> alternatives or checksum with type
checksums = [['1stchecksum', ['md5', 'md5sum']]]
checksums_expected = [['1stchecksum', ('md5', 'md5sum')]]
self.assertEqual(to_checksums(checksums), checksums_expected)

# Error detection
wrong_nesting = [('1stchecksum', ('md5', ('md5sum', 'altmd5sum')))]
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, wrong_nesting)
correct_nesting = [('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]
self.assertEqual(to_checksums(correct_nesting), correct_nesting)
# YEB (YAML EC) doesn't has tuples so it uses lists instead which need to get converted
correct_nesting_yeb = [[['1stchecksum', ['md5', 'md5sum'], ['md5', 'altmd5sum']]]]
correct_nesting_yeb_conv = [[('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]]
self.assertEqual(to_checksums(correct_nesting_yeb), correct_nesting_yeb_conv)
self.assertEqual(to_checksums(correct_nesting_yeb_conv), correct_nesting_yeb_conv)

unexpected_set = [('1stchecksum', {'md5', 'md5sum'})]
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, unexpected_set)
unexpected_dict = [{'src': ('md5sum', {'src': 'shasum'})}]
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*shasum', to_checksums, unexpected_dict)
correct_dict = [{'src': ('md5sum', 'shasum')}]
self.assertEqual(to_checksums(correct_dict), correct_dict)
correct_dict_1 = [{'src': [['md5', 'md5sum'], ['sha', 'shasum']]}]
correct_dict_2 = [{'src': [('md5', 'md5sum'), ('sha', 'shasum')]}]
self.assertEqual(to_checksums(correct_dict_2), correct_dict_2)
self.assertEqual(to_checksums(correct_dict_1), correct_dict_2) # inner lists to tuples

unexpected_Nones = [
[('1stchecksum', None)],
[['1stchecksum', None]],
[{'src': ('md5sum', None)}],
[{'src': ['md5sum', None]}],
]
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[0])
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[1])
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[2])
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[3])

def test_ensure_iterable_license_specs(self):
"""Test ensure_iterable_license_specs function."""
Expand Down

0 comments on commit fb20fd2

Please sign in to comment.