From fb20fd22507f9195d9ceb0eeed416d4ad4ff960a Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 4 Jan 2023 15:41:41 +0100 Subject: [PATCH] Fix `to_checksums` with `None` values in dicts and recursion Having a `'src': None` entry in a dict for checksums is as valid as having a `None` entry directly in the list. However the current function didn't handle it and crashed. Fix that as well as a few corner cases especially in the recursive case by introducing a new function for handling checksum entries in the checksum list and limiting the recursiveness. Fixes #4142 --- easybuild/framework/easyconfig/types.py | 68 +++++++++------ test/framework/type_checking.py | 105 +++++++++++++++++++++--- 2 files changed, 135 insertions(+), 38 deletions(-) diff --git a/easybuild/framework/easyconfig/types.py b/easybuild/framework/easyconfig/types.py index 344c8ee136..51d58db36b 100644 --- a/easybuild/framework/easyconfig/types.py +++ b/easybuild/framework/easyconfig/types.py @@ -510,33 +510,51 @@ def to_dependencies(dep_list): return [to_dependency(dep) for dep in dep_list] -def to_checksums(checksums): - """Ensure correct element types for list of checksums: convert list elements to tuples.""" - res = [] - for checksum in checksums: - # each list entry can be: - # * None (indicates no checksum) - # * a string (SHA256 checksum) - # * a tuple with 2 elements: checksum type + checksum value - # * a list of checksums (i.e. multiple checksums for a single file) - # * a dict (filename to checksum mapping) - if isinstance(checksum, str): - res.append(checksum) - elif isinstance(checksum, (list, tuple)): - # 2 elements + only string/int values => a checksum tuple - if len(checksum) == 2 and all(isinstance(x, (str, int)) for x in checksum): - res.append(tuple(checksum)) +def _to_checksum(checksum, list_level=0, allow_dict=True): + """Ensure the correct element type for each checksum in the checksum list""" + # each entry can be: + # * None (indicates no checksum) + # * a string (SHA256 checksum) + # * a list or tuple with 2 elements: checksum type + checksum value + # * a list or tuple of checksums (i.e. multiple checksums for a single file) + # * a dict (filename to checksum mapping) + if checksum is None or isinstance(checksum, str): + return checksum + elif isinstance(checksum, (list, tuple)): + if len(checksum) == 2 and isinstance(checksum[0], str) and isinstance(checksum[1], (str, int)): + # 2 elements so either: + # - a checksum tuple (2nd element string or int) + # - 2 alternative checksums (tuple) + # - 2 checksums that must each match (list) + # --> Convert to tuple only if we can exclude the 3rd case + if not isinstance(checksum[1], str) or list_level > 0: + return tuple(checksum) else: - res.append(to_checksums(checksum)) - elif isinstance(checksum, dict): - validated_dict = {} - for key, value in checksum.items(): - validated_dict[key] = to_checksums(value) - res.append(validated_dict) - else: - res.append(checksum) + return checksum + elif list_level < 2: + # Alternative checksums or multiple checksums for a single file + # Allowed to nest (at most) 2 times, e.g. [[[type, value]]] == [[(type, value)]] + # None is not allowed here + if any(x is None for x in checksum): + raise ValueError('Unexpected None in ' + str(checksum)) + if isinstance(checksum, tuple) or list_level > 0: + # When we already are in a tuple no further recursion is allowed -> set list_level very high + return tuple(_to_checksum(x, list_level=99, allow_dict=allow_dict) for x in checksum) + else: + return list(_to_checksum(x, list_level=list_level+1, allow_dict=allow_dict) for x in checksum) + elif isinstance(checksum, dict) and allow_dict: + return {key: _to_checksum(value, allow_dict=False) for key, value in checksum.items()} - return res + # Not returned -> Wrong type/format + raise ValueError('Unexpected type of "%s": %s' % (type(checksum), str(checksum))) + + +def to_checksums(checksums): + """Ensure correct element types for list of checksums: convert list elements to tuples.""" + try: + return [_to_checksum(checksum) for checksum in checksums] + except ValueError as e: + raise EasyBuildError('Invalid checksums: %s\n\tError: %s', checksums, e) def ensure_iterable_license_specs(specs): diff --git a/test/framework/type_checking.py b/test/framework/type_checking.py index 77c0716bd9..d6e26964b2 100644 --- a/test/framework/type_checking.py +++ b/test/framework/type_checking.py @@ -171,8 +171,9 @@ def test_check_type_of_param_value_sanity_check_paths(self): out = {'files': ['bin/foo', ('bin/bar', 'bin/baz')], 'dirs': [('lib', 'lib64', 'lib32')]} self.assertEqual(check_type_of_param_value('sanity_check_paths', inp, auto_convert=True), (True, out)) - def test_check_type_of_param_value_checksums(self): - """Test check_type_of_param_value function for checksums.""" + @staticmethod + def get_valid_checksums_values(): + """Return list of values valid for the 'checksums' EC parameter""" # Using (actually invalid) prefix to better detect those in case of errors md5_checksum = 'md518be8435447a017fd1bf2c7ae9224' @@ -180,9 +181,8 @@ def test_check_type_of_param_value_checksums(self): sha256_checksum2 = 'sha2cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551' sha256_checksum3 = 'sha3e54514a03e255df75c5aee8f9e672f663f93abb723444caec8fe43437bde' filesize = 45617379 - # valid values for 'checksums' easyconfig parameters - inputs = [ + return [ [], # single checksum (one file) [md5_checksum], @@ -239,7 +239,11 @@ def test_check_type_of_param_value_checksums(self): {'foo.txt': sha256_checksum1, 'bar.txt': None}, ], ] - for inp in inputs: + + def test_check_type_of_param_value_checksums(self): + """Test check_type_of_param_value function for checksums.""" + + for inp in TypeCheckingTest.get_valid_checksums_values(): type_ok, newval = check_type_of_param_value('checksums', inp) self.assertIs(type_ok, True, 'Failed for ' + str(inp)) self.assertEqual(newval, inp) @@ -724,19 +728,94 @@ def test_to_sanity_check_paths_dict(self): def test_to_checksums(self): """Test to_checksums function.""" + # Some hand-crafted examples. Only the types are important, values are for easier verification test_inputs = [ - ['be662daa971a640e40be5c804d9d7d10'], - ['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')], - [['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')]], - [('md5', 'be662daa971a640e40be5c804d9d7d10')], - ['be662daa971a640e40be5c804d9d7d10', ('adler32', '0x998410035'), ('crc32', '0x1553842328'), - ('md5', 'be662daa971a640e40be5c804d9d7d10'), ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'), - ('size', 273)], + ['checksumvalue'], + [('md5', 'md5checksumvalue')], + ['file_1_checksum', ('md5', 'file_2_md5_checksum')], + # One checksum per file, some with checksum type + [ + 'be662daa971a640e40be5c804d9d7d10', + ('adler32', '0x998410035'), + ('crc32', '0x1553842328'), + ('md5', 'be662daa971a640e40be5c804d9d7d10'), + ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'), + # int type as the 2nd value + ('size', 273), + ], # None values should not be filtered out, but left in place - [None, 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265', None], + [None, 'checksum', None], + # Alternative checksums, not to be confused with multiple checksums for a file + [('main_checksum', 'alternative_checksum')], + [('1st_of_3', '2nd_of_3', '3rd_of_3')], + # Lists must be kept: This means all must match + [['checksum_1_in_list']], + [['checksum_must_match', 'this_must_also_match']], + [['1st_of_3_list', '2nd_of_3_list', '3rd_of_3_list']], + # Alternative checksums with types + [ + (('adler32', '1st_adler'), ('crc32', '1st_crc')), + (('adler32', '2nd_adler'), ('crc32', '2nd_crc'), ('sha1', '2nd_sha')), + ], + # Entries can be dicts even containing `None` + [ + { + 'src-arm.tgz': 'arm_checksum', + 'src-x86.tgz': ('mainchecksum', 'altchecksum'), + 'src-ppc.tgz': ('mainchecksum', ('md5', 'altchecksum')), + 'git-clone.tgz': None, + }, + { + 'src': ['checksum_must_match', 'this_must_also_match'] + }, + # 2nd required checksum a dict + ['first_checksum', {'src-arm': 'arm_checksum'}] + ], ] for checksums in test_inputs: self.assertEqual(to_checksums(checksums), checksums) + # Also reuse the checksums we use in test_check_type_of_param_value_checksums + # When a checksum is valid it must not be modified + for checksums in TypeCheckingTest.get_valid_checksums_values(): + self.assertEqual(to_checksums(checksums), checksums) + + # List in list converted to tuple -> alternatives or checksum with type + checksums = [['1stchecksum', ['md5', 'md5sum']]] + checksums_expected = [['1stchecksum', ('md5', 'md5sum')]] + self.assertEqual(to_checksums(checksums), checksums_expected) + + # Error detection + wrong_nesting = [('1stchecksum', ('md5', ('md5sum', 'altmd5sum')))] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, wrong_nesting) + correct_nesting = [('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))] + self.assertEqual(to_checksums(correct_nesting), correct_nesting) + # YEB (YAML EC) doesn't has tuples so it uses lists instead which need to get converted + correct_nesting_yeb = [[['1stchecksum', ['md5', 'md5sum'], ['md5', 'altmd5sum']]]] + correct_nesting_yeb_conv = [[('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]] + self.assertEqual(to_checksums(correct_nesting_yeb), correct_nesting_yeb_conv) + self.assertEqual(to_checksums(correct_nesting_yeb_conv), correct_nesting_yeb_conv) + + unexpected_set = [('1stchecksum', {'md5', 'md5sum'})] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, unexpected_set) + unexpected_dict = [{'src': ('md5sum', {'src': 'shasum'})}] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*shasum', to_checksums, unexpected_dict) + correct_dict = [{'src': ('md5sum', 'shasum')}] + self.assertEqual(to_checksums(correct_dict), correct_dict) + correct_dict_1 = [{'src': [['md5', 'md5sum'], ['sha', 'shasum']]}] + correct_dict_2 = [{'src': [('md5', 'md5sum'), ('sha', 'shasum')]}] + self.assertEqual(to_checksums(correct_dict_2), correct_dict_2) + self.assertEqual(to_checksums(correct_dict_1), correct_dict_2) # inner lists to tuples + + unexpected_Nones = [ + [('1stchecksum', None)], + [['1stchecksum', None]], + [{'src': ('md5sum', None)}], + [{'src': ['md5sum', None]}], + ] + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[0]) + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[1]) + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[2]) + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[3]) def test_ensure_iterable_license_specs(self): """Test ensure_iterable_license_specs function."""