From 399ed03cdc5fd9b01e57115f67bd059a5ee2346b Mon Sep 17 00:00:00 2001 From: pgmpablo157321 Date: Wed, 5 Oct 2022 16:26:56 -0500 Subject: [PATCH] Add folder checks to submission checker --- tools/submission/submission-checker.py | 101 +++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 5 deletions(-) diff --git a/tools/submission/submission-checker.py b/tools/submission/submission-checker.py index 4f7f12cb4..23a25ace7 100755 --- a/tools/submission/submission-checker.py +++ b/tools/submission/submission-checker.py @@ -986,6 +986,8 @@ REQUIRED_MEASURE_FILES = ["mlperf.conf", "user.conf", "README.md"] MS_TO_NS = 1000 * 1000 S_TO_MS = 1000 +FILE_SIZE_LIMIT_MB = 50 +MB_TO_BYTES = 1024*1024 MAX_ACCURACY_LOG_SIZE = 10 * 1024 OFFLINE_MIN_SPQ = 24576 TEST_DURATION_MS_PRE_1_0 = 60000 @@ -1293,6 +1295,21 @@ def list_files(*path): return [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))] +def list_empty_dirs_recursively(*path): + path = os.path.join(*path) + return [dirpath for dirpath, dirs, files in os.walk(path) if not dirs and not files] + + +def list_dirs_recursively(*path): + path = os.path.join(*path) + return [dirpath for dirpath, dirs, files in os.walk(path)] + + +def list_files_recursively(*path): + path = os.path.join(*path) + return [os.path.join(dirpath, file) for dirpath, dirs, files in os.walk(path) for file in files] + + def split_path(m): return m.replace("\\", "/").split("/") @@ -1711,7 +1728,6 @@ def is_system_over_network(division, system_json, path): """ Verify whether the submitted system is over network and whether it is valid for the division - for 'network' division, it is mandatory that the system is over-network for 'closed' division, the system must not be over-network for 'open' division, the system may be either local or over-network @@ -1738,17 +1754,14 @@ def check_results_dir(config, debug=False): """ Walk the results directory and do the checking. - We are called with the cdw at the root of the submission directory. level1 division - closed|open|network level2 submitter - for example mlperf_org level3 - results, systems, measurements, code - For results the structure from here is: results/$system_desc/$benchmark_model/$scenario/performance/run_n and results/$system_desc/$benchmark_model/$scenario/accuracy - We first walk into results/$system_desc make sure there is a system_desc.json and its good Next we walk into the model @@ -1861,6 +1874,84 @@ def log_result(submitter, if not os.path.exists(results_path): continue + ## Apply folder checks + dirs = list_dirs_recursively(division, submitter) + files = list_files_recursively(division, submitter) + + # Check symbolic links + symbolic_links = [f for f in files if os.path.islink(f)] + if len(symbolic_links) > 0: + log.error( + "%s/%s contains symbolic links: %s", + division, + submitter, + symbolic_links, + ) + results[f"{division}/{submitter}"] = None + continue + + # Check for files over 50 MB + files_over_size_limit = [f for f in files if os.path.getsize(f) > FILE_SIZE_LIMIT_MB * MB_TO_BYTES] + if len(files_over_size_limit) > 0: + log.error( + "%s/%s contains files with size greater than 50 MB: %s", + division, + submitter, + files_over_size_limit, + ) + results[f"{division}/{submitter}"] = None + continue + + # Check files and folders with git unfriendly names + dir_names = [(dir_, dir_.split("/")[-1]) for dir_ in dirs] + file_names = [(file_, file_.split("/")[-1]) for file_ in files] + git_error_names = [name[0] for name in dir_names if name[1].startswith(".")] + [ + name[0] for name in file_names if name[1].startswith(".") + ] + if len(git_error_names) > 0: + log.error( + "%s/%s contains files with git unfriendly name: %s", + division, + submitter, + git_error_names, + ) + results[f"{division}/{submitter}"] = None + continue + + # Check files and folders with spaces names + space_error_names = [name[0] for name in dir_names if " " in name[1]] + [ + name[0] for name in file_names if " " in name[1] + ] + if len(space_error_names) > 0: + log.error( + "%s/%s contains files with spaces in their names: %s", + division, + submitter, + space_error_names, + ) + results[f"{division}/{submitter}"] = None + continue + + # Check for pycache folders + pycache_dirs = [dir for dir in dirs if dir.endswith("__pycache__")] + if len(pycache_dirs) > 0: + log.error( + "%s has the following __pycache__ directories: %s", + name, + pycache_dirs, + ) + results[f"{division}/{submitter}"] = None + continue + + # Check for empty folders + empty_dirs = list_empty_dirs_recursively(division, submitter) + if len(empty_dirs) > 0: + log.error( + "%s has the following empty directories: %s", name, empty_dirs + ) + results[f"{division}/{submitter}"] = None + continue + for system_desc in list_dir(results_path): # we are looking at ./$division/$submitter/results/$system_desc, ie ./closed/mlperf_org/results/t4-ort @@ -2375,4 +2466,4 @@ def main(): if __name__ == "__main__": - sys.exit(main()) + sys.exit(main()) \ No newline at end of file