From d4eaf9059cbfa041cefd18e2a82e02273b2809fe Mon Sep 17 00:00:00 2001 From: Ross Bayer Date: Fri, 7 Feb 2020 23:03:32 -0800 Subject: [PATCH 1/2] [Python: flake8] Update the .flake8 config file to remove non-existent files. --- .flake8 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.flake8 b/.flake8 index 39188cae5c115..549c9aa559695 100644 --- a/.flake8 +++ b/.flake8 @@ -6,11 +6,12 @@ filename = ./benchmark/scripts/Benchmark_Driver, ./benchmark/scripts/Benchmark_DTrace.in, ./benchmark/scripts/Benchmark_GuardMalloc.in, + ./benchmark/scripts/Benchmark_QuickCheck.in, ./benchmark/scripts/Benchmark_RuntimeLeaksRunner.in, + ./benchmark/scripts/run_smoke_bench, ./docs/scripts/ns-html2rst, - ./test/Driver/Inputs/fake-toolchain/clang++, ./test/Driver/Inputs/fake-toolchain/ld, ./utils/80+-check, @@ -30,7 +31,6 @@ filename = ./utils/recursive-lipo, ./utils/round-trip-syntax-test, ./utils/rth, - ./utils/run-remote, ./utils/run-test, ./utils/scale-test, ./utils/submit-benchmark-results, From b1961745e0a5ab346923a2083ac838d8220e6480 Mon Sep 17 00:00:00 2001 From: Ross Bayer Date: Fri, 7 Feb 2020 23:23:17 -0800 Subject: [PATCH 2/2] [Python: black] Reformatted the benchmark Python sources using utils/python_format.py. --- benchmark/scripts/Benchmark_DTrace.in | 73 +- benchmark/scripts/Benchmark_Driver | 719 ++++++++----- benchmark/scripts/Benchmark_GuardMalloc.in | 23 +- benchmark/scripts/Benchmark_QuickCheck.in | 68 +- .../scripts/Benchmark_RuntimeLeaksRunner.in | 100 +- benchmark/scripts/build_linux.py | 50 +- benchmark/scripts/build_script_helper.py | 50 +- benchmark/scripts/compare_perf_tests.py | 506 +++++---- benchmark/scripts/create_benchmark.py | 74 +- .../generate_harness/generate_harness.py | 19 +- .../perf_test_driver/perf_test_driver.py | 78 +- benchmark/scripts/run_smoke_bench | 249 +++-- benchmark/scripts/test_Benchmark_Driver.py | 980 +++++++++++------- benchmark/scripts/test_compare_perf_tests.py | 842 ++++++++------- benchmark/scripts/test_utils.py | 17 +- benchmark/utils/convertToJSON.py | 14 +- 16 files changed, 2277 insertions(+), 1585 deletions(-) diff --git a/benchmark/scripts/Benchmark_DTrace.in b/benchmark/scripts/Benchmark_DTrace.in index 273c538cd650f..300291813b96d 100644 --- a/benchmark/scripts/Benchmark_DTrace.in +++ b/benchmark/scripts/Benchmark_DTrace.in @@ -19,20 +19,17 @@ import sys DRIVER_LIBRARY_PATH = "@PATH_TO_DRIVER_LIBRARY@" sys.path.append(DRIVER_LIBRARY_PATH) -DTRACE_PATH = os.path.join(DRIVER_LIBRARY_PATH, 'swift_stats.d') +DTRACE_PATH = os.path.join(DRIVER_LIBRARY_PATH, "swift_stats.d") import perf_test_driver # noqa (E402 module level import not at top of file) # Regexes for the XFAIL_LIST. Matches against '([Onone|O|Osize],TestName)' -XFAIL_LIST = [ -] +XFAIL_LIST = [] class DTraceResult(perf_test_driver.Result): - def __init__(self, name, status, output, csv_output): - perf_test_driver.Result.__init__( - self, name, status, output, XFAIL_LIST) + perf_test_driver.Result.__init__(self, name, status, output, XFAIL_LIST) self.csv_output = csv_output def is_failure(self): @@ -40,40 +37,38 @@ class DTraceResult(perf_test_driver.Result): @classmethod def data_headers(cls): - return [ - 'Name', 'Result', 'Total RR Opts', 'Total RR Opts/Iter'] + return ["Name", "Result", "Total RR Opts", "Total RR Opts/Iter"] @classmethod def data_format(cls, max_test_len): non_name_headers = DTraceResult.data_headers()[1:] - fmt = ('{:<%d}' % (max_test_len + 5)) + \ - ''.join(['{:<%d}' % (len(h) + 2) for h in non_name_headers]) + fmt = ("{:<%d}" % (max_test_len + 5)) + "".join( + ["{:<%d}" % (len(h) + 2) for h in non_name_headers] + ) return fmt @classmethod def print_data_header(cls, max_test_len, csv_output): headers = cls.data_headers() if csv_output: - print(','.join(headers)) + print(",".join(headers)) return print(cls.data_format(max_test_len).format(*headers)) def print_data(self, max_test_len): result = [self.get_name(), self.get_result()] + map(str, self.output) if self.csv_output: - print(','.join(result)) + print(",".join(result)) return print(DTraceResult.data_format(max_test_len).format(*result)) class DTraceBenchmarkDriver(perf_test_driver.BenchmarkDriver): - def __init__(self, binary, xfail_list, csv_output): perf_test_driver.BenchmarkDriver.__init__( - self, binary, xfail_list, - enable_parallel=True, - opt_levels=['O']) + self, binary, xfail_list, enable_parallel=True, opt_levels=["O"] + ) self.csv_output = csv_output def print_data_header(self, max_test_len): @@ -83,23 +78,37 @@ class DTraceBenchmarkDriver(perf_test_driver.BenchmarkDriver): return {} def process_input(self, data): - test_name = '({}_{})'.format(data['opt'], data['test_name']) + test_name = "({}_{})".format(data["opt"], data["test_name"]) print("Running {}...".format(test_name)) sys.stdout.flush() def get_results_with_iters(iters): e = os.environ - e['SWIFT_DETERMINISTIC_HASHING'] = '1' - p = subprocess.Popen([ - 'sudo', 'dtrace', '-s', DTRACE_PATH, - '-c', '%s %s %s %s' % (data['path'], data['test_name'], - '--num-iters=%d' % iters, - '--num-samples=2') - ], stdout=subprocess.PIPE, stderr=open('/dev/null', 'w'), env=e) + e["SWIFT_DETERMINISTIC_HASHING"] = "1" + p = subprocess.Popen( + [ + "sudo", + "dtrace", + "-s", + DTRACE_PATH, + "-c", + "%s %s %s %s" + % ( + data["path"], + data["test_name"], + "--num-iters=%d" % iters, + "--num-samples=2", + ), + ], + stdout=subprocess.PIPE, + stderr=open("/dev/null", "w"), + env=e, + ) results = [x for x in p.communicate()[0].split("\n") if len(x) > 0] return [ - x.split(',')[1] for x in - results[results.index('DTRACE RESULTS') + 1:]] + x.split(",")[1] for x in results[results.index("DTRACE RESULTS") + 1 :] + ] + iter_2_results = get_results_with_iters(2) iter_3_results = get_results_with_iters(3) iter_5_results = get_results_with_iters(5) @@ -136,16 +145,18 @@ SWIFT_BIN_DIR = os.path.dirname(os.path.abspath(__file__)) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( - '-filter', + "-filter", type=str, default=None, - help='Filter out any test that does not match the given regex') + help="Filter out any test that does not match the given regex", + ) parser.add_argument( - '--emit-csv', + "--emit-csv", default=False, - action='store_true', + action="store_true", help="Emit csv output", - dest='csv_output') + dest="csv_output", + ) return parser.parse_args() diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 31808852bcf22..1e84738562bfe 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -61,23 +61,22 @@ class BenchmarkDriver(object): self.results = {} # Set a constant hash seed. Some tests are currently sensitive to # fluctuations in the number of hash collisions. - os.environ['SWIFT_DETERMINISTIC_HASHING'] = '1' + os.environ["SWIFT_DETERMINISTIC_HASHING"] = "1" def _invoke(self, cmd): - return self._subprocess.check_output( - cmd, stderr=self._subprocess.STDOUT) + return self._subprocess.check_output(cmd, stderr=self._subprocess.STDOUT) @property def test_harness(self): """Full path to test harness binary.""" - suffix = (self.args.optimization if hasattr(self.args, 'optimization') - else 'O') + suffix = self.args.optimization if hasattr(self.args, "optimization") else "O" return os.path.join(self.args.tests, "Benchmark_" + suffix) def _git(self, cmd): """Execute the Git command in the `swift-repo`.""" return self._invoke( - ('git -C {0} '.format(self.args.swift_repo) + cmd).split()).strip() + ("git -C {0} ".format(self.args.swift_repo) + cmd).split() + ).strip() @property def log_file(self): @@ -89,27 +88,28 @@ class BenchmarkDriver(object): return None log_dir = self.args.output_dir harness_name = os.path.basename(self.test_harness) - suffix = '-' + time.strftime('%Y%m%d%H%M%S', time.localtime()) + suffix = "-" + time.strftime("%Y%m%d%H%M%S", time.localtime()) if self.args.swift_repo: log_dir = os.path.join( - log_dir, self._git('rev-parse --abbrev-ref HEAD')) # branch - suffix += '-' + self._git('rev-parse --short HEAD') # revision - return os.path.join(log_dir, harness_name + suffix + '.log') + log_dir, self._git("rev-parse --abbrev-ref HEAD") + ) # branch + suffix += "-" + self._git("rev-parse --short HEAD") # revision + return os.path.join(log_dir, harness_name + suffix + ".log") @property def _cmd_list_benchmarks(self): # Use tab delimiter for easier parsing to override the default comma. # (The third 'column' is always comma-separated list of tags in square # brackets -- currently unused here.) - return [self.test_harness, '--list', '--delim=\t'] + ( - ['--skip-tags='] if (self.args.benchmarks or - self.args.filters) else []) + return [self.test_harness, "--list", "--delim=\t"] + ( + ["--skip-tags="] if (self.args.benchmarks or self.args.filters) else [] + ) def _get_tests(self): """Return a list of performance tests to run.""" number_name_pairs = [ - line.split('\t')[:2] for line in - self._invoke(self._cmd_list_benchmarks).split('\n')[1:-1] + line.split("\t")[:2] + for line in self._invoke(self._cmd_list_benchmarks).split("\n")[1:-1] ] # unzip list of pairs into 2 lists test_numbers, self.all_tests = map(list, zip(*number_name_pairs)) @@ -122,55 +122,79 @@ class BenchmarkDriver(object): def _tests_matching_patterns(self): regexes = [re.compile(pattern) for pattern in self.args.filters] - return sorted(list(set([name for pattern in regexes - for name in self.all_tests - if pattern.match(name)]))) + return sorted( + list( + set( + [ + name + for pattern in regexes + for name in self.all_tests + if pattern.match(name) + ] + ) + ) + ) def _tests_by_name_or_number(self, test_numbers): benchmarks = set(self.args.benchmarks) number_to_name = dict(zip(test_numbers, self.all_tests)) - tests_by_number = [number_to_name[i] - for i in benchmarks.intersection(set(test_numbers))] - return sorted(list(benchmarks - .intersection(set(self.all_tests)) - .union(tests_by_number))) - - def run(self, test=None, num_samples=None, num_iters=None, - sample_time=None, verbose=None, measure_memory=False, - quantile=None): + tests_by_number = [ + number_to_name[i] for i in benchmarks.intersection(set(test_numbers)) + ] + return sorted( + list(benchmarks.intersection(set(self.all_tests)).union(tests_by_number)) + ) + + def run( + self, + test=None, + num_samples=None, + num_iters=None, + sample_time=None, + verbose=None, + measure_memory=False, + quantile=None, + ): """Execute benchmark and gather results.""" num_samples = num_samples or 0 num_iters = num_iters or 0 # automatically determine N to run for 1s sample_time = sample_time or 0 # default is 1s cmd = self._cmd_run( - test, num_samples, num_iters, sample_time, - verbose, measure_memory, quantile) + test, num_samples, num_iters, sample_time, verbose, measure_memory, quantile + ) output = self._invoke(cmd) results = self.parser.results_from_string(output) return results.items()[0][1] if test else results - def _cmd_run(self, test, num_samples, num_iters, sample_time, - verbose, measure_memory, quantile): + def _cmd_run( + self, + test, + num_samples, + num_iters, + sample_time, + verbose, + measure_memory, + quantile, + ): cmd = [self.test_harness] if test: cmd.append(test) else: - cmd.extend([self.test_number.get(name, name) - for name in self.tests]) + cmd.extend([self.test_number.get(name, name) for name in self.tests]) if num_samples > 0: - cmd.append('--num-samples={0}'.format(num_samples)) + cmd.append("--num-samples={0}".format(num_samples)) if num_iters > 0: - cmd.append('--num-iters={0}'.format(num_iters)) + cmd.append("--num-iters={0}".format(num_iters)) if sample_time > 0: - cmd.append('--sample-time={0}'.format(sample_time)) + cmd.append("--sample-time={0}".format(sample_time)) if verbose: - cmd.append('--verbose') + cmd.append("--verbose") if measure_memory: - cmd.append('--memory') + cmd.append("--memory") if quantile: - cmd.append('--quantile={0}'.format(quantile)) - cmd.append('--delta') + cmd.append("--quantile={0}".format(quantile)) + cmd.append("--delta") return cmd def run_independent_samples(self, test): @@ -178,14 +202,18 @@ class BenchmarkDriver(object): Returns the aggregated result of independent benchmark invocations. """ + def merge_results(a, b): a.merge(b) return a - return reduce(merge_results, - [self.run(test, measure_memory=True, - num_iters=1, quantile=20) - for _ in range(self.args.independent_samples)]) + return reduce( + merge_results, + [ + self.run(test, measure_memory=True, num_iters=1, quantile=20) + for _ in range(self.args.independent_samples) + ], + ) def log_results(self, output, log_file=None): """Log output to `log_file`. @@ -196,11 +224,11 @@ class BenchmarkDriver(object): dir = os.path.dirname(log_file) if not os.path.exists(dir): os.makedirs(dir) - print('Logging results to: %s' % log_file) - with open(log_file, 'w') as f: + print("Logging results to: %s" % log_file) + with open(log_file, "w") as f: f.write(output) - RESULT = '{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}' + RESULT = "{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}" def run_and_log(self, csv_console=True): """Run benchmarks and continuously log results to the console. @@ -212,19 +240,41 @@ class BenchmarkDriver(object): format is justified columns. """ format = ( - (lambda values: ','.join(values)) if csv_console else - (lambda values: self.RESULT.format(*values))) # justified columns + (lambda values: ",".join(values)) + if csv_console + else (lambda values: self.RESULT.format(*values)) + ) # justified columns def console_log(values): print(format(values)) def result_values(r): - return map(str, [r.test_num, r.name, r.num_samples, r.min, - r.samples.q1, r.median, r.samples.q3, r.max, - r.max_rss]) - - header = ['#', 'TEST', 'SAMPLES', 'MIN(μs)', 'Q1(μs)', 'MEDIAN(μs)', - 'Q3(μs)', 'MAX(μs)', 'MAX_RSS(B)'] + return map( + str, + [ + r.test_num, + r.name, + r.num_samples, + r.min, + r.samples.q1, + r.median, + r.samples.q3, + r.max, + r.max_rss, + ], + ) + + header = [ + "#", + "TEST", + "SAMPLES", + "MIN(μs)", + "Q1(μs)", + "MEDIAN(μs)", + "Q3(μs)", + "MAX(μs)", + "MAX_RSS(B)", + ] console_log(header) results = [header] for test in self.tests: @@ -232,10 +282,10 @@ class BenchmarkDriver(object): console_log(result) results.append(result) - print( - '\nTotal performance tests executed: {0}'.format(len(self.tests))) - return (None if csv_console else - ('\n'.join([','.join(r) for r in results]) + '\n')) # csv_log + print("\nTotal performance tests executed: {0}".format(len(self.tests))) + return ( + None if csv_console else ("\n".join([",".join(r) for r in results]) + "\n") + ) # csv_log @staticmethod def run_benchmarks(args): @@ -255,22 +305,31 @@ class LoggingReportFormatter(logging.Formatter): """ import logging as log - colors = {log.DEBUG: '9', log.INFO: '2', log.WARNING: '3', log.ERROR: '1', - log.CRITICAL: '5'} + + colors = { + log.DEBUG: "9", + log.INFO: "2", + log.WARNING: "3", + log.ERROR: "1", + log.CRITICAL: "5", + } def __init__(self, use_color=False): """Specify if report should use colors; defaults to False.""" - super(LoggingReportFormatter, self).__init__('%(message)s') + super(LoggingReportFormatter, self).__init__("%(message)s") self.use_color = use_color def format(self, record): """Format the log record with level and category.""" msg = super(LoggingReportFormatter, self).format(record) - category = ((record.name.split('.')[-1] + ': ') if '.' in record.name - else '') - return ('\033[1;3{0}m{1}{2}\033[1;0m'.format( - self.colors[record.levelno], category, msg) if self.use_color else - '{0} {1}{2}'.format(record.levelname, category, msg)) + category = (record.name.split(".")[-1] + ": ") if "." in record.name else "" + return ( + "\033[1;3{0}m{1}{2}\033[1;0m".format( + self.colors[record.levelno], category, msg + ) + if self.use_color + else "{0} {1}{2}".format(record.levelname, category, msg) + ) class MarkdownReportHandler(logging.StreamHandler): @@ -284,27 +343,34 @@ class MarkdownReportHandler(logging.StreamHandler): """Initialize the handler and write a Markdown table header.""" super(MarkdownReportHandler, self).__init__(stream) self.setLevel(logging.INFO) - self.stream.write('\n✅ | Benchmark Check Report\n---|---') + self.stream.write("\n✅ | Benchmark Check Report\n---|---") self.stream.flush() - levels = {logging.WARNING: '\n⚠️', logging.ERROR: '\n⛔️', - logging.INFO: '
'} - categories = {'naming': '🔤', 'runtime': '⏱', 'memory': 'Ⓜ️'} + levels = { + logging.WARNING: "\n⚠️", + logging.ERROR: "\n⛔️", + logging.INFO: "
", + } + categories = {"naming": "🔤", "runtime": "⏱", "memory": "Ⓜ️"} quotes_re = re.compile("'") def format(self, record): msg = super(MarkdownReportHandler, self).format(record) - return (self.levels.get(record.levelno, '') + - ('' if record.levelno == logging.INFO else - self.categories.get(record.name.split('.')[-1], '') + ' | ') + - self.quotes_re.sub('`', msg)) + return ( + self.levels.get(record.levelno, "") + + ( + "" + if record.levelno == logging.INFO + else self.categories.get(record.name.split(".")[-1], "") + " | " + ) + + self.quotes_re.sub("`", msg) + ) def emit(self, record): msg = self.format(record) stream = self.stream try: - if (isinstance(msg, unicode) and - getattr(stream, 'encoding', None)): + if isinstance(msg, unicode) and getattr(stream, "encoding", None): stream.write(msg.encode(stream.encoding)) else: stream.write(msg) @@ -313,7 +379,7 @@ class MarkdownReportHandler(logging.StreamHandler): self.flush() def close(self): - self.stream.write('\n\n') + self.stream.write("\n\n") self.stream.flush() super(MarkdownReportHandler, self).close() @@ -328,10 +394,10 @@ class BenchmarkDoctor(object): consumption). """ - log = logging.getLogger('BenchmarkDoctor') - log_naming = log.getChild('naming') - log_runtime = log.getChild('runtime') - log_memory = log.getChild('memory') + log = logging.getLogger("BenchmarkDoctor") + log_naming = log.getChild("naming") + log_runtime = log.getChild("runtime") + log_memory = log.getChild("memory") log.setLevel(logging.DEBUG) def __init__(self, args, driver=None): @@ -343,23 +409,25 @@ class BenchmarkDoctor(object): self.driver = driver or BenchmarkDriver(args) self.results = {} - if hasattr(args, 'markdown') and args.markdown: + if hasattr(args, "markdown") and args.markdown: self.console_handler = MarkdownReportHandler(sys.stdout) else: self.console_handler = logging.StreamHandler(sys.stdout) self.console_handler.setFormatter( - LoggingReportFormatter(use_color=sys.stdout.isatty())) - self.console_handler.setLevel(logging.DEBUG if args.verbose else - logging.INFO) + LoggingReportFormatter(use_color=sys.stdout.isatty()) + ) + self.console_handler.setLevel( + logging.DEBUG if args.verbose else logging.INFO + ) self.log.addHandler(self.console_handler) - self.log.debug('Checking tests: %s', ', '.join(self.driver.tests)) + self.log.debug("Checking tests: %s", ", ".join(self.driver.tests)) self.requirements = [ self._name_matches_benchmark_naming_convention, self._name_is_at_most_40_chars_long, self._no_setup_overhead, self._reasonable_setup_time, self._optimized_runtime_in_range, - self._constant_memory_use + self._constant_memory_use, ] def __del__(self): @@ -368,95 +436,122 @@ class BenchmarkDoctor(object): handler.close() self.log.removeHandler(self.console_handler) - benchmark_naming_convention_re = re.compile(r'[A-Z][a-zA-Z0-9\-.!?]+') - camel_humps_re = re.compile(r'[a-z][A-Z]') + benchmark_naming_convention_re = re.compile(r"[A-Z][a-zA-Z0-9\-.!?]+") + camel_humps_re = re.compile(r"[a-z][A-Z]") @staticmethod def _name_matches_benchmark_naming_convention(measurements): - name = measurements['name'] + name = measurements["name"] match = BenchmarkDoctor.benchmark_naming_convention_re.match(name) - matched = match.group(0) if match else '' + matched = match.group(0) if match else "" composite_words = len(BenchmarkDoctor.camel_humps_re.findall(name)) + 1 if name != matched: BenchmarkDoctor.log_naming.error( - "'%s' name doesn't conform to benchmark naming convention.", - name) - BenchmarkDoctor.log_naming.info( - 'See http://bit.ly/BenchmarkNaming') + "'%s' name doesn't conform to benchmark naming convention.", name + ) + BenchmarkDoctor.log_naming.info("See http://bit.ly/BenchmarkNaming") if composite_words > 4: BenchmarkDoctor.log_naming.warning( - "'%s' name is composed of %d words.", name, composite_words) + "'%s' name is composed of %d words.", name, composite_words + ) BenchmarkDoctor.log_naming.info( "Split '%s' name into dot-separated groups and variants. " - "See http://bit.ly/BenchmarkNaming", name) + "See http://bit.ly/BenchmarkNaming", + name, + ) @staticmethod def _name_is_at_most_40_chars_long(measurements): - name = measurements['name'] + name = measurements["name"] if len(name) > 40: BenchmarkDoctor.log_naming.error( - "'%s' name is %d characters long.", name, len(name)) + "'%s' name is %d characters long.", name, len(name) + ) BenchmarkDoctor.log_naming.info( - 'Benchmark name should not be longer than 40 characters.') + "Benchmark name should not be longer than 40 characters." + ) @staticmethod - def _select(measurements, num_iters=None, opt_level='O'): - prefix = measurements['name'] + ' ' + opt_level - prefix += '' if num_iters is None else (' i' + str(num_iters)) - return [series for name, series in measurements.items() - if name.startswith(prefix)] + def _select(measurements, num_iters=None, opt_level="O"): + prefix = measurements["name"] + " " + opt_level + prefix += "" if num_iters is None else (" i" + str(num_iters)) + return [ + series for name, series in measurements.items() if name.startswith(prefix) + ] @staticmethod def _optimized_runtime_in_range(measurements): - name = measurements['name'] + name = measurements["name"] setup, ratio = BenchmarkDoctor._setup_overhead(measurements) setup = 0 if ratio < 0.05 else setup runtime = min( - [(result.samples.min - correction) for i_series in - [BenchmarkDoctor._select(measurements, num_iters=i) - for correction in [(setup / i) for i in [1, 2]] - ] for result in i_series]) + [ + (result.samples.min - correction) + for i_series in [ + BenchmarkDoctor._select(measurements, num_iters=i) + for correction in [(setup / i) for i in [1, 2]] + ] + for result in i_series + ] + ) threshold = 1000 if threshold < runtime: - log = (BenchmarkDoctor.log_runtime.warning if runtime < 10000 else - BenchmarkDoctor.log_runtime.error) - caveat = '' if setup == 0 else ' (excluding the setup overhead)' + log = ( + BenchmarkDoctor.log_runtime.warning + if runtime < 10000 + else BenchmarkDoctor.log_runtime.error + ) + caveat = "" if setup == 0 else " (excluding the setup overhead)" log("'%s' execution took at least %d μs%s.", name, runtime, caveat) def factor(base): # suitable divisior that's integer power of base - return int(pow(base, math.ceil( - math.log(runtime / float(threshold), base)))) + return int( + pow(base, math.ceil(math.log(runtime / float(threshold), base))) + ) BenchmarkDoctor.log_runtime.info( "Decrease the workload of '%s' by a factor of %d (%d), to be " - "less than %d μs.", name, factor(2), factor(10), threshold) + "less than %d μs.", + name, + factor(2), + factor(10), + threshold, + ) threshold = 20 if runtime < threshold: - log = (BenchmarkDoctor.log_runtime.error if runtime == 0 else - BenchmarkDoctor.log_runtime.warning) + log = ( + BenchmarkDoctor.log_runtime.error + if runtime == 0 + else BenchmarkDoctor.log_runtime.warning + ) log("'%s' execution took %d μs.", name, runtime) BenchmarkDoctor.log_runtime.info( "Ensure the workload of '%s' has a properly measurable size" " (runtime > %d μs) and is not eliminated by the compiler (use" - " `blackHole` function if necessary)." if runtime == 0 else - "Increase the workload of '%s' to be more than %d μs.", - name, threshold) + " `blackHole` function if necessary)." + if runtime == 0 + else "Increase the workload of '%s' to be more than %d μs.", + name, + threshold, + ) @staticmethod def _setup_overhead(measurements): select = BenchmarkDoctor._select - ti1, ti2 = [float(min(mins)) for mins in - [[result.samples.min for result in i_series] - for i_series in - [select(measurements, num_iters=i) for i in [1, 2]]]] - setup = (int(round(2.0 * (ti1 - ti2))) if ti2 > 20 # limit of accuracy - else 0) + ti1, ti2 = [ + float(min(mins)) + for mins in [ + [result.samples.min for result in i_series] + for i_series in [select(measurements, num_iters=i) for i in [1, 2]] + ] + ] + setup = int(round(2.0 * (ti1 - ti2))) if ti2 > 20 else 0 # limit of accuracy ratio = (setup / ti1) if ti1 > 0 else 0 return (setup, ratio) @@ -466,52 +561,63 @@ class BenchmarkDoctor(object): if ratio > 0.05: BenchmarkDoctor.log_runtime.error( "'%s' has setup overhead of %d μs (%.1f%%).", - measurements['name'], setup, round((100 * ratio), 1)) + measurements["name"], + setup, + round((100 * ratio), 1), + ) BenchmarkDoctor.log_runtime.info( - 'Move initialization of benchmark data to the `setUpFunction` ' - 'registered in `BenchmarkInfo`.') + "Move initialization of benchmark data to the `setUpFunction` " + "registered in `BenchmarkInfo`." + ) @staticmethod def _reasonable_setup_time(measurements): - setup = min([result.setup - for result in BenchmarkDoctor._select(measurements)]) + setup = min([result.setup for result in BenchmarkDoctor._select(measurements)]) if 200000 < setup: # 200 ms BenchmarkDoctor.log_runtime.error( - "'%s' setup took at least %d μs.", - measurements['name'], setup) + "'%s' setup took at least %d μs.", measurements["name"], setup + ) BenchmarkDoctor.log_runtime.info( - 'The `setUpFunction` should take no more than 200 ms.') + "The `setUpFunction` should take no more than 200 ms." + ) @staticmethod def _constant_memory_use(measurements): select = BenchmarkDoctor._select (min_i1, max_i1), (min_i2, max_i2) = [ - (min(memory_use), max(memory_use)) for memory_use in - [[r.mem_pages for r in i_series] for i_series in - [select(measurements, num_iters=i) for i in - [1, 2]]]] + (min(memory_use), max(memory_use)) + for memory_use in [ + [r.mem_pages for r in i_series] + for i_series in [select(measurements, num_iters=i) for i in [1, 2]] + ] + ] range_i1, range_i2 = max_i1 - min_i1, max_i2 - min_i2 normal_range = 15 # pages - name = measurements['name'] + name = measurements["name"] more_info = False if abs(min_i1 - min_i2) > max(range_i1, range_i2, normal_range): more_info = True BenchmarkDoctor.log_memory.error( "'%s' varies the memory footprint of the base " - "workload depending on the `num-iters`.", name) + "workload depending on the `num-iters`.", + name, + ) if max(range_i1, range_i2) > normal_range: more_info = True BenchmarkDoctor.log_memory.warning( "'%s' has very wide range of memory used between " - "independent, repeated measurements.", name) + "independent, repeated measurements.", + name, + ) if more_info: BenchmarkDoctor.log_memory.info( "'%s' mem_pages [i1, i2]: min=[%d, %d] 𝚫=%d R=[%d, %d]", name, - *[min_i1, min_i2, abs(min_i1 - min_i2), range_i1, range_i2]) + *[min_i1, min_i2, abs(min_i1 - min_i2), range_i1, range_i2] + ) @staticmethod def _adjusted_1s_samples(runtime): @@ -530,38 +636,52 @@ class BenchmarkDoctor(object): Returns a dictionary with benchmark name and `PerformanceTestResult`s. """ - self.log.debug('Calibrating num-samples for {0}:'.format(benchmark)) - r = self.driver.run(benchmark, num_samples=3, num_iters=1, - verbose=True) # calibrate + self.log.debug("Calibrating num-samples for {0}:".format(benchmark)) + r = self.driver.run( + benchmark, num_samples=3, num_iters=1, verbose=True + ) # calibrate num_samples = self._adjusted_1s_samples(r.samples.min) def capped(s): return min(s, 200) + run_args = [(capped(num_samples), 1), (capped(num_samples / 2), 2)] opts = self.driver.args.optimization opts = opts if isinstance(opts, list) else [opts] self.log.debug( - 'Runtime {0} μs yields {1} adjusted samples per second.'.format( - r.samples.min, num_samples)) + "Runtime {0} μs yields {1} adjusted samples per second.".format( + r.samples.min, num_samples + ) + ) self.log.debug( - 'Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)'.format( - benchmark, run_args[0][0], run_args[1][0])) + "Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)".format( + benchmark, run_args[0][0], run_args[1][0] + ) + ) measurements = dict( - [('{0} {1} i{2}{3}'.format(benchmark, o, i, suffix), - self.driver.run(benchmark, num_samples=s, num_iters=i, - verbose=True, measure_memory=True)) - for o in opts - for s, i in run_args - for suffix in list('abcde') - ] + [ + ( + "{0} {1} i{2}{3}".format(benchmark, o, i, suffix), + self.driver.run( + benchmark, + num_samples=s, + num_iters=i, + verbose=True, + measure_memory=True, + ), + ) + for o in opts + for s, i in run_args + for suffix in list("abcde") + ] ) - measurements['name'] = benchmark + measurements["name"] = benchmark return measurements def analyze(self, benchmark_measurements): """Analyze whether benchmark fullfills all requirtements.""" - self.log.debug('Analyzing %s', benchmark_measurements['name']) + self.log.debug("Analyzing %s", benchmark_measurements["name"]) for rule in self.requirements: rule(benchmark_measurements) @@ -582,93 +702,137 @@ class BenchmarkDoctor(object): def format_name(log_path): """Return the filename and directory for a log file.""" - return '/'.join(log_path.split('/')[-2:]) + return "/".join(log_path.split("/")[-2:]) def compare_logs(compare_script, new_log, old_log, log_dir, opt): """Return diff of log files at paths `new_log` and `old_log`.""" - print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log))) - subprocess.call([compare_script, '--old-file', old_log, - '--new-file', new_log, '--format', 'markdown', - '--output', os.path.join(log_dir, 'latest_compare_{0}.md' - .format(opt))]) + print("Comparing %s %s ..." % (format_name(old_log), format_name(new_log))) + subprocess.call( + [ + compare_script, + "--old-file", + old_log, + "--new-file", + new_log, + "--format", + "markdown", + "--output", + os.path.join(log_dir, "latest_compare_{0}.md".format(opt)), + ] + ) def compare(args): log_dir = args.log_dir compare_script = args.compare_script baseline_branch = args.baseline_branch - current_branch = \ - BenchmarkDriver(args, tests=[''])._git('rev-parse --abbrev-ref HEAD') + current_branch = BenchmarkDriver(args, tests=[""])._git( + "rev-parse --abbrev-ref HEAD" + ) current_branch_dir = os.path.join(log_dir, current_branch) baseline_branch_dir = os.path.join(log_dir, baseline_branch) - if current_branch != baseline_branch and \ - not os.path.isdir(baseline_branch_dir): - print(('Unable to find benchmark logs for {baseline_branch} branch. ' + - 'Set a baseline benchmark log by passing --benchmark to ' + - 'build-script while on {baseline_branch} branch.') - .format(baseline_branch=baseline_branch)) + if current_branch != baseline_branch and not os.path.isdir(baseline_branch_dir): + print( + ( + "Unable to find benchmark logs for {baseline_branch} branch. " + + "Set a baseline benchmark log by passing --benchmark to " + + "build-script while on {baseline_branch} branch." + ).format(baseline_branch=baseline_branch) + ) return 1 recent_logs = {} for branch_dir in [current_branch_dir, baseline_branch_dir]: - for opt in ['O', 'Onone']: - recent_logs[os.path.basename(branch_dir) + '_' + opt] = sorted( - glob.glob(os.path.join( - branch_dir, 'Benchmark_' + opt + '-*.log')), - key=os.path.getctime, reverse=True) + for opt in ["O", "Onone"]: + recent_logs[os.path.basename(branch_dir) + "_" + opt] = sorted( + glob.glob(os.path.join(branch_dir, "Benchmark_" + opt + "-*.log")), + key=os.path.getctime, + reverse=True, + ) if current_branch == baseline_branch: - if len(recent_logs[baseline_branch + '_O']) > 1 and \ - len(recent_logs[baseline_branch + '_Onone']) > 1: - compare_logs(compare_script, - recent_logs[baseline_branch + '_O'][0], - recent_logs[baseline_branch + '_O'][1], - log_dir, 'O') - compare_logs(compare_script, - recent_logs[baseline_branch + '_Onone'][0], - recent_logs[baseline_branch + '_Onone'][1], - log_dir, 'Onone') + if ( + len(recent_logs[baseline_branch + "_O"]) > 1 + and len(recent_logs[baseline_branch + "_Onone"]) > 1 + ): + compare_logs( + compare_script, + recent_logs[baseline_branch + "_O"][0], + recent_logs[baseline_branch + "_O"][1], + log_dir, + "O", + ) + compare_logs( + compare_script, + recent_logs[baseline_branch + "_Onone"][0], + recent_logs[baseline_branch + "_Onone"][1], + log_dir, + "Onone", + ) else: - print(('{baseline_branch}/{baseline_branch} comparison ' + - 'skipped: no previous {baseline_branch} logs') - .format(baseline_branch=baseline_branch)) + print( + ( + "{baseline_branch}/{baseline_branch} comparison " + + "skipped: no previous {baseline_branch} logs" + ).format(baseline_branch=baseline_branch) + ) else: # TODO: Check for outdated baseline branch log - if len(recent_logs[current_branch + '_O']) == 0 or \ - len(recent_logs[current_branch + '_Onone']) == 0: - print('branch sanity failure: missing branch logs') + if ( + len(recent_logs[current_branch + "_O"]) == 0 + or len(recent_logs[current_branch + "_Onone"]) == 0 + ): + print("branch sanity failure: missing branch logs") return 1 - if len(recent_logs[current_branch + '_O']) == 1 or \ - len(recent_logs[current_branch + '_Onone']) == 1: - print('branch/branch comparison skipped: no previous branch logs') + if ( + len(recent_logs[current_branch + "_O"]) == 1 + or len(recent_logs[current_branch + "_Onone"]) == 1 + ): + print("branch/branch comparison skipped: no previous branch logs") else: - compare_logs(compare_script, - recent_logs[current_branch + '_O'][0], - recent_logs[current_branch + '_O'][1], - log_dir, 'O') - compare_logs(compare_script, - recent_logs[current_branch + '_Onone'][0], - recent_logs[current_branch + '_Onone'][1], - log_dir, 'Onone') - - if len(recent_logs[baseline_branch + '_O']) == 0 or \ - len(recent_logs[baseline_branch + '_Onone']) == 0: - print(('branch/{baseline_branch} failure: no {baseline_branch} ' + - 'logs') - .format(baseline_branch=baseline_branch)) + compare_logs( + compare_script, + recent_logs[current_branch + "_O"][0], + recent_logs[current_branch + "_O"][1], + log_dir, + "O", + ) + compare_logs( + compare_script, + recent_logs[current_branch + "_Onone"][0], + recent_logs[current_branch + "_Onone"][1], + log_dir, + "Onone", + ) + + if ( + len(recent_logs[baseline_branch + "_O"]) == 0 + or len(recent_logs[baseline_branch + "_Onone"]) == 0 + ): + print( + ( + "branch/{baseline_branch} failure: no {baseline_branch} " + "logs" + ).format(baseline_branch=baseline_branch) + ) return 1 else: - compare_logs(compare_script, - recent_logs[current_branch + '_O'][0], - recent_logs[baseline_branch + '_O'][0], - log_dir, 'O') - compare_logs(compare_script, - recent_logs[current_branch + '_Onone'][0], - recent_logs[baseline_branch + '_Onone'][0], - log_dir, 'Onone') + compare_logs( + compare_script, + recent_logs[current_branch + "_O"][0], + recent_logs[baseline_branch + "_O"][0], + log_dir, + "O", + ) + compare_logs( + compare_script, + recent_logs[current_branch + "_Onone"][0], + recent_logs[baseline_branch + "_Onone"][0], + log_dir, + "Onone", + ) # TODO: Fail on large regressions @@ -686,79 +850,100 @@ def positive_int(value): def parse_args(args): """Parse command line arguments and set default values.""" parser = argparse.ArgumentParser( - epilog='Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*' + epilog="Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*" ) subparsers = parser.add_subparsers( - title='Swift benchmark driver commands', - help='See COMMAND -h for additional arguments', metavar='COMMAND') + title="Swift benchmark driver commands", + help="See COMMAND -h for additional arguments", + metavar="COMMAND", + ) shared_benchmarks_parser = argparse.ArgumentParser(add_help=False) benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group() benchmarks_group.add_argument( - 'benchmarks', + "benchmarks", default=[], - help='benchmark to run (default: all)', nargs='*', metavar="BENCHMARK") + help="benchmark to run (default: all)", + nargs="*", + metavar="BENCHMARK", + ) benchmarks_group.add_argument( - '-f', '--filter', dest='filters', action='append', - help='run all tests whose name match regular expression PATTERN, ' + - 'multiple filters are supported', metavar="PATTERN") + "-f", + "--filter", + dest="filters", + action="append", + help="run all tests whose name match regular expression PATTERN, " + + "multiple filters are supported", + metavar="PATTERN", + ) shared_benchmarks_parser.add_argument( - '-t', '--tests', - help='directory containing Benchmark_O{,none,size} ' + - '(default: DRIVER_DIR)', - default=DRIVER_DIR) + "-t", + "--tests", + help="directory containing Benchmark_O{,none,size} " + "(default: DRIVER_DIR)", + default=DRIVER_DIR, + ) shared_benchmarks_parser.add_argument( - '-o', '--optimization', - metavar='OPT', - choices=['O', 'Onone', 'Osize'], - help='optimization level to use: {O,Onone,Osize}, (default: O)', - default='O') + "-o", + "--optimization", + metavar="OPT", + choices=["O", "Onone", "Osize"], + help="optimization level to use: {O,Onone,Osize}, (default: O)", + default="O", + ) run_parser = subparsers.add_parser( - 'run', - help='Run benchmarks and output results to stdout', - parents=[shared_benchmarks_parser]) + "run", + help="Run benchmarks and output results to stdout", + parents=[shared_benchmarks_parser], + ) run_parser.add_argument( - '-i', '--independent-samples', - help='number of times to run each test (default: 1)', - type=positive_int, default=1) + "-i", + "--independent-samples", + help="number of times to run each test (default: 1)", + type=positive_int, + default=1, + ) run_parser.add_argument( - '--output-dir', - help='log results to directory (default: no logging)') + "--output-dir", help="log results to directory (default: no logging)" + ) run_parser.add_argument( - '--swift-repo', - help='absolute path to the Swift source repository') + "--swift-repo", help="absolute path to the Swift source repository" + ) run_parser.set_defaults(func=BenchmarkDriver.run_benchmarks) check_parser = subparsers.add_parser( - 'check', - help='', - parents=[shared_benchmarks_parser]) + "check", help="", parents=[shared_benchmarks_parser] + ) check_group = check_parser.add_mutually_exclusive_group() check_group.add_argument( - '-v', '--verbose', action='store_true', - help='show more details during benchmark analysis') + "-v", + "--verbose", + action="store_true", + help="show more details during benchmark analysis", + ) check_group.add_argument( - '-md', '--markdown', action='store_true', - help='format report as Markdown table') + "-md", "--markdown", action="store_true", help="format report as Markdown table" + ) check_parser.set_defaults(func=BenchmarkDoctor.run_check) - compare_parser = subparsers.add_parser( - 'compare', - help='Compare benchmark results') + compare_parser = subparsers.add_parser("compare", help="Compare benchmark results") compare_parser.add_argument( - '--log-dir', required=True, - help='directory containing benchmark logs') + "--log-dir", required=True, help="directory containing benchmark logs" + ) compare_parser.add_argument( - '--swift-repo', required=True, - help='absolute path to the Swift source repository') + "--swift-repo", + required=True, + help="absolute path to the Swift source repository", + ) compare_parser.add_argument( - '--compare-script', required=True, - help='absolute path to compare script') + "--compare-script", required=True, help="absolute path to compare script" + ) compare_parser.add_argument( - '--baseline-branch', default='master', - help='attempt to compare results to baseline results for specified ' - 'branch (default: master)') + "--baseline-branch", + default="master", + help="attempt to compare results to baseline results for specified " + "branch (default: master)", + ) compare_parser.set_defaults(func=compare) return parser.parse_args(args) @@ -770,5 +955,5 @@ def main(): return args.func(args) -if __name__ == '__main__': +if __name__ == "__main__": exit(main()) diff --git a/benchmark/scripts/Benchmark_GuardMalloc.in b/benchmark/scripts/Benchmark_GuardMalloc.in index e7d001d4bfa1d..872179e1d28de 100644 --- a/benchmark/scripts/Benchmark_GuardMalloc.in +++ b/benchmark/scripts/Benchmark_GuardMalloc.in @@ -21,37 +21,36 @@ sys.path.append("@PATH_TO_DRIVER_LIBRARY@") import perf_test_driver # noqa (E402 module level import not at top of file) # Regexes for the XFAIL_LIST. Matches against '([Onone|O|Osize],TestName)' -XFAIL_LIST = [ -] +XFAIL_LIST = [] class GuardMallocResult(perf_test_driver.Result): - def __init__(self, name, status): perf_test_driver.Result.__init__(self, name, status, "", XFAIL_LIST) class GuardMallocBenchmarkDriver(perf_test_driver.BenchmarkDriver): - def __init__(self, binary, xfail_list): perf_test_driver.BenchmarkDriver.__init__( - self, binary, xfail_list, - enable_parallel=True) + self, binary, xfail_list, enable_parallel=True + ) self.new_env = os.environ.copy() - self.new_env['DYLD_INSERT_LIBRARIES'] = '/usr/lib/libgmalloc.dylib' + self.new_env["DYLD_INSERT_LIBRARIES"] = "/usr/lib/libgmalloc.dylib" def prepare_input(self, name): - return {'env': self.new_env} + return {"env": self.new_env} def process_input(self, data): - test_name = '({},{})'.format(data['opt'], data['test_name']) + test_name = "({},{})".format(data["opt"], data["test_name"]) print("Running {}...".format(test_name)) sys.stdout.flush() p = subprocess.Popen( - [data['path'], data['test_name'], '--num-iters=2'], - env=data['env'], stderr=open('/dev/null', 'w'), - stdout=open('/dev/null', 'w')) + [data["path"], data["test_name"], "--num-iters=2"], + env=data["env"], + stderr=open("/dev/null", "w"), + stdout=open("/dev/null", "w"), + ) status = p.wait() return GuardMallocResult(test_name, status) diff --git a/benchmark/scripts/Benchmark_QuickCheck.in b/benchmark/scripts/Benchmark_QuickCheck.in index 0599d9eb2c8d5..a2cc257476240 100644 --- a/benchmark/scripts/Benchmark_QuickCheck.in +++ b/benchmark/scripts/Benchmark_QuickCheck.in @@ -12,7 +12,6 @@ # # ===---------------------------------------------------------------------===// -import json import os import subprocess import sys @@ -23,47 +22,48 @@ import perf_test_driver # noqa (E402 module level import not at top of file) # This is a hacked up XFAIL list. It should really be a json file, but it will # work for now. Add in the exact name of the pass to XFAIL. -XFAIL_LIST = [ -] +XFAIL_LIST = [] class QuickCheckResult(perf_test_driver.Result): - def __init__(self, name, success): - assert(isinstance(success, bool)) + assert isinstance(success, bool) did_fail = not success perf_test_driver.Result.__init__(self, name, did_fail, "", XFAIL_LIST) def print_data(self, max_test_len): - fmt = '{:<%d}{:<10}' % (max_test_len + 5) + fmt = "{:<%d}{:<10}" % (max_test_len + 5) print(fmt.format(self.get_name(), self.get_result())) class QuickCheckBenchmarkDriver(perf_test_driver.BenchmarkDriver): - def __init__(self, binary, xfail_list, num_iters, opt_levels): perf_test_driver.BenchmarkDriver.__init__( - self, binary, xfail_list, - enable_parallel=True, - opt_levels=opt_levels) + self, binary, xfail_list, enable_parallel=True, opt_levels=opt_levels + ) self.num_iters = num_iters def print_data_header(self, max_test_len): - fmt = '{:<%d}{:<10}' % (max_test_len + 5) - print(fmt.format('Name', 'Result')) + fmt = "{:<%d}{:<10}" % (max_test_len + 5) + print(fmt.format("Name", "Result")) # Propagate any data from this class that is needed for individual # tests. The reason this is needed is to avoid issues with attempting to # access a value in a different process. def prepare_input(self, name): - return {'num_samples': 1, 'num_iters': self.num_iters} + return {"num_samples": 1, "num_iters": self.num_iters} def run_test_inner(self, data, num_iters): - p = subprocess.Popen([ - data['path'], - "--num-samples={}".format(data['num_samples']), - "--num-iters={}".format(num_iters), data['test_name']], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p = subprocess.Popen( + [ + data["path"], + "--num-samples={}".format(data["num_samples"]), + "--num-iters={}".format(num_iters), + data["test_name"], + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) error_out = p.communicate()[1].split("\n") result = p.returncode if result is None: @@ -75,20 +75,21 @@ class QuickCheckBenchmarkDriver(perf_test_driver.BenchmarkDriver): def run_test(self, data, num_iters): try: args = [data, num_iters] - result = perf_test_driver.run_with_timeout(self.run_test_inner, - args) + perf_test_driver.run_with_timeout(self.run_test_inner, args) except Exception, e: - sys.stderr.write("Child Process Failed! (%s,%s). Error: %s\n" % ( - data['path'], data['test_name'], e)) + sys.stderr.write( + "Child Process Failed! (%s,%s). Error: %s\n" + % (data["path"], data["test_name"], e) + ) sys.stderr.flush() return None return True def process_input(self, data): - test_name = '({},{})'.format(data['opt'], data['test_name']) + test_name = "({},{})".format(data["opt"], data["test_name"]) print("Running {}...".format(test_name)) sys.stdout.flush() - if self.run_test(data, data['num_iters']) is None: + if self.run_test(data, data["num_iters"]) is None: return QuickCheckResult(test_name, success=False) return QuickCheckResult(test_name, success=True) @@ -98,13 +99,17 @@ SWIFT_BIN_DIR = os.path.dirname(os.path.abspath(__file__)) def parse_args(): import argparse + parser = argparse.ArgumentParser() parser.add_argument( - '--filter', type=str, default=None, - help='Filter out any test that does not match the given regex') - parser.add_argument('--num-iters', type=int, default=2) + "--filter", + type=str, + default=None, + help="Filter out any test that does not match the given regex", + ) + parser.add_argument("--num-iters", type=int, default=2) default_opt_levels = perf_test_driver.BenchmarkDriver_OptLevels - parser.add_argument('--opt-level', choices=default_opt_levels) + parser.add_argument("--opt-level", choices=default_opt_levels) return parser.parse_args() @@ -113,9 +118,10 @@ if __name__ == "__main__": opt_levels = perf_test_driver.BenchmarkDriver_OptLevels if args.opt_level is not None: opt_levels = [args.opt_level] - l = QuickCheckBenchmarkDriver(SWIFT_BIN_DIR, XFAIL_LIST, args.num_iters, - opt_levels) - if l.run(args.filter): + driver = QuickCheckBenchmarkDriver( + SWIFT_BIN_DIR, XFAIL_LIST, args.num_iters, opt_levels + ) + if driver.run(args.filter): sys.exit(0) else: sys.exit(-1) diff --git a/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in b/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in index 2a7dd0d81d986..756af2348c6b5 100644 --- a/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in +++ b/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in @@ -23,26 +23,26 @@ import perf_test_driver # noqa (E402 module level import not at top of file) # This is a hacked up XFAIL list. It should really be a json file, but it will # work for now. Add in the exact name of the pass to XFAIL. -XFAIL_LIST = [ -] +XFAIL_LIST = [] # Global Objective-C classes created by various frameworks. We do not care # about these. -IGNORABLE_GLOBAL_OBJC_CLASSES = set([ - '__NSPlaceholderDate', - 'NSCache', - '__NSPlaceholderTimeZone', - 'NSPlaceholderNumber', - 'NSPlaceholderString', - '__NSPlaceholderArray', - '__NSPlaceholderDictionary', - '_NSPlaceholderData', - '_NSJSONReader' -]) +IGNORABLE_GLOBAL_OBJC_CLASSES = set( + [ + "__NSPlaceholderDate", + "NSCache", + "__NSPlaceholderTimeZone", + "NSPlaceholderNumber", + "NSPlaceholderString", + "__NSPlaceholderArray", + "__NSPlaceholderDictionary", + "_NSPlaceholderData", + "_NSJSONReader", + ] +) class LeaksRunnerResult(perf_test_driver.Result): - def __init__(self, name, count=None): # True = 1, False = 0. # @@ -57,36 +57,39 @@ class LeaksRunnerResult(perf_test_driver.Result): return "N/A" def print_data(self, max_test_len): - fmt = '{:<%d}{:<10}{:}' % (max_test_len + 5) - print(fmt.format(self.get_name(), self.get_result(), - self.get_count())) + fmt = "{:<%d}{:<10}{:}" % (max_test_len + 5) + print(fmt.format(self.get_name(), self.get_result(), self.get_count())) class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver): - def __init__(self, binary, xfail_list, num_samples, num_iters): perf_test_driver.BenchmarkDriver.__init__( - self, binary, xfail_list, - enable_parallel=True) + self, binary, xfail_list, enable_parallel=True + ) self.num_samples = num_samples self.num_iters = num_iters def print_data_header(self, max_test_len): - fmt = '{:<%d}{:<10}{:}' % (max_test_len + 5) - print(fmt.format('Name', 'Result', 'RC Delta')) + fmt = "{:<%d}{:<10}{:}" % (max_test_len + 5) + print(fmt.format("Name", "Result", "RC Delta")) # Propagate any data from this class that is needed for individual # tests. The reason this is needed is to avoid issues with attempting to # access a value in a different process. def prepare_input(self, name): - return {'num_samples': self.num_samples, 'num_iters': self.num_iters} + return {"num_samples": self.num_samples, "num_iters": self.num_iters} def run_test_inner(self, data, num_iters): - p = subprocess.Popen([ - data['path'], - "--num-samples={}".format(data['num_samples']), - "--num-iters={}".format(num_iters), data['test_name']], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p = subprocess.Popen( + [ + data["path"], + "--num-samples={}".format(data["num_samples"]), + "--num-iters={}".format(num_iters), + data["test_name"], + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) error_out = p.communicate()[1].split("\n") result = p.returncode if result is None: @@ -98,11 +101,12 @@ class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver): def run_test(self, data, num_iters): try: args = [data, num_iters] - result = perf_test_driver.run_with_timeout(self.run_test_inner, - args) + result = perf_test_driver.run_with_timeout(self.run_test_inner, args) except Exception, e: - sys.stderr.write("Child Process Failed! (%s,%s). Error: %s\n" % ( - data['path'], data['test_name'], e)) + sys.stderr.write( + "Child Process Failed! (%s,%s). Error: %s\n" + % (data["path"], data["test_name"], e) + ) sys.stderr.flush() return None @@ -110,26 +114,27 @@ class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver): # We grab the second line since swift globals get lazily created in # the first iteration. d = json.loads(result[1]) - d['objc_objects'] = [x for x in d['objc_objects'] - if x not in IGNORABLE_GLOBAL_OBJC_CLASSES] - d['objc_count'] = len(d['objc_objects']) + d["objc_objects"] = [ + x for x in d["objc_objects"] if x not in IGNORABLE_GLOBAL_OBJC_CLASSES + ] + d["objc_count"] = len(d["objc_objects"]) - total_count = d['objc_count'] + d['swift_count'] + total_count = d["objc_count"] + d["swift_count"] return total_count except Exception: - tmp = (data['path'], data['test_name']) + tmp = (data["path"], data["test_name"]) sys.stderr.write("Failed parse output! (%s,%s)\n" % tmp) sys.stderr.flush() return None def process_input(self, data): - test_name = '({},{})'.format(data['opt'], data['test_name']) + test_name = "({},{})".format(data["opt"], data["test_name"]) print("Running {}...".format(test_name)) sys.stdout.flush() - total_count1 = self.run_test(data, data['num_iters']) + total_count1 = self.run_test(data, data["num_iters"]) if total_count1 is None: return LeaksRunnerResult(test_name) - total_count2 = self.run_test(data, data['num_iters'] + 1) + total_count2 = self.run_test(data, data["num_iters"] + 1) if total_count2 is None: return LeaksRunnerResult(test_name) return LeaksRunnerResult(test_name, total_count2 - total_count1) @@ -140,19 +145,24 @@ SWIFT_BIN_DIR = os.path.dirname(os.path.abspath(__file__)) def parse_args(): import argparse + parser = argparse.ArgumentParser() parser.add_argument( - '-filter', type=str, default=None, - help='Filter out any test that does not match the given regex') - parser.add_argument('-num-samples', type=int, default=2) - parser.add_argument('-num-iters', type=int, default=2) + "-filter", + type=str, + default=None, + help="Filter out any test that does not match the given regex", + ) + parser.add_argument("-num-samples", type=int, default=2) + parser.add_argument("-num-iters", type=int, default=2) return parser.parse_args() if __name__ == "__main__": args = parse_args() driver = LeaksRunnerBenchmarkDriver( - SWIFT_BIN_DIR, XFAIL_LIST, args.num_samples, args.num_iters) + SWIFT_BIN_DIR, XFAIL_LIST, args.num_samples, args.num_iters + ) if driver.run(args.filter): sys.exit(0) else: diff --git a/benchmark/scripts/build_linux.py b/benchmark/scripts/build_linux.py index 64bee4692bbb1..4404815931182 100755 --- a/benchmark/scripts/build_linux.py +++ b/benchmark/scripts/build_linux.py @@ -7,39 +7,45 @@ def main(): p = argparse.ArgumentParser() - p.add_argument('cmake_path', help='The cmake binary to use') - p.add_argument('swift_src_dir', help='The swift source directory') - p.add_argument('clang', help='The path to the clang binary to use') - p.add_argument('swift_root_dir', - help='A path to a swift root produced by installing ' - 'Swift and Foundation together. We infer swiftc ' - 'from here') - p.add_argument('destdir', help='The directory to perform the actual ' - 'build in') - p.add_argument('--clean', action='store_true', - help='Delete destdir before performing a build.') + p.add_argument("cmake_path", help="The cmake binary to use") + p.add_argument("swift_src_dir", help="The swift source directory") + p.add_argument("clang", help="The path to the clang binary to use") + p.add_argument( + "swift_root_dir", + help="A path to a swift root produced by installing " + "Swift and Foundation together. We infer swiftc " + "from here", + ) + p.add_argument("destdir", help="The directory to perform the actual " "build in") + p.add_argument( + "--clean", action="store_true", help="Delete destdir before performing a build." + ) args = p.parse_args() if args.clean: print("Asked to clean... Cleaning!") - subprocess.check_output(['/bin/rm', '-rfv', args.destdir]) - subprocess.check_call(['/bin/mkdir', '-p', args.destdir]) + subprocess.check_output(["/bin/rm", "-rfv", args.destdir]) + subprocess.check_call(["/bin/mkdir", "-p", args.destdir]) os.chdir(args.destdir) configureInvocation = [ - args.cmake_path, '-GNinja', - '-DSWIFT_EXEC={}/bin/swiftc'.format(args.swift_root_dir), - '-DCLANG_EXEC={}'.format(args.clang), - '-DSWIFT_LIBRARY_PATH={}/lib/swift'.format(args.swift_root_dir), - '{}/benchmark'.format(args.swift_src_dir) + args.cmake_path, + "-GNinja", + "-DSWIFT_EXEC={}/bin/swiftc".format(args.swift_root_dir), + "-DCLANG_EXEC={}".format(args.clang), + "-DSWIFT_LIBRARY_PATH={}/lib/swift".format(args.swift_root_dir), + "{}/benchmark".format(args.swift_src_dir), ] - print('COMMAND: {}'.format(' '.join(configureInvocation))) + print("COMMAND: {}".format(" ".join(configureInvocation))) subprocess.check_call(configureInvocation) buildInvocation = [ - args.cmake_path, '--build', args.destdir, '--', - 'swift-benchmark-linux-x86_64' + args.cmake_path, + "--build", + args.destdir, + "--", + "swift-benchmark-linux-x86_64", ] - print('COMMAND: {}'.format(' '.join(buildInvocation))) + print("COMMAND: {}".format(" ".join(buildInvocation))) subprocess.check_call(buildInvocation) diff --git a/benchmark/scripts/build_script_helper.py b/benchmark/scripts/build_script_helper.py index a3f999042289f..53bf7b19f6862 100755 --- a/benchmark/scripts/build_script_helper.py +++ b/benchmark/scripts/build_script_helper.py @@ -9,50 +9,54 @@ def perform_build(args, swiftbuild_path, config, binary_name, opt_flag): - assert(config in ['debug', 'release']) - assert(binary_name in ['Benchmark_O', 'Benchmark_Osize', - 'Benchmark_Onone']) - assert(opt_flag in ['-O', '-Osize', '-Onone']) + assert config in ["debug", "release"] + assert binary_name in ["Benchmark_O", "Benchmark_Osize", "Benchmark_Onone"] + assert opt_flag in ["-O", "-Osize", "-Onone"] inner_build_dir = os.path.join(args.build_path, binary_name) swiftbuild_args = [ swiftbuild_path, - '--package-path', args.package_path, - '--build-path', inner_build_dir, - '--configuration', config, - '-Xswiftc', '-Xllvm', - '-Xswiftc', '-align-module-to-page-size', - '-Xswiftc', opt_flag, + "--package-path", + args.package_path, + "--build-path", + inner_build_dir, + "--configuration", + config, + "-Xswiftc", + "-Xllvm", + "-Xswiftc", + "-align-module-to-page-size", + "-Xswiftc", + opt_flag, ] if args.verbose: - swiftbuild_args.append('--verbose') + swiftbuild_args.append("--verbose") subprocess.call(swiftbuild_args) # Copy the benchmark file into the final ./bin directory. - binpath = os.path.join(inner_build_dir, config, 'SwiftBench') - finalpath = os.path.join(args.build_path, 'bin', binary_name) + binpath = os.path.join(inner_build_dir, config, "SwiftBench") + finalpath = os.path.join(args.build_path, "bin", binary_name) shutil.copy(binpath, finalpath) def main(): parser = argparse.ArgumentParser() - parser.add_argument('--verbose', '-v', action='store_true') - parser.add_argument('--package-path', type=str, required=True) - parser.add_argument('--build-path', type=str, required=True) - parser.add_argument('--toolchain', type=str, required=True) + parser.add_argument("--verbose", "-v", action="store_true") + parser.add_argument("--package-path", type=str, required=True) + parser.add_argument("--build-path", type=str, required=True) + parser.add_argument("--toolchain", type=str, required=True) args = parser.parse_args() # Create our bin directory so we can copy in the binaries. - bin_dir = os.path.join(args.build_path, 'bin') + bin_dir = os.path.join(args.build_path, "bin") if not os.path.isdir(bin_dir): os.makedirs(bin_dir) - swiftbuild_path = os.path.join(args.toolchain, 'usr', 'bin', 'swift-build') - perform_build(args, swiftbuild_path, 'debug', 'Benchmark_Onone', '-Onone') - perform_build(args, swiftbuild_path, 'release', 'Benchmark_Osize', - '-Osize') - perform_build(args, swiftbuild_path, 'release', 'Benchmark_O', '-O') + swiftbuild_path = os.path.join(args.toolchain, "usr", "bin", "swift-build") + perform_build(args, swiftbuild_path, "debug", "Benchmark_Onone", "-Onone") + perform_build(args, swiftbuild_path, "release", "Benchmark_Osize", "-Osize") + perform_build(args, swiftbuild_path, "release", "Benchmark_O", "-O") if __name__ == "__main__": diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 017ba24c10229..69450cb4b97b5 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -37,7 +37,7 @@ class `ReportFormatter` creates the test comparison report in specified format. from math import ceil, sqrt -class Sample(namedtuple('Sample', 'i num_iters runtime')): +class Sample(namedtuple("Sample", "i num_iters runtime")): u"""Single benchmark measurement. Initialized with: @@ -48,10 +48,10 @@ class Sample(namedtuple('Sample', 'i num_iters runtime')): def __repr__(self): """Shorter Sample formating for debugging purposes.""" - return 's({0.i!r}, {0.num_iters!r}, {0.runtime!r})'.format(self) + return "s({0.i!r}, {0.num_iters!r}, {0.runtime!r})".format(self) -class Yield(namedtuple('Yield', 'before_sample after')): +class Yield(namedtuple("Yield", "before_sample after")): u"""Meta-measurement of when the Benchmark_X voluntarily yielded process. `before_sample`: index of measurement taken just after returning from yield @@ -79,13 +79,14 @@ def __init__(self, name, samples=None): def __str__(self): """Text summary of benchmark statistics.""" return ( - '{0.name!s} n={0.count!r} ' - 'Min={0.min!r} Q1={0.q1!r} M={0.median!r} Q3={0.q3!r} ' - 'Max={0.max!r} ' - 'R={0.range!r} {0.spread:.2%} IQR={0.iqr!r} ' - 'Mean={0.mean:.0f} SD={0.sd:.0f} CV={0.cv:.2%}' - .format(self) if self.samples else - '{0.name!s} n=0'.format(self)) + "{0.name!s} n={0.count!r} " + "Min={0.min!r} Q1={0.q1!r} M={0.median!r} Q3={0.q3!r} " + "Max={0.max!r} " + "R={0.range!r} {0.spread:.2%} IQR={0.iqr!r} " + "Mean={0.mean:.0f} SD={0.sd:.0f} CV={0.cv:.2%}".format(self) + if self.samples + else "{0.name!s} n=0".format(self) + ) def add(self, sample): """Add sample to collection and recompute statistics.""" @@ -97,8 +98,9 @@ def add(self, sample): def _update_stats(self, sample): old_stats = (self.count, self.mean, self.S_runtime) - _, self.mean, self.S_runtime = ( - self.running_mean_variance(old_stats, sample.runtime)) + _, self.mean, self.S_runtime = self.running_mean_variance( + old_stats, sample.runtime + ) def exclude_outliers(self, top_only=False): """Exclude outliers by applying Interquartile Range Rule. @@ -112,8 +114,11 @@ def exclude_outliers(self, top_only=False): benchmark runtimes in the microbenchmark range to filter out the environment noise caused by preemtive multitasking. """ - lo = (0 if top_only else - bisect_left(self._runtimes, int(self.q1 - 1.5 * self.iqr))) + lo = ( + 0 + if top_only + else bisect_left(self._runtimes, int(self.q1 - 1.5 * self.iqr)) + ) hi = bisect_right(self._runtimes, int(self.q3 + 1.5 * self.iqr)) outliers = self.samples[:lo] + self.samples[hi:] @@ -181,8 +186,7 @@ def iqr(self): @property def sd(self): u"""Standard Deviation (μs).""" - return (0 if self.count < 2 else - sqrt(self.S_runtime / (self.count - 1))) + return 0 if self.count < 2 else sqrt(self.S_runtime / (self.count - 1)) @staticmethod def running_mean_variance((k, M_, S_), x): @@ -229,14 +233,13 @@ class PerformanceTestResult(object): `--quantile`parameter. In both cases, the last column, MAX_RSS is optional. """ - def __init__(self, csv_row, quantiles=False, memory=False, delta=False, - meta=False): + def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=False): """Initialize from a row of multiple columns with benchmark summary. The row is an iterable, such as a row provided by the CSV parser. """ - self.test_num = csv_row[0] # Ordinal number of the test - self.name = csv_row[1] # Name of the performance test + self.test_num = csv_row[0] # Ordinal number of the test + self.name = csv_row[1] # Name of the performance test self.num_samples = int(csv_row[2]) # Number of measurements taken if quantiles: # Variable number of columns representing quantiles @@ -244,50 +247,63 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:] if delta: runtimes = [int(x) if x else 0 for x in runtimes] - runtimes = reduce(lambda l, x: l.append(l[-1] + x) or # runnin - l if l else [x], runtimes, None) # total + runtimes = reduce( + lambda l, x: l.append(l[-1] + x) or l if l else [x], # runnin + runtimes, + None, + ) # total num_values = len(runtimes) if self.num_samples < num_values: # remove repeated samples quantile = num_values - 1 qs = [float(i) / float(quantile) for i in range(0, num_values)] - indices = [max(0, int(ceil(self.num_samples * float(q))) - 1) - for q in qs] - runtimes = [runtimes[indices.index(i)] - for i in range(0, self.num_samples)] + indices = [ + max(0, int(ceil(self.num_samples * float(q))) - 1) for q in qs + ] + runtimes = [ + runtimes[indices.index(i)] for i in range(0, self.num_samples) + ] self.samples = PerformanceTestSamples( - self.name, - [Sample(None, None, int(runtime)) for runtime in runtimes]) + self.name, [Sample(None, None, int(runtime)) for runtime in runtimes] + ) self.samples.exclude_outliers(top_only=True) sams = self.samples - self.min, self.max, self.median, self.mean, self.sd = \ - sams.min, sams.max, sams.median, sams.mean, sams.sd - self.max_rss = ( # Maximum Resident Set Size (B) - int(csv_row[mem_index]) if memory else None) + self.min, self.max, self.median, self.mean, self.sd = ( + sams.min, + sams.max, + sams.median, + sams.mean, + sams.sd, + ) + self.max_rss = ( # Maximum Resident Set Size (B) + int(csv_row[mem_index]) if memory else None + ) else: # Legacy format with statistics for normal distribution. - self.min = int(csv_row[3]) # Minimum runtime (μs) - self.max = int(csv_row[4]) # Maximum runtime (μs) - self.mean = float(csv_row[5]) # Mean (average) runtime (μs) - self.sd = float(csv_row[6]) # Standard Deviation (μs) - self.median = int(csv_row[7]) # Median runtime (μs) - self.max_rss = ( # Maximum Resident Set Size (B) - int(csv_row[8]) if len(csv_row) > 8 else None) + self.min = int(csv_row[3]) # Minimum runtime (μs) + self.max = int(csv_row[4]) # Maximum runtime (μs) + self.mean = float(csv_row[5]) # Mean (average) runtime (μs) + self.sd = float(csv_row[6]) # Standard Deviation (μs) + self.median = int(csv_row[7]) # Median runtime (μs) + self.max_rss = ( # Maximum Resident Set Size (B) + int(csv_row[8]) if len(csv_row) > 8 else None + ) self.samples = None # Optional measurement metadata. The number of: # memory pages used, involuntary context switches and voluntary yields - self.mem_pages, self.involuntary_cs, self.yield_count = \ + self.mem_pages, self.involuntary_cs, self.yield_count = ( [int(x) for x in csv_row[-3:]] if meta else (None, None, None) + ) self.yields = None self.setup = None def __repr__(self): """Short summary for debugging purposes.""" return ( - '' - .format(self)) + "".format(self) + ) def merge(self, r): """Merge two results. @@ -302,8 +318,13 @@ def merge(self, r): map(self.samples.add, r.samples.samples) sams = self.samples self.num_samples = sams.num_samples - self.min, self.max, self.median, self.mean, self.sd = \ - sams.min, sams.max, sams.median, sams.mean, sams.sd + self.min, self.max, self.median, self.mean, self.sd = ( + sams.min, + sams.max, + sams.median, + sams.mean, + sams.sd, + ) else: self.min = min(self.min, r.min) self.max = max(self.max, r.max) @@ -315,8 +336,8 @@ def merge(self, r): # Metadata def minimum(a, b): # work around None being less than everything - return (min(filter(lambda x: x is not None, [a, b])) if any([a, b]) - else None) + return min(filter(lambda x: x is not None, [a, b])) if any([a, b]) else None + self.max_rss = minimum(self.max_rss, r.max_rss) self.setup = minimum(self.setup, r.setup) @@ -339,12 +360,13 @@ def __init__(self, old, new): # Test runtime improvement in % ratio = (new.min + 0.001) / (old.min + 0.001) - self.delta = ((ratio - 1) * 100) + self.delta = (ratio - 1) * 100 # Indication of dubious changes: when result's MIN falls inside the # (MIN, MAX) interval of result they are being compared with. - self.is_dubious = ((old.min < new.min and new.min < old.max) or - (new.min < old.min and old.min < new.max)) + self.is_dubious = (old.min < new.min and new.min < old.max) or ( + new.min < old.min and old.min < new.max + ) class LogParser(object): @@ -371,15 +393,20 @@ def _reset(self): # Parse lines like this # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs) results_re = re.compile( - r'( *\d+[, \t]+[\w.\-\?!]+[, \t]+' + # #,TEST - r'[, \t]+'.join([r'\d+'] * 2) + # at least 2... - r'(?:[, \t]+\d*)*)') # ...or more numeric columns + r"( *\d+[, \t]+[\w.\-\?!]+[, \t]+" + + r"[, \t]+".join([r"\d+"] * 2) # #,TEST + + r"(?:[, \t]+\d*)*)" # at least 2... + ) # ...or more numeric columns def _append_result(self, result): - columns = result.split(',') if ',' in result else result.split() + columns = result.split(",") if "," in result else result.split() r = PerformanceTestResult( - columns, quantiles=self.quantiles, memory=self.memory, - delta=self.delta, meta=self.meta) + columns, + quantiles=self.quantiles, + memory=self.memory, + delta=self.delta, + meta=self.meta, + ) r.setup = self.setup r.max_rss = r.max_rss or self.max_rss r.mem_pages = r.mem_pages or self.mem_pages @@ -397,45 +424,43 @@ def _store_memory_stats(self, max_rss, mem_pages): self.mem_pages = int(mem_pages) def _configure_format(self, header): - self.quantiles = 'MEAN' not in header - self.memory = 'MAX_RSS' in header - self.meta = 'PAGES' in header - self.delta = '𝚫' in header + self.quantiles = "MEAN" not in header + self.memory = "MAX_RSS" in header + self.meta = "PAGES" in header + self.delta = "𝚫" in header # Regular expression and action to take when it matches the parsed line state_actions = { results_re: _append_result, - # Verbose mode adds new productions: # Adaptively determined N; test loop multiple adjusting runtime to ~1s - re.compile(r'\s+Measuring with scale (\d+).'): - (lambda self, num_iters: setattr(self, 'num_iters', num_iters)), - - re.compile(r'\s+Sample (\d+),(\d+)'): - (lambda self, i, runtime: - self.samples.append( - Sample(int(i), int(self.num_iters), int(runtime)))), - - re.compile(r'\s+SetUp (\d+)'): - (lambda self, setup: setattr(self, 'setup', int(setup))), - - re.compile(r'\s+Yielding after ~(\d+) μs'): - (lambda self, since_last_yield: - self.yields.append( - Yield(len(self.samples), int(since_last_yield)))), - - re.compile(r'( *#[, \t]+TEST[, \t]+SAMPLES[, \t]+MIN.*)'): - _configure_format, - + re.compile(r"\s+Measuring with scale (\d+)."): ( + lambda self, num_iters: setattr(self, "num_iters", num_iters) + ), + re.compile(r"\s+Sample (\d+),(\d+)"): ( + lambda self, i, runtime: self.samples.append( + Sample(int(i), int(self.num_iters), int(runtime)) + ) + ), + re.compile(r"\s+SetUp (\d+)"): ( + lambda self, setup: setattr(self, "setup", int(setup)) + ), + re.compile(r"\s+Yielding after ~(\d+) μs"): ( + lambda self, since_last_yield: self.yields.append( + Yield(len(self.samples), int(since_last_yield)) + ) + ), + re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t]+MIN.*)"): _configure_format, # Environmental statistics: memory usage and context switches - re.compile(r'\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)'): - _store_memory_stats, - - re.compile(r'\s+VCS \d+ - \d+ = (\d+)'): - (lambda self, vcs: setattr(self, 'voluntary_cs', int(vcs))), - - re.compile(r'\s+ICS \d+ - \d+ = (\d+)'): - (lambda self, ics: setattr(self, 'involuntary_cs', int(ics))), + re.compile( + r"\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)" + ): _store_memory_stats, + re.compile(r"\s+VCS \d+ - \d+ = (\d+)"): ( + lambda self, vcs: setattr(self, "voluntary_cs", int(vcs)) + ), + re.compile(r"\s+ICS \d+ - \d+ = (\d+)"): ( + lambda self, ics: setattr(self, "involuntary_cs", int(ics)) + ), } def parse_results(self, lines): @@ -511,10 +536,10 @@ def __init__(self, old_results, new_results, delta_threshold): added_tests = new_tests.difference(old_tests) removed_tests = old_tests.difference(new_tests) - self.added = sorted([new_results[t] for t in added_tests], - key=lambda r: r.name) - self.removed = sorted([old_results[t] for t in removed_tests], - key=lambda r: r.name) + self.added = sorted([new_results[t] for t in added_tests], key=lambda r: r.name) + self.removed = sorted( + [old_results[t] for t in removed_tests], key=lambda r: r.name + ) def compare(name): return ResultComparison(old_results[name], new_results[name]) @@ -525,19 +550,24 @@ def partition(l, p): return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], [])) decreased, not_decreased = partition( - comparisons, lambda c: c.ratio < (1 - delta_threshold)) + comparisons, lambda c: c.ratio < (1 - delta_threshold) + ) increased, unchanged = partition( - not_decreased, lambda c: c.ratio > (1 + delta_threshold)) + not_decreased, lambda c: c.ratio > (1 + delta_threshold) + ) # sorted partitions names = [c.name for c in comparisons] comparisons = dict(zip(names, comparisons)) - self.decreased = [comparisons[c.name] - for c in sorted(decreased, key=lambda c: -c.delta)] - self.increased = [comparisons[c.name] - for c in sorted(increased, key=lambda c: c.delta)] - self.unchanged = [comparisons[c.name] - for c in sorted(unchanged, key=lambda c: c.name)] + self.decreased = [ + comparisons[c.name] for c in sorted(decreased, key=lambda c: -c.delta) + ] + self.increased = [ + comparisons[c.name] for c in sorted(increased, key=lambda c: c.delta) + ] + self.unchanged = [ + comparisons[c.name] for c in sorted(unchanged, key=lambda c: c.name) + ] class ReportFormatter(object): @@ -549,23 +579,25 @@ class ReportFormatter(object): GitHub), `git` and `html`. """ - def __init__(self, comparator, changes_only, - single_table=False): + def __init__(self, comparator, changes_only, single_table=False): """Initialize with `TestComparator` and names of branches.""" self.comparator = comparator self.changes_only = changes_only self.single_table = single_table - PERFORMANCE_TEST_RESULT_HEADER = ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS') - RESULT_COMPARISON_HEADER = ('TEST', 'OLD', 'NEW', 'DELTA', 'RATIO') + PERFORMANCE_TEST_RESULT_HEADER = ("TEST", "MIN", "MAX", "MEAN", "MAX_RSS") + RESULT_COMPARISON_HEADER = ("TEST", "OLD", "NEW", "DELTA", "RATIO") @staticmethod def header_for(result): """Column labels for header row in results table.""" - return (ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER - if isinstance(result, PerformanceTestResult) else - # isinstance(result, ResultComparison) - ReportFormatter.RESULT_COMPARISON_HEADER) + return ( + ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER + if isinstance(result, PerformanceTestResult) + else + # isinstance(result, ResultComparison) + ReportFormatter.RESULT_COMPARISON_HEADER + ) @staticmethod def values(result): @@ -574,53 +606,63 @@ def values(result): Returns tuple of strings to display in the results table. """ return ( - (result.name, - str(result.min), str(result.max), str(int(result.mean)), - str(result.max_rss) if result.max_rss else '—') - if isinstance(result, PerformanceTestResult) else + ( + result.name, + str(result.min), + str(result.max), + str(int(result.mean)), + str(result.max_rss) if result.max_rss else "—", + ) + if isinstance(result, PerformanceTestResult) + else # isinstance(result, ResultComparison) - (result.name, - str(result.old.min), str(result.new.min), - '{0:+.1f}%'.format(result.delta), - '{0:.2f}x{1}'.format(result.ratio, - ' (?)' if result.is_dubious else '')) + ( + result.name, + str(result.old.min), + str(result.new.min), + "{0:+.1f}%".format(result.delta), + "{0:.2f}x{1}".format(result.ratio, " (?)" if result.is_dubious else ""), + ) ) def markdown(self): """Report results of benchmark comparisons in Markdown format.""" return self._formatted_text( - label_formatter=lambda s: ('**' + s + '**'), - COLUMN_SEPARATOR=' | ', - DELIMITER_ROW=([':---'] + ['---:'] * 4), - SEPARATOR='  | | | | \n', + label_formatter=lambda s: ("**" + s + "**"), + COLUMN_SEPARATOR=" | ", + DELIMITER_ROW=([":---"] + ["---:"] * 4), + SEPARATOR="  | | | | \n", SECTION="""
{0} ({1}) {2}
-""") +""", + ) def git(self): """Report results of benchmark comparisons in 'git' format.""" return self._formatted_text( label_formatter=lambda s: s.upper(), - COLUMN_SEPARATOR=' ', + COLUMN_SEPARATOR=" ", DELIMITER_ROW=None, - SEPARATOR='\n', + SEPARATOR="\n", SECTION=""" -{0} ({1}): \n{2}""") +{0} ({1}): \n{2}""", + ) def _column_widths(self): changed = self.comparator.decreased + self.comparator.increased - results = (changed if self.changes_only else - changed + self.comparator.unchanged) + results = changed if self.changes_only else changed + self.comparator.unchanged results += self.comparator.added + self.comparator.removed widths = [ - map(len, columns) for columns in - [ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER, - ReportFormatter.RESULT_COMPARISON_HEADER] + - [ReportFormatter.values(r) for r in results] + map(len, columns) + for columns in [ + ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER, + ReportFormatter.RESULT_COMPARISON_HEADER, + ] + + [ReportFormatter.values(r) for r in results] ] def max_widths(maximum, widths): @@ -628,8 +670,9 @@ def max_widths(maximum, widths): return reduce(max_widths, widths, [0] * 5) - def _formatted_text(self, label_formatter, COLUMN_SEPARATOR, - DELIMITER_ROW, SEPARATOR, SECTION): + def _formatted_text( + self, label_formatter, COLUMN_SEPARATOR, DELIMITER_ROW, SEPARATOR, SECTION + ): widths = self._column_widths() self.header_printed = False @@ -637,43 +680,62 @@ def justify_columns(contents): return [c.ljust(w) for w, c in zip(widths, contents)] def row(contents): - return ('' if not contents else - COLUMN_SEPARATOR.join(justify_columns(contents)) + '\n') + return ( + "" + if not contents + else COLUMN_SEPARATOR.join(justify_columns(contents)) + "\n" + ) def header(title, column_labels): - labels = (column_labels if not self.single_table else - map(label_formatter, (title, ) + column_labels[1:])) - h = (('' if not self.header_printed else SEPARATOR) + - row(labels) + - (row(DELIMITER_ROW) if not self.header_printed else '')) + labels = ( + column_labels + if not self.single_table + else map(label_formatter, (title,) + column_labels[1:]) + ) + h = ( + ("" if not self.header_printed else SEPARATOR) + + row(labels) + + (row(DELIMITER_ROW) if not self.header_printed else "") + ) if self.single_table and not self.header_printed: self.header_printed = True return h def format_columns(r, is_strong): - return (r if not is_strong else - r[:-1] + ('**' + r[-1] + '**', )) + return r if not is_strong else r[:-1] + ("**" + r[-1] + "**",) def table(title, results, is_strong=False, is_open=False): if not results: - return '' - rows = [row(format_columns(ReportFormatter.values(r), is_strong)) - for r in results] - table = (header(title if self.single_table else '', - ReportFormatter.header_for(results[0])) + - ''.join(rows)) - return (table if self.single_table else - SECTION.format( - title, len(results), table, 'open' if is_open else '')) - - return '\n' + ''.join([ - table('Regression', self.comparator.decreased, True, True), - table('Improvement', self.comparator.increased, True), - ('' if self.changes_only else - table('No Changes', self.comparator.unchanged)), - table('Added', self.comparator.added, is_open=True), - table('Removed', self.comparator.removed, is_open=True) - ]) + return "" + rows = [ + row(format_columns(ReportFormatter.values(r), is_strong)) + for r in results + ] + table = header( + title if self.single_table else "", + ReportFormatter.header_for(results[0]), + ) + "".join(rows) + return ( + table + if self.single_table + else SECTION.format( + title, len(results), table, "open" if is_open else "" + ) + ) + + return "\n" + "".join( + [ + table("Regression", self.comparator.decreased, True, True), + table("Improvement", self.comparator.increased, True), + ( + "" + if self.changes_only + else table("No Changes", self.comparator.unchanged) + ), + table("Added", self.comparator.added, is_open=True), + table("Removed", self.comparator.removed, is_open=True), + ] + ) HTML = """ @@ -722,68 +784,90 @@ def table(title, results, is_strong=False, is_open=False): def html(self): """Report results of benchmark comparisons in HTML format.""" + def row(name, old, new, delta, speedup, speedup_color): - return self.HTML_ROW.format( - name, old, new, delta, speedup_color, speedup) + return self.HTML_ROW.format(name, old, new, delta, speedup_color, speedup) def header(contents): - return self.HTML_HEADER_ROW.format(* contents) + return self.HTML_HEADER_ROW.format(*contents) def table(title, results, speedup_color): rows = [ - row(*(ReportFormatter.values(r) + (speedup_color,))) - for r in results + row(*(ReportFormatter.values(r) + (speedup_color,))) for r in results ] - return ('' if not rows else - header((title, len(results)) + - ReportFormatter.header_for(results[0])[1:]) + - ''.join(rows)) + return ( + "" + if not rows + else header( + (title, len(results)) + ReportFormatter.header_for(results[0])[1:] + ) + + "".join(rows) + ) return self.HTML.format( - ''.join([ - table('Regression', self.comparator.decreased, 'red'), - table('Improvement', self.comparator.increased, 'green'), - ('' if self.changes_only else - table('No Changes', self.comparator.unchanged, 'black')), - table('Added', self.comparator.added, ''), - table('Removed', self.comparator.removed, '') - ])) + "".join( + [ + table("Regression", self.comparator.decreased, "red"), + table("Improvement", self.comparator.increased, "green"), + ( + "" + if self.changes_only + else table("No Changes", self.comparator.unchanged, "black") + ), + table("Added", self.comparator.added, ""), + table("Removed", self.comparator.removed, ""), + ] + ) + ) def parse_args(args): """Parse command line arguments and set default values.""" - parser = argparse.ArgumentParser(description='Compare Performance tests.') - parser.add_argument('--old-file', - help='Baseline performance test suite (csv file)', - required=True) - parser.add_argument('--new-file', - help='New performance test suite (csv file)', - required=True) - parser.add_argument('--format', - choices=['markdown', 'git', 'html'], - help='Output format. Default is markdown.', - default="markdown") - parser.add_argument('--output', help='Output file name') - parser.add_argument('--changes-only', - help='Output only affected tests', action='store_true') + parser = argparse.ArgumentParser(description="Compare Performance tests.") + parser.add_argument( + "--old-file", help="Baseline performance test suite (csv file)", required=True + ) + parser.add_argument( + "--new-file", help="New performance test suite (csv file)", required=True + ) + parser.add_argument( + "--format", + choices=["markdown", "git", "html"], + help="Output format. Default is markdown.", + default="markdown", + ) + parser.add_argument("--output", help="Output file name") + parser.add_argument( + "--changes-only", help="Output only affected tests", action="store_true" + ) + parser.add_argument( + "--single-table", + help="Combine data in a single table in git and markdown formats", + action="store_true", + ) parser.add_argument( - '--single-table', - help='Combine data in a single table in git and markdown formats', - action='store_true') - parser.add_argument('--delta-threshold', - help='Delta threshold. Default 0.05.', - type=float, default=0.05) + "--delta-threshold", + help="Delta threshold. Default 0.05.", + type=float, + default=0.05, + ) return parser.parse_args(args) -def create_report(old_results, new_results, delta_threshold, format, - changes_only=True, single_table=True): +def create_report( + old_results, + new_results, + delta_threshold, + format, + changes_only=True, + single_table=True, +): comparator = TestComparator(old_results, new_results, delta_threshold) formatter = ReportFormatter(comparator, changes_only, single_table) formats = { - 'markdown': formatter.markdown, - 'git': formatter.git, - 'html': formatter.html + "markdown": formatter.markdown, + "git": formatter.git, + "html": formatter.html, } report = formats[format]() @@ -793,16 +877,20 @@ def create_report(old_results, new_results, delta_threshold, format, def main(): """Compare benchmarks for changes in a formatted report.""" args = parse_args(sys.argv[1:]) - report = create_report(LogParser.results_from_file(args.old_file), - LogParser.results_from_file(args.new_file), - args.delta_threshold, args.format, - args.changes_only, args.single_table) + report = create_report( + LogParser.results_from_file(args.old_file), + LogParser.results_from_file(args.new_file), + args.delta_threshold, + args.format, + args.changes_only, + args.single_table, + ) print(report) if args.output: - with open(args.output, 'w') as f: + with open(args.output, "w") as f: f.write(report) -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/benchmark/scripts/create_benchmark.py b/benchmark/scripts/create_benchmark.py index 2e2a4786752ae..cccaae23c76bd 100755 --- a/benchmark/scripts/create_benchmark.py +++ b/benchmark/scripts/create_benchmark.py @@ -7,7 +7,7 @@ def main(): p = argparse.ArgumentParser() - p.add_argument('name', help='The name of the new benchmark to be created') + p.add_argument("name", help="The name of the new benchmark to be created") args = p.parse_args() # adds benchmark to `CMakeLists.txt` @@ -24,19 +24,19 @@ def update_cmakelists(name): """Adds a new entry to the `CMakeLists.txt` file with the given benchmark name. """ - relative_path = create_relative_path('../CMakeLists.txt') + relative_path = create_relative_path("../CMakeLists.txt") file_contents = [] - with open(relative_path, 'r') as f: + with open(relative_path, "r") as f: file_contents = f.readlines() file_new_contents = insert_line_alphabetically( name, - ' single-source/' + name + '\n', + " single-source/" + name + "\n", file_contents, - r" single-source\/([a-zA-Z]+)" + r" single-source\/([a-zA-Z]+)", ) - with open(relative_path, 'w') as f: + with open(relative_path, "w") as f: for line in file_new_contents: f.write(line) @@ -46,17 +46,17 @@ def create_benchmark_file(name): and places it in the `single-source` directory. """ - template_path = create_relative_path('Template.swift') - benchmark_template = '' - with open(template_path, 'r') as f: - benchmark_template = ''.join(f.readlines()) + template_path = create_relative_path("Template.swift") + benchmark_template = "" + with open(template_path, "r") as f: + benchmark_template = "".join(f.readlines()) # fill in template with benchmark name. formatted_template = benchmark_template.format(name=name) - relative_path = create_relative_path('../single-source/') - source_file_path = os.path.join(relative_path, name + '.swift') - with open(source_file_path, 'w') as f: + relative_path = create_relative_path("../single-source/") + source_file_path = os.path.join(relative_path, name + ".swift") + with open(source_file_path, "w") as f: f.write(formatted_template) @@ -64,14 +64,14 @@ def add_import_benchmark(name): """Adds an `import` statement to the `main.swift` file for the new benchmark. """ - relative_path = create_relative_path('../utils/main.swift') + relative_path = create_relative_path("../utils/main.swift") # read current contents into an array file_contents = [] - with open(relative_path, 'r') as f: + with open(relative_path, "r") as f: file_contents = f.readlines() - # the test dependencies are placed before all benchmarks, so we have to + # the test dependencies are placed before all benchmarks, so we have to # insert the benchmark in the right alphabetical order after we have seen # all test dependencies. read_test_dependencies = False @@ -82,23 +82,27 @@ def add_import_benchmark(name): match = re.search(r"import ([a-zA-Z]+)", line) if match and match.group(1): benchmark_name = match.group(1) - # find where to insert the new benchmark in the right alphabetical + # find where to insert the new benchmark in the right alphabetical # order. - if (name < benchmark_name and previous_benchmark_name is None or - name < benchmark_name and name > previous_benchmark_name): + if ( + name < benchmark_name + and previous_benchmark_name is None + or name < benchmark_name + and name > previous_benchmark_name + ): if read_test_dependencies: - file_new_contents.append('import ' + name + '\n' + line) + file_new_contents.append("import " + name + "\n" + line) else: - # all test dependencies are first specified, so from now + # all test dependencies are first specified, so from now # on we can look where to insert the new benchmark. read_test_dependencies = True file_new_contents.append(line) else: - file_new_contents.append(line) + file_new_contents.append(line) previous_benchmark_name = benchmark_name else: file_new_contents.append(line) - with open(relative_path, 'w') as f: + with open(relative_path, "w") as f: for line in file_new_contents: f.write(line) @@ -107,19 +111,19 @@ def add_register_benchmark(name): """Adds an `import` statement to the `main.swift` file for the new benchmark. """ - relative_path = create_relative_path('../utils/main.swift') + relative_path = create_relative_path("../utils/main.swift") file_contents = [] - with open(relative_path, 'r') as f: + with open(relative_path, "r") as f: file_contents = f.readlines() file_new_contents = insert_line_alphabetically( name, - 'registerBenchmark(' + name + ')\n', - file_contents, - r"registerBenchmark\(([a-zA-Z]+)\)" + "registerBenchmark(" + name + ")\n", + file_contents, + r"registerBenchmark\(([a-zA-Z]+)\)", ) - with open(relative_path, 'w') as f: + with open(relative_path, "w") as f: for line in file_new_contents: f.write(line) @@ -129,7 +133,7 @@ def insert_line_alphabetically(name, new_line, lines, regex): find where the new benchmark should be inserted with the given `new_line`. """ # the name of the previous seen benchmark in order to insert the new - # one at the correct position + # one at the correct position previous_benchmark_name = None # the new contents of the file updated_lines = [] @@ -140,11 +144,15 @@ def insert_line_alphabetically(name, new_line, lines, regex): benchmark_name = match.group(1) # check if we're at the line where we have to insert the new # benchmark in the correct alphabetical order - if (name < benchmark_name and previous_benchmark_name is None or - name < benchmark_name and name > previous_benchmark_name): + if ( + name < benchmark_name + and previous_benchmark_name is None + or name < benchmark_name + and name > previous_benchmark_name + ): updated_lines.append(new_line + line) else: - updated_lines.append(line) + updated_lines.append(line) previous_benchmark_name = benchmark_name else: updated_lines.append(line) diff --git a/benchmark/scripts/generate_harness/generate_harness.py b/benchmark/scripts/generate_harness/generate_harness.py index 6e4bc0f815c5e..c5c6f87242133 100755 --- a/benchmark/scripts/generate_harness/generate_harness.py +++ b/benchmark/scripts/generate_harness/generate_harness.py @@ -21,12 +21,12 @@ import subprocess script_dir = os.path.dirname(os.path.realpath(__file__)) -perf_dir = os.path.realpath(os.path.join(script_dir, '../..')) -gyb = os.path.realpath(os.path.join(perf_dir, '../utils/gyb')) +perf_dir = os.path.realpath(os.path.join(script_dir, "../..")) +gyb = os.path.realpath(os.path.join(perf_dir, "../utils/gyb")) parser = argparse.ArgumentParser() -parser.add_argument("--output-dir", - help="Output directory (for validation test)", - default=perf_dir) +parser.add_argument( + "--output-dir", help="Output directory (for validation test)", default=perf_dir +) args = parser.parse_args() output_dir = args.output_dir @@ -35,7 +35,8 @@ def all_files(directory, extension): # matching: [directory]/**/*[extension] return [ os.path.join(root, f) for root, _, files in os.walk(directory) - for f in files if f.endswith(extension) + for f in files + if f.endswith(extension) ] @@ -46,13 +47,13 @@ def will_write(filename): # ensure path to file exists before writing os.makedirs(output_path) -if __name__ == '__main__': +if __name__ == "__main__": # Generate Your Boilerplate # Make sure longer paths are done first as CMakeLists.txt and main.swift # depend on the other gybs being generated first. - gyb_files = sorted(all_files(perf_dir, '.gyb'), key=len, reverse=True) + gyb_files = sorted(all_files(perf_dir, ".gyb"), key=len, reverse=True) for f in gyb_files: relative_path = os.path.relpath(f[:-4], perf_dir) out_file = os.path.join(output_dir, relative_path) will_write(out_file) - subprocess.call([gyb, '--line-directive', '', '-o', out_file, f]) + subprocess.call([gyb, "--line-directive", "", "-o", out_file, f]) diff --git a/benchmark/scripts/perf_test_driver/perf_test_driver.py b/benchmark/scripts/perf_test_driver/perf_test_driver.py index 449059b031d74..7f8929f771764 100644 --- a/benchmark/scripts/perf_test_driver/perf_test_driver.py +++ b/benchmark/scripts/perf_test_driver/perf_test_driver.py @@ -21,29 +21,27 @@ import subprocess -BENCHMARK_OUTPUT_RE = re.compile(r'\d+,([^,]+)') +BENCHMARK_OUTPUT_RE = re.compile(r"\d+,([^,]+)") class Result(object): - def __init__(self, name, status, output, xfail_list): self.name = name self.status = status self.output = output - self.is_xfailed = any( - (re.match(x, self.name) is not None for x in xfail_list)) + self.is_xfailed = any((re.match(x, self.name) is not None for x in xfail_list)) def is_failure(self): - return self.get_result() in ['FAIL', 'XPASS'] + return self.get_result() in ["FAIL", "XPASS"] def get_result(self): if self.is_xfailed: if self.status: - return 'XFAIL' - return 'XPASS' + return "XFAIL" + return "XPASS" if self.status: - return 'FAIL' - return 'PASS' + return "FAIL" + return "PASS" def get_name(self): return self.name @@ -53,7 +51,7 @@ def merge_in_extra_data(self, d): return d def print_data(self, max_test_len): - fmt = '{:<%d}{:}' % (max_test_len + 5) + fmt = "{:<%d}{:}" % (max_test_len + 5) print(fmt.format(self.get_name(), self.get_result())) @@ -65,36 +63,44 @@ def run_with_timeout(func, args): # we update to use python >= 3.3, use the timeout API on communicate # instead. import multiprocessing.dummy + fakeThreadPool = multiprocessing.dummy.Pool(1) try: result = fakeThreadPool.apply_async(func, args=args) return result.get(timeout_seconds) except multiprocessing.TimeoutError: fakeThreadPool.terminate() - raise RuntimeError("Child process aborted due to timeout. " - "Timeout: %s seconds" % timeout_seconds) + raise RuntimeError( + "Child process aborted due to timeout. " + "Timeout: %s seconds" % timeout_seconds + ) def _unwrap_self(args): return type(args[0]).process_input(*args) -BenchmarkDriver_OptLevels = ['Onone', 'O', 'Osize'] +BenchmarkDriver_OptLevels = ["Onone", "O", "Osize"] class BenchmarkDriver(object): - - def __init__(self, binary_dir, xfail_list, enable_parallel=False, - opt_levels=BenchmarkDriver_OptLevels): - self.targets = [(os.path.join(binary_dir, 'Benchmark_%s' % o), o) - for o in opt_levels] + def __init__( + self, + binary_dir, + xfail_list, + enable_parallel=False, + opt_levels=BenchmarkDriver_OptLevels, + ): + self.targets = [ + (os.path.join(binary_dir, "Benchmark_%s" % o), o) for o in opt_levels + ] self.xfail_list = xfail_list self.enable_parallel = enable_parallel self.data = None def print_data_header(self, max_test_len): - fmt = '{:<%d}{:}' % (max_test_len + 5) - print(fmt.format('Name', 'Result')) + fmt = "{:<%d}{:}" % (max_test_len + 5) + print(fmt.format("Name", "Result")) def prepare_input(self, name, opt_level): raise RuntimeError("Abstract method") @@ -115,7 +121,7 @@ def run_for_opt_level(self, binary, opt_level, test_filter): names = [n for n in names if regex.match(n)] def prepare_input_wrapper(name): - x = {'opt': opt_level, 'path': binary, 'test_name': name} + x = {"opt": opt_level, "path": binary, "test_name": name} x.update(self.prepare_input(name)) return x @@ -129,33 +135,31 @@ def prepare_input_wrapper(name): results = map(self.process_input, prepared_input) def reduce_results(acc, r): - acc['result'].append(r) - acc['has_failure'] = acc['has_failure'] or r.is_failure() - acc['max_test_len'] = max(acc['max_test_len'], len(r.get_name())) - acc['extra_data'] = r.merge_in_extra_data(acc['extra_data']) + acc["result"].append(r) + acc["has_failure"] = acc["has_failure"] or r.is_failure() + acc["max_test_len"] = max(acc["max_test_len"], len(r.get_name())) + acc["extra_data"] = r.merge_in_extra_data(acc["extra_data"]) return acc - return functools.reduce(reduce_results, results, { - 'result': [], - 'has_failure': False, - 'max_test_len': 0, - 'extra_data': {} - }) + return functools.reduce( + reduce_results, + results, + {"result": [], "has_failure": False, "max_test_len": 0, "extra_data": {}}, + ) def print_data(self, data, max_test_len): print("Results:") self.print_data_header(max_test_len) for d in data: - for r in d['result']: + for r in d["result"]: r.print_data(max_test_len) def run(self, test_filter=None): self.data = [ self.run_for_opt_level(binary, opt_level, test_filter) - for binary, opt_level in self.targets] - max_test_len = functools.reduce(max, - [d['max_test_len'] for d in self.data]) - has_failure = functools.reduce(max, - [d['has_failure'] for d in self.data]) + for binary, opt_level in self.targets + ] + max_test_len = functools.reduce(max, [d["max_test_len"] for d in self.data]) + has_failure = functools.reduce(max, [d["has_failure"] for d in self.data]) self.print_data(self.data, max_test_len) return not has_failure diff --git a/benchmark/scripts/run_smoke_bench b/benchmark/scripts/run_smoke_bench index f478c7e95a869..0facbe7b344f5 100755 --- a/benchmark/scripts/run_smoke_bench +++ b/benchmark/scripts/run_smoke_bench @@ -28,14 +28,15 @@ import glob import os import subprocess import sys +from imp import load_source from compare_perf_tests import LogParser, TestComparator, create_report -from imp import load_source # import Benchmark_Driver # doesn't work because it misses '.py' extension Benchmark_Driver = load_source( - 'Benchmark_Driver', os.path.join(os.path.dirname( - os.path.abspath(__file__)), 'Benchmark_Driver')) + "Benchmark_Driver", + os.path.join(os.path.dirname(os.path.abspath(__file__)), "Benchmark_Driver"), +) # from Benchmark_Driver import BenchmarkDriver, BenchmarkDoctor, ... BenchmarkDriver = Benchmark_Driver.BenchmarkDriver BenchmarkDoctor = Benchmark_Driver.BenchmarkDoctor @@ -46,11 +47,12 @@ VERBOSE = False class DriverArgs(object): """Arguments for BenchmarkDriver.""" - def __init__(self, tests, optimization='O'): + + def __init__(self, tests, optimization="O"): """Initialize with path to the build-dir and optimization level.""" self.benchmarks = None self.filters = None - self.tests = os.path.join(tests, 'bin') + self.tests = os.path.join(tests, "bin") self.optimization = optimization @@ -63,49 +65,76 @@ def main(): global VERBOSE argparser = argparse.ArgumentParser() argparser.add_argument( - '-verbose', action='store_true', - help='print verbose messages') + "-verbose", action="store_true", help="print verbose messages" + ) argparser.add_argument( - '-O', action='append_const', const='O', dest='opt_levels', - help='test -O benchmarks') + "-O", + action="append_const", + const="O", + dest="opt_levels", + help="test -O benchmarks", + ) argparser.add_argument( - '-Osize', action='append_const', const='Osize', dest='opt_levels', - help='test -Osize benchmarks') + "-Osize", + action="append_const", + const="Osize", + dest="opt_levels", + help="test -Osize benchmarks", + ) argparser.add_argument( - '-Onone', action='append_const', const='Onone', dest='opt_levels', - help='test -Onone benchmarks (except code size)') + "-Onone", + action="append_const", + const="Onone", + dest="opt_levels", + help="test -Onone benchmarks (except code size)", + ) argparser.add_argument( - '-skip-code-size', action='store_true', - help="Don't report code size differences") + "-skip-code-size", + action="store_true", + help="Don't report code size differences", + ) argparser.add_argument( - '-skip-performance', action='store_true', - help="Don't report performance differences") + "-skip-performance", + action="store_true", + help="Don't report performance differences", + ) argparser.add_argument( - '-skip-check-added', action='store_true', - help="Don't validate newly added benchmarks") + "-skip-check-added", + action="store_true", + help="Don't validate newly added benchmarks", + ) argparser.add_argument( - '-o', type=str, - help='In addition to stdout, write the results into a markdown file') + "-o", + type=str, + help="In addition to stdout, write the results into a markdown file", + ) argparser.add_argument( - '-threshold', type=float, - help='The performance threshold in %% which triggers a re-run', - default=5) + "-threshold", + type=float, + help="The performance threshold in %% which triggers a re-run", + default=5, + ) argparser.add_argument( - '-num-samples', type=int, - help='The (minimum) number of samples to run', default=3) + "-num-samples", + type=int, + help="The (minimum) number of samples to run", + default=3, + ) argparser.add_argument( - '-num-reruns', type=int, + "-num-reruns", + type=int, help="The number of re-runs until it's assumed to be a real change", - default=8) + default=8, + ) argparser.add_argument( - '-platform', type=str, - help='The benchmark build platform', default='macosx') + "-platform", type=str, help="The benchmark build platform", default="macosx" + ) argparser.add_argument( - 'oldbuilddir', nargs=1, type=str, - help='old benchmark build directory') + "oldbuilddir", nargs=1, type=str, help="old benchmark build directory" + ) argparser.add_argument( - 'newbuilddir', nargs=1, type=str, - help='new benchmark build directory') + "newbuilddir", nargs=1, type=str, help="new benchmark build directory" + ) args = argparser.parse_args() VERBOSE = args.verbose @@ -115,29 +144,42 @@ def main(): def test_opt_levels(args): output_file = None if args.o: - output_file = open(args.o, 'w') + output_file = open(args.o, "w") changes = False - for opt_level in args.opt_levels or ['O', 'Osize', 'Onone']: - log('####### Testing optimization level -' + opt_level + ' #######') + for opt_level in args.opt_levels or ["O", "Osize", "Onone"]: + log("####### Testing optimization level -" + opt_level + " #######") if not args.skip_performance: - if test_performance(opt_level, args.oldbuilddir[0], - args.newbuilddir[0], - float(args.threshold) / 100, args.num_samples, - args.num_reruns, output_file): + if test_performance( + opt_level, + args.oldbuilddir[0], + args.newbuilddir[0], + float(args.threshold) / 100, + args.num_samples, + args.num_reruns, + output_file, + ): changes = True # There is no point in reporting code size for Onone. - if not args.skip_code_size and opt_level != 'Onone': - if report_code_size(opt_level, args.oldbuilddir[0], - args.newbuilddir[0], - args.platform, output_file): + if not args.skip_code_size and opt_level != "Onone": + if report_code_size( + opt_level, + args.oldbuilddir[0], + args.newbuilddir[0], + args.platform, + output_file, + ): changes = True if not args.skip_code_size: - if report_code_size('swiftlibs', args.oldbuilddir[0], - args.newbuilddir[0], - args.platform, output_file): + if report_code_size( + "swiftlibs", + args.oldbuilddir[0], + args.newbuilddir[0], + args.platform, + output_file, + ): changes = True if not args.skip_check_added: @@ -158,10 +200,14 @@ def measure(driver, tests, i): Collect increasing number of samples, depending on the iteration. """ num_samples = min(i + 3, 10) - msg = ' Iteration {0} for {1}: num samples = {2}, '.format( - i, driver.args.tests, num_samples) - msg += ('running all tests' if driver.all_tests == tests else - 're-testing {0} tests'.format(len(tests))) + msg = " Iteration {0} for {1}: num samples = {2}, ".format( + i, driver.args.tests, num_samples + ) + msg += ( + "running all tests" + if driver.all_tests == tests + else "re-testing {0} tests".format(len(tests)) + ) log(msg) driver.tests = tests return driver.run(num_samples=num_samples, sample_time=0.0025) @@ -174,8 +220,9 @@ def merge(results, other_results): return results -def test_performance(opt_level, old_dir, new_dir, threshold, num_samples, - num_reruns, output_file): +def test_performance( + opt_level, old_dir, new_dir, threshold, num_samples, num_reruns, output_file +): """Detect performance changes in benchmarks. Start fast with few samples per benchmark and gradually spend more time @@ -183,8 +230,10 @@ def test_performance(opt_level, old_dir, new_dir, threshold, num_samples, """ i, unchanged_length_count = 0, 0 - old, new = [BenchmarkDriver(DriverArgs(dir, optimization=opt_level)) - for dir in [old_dir, new_dir]] + old, new = [ + BenchmarkDriver(DriverArgs(dir, optimization=opt_level)) + for dir in [old_dir, new_dir] + ] results = [measure(driver, driver.tests, i) for driver in [old, new]] tests = TestComparator(results[0], results[1], threshold) changed = tests.decreased + tests.increased @@ -192,10 +241,11 @@ def test_performance(opt_level, old_dir, new_dir, threshold, num_samples, while len(changed) > 0 and unchanged_length_count < num_reruns: i += 1 if VERBOSE: - log(' test again: ' + str([test.name for test in changed])) - results = [merge(the_results, - measure(driver, [test.name for test in changed], i)) - for the_results, driver in zip(results, [old, new])] + log(" test again: " + str([test.name for test in changed])) + results = [ + merge(the_results, measure(driver, [test.name for test in changed], i)) + for the_results, driver in zip(results, [old, new]) + ] tests = TestComparator(results[0], results[1], threshold) changed = tests.decreased + tests.increased @@ -204,19 +254,19 @@ def test_performance(opt_level, old_dir, new_dir, threshold, num_samples, else: unchanged_length_count = 0 - log('') - return report_results("Performance: -" + opt_level, None, None, - threshold * 1.4, output_file, *results) + log("") + return report_results( + "Performance: -" + opt_level, None, None, threshold * 1.4, output_file, *results + ) def report_code_size(opt_level, old_dir, new_dir, platform, output_file): - if opt_level == 'swiftlibs': - files = glob.glob(os.path.join(old_dir, 'lib', 'swift', platform, - '*.dylib')) + if opt_level == "swiftlibs": + files = glob.glob(os.path.join(old_dir, "lib", "swift", platform, "*.dylib")) else: - files = glob.glob(os.path.join(old_dir, - opt_level + '-*' + platform + '*', - '*.o')) + files = glob.glob( + os.path.join(old_dir, opt_level + "-*" + platform + "*", "*.o") + ) idx = 1 old_lines = "" @@ -229,37 +279,44 @@ def report_code_size(opt_level, old_dir, new_dir, platform, output_file): bname = os.path.basename(oldfile) def result_line(value): - v = ',' + str(value) - return (str(idx) + ',' + bname + ',1' + (v * 3) + - ',0' + v + '\n') + v = "," + str(value) + return str(idx) + "," + bname + ",1" + (v * 3) + ",0" + v + "\n" old_lines += result_line(oldsize) new_lines += result_line(newsize) idx += 1 - return report_results("Code size: -" + opt_level, - old_lines, new_lines, 0.01, output_file) + return report_results( + "Code size: -" + opt_level, old_lines, new_lines, 0.01, output_file + ) def get_codesize(filename): - output = subprocess.check_output(['size', filename]).splitlines() + output = subprocess.check_output(["size", filename]).splitlines() header_line = output[0] data_line = output[1] - if header_line.find('__TEXT') != 0: - sys.exit('unexpected output from size command:\n' + output) - return int(data_line.split('\t')[0]) - - -def report_results(title, old_lines, new_lines, threshold, output_file, - old_results=None, new_results=None): + if header_line.find("__TEXT") != 0: + sys.exit("unexpected output from size command:\n" + output) + return int(data_line.split("\t")[0]) + + +def report_results( + title, + old_lines, + new_lines, + threshold, + output_file, + old_results=None, + new_results=None, +): old_results = old_results or LogParser.results_from_string(old_lines) new_results = new_results or LogParser.results_from_string(new_lines) print("------- " + title + " -------") - print(create_report(old_results, new_results, threshold, 'git')) + print(create_report(old_results, new_results, threshold, "git")) if output_file: - report = create_report(old_results, new_results, threshold, 'markdown') + report = create_report(old_results, new_results, threshold, "markdown") if report != "": output_file.write("### " + title + "\n") output_file.write(report) @@ -289,17 +346,19 @@ performance team (@eeckstein). Hardware Overview """ - po = subprocess.check_output(['system_profiler', 'SPHardwareDataType']) + po = subprocess.check_output(["system_profiler", "SPHardwareDataType"]) for line in po.splitlines(): - selection = ['Model Name', - 'Model Identifier', - 'Processor Name', - 'Processor Speed', - 'Number of Processors', - 'Total Number of Cores', - 'L2 Cache', - 'L3 Cache', - 'Memory:'] + selection = [ + "Model Name", + "Model Identifier", + "Processor Name", + "Processor Speed", + "Number of Processors", + "Total Number of Cores", + "L2 Cache", + "L3 Cache", + "Memory:", + ] if any(s in line for s in selection): text += line + "\n" @@ -320,5 +379,5 @@ def check_added(args, output_file=None): doctor.check() -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py index 32b1a9e527635..570fee82f2f8b 100644 --- a/benchmark/scripts/test_Benchmark_Driver.py +++ b/benchmark/scripts/test_Benchmark_Driver.py @@ -26,8 +26,9 @@ # import Benchmark_Driver # doesn't work because it misses '.py' extension Benchmark_Driver = load_source( - 'Benchmark_Driver', os.path.join(os.path.dirname( - os.path.abspath(__file__)), 'Benchmark_Driver')) + "Benchmark_Driver", + os.path.join(os.path.dirname(os.path.abspath(__file__)), "Benchmark_Driver"), +) # from Benchmark_Driver import parse_args parse_args = Benchmark_Driver.parse_args BenchmarkDriver = Benchmark_Driver.BenchmarkDriver @@ -45,121 +46,122 @@ def assert_contains(self, texts, output): def test_requires_command_argument(self): with captured_output() as (_, err): self.assertRaises(SystemExit, parse_args, []) - self.assert_contains(['usage:', 'COMMAND', 'too few arguments'], - err.getvalue()) + self.assert_contains(["usage:", "COMMAND", "too few arguments"], err.getvalue()) def test_command_help_lists_commands(self): with captured_output() as (out, _): - self.assertRaises(SystemExit, parse_args, ['-h']) - self.assert_contains(['COMMAND', 'run', 'compare', 'check'], - out.getvalue()) + self.assertRaises(SystemExit, parse_args, ["-h"]) + self.assert_contains(["COMMAND", "run", "compare", "check"], out.getvalue()) def test_run_benchmarks_by_name_or_ordinal(self): - benchmarks = ['AngryPhonebook', '42'] - self.assertEqual( - parse_args(['run'] + benchmarks).benchmarks, benchmarks) + benchmarks = ["AngryPhonebook", "42"] + self.assertEqual(parse_args(["run"] + benchmarks).benchmarks, benchmarks) def test_run_benchmarks_matching_pattern(self): - regexes = ['Prefix', '.*Suffix.*'] - filters = ['-f', regexes[0], '-f', regexes[1]] - self.assertEqual(parse_args(['run'] + filters).filters, regexes) + regexes = ["Prefix", ".*Suffix.*"] + filters = ["-f", regexes[0], "-f", regexes[1]] + self.assertEqual(parse_args(["run"] + filters).filters, regexes) def test_run_benchmarks_and_filters_are_exclusive(self): with captured_output() as (_, err): - self.assertRaises(SystemExit, - parse_args, 'run -f Filter1 Benchmark1'.split()) + self.assertRaises( + SystemExit, parse_args, "run -f Filter1 Benchmark1".split() + ) self.assert_contains( - ['error', - 'argument BENCHMARK: not allowed with argument -f/--filter'], - err.getvalue()) + ["error", "argument BENCHMARK: not allowed with argument -f/--filter"], + err.getvalue(), + ) def test_tests_location(self): here = os.path.dirname(os.path.abspath(__file__)) - self.assertEqual(parse_args(['run']).tests, here) - tests = '/benchmarks/are/here' - self.assertEqual(parse_args(['run', '-t', tests]).tests, tests) + self.assertEqual(parse_args(["run"]).tests, here) + tests = "/benchmarks/are/here" + self.assertEqual(parse_args(["run", "-t", tests]).tests, tests) def test_optimization_argument(self): - self.assertEqual(parse_args(['run']).optimization, 'O') - self.assertEqual( - parse_args(['run', '-o', 'O']).optimization, 'O') - self.assertEqual( - parse_args(['run', '-o', 'Onone']).optimization, 'Onone') - self.assertEqual( - parse_args(['run', '-o', 'Osize']).optimization, 'Osize') + self.assertEqual(parse_args(["run"]).optimization, "O") + self.assertEqual(parse_args(["run", "-o", "O"]).optimization, "O") + self.assertEqual(parse_args(["run", "-o", "Onone"]).optimization, "Onone") + self.assertEqual(parse_args(["run", "-o", "Osize"]).optimization, "Osize") with captured_output() as (_, err): - self.assertRaises(SystemExit, - parse_args, ['run', '-o', 'bogus']) + self.assertRaises(SystemExit, parse_args, ["run", "-o", "bogus"]) self.assert_contains( - ['error:', - "argument -o/--optimization: invalid choice: 'bogus'", - "(choose from 'O', 'Onone', 'Osize')"], - err.getvalue()) + [ + "error:", + "argument -o/--optimization: invalid choice: 'bogus'", + "(choose from 'O', 'Onone', 'Osize')", + ], + err.getvalue(), + ) def test_independent_samples(self): - self.assertEqual(parse_args(['run']).independent_samples, 1) - self.assertEqual(parse_args(['run', '-i', '3']).independent_samples, - 3) + self.assertEqual(parse_args(["run"]).independent_samples, 1) + self.assertEqual(parse_args(["run", "-i", "3"]).independent_samples, 3) with captured_output() as (out, err): - self.assertRaises(SystemExit, - parse_args, ['run', '-i', '-3']) + self.assertRaises(SystemExit, parse_args, ["run", "-i", "-3"]) self.assert_contains( - ['error:', "argument -i/--independent-samples: " + - "invalid positive_int value: '-3'"], - err.getvalue()) + [ + "error:", + "argument -i/--independent-samples: " + + "invalid positive_int value: '-3'", + ], + err.getvalue(), + ) def test_output_dir(self): - self.assertIsNone(parse_args(['run']).output_dir) - self.assertEqual( - parse_args(['run', '--output-dir', '/log']).output_dir, '/log') + self.assertIsNone(parse_args(["run"]).output_dir) + self.assertEqual(parse_args(["run", "--output-dir", "/log"]).output_dir, "/log") def test_check_supports_vebose_output(self): - self.assertFalse(parse_args(['check']).verbose) - self.assertTrue(parse_args(['check', '-v']).verbose) - self.assertTrue(parse_args(['check', '--verbose']).verbose) + self.assertFalse(parse_args(["check"]).verbose) + self.assertTrue(parse_args(["check", "-v"]).verbose) + self.assertTrue(parse_args(["check", "--verbose"]).verbose) def test_check_supports_mardown_output(self): - self.assertFalse(parse_args(['check']).markdown) - self.assertTrue(parse_args(['check', '-md']).markdown) - self.assertTrue(parse_args(['check', '--markdown']).markdown) + self.assertFalse(parse_args(["check"]).markdown) + self.assertTrue(parse_args(["check", "-md"]).markdown) + self.assertTrue(parse_args(["check", "--markdown"]).markdown) def test_check_flags_are_mutually_exclusive(self): with captured_output() as (out, err): - self.assertRaises(SystemExit, - parse_args, ['check', '-md', '-v']) + self.assertRaises(SystemExit, parse_args, ["check", "-md", "-v"]) self.assert_contains( - ['error:', 'argument -v/--verbose: ' + - 'not allowed with argument -md/--markdown'], - err.getvalue()) + [ + "error:", + "argument -v/--verbose: " + "not allowed with argument -md/--markdown", + ], + err.getvalue(), + ) class ArgsStub(object): def __init__(self): self.benchmarks = None self.filters = None - self.tests = '/benchmarks/' - self.optimization = 'O' + self.tests = "/benchmarks/" + self.optimization = "O" class SubprocessMock(Mock): """Mock for subprocess module's `check_output` method.""" + STDOUT = object() def __init__(self, responses=None): super(SubprocessMock, self).__init__(responses) - def _check_output(args, stdin=None, stdout=None, stderr=None, - shell=False): + def _check_output(args, stdin=None, stdout=None, stderr=None, shell=False): return self.record_and_respond(args, stdin, stdout, stderr, shell) + self.check_output = _check_output def record_and_respond(self, args, stdin, stdout, stderr, shell): # _ = stdin, stdout, shell # ignored in mock - assert stderr == self.STDOUT, 'Errors are NOT redirected to STDOUT' + assert stderr == self.STDOUT, "Errors are NOT redirected to STDOUT" args = tuple(args) self.calls.append(args) - return self.respond.get(args, '') + return self.respond.get(args, "") class TestBenchmarkDriverInitialization(unittest.TestCase): @@ -169,81 +171,95 @@ def setUp(self): def test_test_harness(self): self.assertEqual( - BenchmarkDriver(self.args, tests=['ignored']).test_harness, - '/benchmarks/Benchmark_O') - self.args.tests = '/path' - self.args.optimization = 'Suffix' + BenchmarkDriver(self.args, tests=["ignored"]).test_harness, + "/benchmarks/Benchmark_O", + ) + self.args.tests = "/path" + self.args.optimization = "Suffix" self.assertEqual( - BenchmarkDriver(self.args, tests=['ignored']).test_harness, - '/path/Benchmark_Suffix') + BenchmarkDriver(self.args, tests=["ignored"]).test_harness, + "/path/Benchmark_Suffix", + ) def test_gets_list_of_precommit_benchmarks(self): self.subprocess_mock.expect( - '/benchmarks/Benchmark_O --list --delim=\t'.split(' '), - '#\tTest\t[Tags]\n1\tBenchmark1\t[t1, t2]\n2\tBenchmark2\t[t3]\n') - driver = BenchmarkDriver( - self.args, _subprocess=self.subprocess_mock) + "/benchmarks/Benchmark_O --list --delim=\t".split(" "), + "#\tTest\t[Tags]\n1\tBenchmark1\t[t1, t2]\n2\tBenchmark2\t[t3]\n", + ) + driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock) self.subprocess_mock.assert_called_all_expected() - self.assertEqual(driver.tests, - ['Benchmark1', 'Benchmark2']) - self.assertEqual(driver.all_tests, - ['Benchmark1', 'Benchmark2']) - self.assertEquals(driver.test_number['Benchmark1'], "1") - self.assertEquals(driver.test_number['Benchmark2'], "2") + self.assertEqual(driver.tests, ["Benchmark1", "Benchmark2"]) + self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2"]) + self.assertEquals(driver.test_number["Benchmark1"], "1") + self.assertEquals(driver.test_number["Benchmark2"], "2") list_all_tests = ( - '/benchmarks/Benchmark_O --list --delim=\t --skip-tags='.split(' '), + "/benchmarks/Benchmark_O --list --delim=\t --skip-tags=".split(" "), """# Test [Tags] 1 Benchmark1 [t1, t2] 2 Benchmark2 [t3] 3 Benchmark3 [t3, t4] -""") +""", + ) def test_gets_list_of_all_benchmarks_when_benchmarks_args_exist(self): """Filters tests by name or test number, ignoring unknown.""" - self.args.benchmarks = '1 Benchmark3 1 bogus'.split() + self.args.benchmarks = "1 Benchmark3 1 bogus".split() self.subprocess_mock.expect(*self.list_all_tests) - driver = BenchmarkDriver( - self.args, _subprocess=self.subprocess_mock) + driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock) self.subprocess_mock.assert_called_all_expected() - self.assertEqual(driver.tests, ['Benchmark1', 'Benchmark3']) - self.assertEqual(driver.all_tests, - ['Benchmark1', 'Benchmark2', 'Benchmark3']) + self.assertEqual(driver.tests, ["Benchmark1", "Benchmark3"]) + self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2", "Benchmark3"]) def test_filters_benchmarks_by_pattern(self): - self.args.filters = '-f .+3'.split() + self.args.filters = "-f .+3".split() self.subprocess_mock.expect(*self.list_all_tests) - driver = BenchmarkDriver( - self.args, _subprocess=self.subprocess_mock) + driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock) self.subprocess_mock.assert_called_all_expected() - self.assertEqual(driver.tests, ['Benchmark3']) - self.assertEqual(driver.all_tests, - ['Benchmark1', 'Benchmark2', 'Benchmark3']) + self.assertEqual(driver.tests, ["Benchmark3"]) + self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2", "Benchmark3"]) def test_log_file(self): """When swift-repo is set, log is tied to Git branch and revision.""" - self.assertIsNone(BenchmarkDriver( - Stub(output_dir=None, tests='/bin/'), tests=['ignored']).log_file) + self.assertIsNone( + BenchmarkDriver( + Stub(output_dir=None, tests="/bin/"), tests=["ignored"] + ).log_file + ) - now = time.strftime('%Y%m%d%H%M%S', time.localtime()) + now = time.strftime("%Y%m%d%H%M%S", time.localtime()) driver = BenchmarkDriver( - Stub(output_dir='/path', tests='/bin/', optimization='Suffix', - swift_repo=None,), tests=['ignored']) - self.assertEqual(driver.log_file, - '/path/Benchmark_Suffix-' + now + '.log') - - r = '/repo/' - subprocess_mock = SubprocessMock(responses=[ - ('git -C {0} rev-parse --abbrev-ref HEAD'.format(r).split(' '), - 'branch\n'), - ('git -C {0} rev-parse --short HEAD'.format(r).split(' '), - 'short_hash\n'), - ]) + Stub( + output_dir="/path", + tests="/bin/", + optimization="Suffix", + swift_repo=None, + ), + tests=["ignored"], + ) + self.assertEqual(driver.log_file, "/path/Benchmark_Suffix-" + now + ".log") + + r = "/repo/" + subprocess_mock = SubprocessMock( + responses=[ + ( + "git -C {0} rev-parse --abbrev-ref HEAD".format(r).split(" "), + "branch\n", + ), + ( + "git -C {0} rev-parse --short HEAD".format(r).split(" "), + "short_hash\n", + ), + ] + ) driver = BenchmarkDriver( - Stub(output_dir='/log/', tests='', optimization='S', swift_repo=r), - tests=['ignored'], _subprocess=subprocess_mock) - self.assertEqual(driver.log_file, - '/log/branch/Benchmark_S-' + now + '-short_hash.log') + Stub(output_dir="/log/", tests="", optimization="S", swift_repo=r), + tests=["ignored"], + _subprocess=subprocess_mock, + ) + self.assertEqual( + driver.log_file, "/log/branch/Benchmark_S-" + now + "-short_hash.log" + ) subprocess_mock.assert_called_all_expected() @@ -253,8 +269,8 @@ class LogParserStub(object): @staticmethod def results_from_string(log_contents): LogParserStub.results_from_string_called = True - r = PerformanceTestResult('3,b1,1,123,123,123,0,123'.split(',')) - return {'b1': r} + r = PerformanceTestResult("3,b1,1,123,123,123,0,123".split(",")) + return {"b1": r} class TestBenchmarkDriverRunningTests(unittest.TestCase): @@ -263,34 +279,38 @@ def setUp(self): self.parser_stub = LogParserStub() self.subprocess_mock = SubprocessMock() self.subprocess_mock.expect( - '/benchmarks/Benchmark_O --list --delim=\t'.split(' '), - '#\tTest\t[Tags]\n1\tb1\t[tag]\n') + "/benchmarks/Benchmark_O --list --delim=\t".split(" "), + "#\tTest\t[Tags]\n1\tb1\t[tag]\n", + ) self.driver = BenchmarkDriver( - self.args, _subprocess=self.subprocess_mock, - parser=self.parser_stub) + self.args, _subprocess=self.subprocess_mock, parser=self.parser_stub + ) def test_run_benchmark_with_multiple_samples(self): - self.driver.run('b1') - self.subprocess_mock.assert_called_with( - ('/benchmarks/Benchmark_O', 'b1')) - self.driver.run('b2', num_samples=5) + self.driver.run("b1") + self.subprocess_mock.assert_called_with(("/benchmarks/Benchmark_O", "b1")) + self.driver.run("b2", num_samples=5) self.subprocess_mock.assert_called_with( - ('/benchmarks/Benchmark_O', 'b2', '--num-samples=5')) + ("/benchmarks/Benchmark_O", "b2", "--num-samples=5") + ) def test_run_benchmark_with_specified_number_of_iterations(self): - self.driver.run('b', num_iters=1) + self.driver.run("b", num_iters=1) self.subprocess_mock.assert_called_with( - ('/benchmarks/Benchmark_O', 'b', '--num-iters=1')) + ("/benchmarks/Benchmark_O", "b", "--num-iters=1") + ) def test_run_benchmark_for_specified_time(self): - self.driver.run('b', sample_time=0.5) + self.driver.run("b", sample_time=0.5) self.subprocess_mock.assert_called_with( - ('/benchmarks/Benchmark_O', 'b', '--sample-time=0.5')) + ("/benchmarks/Benchmark_O", "b", "--sample-time=0.5") + ) def test_run_benchmark_in_verbose_mode(self): - self.driver.run('b', verbose=True) + self.driver.run("b", verbose=True) self.subprocess_mock.assert_called_with( - ('/benchmarks/Benchmark_O', 'b', '--verbose')) + ("/benchmarks/Benchmark_O", "b", "--verbose") + ) def test_run_batch(self): """Run all active tests in a single execution of the Benchmark_X. @@ -298,10 +318,9 @@ def test_run_batch(self): Known test names are passed to the harness in a compressed form as test numbers. """ - self.driver.tests = ['b1', 'bx'] + self.driver.tests = ["b1", "bx"] self.driver.run() - self.subprocess_mock.assert_called_with( - ('/benchmarks/Benchmark_O', '1', 'bx')) + self.subprocess_mock.assert_called_with(("/benchmarks/Benchmark_O", "1", "bx")) def test_parse_results_from_running_benchmarks(self): """Parse measurements results using LogParser. @@ -309,55 +328,70 @@ def test_parse_results_from_running_benchmarks(self): Individual test run returns the first PerformanceTestResult directly. Batch run returns the dictionary of PerformanceTestResults. """ - r = self.driver.run('b') + r = self.driver.run("b") self.assertTrue(self.parser_stub.results_from_string_called) - self.assertEquals(r.name, 'b1') # non-matching name, just 1st result + self.assertEquals(r.name, "b1") # non-matching name, just 1st result r = self.driver.run() self.assertTrue(isinstance(r, dict)) - self.assertEquals(r['b1'].name, 'b1') + self.assertEquals(r["b1"].name, "b1") def test_measure_memory(self): - self.driver.run('b', measure_memory=True) + self.driver.run("b", measure_memory=True) self.subprocess_mock.assert_called_with( - ('/benchmarks/Benchmark_O', 'b', '--memory')) + ("/benchmarks/Benchmark_O", "b", "--memory") + ) def test_report_quantiles(self): """Use delta compression for quantile reports.""" - self.driver.run('b', quantile=4) + self.driver.run("b", quantile=4) self.subprocess_mock.assert_called_with( - ('/benchmarks/Benchmark_O', 'b', '--quantile=4', '--delta')) + ("/benchmarks/Benchmark_O", "b", "--quantile=4", "--delta") + ) def test_run_benchmark_independent_samples(self): """Extract up to 20 measurements from an independent run.""" self.driver.args.independent_samples = 3 - r = self.driver.run_independent_samples('b1') - self.assertEqual(self.subprocess_mock.calls.count( - ('/benchmarks/Benchmark_O', 'b1', '--num-iters=1', '--memory', - '--quantile=20', '--delta')), 3) + r = self.driver.run_independent_samples("b1") + self.assertEqual( + self.subprocess_mock.calls.count( + ( + "/benchmarks/Benchmark_O", + "b1", + "--num-iters=1", + "--memory", + "--quantile=20", + "--delta", + ) + ), + 3, + ) self.assertEqual(r.num_samples, 3) # results are merged def test_run_and_log(self): def mock_run(test): - self.assertEqual(test, 'b1') + self.assertEqual(test, "b1") return PerformanceTestResult( - '3,b1,5,101,1,1,1,1,888'.split(','), - quantiles=True, delta=True, memory=True) - driver = BenchmarkDriver(tests=['b1'], args=Stub(output_dir=None)) + "3,b1,5,101,1,1,1,1,888".split(","), + quantiles=True, + delta=True, + memory=True, + ) + + driver = BenchmarkDriver(tests=["b1"], args=Stub(output_dir=None)) driver.run_independent_samples = mock_run # patching with captured_output() as (out, _): log = driver.run_and_log() - header = '#,TEST,SAMPLES,MIN(μs),Q1(μs),MEDIAN(μs),Q3(μs),MAX(μs),' +\ - 'MAX_RSS(B)\n' - csv_log = '3,b1,5,101,102,103,104,105,888\n' + header = ( + "#,TEST,SAMPLES,MIN(μs),Q1(μs),MEDIAN(μs),Q3(μs),MAX(μs)," + "MAX_RSS(B)\n" + ) + csv_log = "3,b1,5,101,102,103,104,105,888\n" self.assertEqual(log, None) self.assertEqual( out.getvalue(), - header + - csv_log + - '\n' + - 'Total performance tests executed: 1\n') + header + csv_log + "\n" + "Total performance tests executed: 1\n", + ) with captured_output() as (out, _): log = driver.run_and_log(csv_console=False) @@ -365,66 +399,72 @@ def mock_run(test): self.assertEqual(log, header + csv_log) self.assertEqual( out.getvalue(), - ' # TEST SAMPLES MIN(μs)' + - ' Q1(μs) MEDIAN(μs) Q3(μs) MAX(μs) MAX_RSS(B)\n' + - ' 3 b1 5 101' + - ' 102 103 104 105 888\n' + - '\n' + - 'Total performance tests executed: 1\n') + " # TEST SAMPLES MIN(μs)" + + " Q1(μs) MEDIAN(μs) Q3(μs) MAX(μs) MAX_RSS(B)\n" + + " 3 b1 5 101" + + " 102 103 104 105 888\n" + + "\n" + + "Total performance tests executed: 1\n", + ) def test_log_results(self): """Create log directory if it doesn't exist and write the log file.""" + def assert_log_written(out, log_file, content): - self.assertEqual(out.getvalue(), - 'Logging results to: ' + log_file + '\n') - with open(log_file, 'rU') as f: + self.assertEqual(out.getvalue(), "Logging results to: " + log_file + "\n") + with open(log_file, "rU") as f: text = f.read() self.assertEqual(text, "formatted output") try: import tempfile # setUp + temp_dir = tempfile.mkdtemp() - log_dir = os.path.join(temp_dir, 'sub-dir/') - driver = BenchmarkDriver(Stub(), tests=['']) + log_dir = os.path.join(temp_dir, "sub-dir/") + driver = BenchmarkDriver(Stub(), tests=[""]) self.assertFalse(os.path.exists(log_dir)) content = "formatted output" - log_file = os.path.join(log_dir, '1.log') + log_file = os.path.join(log_dir, "1.log") with captured_output() as (out, _): driver.log_results(content, log_file=log_file) assert_log_written(out, log_file, content) self.assertTrue(os.path.exists(log_dir)) - log_file = os.path.join(log_dir, '2.log') + log_file = os.path.join(log_dir, "2.log") with captured_output() as (out, _): driver.log_results(content, log_file=log_file) assert_log_written(out, log_file, content) finally: import shutil # tearDown + shutil.rmtree(temp_dir) def test_deterministing_hashing(self): - cmd = ['printenv', 'SWIFT_DETERMINISTIC_HASHING'] - driver = BenchmarkDriver(['no args'], tests=['ignored']) - self.assertEqual(driver._invoke(cmd).strip(), '1') + cmd = ["printenv", "SWIFT_DETERMINISTIC_HASHING"] + driver = BenchmarkDriver(["no args"], tests=["ignored"]) + self.assertEqual(driver._invoke(cmd).strip(), "1") class BenchmarkDriverMock(Mock): """Mock for BenchmarkDriver's `run` method""" + def __init__(self, tests, responses=None): super(BenchmarkDriverMock, self).__init__(responses) self.tests = tests self.args = ArgsStub() - def _run(test, num_samples=None, num_iters=None, - verbose=None, measure_memory=False): - return self.record_and_respond(test, num_samples, num_iters, - verbose, measure_memory) + def _run( + test, num_samples=None, num_iters=None, verbose=None, measure_memory=False + ): + return self.record_and_respond( + test, num_samples, num_iters, verbose, measure_memory + ) + self.run = _run - def record_and_respond(self, test, num_samples, num_iters, - verbose, measure_memory): + def record_and_respond(self, test, num_samples, num_iters, verbose, measure_memory): args = (test, num_samples, num_iters, verbose, measure_memory) self.calls.append(args) return self.respond.get(args, _PTR(min=700)) @@ -432,35 +472,53 @@ def record_and_respond(self, test, num_samples, num_iters, class TestLoggingReportFormatter(unittest.TestCase): def test_plain_log_format(self): - lr = logging.makeLogRecord({ - 'name': 'Base.category', 'level': logging.DEBUG, - 'levelname': 'DEBUG', 'msg': 'Hi!'}) + lr = logging.makeLogRecord( + { + "name": "Base.category", + "level": logging.DEBUG, + "levelname": "DEBUG", + "msg": "Hi!", + } + ) f = LoggingReportFormatter() - self.assertEqual(f.format(lr), 'DEBUG category: Hi!') + self.assertEqual(f.format(lr), "DEBUG category: Hi!") def test_colored_log_format(self): def record(level, level_name): - return logging.makeLogRecord({ - 'name': 'Base.category', 'levelno': level, - 'levelname': level_name, 'msg': 'Hi!'}) + return logging.makeLogRecord( + { + "name": "Base.category", + "levelno": level, + "levelname": level_name, + "msg": "Hi!", + } + ) + f = LoggingReportFormatter(use_color=True) - self.assertEqual(f.format(record(logging.DEBUG, 'DEBUG')), - '\x1b[1;39mcategory: Hi!\x1b[1;0m') - self.assertEqual(f.format(record(logging.INFO, 'INFO')), - '\x1b[1;32mcategory: Hi!\x1b[1;0m') - self.assertEqual(f.format(record(logging.WARNING, 'WARNING')), - '\x1b[1;33mcategory: Hi!\x1b[1;0m') - self.assertEqual(f.format(record(logging.ERROR, 'ERROR')), - '\x1b[1;31mcategory: Hi!\x1b[1;0m') - self.assertEqual(f.format(record(logging.CRITICAL, 'CRITICAL')), - '\x1b[1;35mcategory: Hi!\x1b[1;0m') + self.assertEqual( + f.format(record(logging.DEBUG, "DEBUG")), "\x1b[1;39mcategory: Hi!\x1b[1;0m" + ) + self.assertEqual( + f.format(record(logging.INFO, "INFO")), "\x1b[1;32mcategory: Hi!\x1b[1;0m" + ) + self.assertEqual( + f.format(record(logging.WARNING, "WARNING")), + "\x1b[1;33mcategory: Hi!\x1b[1;0m", + ) + self.assertEqual( + f.format(record(logging.ERROR, "ERROR")), "\x1b[1;31mcategory: Hi!\x1b[1;0m" + ) + self.assertEqual( + f.format(record(logging.CRITICAL, "CRITICAL")), + "\x1b[1;35mcategory: Hi!\x1b[1;0m", + ) def test_no_prefix_for_base_logging(self): - lr = logging.makeLogRecord({ - 'name': 'Base', 'level': logging.INFO, - 'levelname': 'INFO', 'msg': 'Hi!'}) + lr = logging.makeLogRecord( + {"name": "Base", "level": logging.INFO, "levelname": "INFO", "msg": "Hi!"} + ) f = LoggingReportFormatter() - self.assertEqual(f.format(lr), 'INFO Hi!') + self.assertEqual(f.format(lr), "INFO Hi!") class TestMarkdownReportHandler(unittest.TestCase): @@ -475,44 +533,41 @@ def assert_contains(self, texts): self.assertIn(text, self.stream.getvalue()) def record(self, level, category, msg): - return logging.makeLogRecord({ - 'name': 'BenchmarkDoctor.' + category, - 'levelno': level, 'msg': msg}) + return logging.makeLogRecord( + {"name": "BenchmarkDoctor." + category, "levelno": level, "msg": msg} + ) def test_init_writes_table_header(self): self.assertEqual(self.handler.level, logging.INFO) - self.assert_contains(['Benchmark Check Report\n', '---|---']) + self.assert_contains(["Benchmark Check Report\n", "---|---"]) def test_close_writes_final_newlines(self): self.handler.close() - self.assert_contains(['---|---\n\n']) + self.assert_contains(["---|---\n\n"]) def test_errors_and_warnings_start_new_rows_with_icons(self): - self.handler.emit(self.record(logging.ERROR, '', 'Blunder')) - self.handler.emit(self.record(logging.WARNING, '', 'Boo-boo')) - self.assert_contains(['\n⛔️ | Blunder', - '\n⚠️ | Boo-boo']) + self.handler.emit(self.record(logging.ERROR, "", "Blunder")) + self.handler.emit(self.record(logging.WARNING, "", "Boo-boo")) + self.assert_contains(["\n⛔️ | Blunder", "\n⚠️ | Boo-boo"]) def test_category_icons(self): - self.handler.emit(self.record(logging.WARNING, 'naming', 'naming')) - self.handler.emit(self.record(logging.WARNING, 'runtime', 'runtime')) - self.handler.emit(self.record(logging.WARNING, 'memory', 'memory')) - self.assert_contains(['🔤 | naming', - '⏱ | runtime', - 'Ⓜ️ | memory']) + self.handler.emit(self.record(logging.WARNING, "naming", "naming")) + self.handler.emit(self.record(logging.WARNING, "runtime", "runtime")) + self.handler.emit(self.record(logging.WARNING, "memory", "memory")) + self.assert_contains(["🔤 | naming", "⏱ | runtime", "Ⓜ️ | memory"]) def test_info_stays_in_table_cell_breaking_line_row_to_subscript(self): """Assuming Infos only follow after Errors and Warnings. Infos don't emit category icons. """ - self.handler.emit(self.record(logging.ERROR, 'naming', 'Blunder')) - self.handler.emit(self.record(logging.INFO, 'naming', 'Fixit')) - self.assert_contains(['Blunder
Fixit']) + self.handler.emit(self.record(logging.ERROR, "naming", "Blunder")) + self.handler.emit(self.record(logging.INFO, "naming", "Fixit")) + self.assert_contains(["Blunder
Fixit"]) def test_names_in_code_format(self): - self.handler.emit(self.record(logging.WARNING, '', "'QuotedName'")) - self.assert_contains(['| `QuotedName`']) + self.handler.emit(self.record(logging.WARNING, "", "'QuotedName'")) + self.assert_contains(["| `QuotedName`"]) def _PTR(min=700, mem_pages=1000, setup=None): @@ -520,19 +575,17 @@ def _PTR(min=700, mem_pages=1000, setup=None): return Stub(samples=Stub(min=min), mem_pages=mem_pages, setup=setup) -def _run(test, num_samples=None, num_iters=None, verbose=None, - measure_memory=False): +def _run(test, num_samples=None, num_iters=None, verbose=None, measure_memory=False): """Helper function that constructs tuple with arguments for run method.""" - return ( - test, num_samples, num_iters, verbose, measure_memory) + return (test, num_samples, num_iters, verbose, measure_memory) class TestBenchmarkDoctor(unittest.TestCase): @classmethod def setUpClass(cls): super(TestBenchmarkDoctor, cls).setUpClass() - doctor_log = logging.getLogger('BenchmarkDoctor') - cls._doctor_log_handler = MockLoggingHandler(level='DEBUG') + doctor_log = logging.getLogger("BenchmarkDoctor") + cls._doctor_log_handler = MockLoggingHandler(level="DEBUG") doctor_log.addHandler(cls._doctor_log_handler) def setUp(self): @@ -547,34 +600,33 @@ def assert_contains(self, texts, output): self.assertIn(text, output) def test_uses_logging(self): - driver = BenchmarkDriverMock(tests=['B1', 'B2']) + driver = BenchmarkDriverMock(tests=["B1", "B2"]) with captured_output() as (out, _): BenchmarkDoctor(self.args, driver) - self.assert_contains(['Checking tests: B1, B2'], self.logs['debug']) - self.assertEqual(out.getvalue(), '') + self.assert_contains(["Checking tests: B1, B2"], self.logs["debug"]) + self.assertEqual(out.getvalue(), "") def test_supports_verbose_output(self): - driver = BenchmarkDriverMock(tests=['B1', 'B2']) + driver = BenchmarkDriverMock(tests=["B1", "B2"]) driver.verbose = True self.args.verbose = True with captured_output() as (out, _): BenchmarkDoctor(self.args, driver) - self.assert_contains(['Checking tests: B1, B2'], out.getvalue()) + self.assert_contains(["Checking tests: B1, B2"], out.getvalue()) def test_uses_report_formatter(self): - doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock(tests=['B1'])) - console_handler = logging.getLogger('BenchmarkDoctor').handlers[1] + doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock(tests=["B1"])) + console_handler = logging.getLogger("BenchmarkDoctor").handlers[1] self.assertTrue(doctor) self.assertTrue(isinstance(console_handler, logging.StreamHandler)) - self.assertTrue(isinstance(console_handler.formatter, - LoggingReportFormatter)) + self.assertTrue(isinstance(console_handler.formatter, LoggingReportFormatter)) def test_uses_optional_markdown_report_formatter(self): self.args.markdown = True with captured_output() as (_, _): - doc = BenchmarkDoctor(self.args, BenchmarkDriverMock(tests=['B1'])) + doc = BenchmarkDoctor(self.args, BenchmarkDriverMock(tests=["B1"])) self.assertTrue(doc) - console_handler = logging.getLogger('BenchmarkDoctor').handlers[1] + console_handler = logging.getLogger("BenchmarkDoctor").handlers[1] self.assertTrue(isinstance(console_handler, MarkdownReportHandler)) def test_measure_10_independent_1s_benchmark_series(self): @@ -584,86 +636,155 @@ def test_measure_10_independent_1s_benchmark_series(self): take measurements for approximately 1s based on short initial runtime sampling. Capped at 200 samples. """ - driver = BenchmarkDriverMock(tests=['B1'], responses=([ - # calibration run, returns a stand-in for PerformanceTestResult - (_run('B1', num_samples=3, num_iters=1, - verbose=True), _PTR(min=300))] + - # 5x i1 series, with 300 μs runtime its possible to take 4098 - # samples/s, but it should be capped at 2k - ([(_run('B1', num_samples=200, num_iters=1, - verbose=True, measure_memory=True), _PTR(min=300))] * 5) + - # 5x i2 series - ([(_run('B1', num_samples=200, num_iters=2, - verbose=True, measure_memory=True), _PTR(min=300))] * 5) - )) + driver = BenchmarkDriverMock( + tests=["B1"], + responses=( + [ + # calibration run, returns a stand-in for PerformanceTestResult + ( + _run("B1", num_samples=3, num_iters=1, verbose=True), + _PTR(min=300), + ) + ] + + + # 5x i1 series, with 300 μs runtime its possible to take 4098 + # samples/s, but it should be capped at 2k + ( + [ + ( + _run( + "B1", + num_samples=200, + num_iters=1, + verbose=True, + measure_memory=True, + ), + _PTR(min=300), + ) + ] + * 5 + ) + + + # 5x i2 series + ( + [ + ( + _run( + "B1", + num_samples=200, + num_iters=2, + verbose=True, + measure_memory=True, + ), + _PTR(min=300), + ) + ] + * 5 + ) + ), + ) doctor = BenchmarkDoctor(self.args, driver) with captured_output() as (out, _): - measurements = doctor.measure('B1') + measurements = doctor.measure("B1") driver.assert_called_all_expected() self.assert_contains( - ['name', - 'B1 O i1a', 'B1 O i1b', 'B1 O i1c', 'B1 O i1d', 'B1 O i1e', - 'B1 O i2a', 'B1 O i2b', 'B1 O i2c', 'B1 O i2d', 'B1 O i2e'], - measurements.keys()) - self.assertEqual(measurements['name'], 'B1') + [ + "name", + "B1 O i1a", + "B1 O i1b", + "B1 O i1c", + "B1 O i1d", + "B1 O i1e", + "B1 O i2a", + "B1 O i2b", + "B1 O i2c", + "B1 O i2d", + "B1 O i2e", + ], + measurements.keys(), + ) + self.assertEqual(measurements["name"], "B1") self.assert_contains( - ['Calibrating num-samples for B1:', - 'Runtime 300 μs yields 4096 adjusted samples per second.', - 'Measuring B1, 5 x i1 (200 samples), 5 x i2 (200 samples)'], - self.logs['debug']) + [ + "Calibrating num-samples for B1:", + "Runtime 300 μs yields 4096 adjusted samples per second.", + "Measuring B1, 5 x i1 (200 samples), 5 x i2 (200 samples)", + ], + self.logs["debug"], + ) def test_benchmark_name_matches_naming_conventions(self): - driver = BenchmarkDriverMock(tests=[ - 'BenchmarkName', 'CapitalWordsConvention', 'ABBRName', - 'TooManyCamelCaseHumps', - 'Existential.Array.method.1x.Val4', - 'Flatten.Array.Array.Str.for-in.reserved', - 'Flatten.Array.String?.as!.NSArray', - 'wrongCase', 'Wrong_convention', 'Illegal._$%[]<>{}@^()']) + driver = BenchmarkDriverMock( + tests=[ + "BenchmarkName", + "CapitalWordsConvention", + "ABBRName", + "TooManyCamelCaseHumps", + "Existential.Array.method.1x.Val4", + "Flatten.Array.Array.Str.for-in.reserved", + "Flatten.Array.String?.as!.NSArray", + "wrongCase", + "Wrong_convention", + "Illegal._$%[]<>{}@^()", + ] + ) with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, driver) doctor.check() output = out.getvalue() - self.assertIn('naming: ', output) - self.assertNotIn('BenchmarkName', output) - self.assertNotIn('CapitalWordsConvention', output) - self.assertNotIn('ABBRName', output) - self.assertNotIn('Existential.Array.method.1x.Val4', output) - self.assertNotIn('Flatten.Array.Array.Str.for-in.reserved', output) - self.assertNotIn('Flatten.Array.String?.as!.NSArray', output) + self.assertIn("naming: ", output) + self.assertNotIn("BenchmarkName", output) + self.assertNotIn("CapitalWordsConvention", output) + self.assertNotIn("ABBRName", output) + self.assertNotIn("Existential.Array.method.1x.Val4", output) + self.assertNotIn("Flatten.Array.Array.Str.for-in.reserved", output) + self.assertNotIn("Flatten.Array.String?.as!.NSArray", output) err_msg = " name doesn't conform to benchmark naming convention." self.assert_contains( - ["'wrongCase'" + err_msg, "'Wrong_convention'" + err_msg, - "'Illegal._$%[]<>{}@^()'" + err_msg], self.logs['error']) + [ + "'wrongCase'" + err_msg, + "'Wrong_convention'" + err_msg, + "'Illegal._$%[]<>{}@^()'" + err_msg, + ], + self.logs["error"], + ) self.assert_contains( ["'TooManyCamelCaseHumps' name is composed of 5 words."], - self.logs['warning']) + self.logs["warning"], + ) + self.assert_contains(["See http://bit.ly/BenchmarkNaming"], self.logs["info"]) self.assert_contains( - ['See http://bit.ly/BenchmarkNaming'], self.logs['info']) - self.assert_contains( - ["Split 'TooManyCamelCaseHumps' name into dot-separated groups " - "and variants. See http://bit.ly/BenchmarkNaming"], - self.logs['info']) + [ + "Split 'TooManyCamelCaseHumps' name into dot-separated groups " + "and variants. See http://bit.ly/BenchmarkNaming" + ], + self.logs["info"], + ) def test_benchmark_name_is_at_most_40_chars_long(self): - driver = BenchmarkDriverMock(tests=[ - 'BenchmarkName', - 'ThisTestNameIsTooLongAndCausesOverflowsInReports']) + driver = BenchmarkDriverMock( + tests=["BenchmarkName", "ThisTestNameIsTooLongAndCausesOverflowsInReports"] + ) with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, driver) doctor.check() output = out.getvalue() - self.assertIn('naming: ', output) - self.assertNotIn('BenchmarkName', output) + self.assertIn("naming: ", output) + self.assertNotIn("BenchmarkName", output) self.assert_contains( - ["'ThisTestNameIsTooLongAndCausesOverflowsInReports' name is " - "48 characters long."], self.logs['error']) + [ + "'ThisTestNameIsTooLongAndCausesOverflowsInReports' name is " + "48 characters long." + ], + self.logs["error"], + ) self.assert_contains( ["Benchmark name should not be longer than 40 characters."], - self.logs['info']) + self.logs["info"], + ) def test_benchmark_runtime_range(self): """Optimized benchmark should have runtime between 20 μs and 1000 μs. @@ -679,159 +800,226 @@ def test_benchmark_runtime_range(self): Warn about longer runtime. Runtimes over 10ms are an error. """ + def measurements(name, runtime): - return {'name': name, - name + ' O i1a': _PTR(min=runtime + 2), - name + ' O i2a': _PTR(min=runtime)} + return { + "name": name, + name + " O i1a": _PTR(min=runtime + 2), + name + " O i2a": _PTR(min=runtime), + } with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([])) - doctor.analyze(measurements('Sylph', 0)) - doctor.analyze(measurements('Unicorn', 3)) - doctor.analyze(measurements('Cheetah', 200)) - doctor.analyze(measurements('Hare', 1001)) - doctor.analyze(measurements('Tortoise', 500000)) - doctor.analyze({'name': 'OverheadTurtle', - 'OverheadTurtle O i1a': _PTR(min=800000), - 'OverheadTurtle O i2a': _PTR(min=700000)}) + doctor.analyze(measurements("Sylph", 0)) + doctor.analyze(measurements("Unicorn", 3)) + doctor.analyze(measurements("Cheetah", 200)) + doctor.analyze(measurements("Hare", 1001)) + doctor.analyze(measurements("Tortoise", 500000)) + doctor.analyze( + { + "name": "OverheadTurtle", + "OverheadTurtle O i1a": _PTR(min=800000), + "OverheadTurtle O i2a": _PTR(min=700000), + } + ) output = out.getvalue() - self.assertIn('runtime: ', output) - self.assertNotIn('Cheetah', output) - self.assert_contains(["'Sylph' execution took 0 μs."], - self.logs['error']) + self.assertIn("runtime: ", output) + self.assertNotIn("Cheetah", output) + self.assert_contains(["'Sylph' execution took 0 μs."], self.logs["error"]) self.assert_contains( - ["Ensure the workload of 'Sylph' has a properly measurable size" - " (runtime > 20 μs) and is not eliminated by the compiler (use " - "`blackHole` function if necessary)."], - self.logs['info']) - self.assert_contains(["'Unicorn' execution took 3 μs."], - self.logs['warning']) + [ + "Ensure the workload of 'Sylph' has a properly measurable size" + " (runtime > 20 μs) and is not eliminated by the compiler (use " + "`blackHole` function if necessary)." + ], + self.logs["info"], + ) + self.assert_contains(["'Unicorn' execution took 3 μs."], self.logs["warning"]) self.assert_contains( ["Increase the workload of 'Unicorn' to be more than 20 μs."], - self.logs['info']) - self.assert_contains(["'Hare' execution took at least 1001 μs."], - self.logs['warning']) + self.logs["info"], + ) + self.assert_contains( + ["'Hare' execution took at least 1001 μs."], self.logs["warning"] + ) self.assert_contains( - ["Decrease the workload of 'Hare' by a factor of 2 (10), " - "to be less than 1000 μs."], self.logs['info']) + [ + "Decrease the workload of 'Hare' by a factor of 2 (10), " + "to be less than 1000 μs." + ], + self.logs["info"], + ) self.assert_contains( - ["'Tortoise' execution took at least 500000 μs."], - self.logs['error']) + ["'Tortoise' execution took at least 500000 μs."], self.logs["error"] + ) self.assert_contains( - ["Decrease the workload of 'Tortoise' by a factor of 512 (1000), " - "to be less than 1000 μs."], self.logs['info']) + [ + "Decrease the workload of 'Tortoise' by a factor of 512 (1000), " + "to be less than 1000 μs." + ], + self.logs["info"], + ) self.assert_contains( - ["'OverheadTurtle' execution took at least 600000 μs" - " (excluding the setup overhead)."], - self.logs['error']) + [ + "'OverheadTurtle' execution took at least 600000 μs" + " (excluding the setup overhead)." + ], + self.logs["error"], + ) def test_benchmark_has_no_significant_setup_overhead(self): with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([])) - doctor.analyze({ - 'name': 'NoOverhead', # not 'significant' enough - # Based on DropFirstArray a10/e10: overhead 3.7% (6 μs) - 'NoOverhead O i1a': _PTR(min=162), - 'NoOverhead O i2a': _PTR(min=159)}) - doctor.analyze({ - 'name': 'SO', # Setup Overhead - # Based on SuffixArrayLazy a10/e10: overhead 5.8% (4 μs) - 'SO O i1a': _PTR(min=69), 'SO O i1b': _PTR(min=70), - 'SO O i2a': _PTR(min=67), 'SO O i2b': _PTR(min=68)}) - doctor.analyze({'name': 'Zero', 'Zero O i1a': _PTR(min=0), - 'Zero O i2a': _PTR(min=0)}) - doctor.analyze({ - 'name': 'LOA', # Limit of Accuracy - # Impossible to detect overhead: - # Even 1μs change in 20μs runtime is 5%. - 'LOA O i1a': _PTR(min=21), - 'LOA O i2a': _PTR(min=20)}) + doctor.analyze( + { + "name": "NoOverhead", # not 'significant' enough + # Based on DropFirstArray a10/e10: overhead 3.7% (6 μs) + "NoOverhead O i1a": _PTR(min=162), + "NoOverhead O i2a": _PTR(min=159), + } + ) + doctor.analyze( + { + "name": "SO", # Setup Overhead + # Based on SuffixArrayLazy a10/e10: overhead 5.8% (4 μs) + "SO O i1a": _PTR(min=69), + "SO O i1b": _PTR(min=70), + "SO O i2a": _PTR(min=67), + "SO O i2b": _PTR(min=68), + } + ) + doctor.analyze( + {"name": "Zero", "Zero O i1a": _PTR(min=0), "Zero O i2a": _PTR(min=0)} + ) + doctor.analyze( + { + "name": "LOA", # Limit of Accuracy + # Impossible to detect overhead: + # Even 1μs change in 20μs runtime is 5%. + "LOA O i1a": _PTR(min=21), + "LOA O i2a": _PTR(min=20), + } + ) output = out.getvalue() - self.assertIn('runtime: ', output) - self.assertNotIn('NoOverhead', output) - self.assertNotIn('ZeroRuntime', output) - self.assertNotIn('LOA', output) + self.assertIn("runtime: ", output) + self.assertNotIn("NoOverhead", output) + self.assertNotIn("ZeroRuntime", output) + self.assertNotIn("LOA", output) self.assert_contains( - ["'SO' has setup overhead of 4 μs (5.8%)."], - self.logs['error']) + ["'SO' has setup overhead of 4 μs (5.8%)."], self.logs["error"] + ) self.assert_contains( - ["Move initialization of benchmark data to the `setUpFunction` " - "registered in `BenchmarkInfo`."], self.logs['info']) + [ + "Move initialization of benchmark data to the `setUpFunction` " + "registered in `BenchmarkInfo`." + ], + self.logs["info"], + ) def test_benchmark_setup_takes_reasonable_time(self): """Setup < 200 ms (20% extra on top of the typical 1 s measurement)""" with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([])) - doctor.analyze({ - 'name': 'NormalSetup', - 'NormalSetup O i1a': _PTR(setup=199999), - 'NormalSetup O i2a': _PTR(setup=200001)}) - doctor.analyze({ - 'name': 'LongSetup', - 'LongSetup O i1a': _PTR(setup=200001), - 'LongSetup O i2a': _PTR(setup=200002)}) + doctor.analyze( + { + "name": "NormalSetup", + "NormalSetup O i1a": _PTR(setup=199999), + "NormalSetup O i2a": _PTR(setup=200001), + } + ) + doctor.analyze( + { + "name": "LongSetup", + "LongSetup O i1a": _PTR(setup=200001), + "LongSetup O i2a": _PTR(setup=200002), + } + ) output = out.getvalue() - self.assertIn('runtime: ', output) - self.assertNotIn('NormalSetup', output) + self.assertIn("runtime: ", output) + self.assertNotIn("NormalSetup", output) self.assert_contains( - ["'LongSetup' setup took at least 200001 μs."], - self.logs['error']) + ["'LongSetup' setup took at least 200001 μs."], self.logs["error"] + ) self.assert_contains( - ["The `setUpFunction` should take no more than 200 ms."], - self.logs['info']) + ["The `setUpFunction` should take no more than 200 ms."], self.logs["info"] + ) def test_benchmark_has_constant_memory_use(self): """Benchmark's memory footprint must not vary with num-iters.""" with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([])) - doctor.analyze({ - # The threshold of 15 pages was estimated from previous - # measurements. The normal range should be probably aproximated - # by a function instead of a simple constant. - # TODO: re-evaluate normal range from whole SBS - 'name': 'ConstantMemory', - 'ConstantMemory O i1a': _PTR(mem_pages=1460), - 'ConstantMemory O i2a': _PTR(mem_pages=(1460 + 15))}) - doctor.analyze({ - 'name': 'VariableMemory', # ObserverForwardStruct - 'VariableMemory O i1a': _PTR(mem_pages=1460), - 'VariableMemory O i1b': _PTR(mem_pages=1472), - # i2 series start at 290 pages higher - 'VariableMemory O i2a': _PTR(mem_pages=1750), - 'VariableMemory O i2b': _PTR(mem_pages=1752)}) - measurements = dict([ - ('HighVariance O i{0}{1}'.format(num_iters, suffix), - _PTR(mem_pages=num_pages)) - for num_iters, pages in [ - (1, [6200, 5943, 4818, 5612, 5469]), - (2, [6244, 5832, 4674, 5176, 5490])] - for num_pages, suffix in zip(pages, list('abcde'))]) - measurements['name'] = 'HighVariance' # Array2D + doctor.analyze( + { + # The threshold of 15 pages was estimated from previous + # measurements. The normal range should be probably aproximated + # by a function instead of a simple constant. + # TODO: re-evaluate normal range from whole SBS + "name": "ConstantMemory", + "ConstantMemory O i1a": _PTR(mem_pages=1460), + "ConstantMemory O i2a": _PTR(mem_pages=(1460 + 15)), + } + ) + doctor.analyze( + { + "name": "VariableMemory", # ObserverForwardStruct + "VariableMemory O i1a": _PTR(mem_pages=1460), + "VariableMemory O i1b": _PTR(mem_pages=1472), + # i2 series start at 290 pages higher + "VariableMemory O i2a": _PTR(mem_pages=1750), + "VariableMemory O i2b": _PTR(mem_pages=1752), + } + ) + measurements = dict( + [ + ( + "HighVariance O i{0}{1}".format(num_iters, suffix), + _PTR(mem_pages=num_pages), + ) + for num_iters, pages in [ + (1, [6200, 5943, 4818, 5612, 5469]), + (2, [6244, 5832, 4674, 5176, 5490]), + ] + for num_pages, suffix in zip(pages, list("abcde")) + ] + ) + measurements["name"] = "HighVariance" # Array2D doctor.analyze(measurements) output = out.getvalue() - self.assertIn('memory: ', output) - self.assertNotIn('ConstantMemory', output) + self.assertIn("memory: ", output) + self.assertNotIn("ConstantMemory", output) self.assert_contains( - ["'VariableMemory' varies the memory footprint of the base " - "workload depending on the `num-iters`."], - self.logs['error']) + [ + "'VariableMemory' varies the memory footprint of the base " + "workload depending on the `num-iters`." + ], + self.logs["error"], + ) self.assert_contains( - ["'VariableMemory' " - "mem_pages [i1, i2]: min=[1460, 1750] 𝚫=290 R=[12, 2]"], - self.logs['info']) + [ + "'VariableMemory' " + "mem_pages [i1, i2]: min=[1460, 1750] 𝚫=290 R=[12, 2]" + ], + self.logs["info"], + ) self.assert_contains( - ["'HighVariance' has very wide range of memory used between " - "independent, repeated measurements."], - self.logs['warning']) + [ + "'HighVariance' has very wide range of memory used between " + "independent, repeated measurements." + ], + self.logs["warning"], + ) self.assert_contains( - ["'HighVariance' " - "mem_pages [i1, i2]: min=[4818, 4674] 𝚫=144 R=[1382, 1570]"], - self.logs['info']) + [ + "'HighVariance' " + "mem_pages [i1, i2]: min=[4818, 4674] 𝚫=144 R=[1382, 1570]" + ], + self.logs["info"], + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 4c1c6effffcd5..2053e93c0b42b 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -48,11 +48,11 @@ def test_is_iterable(self): class TestPerformanceTestSamples(unittest.TestCase): def setUp(self): - self.samples = PerformanceTestSamples('B1') + self.samples = PerformanceTestSamples("B1") self.samples.add(Sample(7, 42, 1000)) def test_has_name(self): - self.assertEqual(self.samples.name, 'B1') + self.assertEqual(self.samples.name, "B1") def test_stores_samples(self): self.assertEqual(self.samples.count, 1) @@ -70,7 +70,7 @@ def test_quantile(self): self.assertEqual(self.samples.quantile(1), 1100) self.samples.add(Sample(3, 1, 1050)) self.assertEqual(self.samples.quantile(0), 1000) - self.assertEqual(self.samples.quantile(.5), 1050) + self.assertEqual(self.samples.quantile(0.5), 1050) self.assertEqual(self.samples.quantile(1), 1100) def assertEqualFiveNumberSummary(self, ss, expected_fns): @@ -82,20 +82,15 @@ def assertEqualFiveNumberSummary(self, ss, expected_fns): self.assertEqual(ss.max, e_max) def test_computes_five_number_summary(self): - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1000, 1000, 1000, 1000)) + self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1000, 1000, 1000)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1000, 1000, 1100, 1100)) + self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1000, 1100, 1100)) self.samples.add(Sample(3, 1, 1050)) - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1000, 1050, 1100, 1100)) + self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1050, 1100, 1100)) self.samples.add(Sample(4, 1, 1025)) - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1000, 1025, 1050, 1100)) + self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1025, 1050, 1100)) self.samples.add(Sample(5, 1, 1075)) - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1025, 1050, 1075, 1100)) + self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100)) def test_computes_inter_quartile_range(self): self.assertEqual(self.samples.iqr, 0) @@ -111,59 +106,66 @@ def assertEqualtats(self, stats, expected_stats): def test_computes_mean_sd_cv(self): ss = self.samples - self.assertEqualtats( - (ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0)) + self.assertEqualtats((ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualtats( - (ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100)) + self.assertEqualtats((ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100)) def test_computes_range_spread(self): ss = self.samples - self.assertEqualtats( - (ss.range, ss.spread), (0, 0)) + self.assertEqualtats((ss.range, ss.spread), (0, 0)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualtats( - (ss.range, ss.spread), (100, 10.0 / 100)) + self.assertEqualtats((ss.range, ss.spread), (100, 10.0 / 100)) def test_init_with_samples(self): self.samples = PerformanceTestSamples( - 'B2', [Sample(0, 1, 1000), Sample(1, 1, 1100)]) + "B2", [Sample(0, 1, 1000), Sample(1, 1, 1100)] + ) self.assertEqual(self.samples.count, 2) self.assertEqualtats( - (self.samples.mean, self.samples.sd, - self.samples.range, self.samples.spread), - (1050.0, 70.71, 100, 9.52 / 100)) + ( + self.samples.mean, + self.samples.sd, + self.samples.range, + self.samples.spread, + ), + (1050.0, 70.71, 100, 9.52 / 100), + ) def test_can_handle_zero_runtime(self): # guard against dividing by 0 - self.samples = PerformanceTestSamples('Zero') + self.samples = PerformanceTestSamples("Zero") self.samples.add(Sample(0, 1, 0)) self.assertEqualtats( - (self.samples.mean, self.samples.sd, self.samples.cv, - self.samples.range, self.samples.spread), - (0, 0, 0.0, 0, 0.0)) + ( + self.samples.mean, + self.samples.sd, + self.samples.cv, + self.samples.range, + self.samples.spread, + ), + (0, 0, 0.0, 0, 0.0), + ) def test_excludes_outliers(self): - ss = [Sample(*map(int, s.split())) for s in - '0 1 1000, 1 1 1025, 2 1 1050, 3 1 1075, 4 1 1100, ' - '5 1 1000, 6 1 1025, 7 1 1050, 8 1 1075, 9 1 1100, ' - '10 1 1050, 11 1 949, 12 1 1151'.split(',')] - self.samples = PerformanceTestSamples('Outliers', ss) + ss = [ + Sample(*map(int, s.split())) + for s in "0 1 1000, 1 1 1025, 2 1 1050, 3 1 1075, 4 1 1100, " + "5 1 1000, 6 1 1025, 7 1 1050, 8 1 1075, 9 1 1100, " + "10 1 1050, 11 1 949, 12 1 1151".split(",") + ] + self.samples = PerformanceTestSamples("Outliers", ss) self.assertEqual(self.samples.count, 13) - self.assertEqualtats( - (self.samples.mean, self.samples.sd), (1050, 52.36)) + self.assertEqualtats((self.samples.mean, self.samples.sd), (1050, 52.36)) self.samples.exclude_outliers() self.assertEqual(self.samples.count, 11) self.assertEqual(self.samples.outliers, ss[11:]) - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1025, 1050, 1075, 1100)) - self.assertEqualtats( - (self.samples.mean, self.samples.sd), (1050, 35.36)) + self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100)) + self.assertEqualtats((self.samples.mean, self.samples.sd), (1050, 35.36)) def test_excludes_outliers_zero_IQR(self): - self.samples = PerformanceTestSamples('Tight') + self.samples = PerformanceTestSamples("Tight") self.samples.add(Sample(0, 2, 23)) self.samples.add(Sample(1, 2, 18)) self.samples.add(Sample(2, 2, 18)) @@ -173,13 +175,14 @@ def test_excludes_outliers_zero_IQR(self): self.samples.exclude_outliers() self.assertEqual(self.samples.count, 3) - self.assertEqualtats( - (self.samples.min, self.samples.max), (18, 18)) + self.assertEqualtats((self.samples.min, self.samples.max), (18, 18)) def test_excludes_outliers_top_only(self): - ss = [Sample(*map(int, s.split())) for s in - '0 1 1, 1 1 2, 2 1 2, 3 1 2, 4 1 3'.split(',')] - self.samples = PerformanceTestSamples('Top', ss) + ss = [ + Sample(*map(int, s.split())) + for s in "0 1 1, 1 1 2, 2 1 2, 3 1 2, 4 1 3".split(",") + ] + self.samples = PerformanceTestSamples("Top", ss) self.assertEqualFiveNumberSummary(self.samples, (1, 2, 2, 2, 3)) self.assertEqual(self.samples.iqr, 0) @@ -191,48 +194,52 @@ def test_excludes_outliers_top_only(self): class TestPerformanceTestResult(unittest.TestCase): def test_init(self): - log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' - r = PerformanceTestResult(log_line.split(',')) - self.assertEqual(r.test_num, '1') - self.assertEqual(r.name, 'AngryPhonebook') + log_line = "1,AngryPhonebook,20,10664,12933,11035,576,10884" + r = PerformanceTestResult(log_line.split(",")) + self.assertEqual(r.test_num, "1") + self.assertEqual(r.name, "AngryPhonebook") self.assertEqual( (r.num_samples, r.min, r.max, r.mean, r.sd, r.median), - (20, 10664, 12933, 11035, 576, 10884)) + (20, 10664, 12933, 11035, 576, 10884), + ) self.assertEqual(r.samples, None) - log_line = '1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336' - r = PerformanceTestResult(log_line.split(',')) + log_line = "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336" + r = PerformanceTestResult(log_line.split(",")) self.assertEqual(r.max_rss, 10510336) def test_init_quantiles(self): # #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs) - log = '1,Ackermann,3,54383,54512,54601' - r = PerformanceTestResult(log.split(','), quantiles=True) - self.assertEqual(r.test_num, '1') - self.assertEqual(r.name, 'Ackermann') - self.assertEqual((r.num_samples, r.min, r.median, r.max), - (3, 54383, 54512, 54601)) + log = "1,Ackermann,3,54383,54512,54601" + r = PerformanceTestResult(log.split(","), quantiles=True) + self.assertEqual(r.test_num, "1") + self.assertEqual(r.name, "Ackermann") + self.assertEqual( + (r.num_samples, r.min, r.median, r.max), (3, 54383, 54512, 54601) + ) self.assertAlmostEquals(r.mean, 54498.67, places=2) self.assertAlmostEquals(r.sd, 109.61, places=2) self.assertEqual(r.samples.count, 3) self.assertEqual(r.samples.num_samples, 3) - self.assertEqual([s.runtime for s in r.samples.all_samples], - [54383, 54512, 54601]) + self.assertEqual( + [s.runtime for s in r.samples.all_samples], [54383, 54512, 54601] + ) # #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B) - log = '1,Ackermann,3,54529,54760,55807,266240' - r = PerformanceTestResult(log.split(','), quantiles=True, memory=True) + log = "1,Ackermann,3,54529,54760,55807,266240" + r = PerformanceTestResult(log.split(","), quantiles=True, memory=True) self.assertEqual((r.samples.count, r.max_rss), (3, 266240)) # #,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs) - log = '1,Ackermann,5,54570,54593,54644,57212,58304' - r = PerformanceTestResult(log.split(','), quantiles=True, memory=False) - self.assertEqual((r.num_samples, r.min, r.median, r.max), - (5, 54570, 54644, 58304)) + log = "1,Ackermann,5,54570,54593,54644,57212,58304" + r = PerformanceTestResult(log.split(","), quantiles=True, memory=False) + self.assertEqual( + (r.num_samples, r.min, r.median, r.max), (5, 54570, 54644, 58304) + ) self.assertEqual((r.samples.q1, r.samples.q3), (54593, 57212)) self.assertEqual(r.samples.count, 5) # #,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B) - log = '1,Ackermann,5,54686,54731,54774,55030,63466,270336' - r = PerformanceTestResult(log.split(','), quantiles=True, memory=True) + log = "1,Ackermann,5,54686,54731,54774,55030,63466,270336" + r = PerformanceTestResult(log.split(","), quantiles=True, memory=True) self.assertEqual(r.samples.num_samples, 5) self.assertEqual(r.samples.count, 4) # outlier was excluded self.assertEqual(r.max_rss, 270336) @@ -241,10 +248,9 @@ def test_init_delta_quantiles(self): # #,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX # 2-quantile from 2 samples in repeated min, when delta encoded, # the difference is 0, which is ommited -- only separator remains - log = '202,DropWhileArray,2,265,,22' - r = PerformanceTestResult(log.split(','), quantiles=True, delta=True) - self.assertEqual((r.num_samples, r.min, r.median, r.max), - (2, 265, 265, 287)) + log = "202,DropWhileArray,2,265,,22" + r = PerformanceTestResult(log.split(","), quantiles=True, delta=True) + self.assertEqual((r.num_samples, r.min, r.median, r.max), (2, 265, 265, 287)) self.assertEqual(r.samples.count, 2) self.assertEqual(r.samples.num_samples, 2) @@ -260,14 +266,17 @@ def test_init_oversampled_quantiles(self): qs <- subsample(x, s); c(qs[1], diff(qs)) })) sapply(c(3, 5, 11, 21), tbl) """ + def validatePTR(deq): # construct from delta encoded quantiles string - deq = deq.split(',') - num_samples = deq.count('1') - r = PerformanceTestResult(['0', 'B', str(num_samples)] + deq, - quantiles=True, delta=True) + deq = deq.split(",") + num_samples = deq.count("1") + r = PerformanceTestResult( + ["0", "B", str(num_samples)] + deq, quantiles=True, delta=True + ) self.assertEqual(r.samples.num_samples, num_samples) - self.assertEqual([s.runtime for s in r.samples.all_samples], - range(1, num_samples + 1)) + self.assertEqual( + [s.runtime for s in r.samples.all_samples], range(1, num_samples + 1) + ) delta_encoded_quantiles = """ 1,, @@ -306,55 +315,58 @@ def validatePTR(deq): # construct from delta encoded quantiles string 1,,1,1,1,1,1,1,1,1,,1,1,1,1,1,1,1,1,1, 1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1""" - map(validatePTR, delta_encoded_quantiles.split('\n')[1:]) + map(validatePTR, delta_encoded_quantiles.split("\n")[1:]) def test_init_meta(self): # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),… # …PAGES,ICS,YIELD - log = '1,Ackermann,200,715,1281,726,47,715,7,29,15' - r = PerformanceTestResult(log.split(','), meta=True) - self.assertEqual((r.test_num, r.name), ('1', 'Ackermann')) + log = "1,Ackermann,200,715,1281,726,47,715,7,29,15" + r = PerformanceTestResult(log.split(","), meta=True) + self.assertEqual((r.test_num, r.name), ("1", "Ackermann")) self.assertEqual( (r.num_samples, r.min, r.max, r.mean, r.sd, r.median), - (200, 715, 1281, 726, 47, 715)) - self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), - (7, 29, 15)) + (200, 715, 1281, 726, 47, 715), + ) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (7, 29, 15)) # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B),… # …PAGES,ICS,YIELD - log = '1,Ackermann,200,715,1951,734,97,715,36864,9,50,15' - r = PerformanceTestResult(log.split(','), memory=True, meta=True) + log = "1,Ackermann,200,715,1951,734,97,715,36864,9,50,15" + r = PerformanceTestResult(log.split(","), memory=True, meta=True) self.assertEqual( (r.num_samples, r.min, r.max, r.mean, r.sd, r.median), - (200, 715, 1951, 734, 97, 715)) + (200, 715, 1951, 734, 97, 715), + ) self.assertEqual( (r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss), - (9, 50, 15, 36864)) + (9, 50, 15, 36864), + ) # #,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD - log = '1,Ackermann,200,715,3548,8,31,15' - r = PerformanceTestResult(log.split(','), quantiles=True, meta=True) + log = "1,Ackermann,200,715,3548,8,31,15" + r = PerformanceTestResult(log.split(","), quantiles=True, meta=True) self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 3548)) - self.assertEqual((r.samples.count, r.samples.min, r.samples.max), - (2, 715, 3548)) - self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), - (8, 31, 15)) + self.assertEqual( + (r.samples.count, r.samples.min, r.samples.max), (2, 715, 3548) + ) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 31, 15)) # #,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD - log = '1,Ackermann,200,715,1259,32768,8,28,15' + log = "1,Ackermann,200,715,1259,32768,8,28,15" r = PerformanceTestResult( - log.split(','), quantiles=True, memory=True, meta=True) + log.split(","), quantiles=True, memory=True, meta=True + ) self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 1259)) - self.assertEqual((r.samples.count, r.samples.min, r.samples.max), - (2, 715, 1259)) + self.assertEqual( + (r.samples.count, r.samples.min, r.samples.max), (2, 715, 1259) + ) self.assertEquals(r.max_rss, 32768) - self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), - (8, 28, 15)) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 28, 15)) def test_repr(self): - log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' - r = PerformanceTestResult(log_line.split(',')) + log_line = "1,AngryPhonebook,20,10664,12933,11035,576,10884" + r = PerformanceTestResult(log_line.split(",")) self.assertEqual( str(r), - '' + "", ) def test_merge(self): @@ -362,51 +374,70 @@ def test_merge(self): 1,AngryPhonebook,1,12045,12045,12045,0,12045 1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336 1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144 -1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split('\n')[1:] - results = map(PerformanceTestResult, - [line.split(',') for line in tests]) +1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split( + "\n" + )[ + 1: + ] + results = map(PerformanceTestResult, [line.split(",") for line in tests]) results[2].setup = 9 results[3].setup = 7 def as_tuple(r): - return (r.num_samples, r.min, r.max, round(r.mean, 2), - r.sd, r.median, r.max_rss, r.setup) + return ( + r.num_samples, + r.min, + r.max, + round(r.mean, 2), + r.sd, + r.median, + r.max_rss, + r.setup, + ) r = results[0] - self.assertEqual(as_tuple(r), - (1, 12045, 12045, 12045, 0, 12045, None, None)) + self.assertEqual(as_tuple(r), (1, 12045, 12045, 12045, 0, 12045, None, None)) r.merge(results[1]) - self.assertEqual(as_tuple(r), # drops SD and median, +max_rss - (2, 12045, 12325, 12185, None, None, 10510336, None)) + self.assertEqual( + as_tuple(r), # drops SD and median, +max_rss + (2, 12045, 12325, 12185, None, None, 10510336, None), + ) r.merge(results[2]) - self.assertEqual(as_tuple(r), # picks smaller of the MAX_RSS, +setup - (3, 11616, 12325, 11995.33, None, None, 10502144, 9)) + self.assertEqual( + as_tuple(r), # picks smaller of the MAX_RSS, +setup + (3, 11616, 12325, 11995.33, None, None, 10502144, 9), + ) r.merge(results[3]) - self.assertEqual(as_tuple(r), # picks smaller of the setup values - (4, 11616, 12325, 12064, None, None, 10498048, 7)) + self.assertEqual( + as_tuple(r), # picks smaller of the setup values + (4, 11616, 12325, 12064, None, None, 10498048, 7), + ) class TestResultComparison(unittest.TestCase): def setUp(self): self.r0 = PerformanceTestResult( - '101,GlobalClass,20,0,0,0,0,0,10185728'.split(',')) + "101,GlobalClass,20,0,0,0,0,0,10185728".split(",") + ) self.r01 = PerformanceTestResult( - '101,GlobalClass,20,20,20,20,0,0,10185728'.split(',')) + "101,GlobalClass,20,20,20,20,0,0,10185728".split(",") + ) self.r1 = PerformanceTestResult( - '1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336'.split(',')) + "1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336".split(",") + ) self.r2 = PerformanceTestResult( - '1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144'.split(',')) + "1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144".split(",") + ) def test_init(self): rc = ResultComparison(self.r1, self.r2) - self.assertEqual(rc.name, 'AngryPhonebook') + self.assertEqual(rc.name, "AngryPhonebook") self.assertAlmostEquals(rc.ratio, 12325.0 / 11616.0) - self.assertAlmostEquals(rc.delta, (((11616.0 / 12325.0) - 1) * 100), - places=3) + self.assertAlmostEquals(rc.delta, (((11616.0 / 12325.0) - 1) * 100), places=3) # handle test results that sometimes change to zero, when compiler # optimizes out the body of the incorrectly written test rc = ResultComparison(self.r0, self.r0) - self.assertEqual(rc.name, 'GlobalClass') + self.assertEqual(rc.name, "GlobalClass") self.assertAlmostEquals(rc.ratio, 1) self.assertAlmostEquals(rc.delta, 0, places=3) rc = ResultComparison(self.r0, self.r01) @@ -416,10 +447,7 @@ def test_init(self): self.assertAlmostEquals(rc.ratio, 20001) self.assertAlmostEquals(rc.delta, -99.995, places=3) # disallow comparison of different test results - self.assertRaises( - AssertionError, - ResultComparison, self.r0, self.r1 - ) + self.assertRaises(AssertionError, ResultComparison, self.r0, self.r1) def test_values_is_dubious(self): self.assertFalse(ResultComparison(self.r1, self.r2).is_dubious) @@ -441,7 +469,7 @@ def tearDown(self): def write_temp_file(self, file_name, data): temp_file_name = os.path.join(self.test_dir, file_name) - with open(temp_file_name, 'w') as f: + with open(temp_file_name, "w") as f: f.write(data) return temp_file_name @@ -461,19 +489,25 @@ class OldAndNewLog(unittest.TestCase): 3,Array2D,20,335831,400221,346622,0,346622 1,AngryPhonebook,20,10458,12714,11000,0,11000""" - old_results = dict([(r.name, r) - for r in - map(PerformanceTestResult, - [line.split(',') - for line in - old_log_content.splitlines()])]) - - new_results = dict([(r.name, r) - for r in - map(PerformanceTestResult, - [line.split(',') - for line in - new_log_content.splitlines()])]) + old_results = dict( + [ + (r.name, r) + for r in map( + PerformanceTestResult, + [line.split(",") for line in old_log_content.splitlines()], + ) + ] + ) + + new_results = dict( + [ + (r.name, r) + for r in map( + PerformanceTestResult, + [line.split(",") for line in new_log_content.splitlines()], + ) + ] + ) def assert_report_contains(self, texts, report): assert not isinstance(texts, str) @@ -494,95 +528,108 @@ def test_parse_results_csv(self): parser = LogParser() results = parser.parse_results(log.splitlines()) self.assertTrue(isinstance(results[0], PerformanceTestResult)) - self.assertEquals(results[0].name, 'Array.append.Array.Int?') - self.assertEquals(results[1].name, - 'Bridging.NSArray.as!.Array.NSString') - self.assertEquals(results[2].name, - 'Flatten.Array.Tuple4.lazy.for-in.Reserve') + self.assertEquals(results[0].name, "Array.append.Array.Int?") + self.assertEquals(results[1].name, "Bridging.NSArray.as!.Array.NSString") + self.assertEquals(results[2].name, "Flatten.Array.Tuple4.lazy.for-in.Reserve") def test_parse_results_tab_delimited(self): - log = '34\tBitCount\t20\t3\t4\t4\t0\t4' + log = "34\tBitCount\t20\t3\t4\t4\t0\t4" parser = LogParser() results = parser.parse_results(log.splitlines()) self.assertTrue(isinstance(results[0], PerformanceTestResult)) - self.assertEqual(results[0].name, 'BitCount') + self.assertEqual(results[0].name, "BitCount") def test_parse_results_formatted_text(self): """Parse format that Benchmark_Driver prints to console""" - log = (""" + log = """ # TEST SAMPLES MIN(μs) MAX(μs) MEAN(μs) SD(μs) MEDIAN(μs) MAX_RSS(B) 3 Array2D 20 2060 2188 2099 0 2099 20915200 Total performance tests executed: 1 -""") +""" parser = LogParser() results = parser.parse_results(log.splitlines()[1:]) # without 1st \n self.assertTrue(isinstance(results[0], PerformanceTestResult)) r = results[0] - self.assertEqual(r.name, 'Array2D') + self.assertEqual(r.name, "Array2D") self.assertEqual(r.max_rss, 20915200) def test_parse_quantiles(self): """Gathers samples from reported quantiles. Handles optional memory.""" r = LogParser.results_from_string( """#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs) -1,Ackermann,3,54383,54512,54601""")['Ackermann'] - self.assertEqual([s.runtime for s in r.samples.all_samples], - [54383, 54512, 54601]) +1,Ackermann,3,54383,54512,54601""" + )["Ackermann"] + self.assertEqual( + [s.runtime for s in r.samples.all_samples], [54383, 54512, 54601] + ) r = LogParser.results_from_string( """#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B) -1,Ackermann,3,54529,54760,55807,266240""")['Ackermann'] - self.assertEqual([s.runtime for s in r.samples.all_samples], - [54529, 54760, 55807]) +1,Ackermann,3,54529,54760,55807,266240""" + )["Ackermann"] + self.assertEqual( + [s.runtime for s in r.samples.all_samples], [54529, 54760, 55807] + ) self.assertEqual(r.max_rss, 266240) def test_parse_delta_quantiles(self): r = LogParser.results_from_string( # 2-quantile aka. median - '#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,')['B'] + "#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,," + )["B"] self.assertEqual( (r.num_samples, r.min, r.median, r.max, r.samples.count), - (1, 101, 101, 101, 1)) + (1, 101, 101, 101, 1), + ) r = LogParser.results_from_string( - '#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1')['B'] + "#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1" + )["B"] self.assertEqual( (r.num_samples, r.min, r.median, r.max, r.samples.count), - (2, 101, 101, 102, 2)) + (2, 101, 101, 102, 2), + ) r = LogParser.results_from_string( # 20-quantiles aka. ventiles - '#,TEST,SAMPLES,MIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,' + - '𝚫V9,𝚫VA,𝚫VB,𝚫VC,𝚫VD,𝚫VE,𝚫VF,𝚫VG,𝚫VH,𝚫VI,𝚫VJ,𝚫MAX\n' + - '202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464' - )['DropWhileArray'] + "#,TEST,SAMPLES,MIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8," + + "𝚫V9,𝚫VA,𝚫VB,𝚫VC,𝚫VD,𝚫VE,𝚫VF,𝚫VG,𝚫VH,𝚫VI,𝚫VJ,𝚫MAX\n" + + "202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464" + )["DropWhileArray"] self.assertEqual( (r.num_samples, r.min, r.max, r.samples.count), # last 3 ventiles were outliers and were excluded from the sample - (200, 214, 215, 18)) + (200, 214, 215, 18), + ) def test_parse_meta(self): r = LogParser.results_from_string( - '#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),' + - 'PAGES,ICS,YIELD\n' + - '0,B,1,2,2,2,0,2,7,29,15')['B'] + "#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs)," + + "PAGES,ICS,YIELD\n" + + "0,B,1,2,2,2,0,2,7,29,15" + )["B"] self.assertEqual( - (r.min, r.mem_pages, r.involuntary_cs, r.yield_count), - (2, 7, 29, 15)) + (r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (2, 7, 29, 15) + ) r = LogParser.results_from_string( - '#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),' + - 'MAX_RSS(B),PAGES,ICS,YIELD\n' + - '0,B,1,3,3,3,0,3,36864,9,50,15')['B'] + "#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs)," + + "MAX_RSS(B),PAGES,ICS,YIELD\n" + + "0,B,1,3,3,3,0,3,36864,9,50,15" + )["B"] self.assertEqual( (r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss), - (3, 9, 50, 15, 36864)) + (3, 9, 50, 15, 36864), + ) r = LogParser.results_from_string( - '#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD\n' + - '0,B,1,4,4,8,31,15')['B'] - self.assertEqual((r.min, r.mem_pages, r.involuntary_cs, r.yield_count), - (4, 8, 31, 15)) + "#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15" + )["B"] + self.assertEqual( + (r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15) + ) r = LogParser.results_from_string( - '#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n' + - '0,B,1,5,5,32768,8,28,15')['B'] + "#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n" + + "0,B,1,5,5,32768,8,28,15" + )["B"] self.assertEqual( (r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss), - (5, 8, 28, 15, 32768)) + (5, 8, 28, 15, 32768), + ) def test_parse_results_verbose(self): """Parse multiple performance test results with 2 sample formats: @@ -608,27 +655,31 @@ def test_parse_results_verbose(self): Totals,2""" parser = LogParser() - results = parser.parse_results(verbose_log.split('\n')) + results = parser.parse_results(verbose_log.split("\n")) r = results[0] self.assertEqual( (r.name, r.min, r.max, int(r.mean), int(r.sd), r.median), - ('AngryPhonebook', 11467, 13898, 12392, 1315, 11812) + ("AngryPhonebook", 11467, 13898, 12392, 1315, 11812), ) self.assertEqual(r.num_samples, r.samples.num_samples) - self.assertEqual(results[0].samples.all_samples, - [(0, 78, 11812), (1, 90, 13898), (2, 90, 11467)]) + self.assertEqual( + results[0].samples.all_samples, + [(0, 78, 11812), (1, 90, 13898), (2, 90, 11467)], + ) self.assertEqual(r.yields, None) r = results[1] self.assertEqual( (r.name, r.min, r.max, int(r.mean), int(r.sd), r.median), - ('Array2D', 369900, 381039, 373994, 6127, 371043) + ("Array2D", 369900, 381039, 373994, 6127, 371043), ) self.assertEqual(r.setup, 14444) self.assertEqual(r.num_samples, r.samples.num_samples) - self.assertEqual(results[1].samples.all_samples, - [(0, 1, 369900), (1, 1, 381039), (2, 1, 371043)]) + self.assertEqual( + results[1].samples.all_samples, + [(0, 1, 369900), (1, 1, 381039), (2, 1, 371043)], + ) yielded = r.yields[0] self.assertEqual(yielded.before_sample, 1) self.assertEqual(yielded.after, 369918) @@ -642,7 +693,7 @@ def test_parse_environment_verbose(self): 2,AngryPhonebook,3,11269,11884,11657,338,11820 """ parser = LogParser() - results = parser.parse_results(verbose_log.split('\n')) + results = parser.parse_results(verbose_log.split("\n")) r = results[0] self.assertEqual(r.max_rss, 32768) @@ -655,8 +706,8 @@ def test_results_from_merge(self): concatenated_logs = """4,ArrayAppend,20,23641,29000,24990,0,24990 4,ArrayAppend,1,20000,20000,20000,0,20000""" results = LogParser.results_from_string(concatenated_logs) - self.assertEqual(results.keys(), ['ArrayAppend']) - result = results['ArrayAppend'] + self.assertEqual(results.keys(), ["ArrayAppend"]) + result = results["ArrayAppend"] self.assertTrue(isinstance(result, PerformanceTestResult)) self.assertEqual(result.min, 20000) self.assertEqual(result.max, 29000) @@ -677,8 +728,8 @@ def test_results_from_merge_verbose(self): Sample 3,364245 3,Array2D,4,363094,376131,368159,5931,369169""" results = LogParser.results_from_string(concatenated_logs) - self.assertEqual(results.keys(), ['Array2D']) - result = results['Array2D'] + self.assertEqual(results.keys(), ["Array2D"]) + result = results["Array2D"] self.assertTrue(isinstance(result, PerformanceTestResult)) self.assertEqual(result.min, 350815) self.assertEqual(result.max, 376131) @@ -715,7 +766,7 @@ def test_excludes_outliers_from_samples(self): 65,DropFirstAnySeqCntRangeLazy,10,184,455,228,79,206 """ parser = LogParser() - result = parser.parse_results(verbose_log.split('\n'))[0] + result = parser.parse_results(verbose_log.split("\n"))[0] self.assertEqual(result.num_samples, 10) self.assertEqual(result.samples.count, 8) self.assertEqual(len(result.samples.outliers), 2) @@ -727,26 +778,26 @@ def names(tests): return [t.name for t in tests] tc = TestComparator(self.old_results, self.new_results, 0.05) - self.assertEqual(names(tc.unchanged), ['AngryPhonebook', 'Array2D']) - self.assertEqual(names(tc.increased), ['ByteSwap', 'ArrayAppend']) - self.assertEqual(names(tc.decreased), ['BitCount']) - self.assertEqual(names(tc.added), ['TwoSum']) - self.assertEqual(names(tc.removed), ['AnyHashableWithAClass']) + self.assertEqual(names(tc.unchanged), ["AngryPhonebook", "Array2D"]) + self.assertEqual(names(tc.increased), ["ByteSwap", "ArrayAppend"]) + self.assertEqual(names(tc.decreased), ["BitCount"]) + self.assertEqual(names(tc.added), ["TwoSum"]) + self.assertEqual(names(tc.removed), ["AnyHashableWithAClass"]) # other way around tc = TestComparator(self.new_results, self.old_results, 0.05) - self.assertEqual(names(tc.unchanged), ['AngryPhonebook', 'Array2D']) - self.assertEqual(names(tc.increased), ['BitCount']) - self.assertEqual(names(tc.decreased), ['ByteSwap', 'ArrayAppend']) - self.assertEqual(names(tc.added), ['AnyHashableWithAClass']) - self.assertEqual(names(tc.removed), ['TwoSum']) + self.assertEqual(names(tc.unchanged), ["AngryPhonebook", "Array2D"]) + self.assertEqual(names(tc.increased), ["BitCount"]) + self.assertEqual(names(tc.decreased), ["ByteSwap", "ArrayAppend"]) + self.assertEqual(names(tc.added), ["AnyHashableWithAClass"]) + self.assertEqual(names(tc.removed), ["TwoSum"]) # delta_threshold determines the sorting into change groups; # report only change above 100% (ByteSwap's runtime went to 0): tc = TestComparator(self.old_results, self.new_results, 1) self.assertEqual( names(tc.unchanged), - ['AngryPhonebook', 'Array2D', 'ArrayAppend', 'BitCount'] + ["AngryPhonebook", "Array2D", "ArrayAppend", "BitCount"], ) - self.assertEqual(names(tc.increased), ['ByteSwap']) + self.assertEqual(names(tc.increased), ["ByteSwap"]) self.assertEqual(tc.decreased, []) @@ -770,45 +821,58 @@ def assert_html_contains(self, texts): def test_values(self): self.assertEqual( - ReportFormatter.values(PerformanceTestResult( - '1,AngryPhonebook,20,10664,12933,11035,576,10884'.split(','))), - ('AngryPhonebook', '10664', '12933', '11035', '—') + ReportFormatter.values( + PerformanceTestResult( + "1,AngryPhonebook,20,10664,12933,11035,576,10884".split(",") + ) + ), + ("AngryPhonebook", "10664", "12933", "11035", "—"), ) self.assertEqual( - ReportFormatter.values(PerformanceTestResult( - '1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336' - .split(','))), - ('AngryPhonebook', '12045', '12045', '12045', '10510336') + ReportFormatter.values( + PerformanceTestResult( + "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(",") + ) + ), + ("AngryPhonebook", "12045", "12045", "12045", "10510336"), ) r1 = PerformanceTestResult( - '1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336'.split(',')) + "1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336".split(",") + ) r2 = PerformanceTestResult( - '1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144'.split(',')) + "1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144".split(",") + ) self.assertEqual( ReportFormatter.values(ResultComparison(r1, r2)), - ('AngryPhonebook', '12325', '11616', '-5.8%', '1.06x') + ("AngryPhonebook", "12325", "11616", "-5.8%", "1.06x"), ) self.assertEqual( ReportFormatter.values(ResultComparison(r2, r1)), - ('AngryPhonebook', '11616', '12325', '+6.1%', '0.94x') + ("AngryPhonebook", "11616", "12325", "+6.1%", "0.94x"), ) r2.max = r1.min + 1 self.assertEqual( ReportFormatter.values(ResultComparison(r1, r2))[4], - '1.06x (?)' # is_dubious + "1.06x (?)", # is_dubious ) def test_justified_columns(self): """Table columns are all formated with same width, defined by the longest value. """ - self.assert_markdown_contains([ - 'AnyHashableWithAClass | 247027 | 319065 | 259056 | 10250445', - 'Array2D | 335831 | 335831 | +0.0% | 1.00x']) - self.assert_git_contains([ - 'AnyHashableWithAClass 247027 319065 259056 10250445', - 'Array2D 335831 335831 +0.0% 1.00x']) + self.assert_markdown_contains( + [ + "AnyHashableWithAClass | 247027 | 319065 | 259056 | 10250445", + "Array2D | 335831 | 335831 | +0.0% | 1.00x", + ] + ) + self.assert_git_contains( + [ + "AnyHashableWithAClass 247027 319065 259056 10250445", + "Array2D 335831 335831 +0.0% 1.00x", + ] + ) def test_column_headers(self): """Report contains table headers for ResultComparisons and changed @@ -817,49 +881,63 @@ def test_column_headers(self): performance_test_result = self.tc.added[0] self.assertEqual( ReportFormatter.header_for(performance_test_result), - ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS') + ("TEST", "MIN", "MAX", "MEAN", "MAX_RSS"), ) comparison_result = self.tc.increased[0] self.assertEqual( ReportFormatter.header_for(comparison_result), - ('TEST', 'OLD', 'NEW', 'DELTA', 'RATIO') - ) - self.assert_markdown_contains([ - 'TEST | OLD | NEW | DELTA | RATIO', - ':--- | ---: | ---: | ---: | ---: ', - 'TEST | MIN | MAX | MEAN | MAX_RSS']) - self.assert_git_contains([ - 'TEST OLD NEW DELTA RATIO', - 'TEST MIN MAX MEAN MAX_RSS']) - self.assert_html_contains([ - """ + ("TEST", "OLD", "NEW", "DELTA", "RATIO"), + ) + self.assert_markdown_contains( + [ + "TEST | OLD | NEW | DELTA | RATIO", + ":--- | ---: | ---: | ---: | ---: ", + "TEST | MIN | MAX | MEAN | MAX_RSS", + ] + ) + self.assert_git_contains( + [ + "TEST OLD NEW DELTA RATIO", + "TEST MIN MAX MEAN MAX_RSS", + ] + ) + self.assert_html_contains( + [ + """ OLD NEW DELTA RATIO""", - """ + """ MIN MAX MEAN - MAX_RSS"""]) + MAX_RSS""", + ] + ) def test_emphasize_speedup(self): """Emphasize speedup values for regressions and improvements""" # tests in No Changes don't have emphasized speedup - self.assert_markdown_contains([ - 'BitCount | 3 | 9 | +199.9% | **0.33x**', - 'ByteSwap | 4 | 0 | -100.0% | **4001.00x**', - 'AngryPhonebook | 10458 | 10458 | +0.0% | 1.00x ', - 'ArrayAppend | 23641 | 20000 | -15.4% | **1.18x (?)**' - ]) - self.assert_git_contains([ - 'BitCount 3 9 +199.9% **0.33x**', - 'ByteSwap 4 0 -100.0% **4001.00x**', - 'AngryPhonebook 10458 10458 +0.0% 1.00x', - 'ArrayAppend 23641 20000 -15.4% **1.18x (?)**' - ]) - self.assert_html_contains([ - """ + self.assert_markdown_contains( + [ + "BitCount | 3 | 9 | +199.9% | **0.33x**", + "ByteSwap | 4 | 0 | -100.0% | **4001.00x**", + "AngryPhonebook | 10458 | 10458 | +0.0% | 1.00x ", + "ArrayAppend | 23641 | 20000 | -15.4% | **1.18x (?)**", + ] + ) + self.assert_git_contains( + [ + "BitCount 3 9 +199.9% **0.33x**", + "ByteSwap 4 0 -100.0% **4001.00x**", + "AngryPhonebook 10458 10458 +0.0% 1.00x", + "ArrayAppend 23641 20000 -15.4% **1.18x (?)**", + ] + ) + self.assert_html_contains( + [ + """ BitCount 3 @@ -867,7 +945,7 @@ def test_emphasize_speedup(self): +199.9% 0.33x """, - """ + """ ByteSwap 4 @@ -875,182 +953,221 @@ def test_emphasize_speedup(self): -100.0% 4001.00x """, - """ + """ AngryPhonebook 10458 10458 +0.0% 1.00x - """ - ]) + """, + ] + ) def test_sections(self): """Report is divided into sections with summaries.""" - self.assert_markdown_contains([ - """
+ self.assert_markdown_contains( + [ + """
Regression (1)""", - """
+ """
Improvement (2)""", - """
+ """
No Changes (2)""", - """
+ """
Added (1)""", - """
- Removed (1)"""]) - self.assert_git_contains([ - 'Regression (1): \n', - 'Improvement (2): \n', - 'No Changes (2): \n', - 'Added (1): \n', - 'Removed (1): \n']) - self.assert_html_contains([ - "Regression (1)", - "Improvement (2)", - "No Changes (2)", - "Added (1)", - "Removed (1)"]) + """
+ Removed (1)""", + ] + ) + self.assert_git_contains( + [ + "Regression (1): \n", + "Improvement (2): \n", + "No Changes (2): \n", + "Added (1): \n", + "Removed (1): \n", + ] + ) + self.assert_html_contains( + [ + "Regression (1)", + "Improvement (2)", + "No Changes (2)", + "Added (1)", + "Removed (1)", + ] + ) def test_report_only_changes(self): """Leave out tests without significant change.""" rf = ReportFormatter(self.tc, changes_only=True) markdown, git, html = rf.markdown(), rf.git(), rf.html() - self.assertNotIn('No Changes', markdown) - self.assertNotIn('AngryPhonebook', markdown) - self.assertNotIn('No Changes', git) - self.assertNotIn('AngryPhonebook', git) - self.assertNotIn('No Changes', html) - self.assertNotIn('AngryPhonebook', html) + self.assertNotIn("No Changes", markdown) + self.assertNotIn("AngryPhonebook", markdown) + self.assertNotIn("No Changes", git) + self.assertNotIn("AngryPhonebook", git) + self.assertNotIn("No Changes", html) + self.assertNotIn("AngryPhonebook", html) def test_single_table_report(self): """Single table report has inline headers and no elaborate sections.""" self.tc.removed = [] # test handling empty section rf = ReportFormatter(self.tc, changes_only=True, single_table=True) markdown = rf.markdown() - self.assertNotIn('Regression (1)', - 'TEST | OLD | NEW | DELTA | RATIO', - 'BitCount | 3 | 9 | +199.9% | **0.33x**', + "Regression (1)", + "TEST | OLD | NEW | DELTA | RATIO", + "BitCount | 3 | 9 | +199.9% | **0.33x**", ] git = [ - 'Regression (1):', - 'TEST OLD NEW DELTA RATIO', - 'BitCount 3 9 +199.9% **0.33x**', + "Regression (1):", + "TEST OLD NEW DELTA RATIO", + "BitCount 3 9 +199.9% **0.33x**", ] - html = ['', "BitCount"] + html = ["", "BitCount"] def setUp(self): super(Test_compare_perf_tests_main, self).setUp() - self.old_log = self.write_temp_file('old.log', self.old_log_content) - self.new_log = self.write_temp_file('new.log', self.new_log_content) + self.old_log = self.write_temp_file("old.log", self.old_log_content) + self.new_log = self.write_temp_file("new.log", self.new_log_content) def execute_main_with_format(self, report_format, test_output=False): - report_file = self.test_dir + 'report.log' - args = ['compare_perf_tests.py', - '--old-file', self.old_log, - '--new-file', self.new_log, - '--format', report_format] - - sys.argv = (args if not test_output else - args + ['--output', report_file]) + report_file = self.test_dir + "report.log" + args = [ + "compare_perf_tests.py", + "--old-file", + self.old_log, + "--new-file", + self.new_log, + "--format", + report_format, + ] + + sys.argv = args if not test_output else args + ["--output", report_file] with captured_output() as (out, _): main() report_out = out.getvalue() if test_output: - with open(report_file, 'r') as f: + with open(report_file, "r") as f: report = f.read() # because print adds newline, add one here, too: - report_file = str(report + '\n') + report_file = str(report + "\n") else: report_file = None @@ -1058,40 +1175,41 @@ def execute_main_with_format(self, report_format, test_output=False): def test_markdown(self): """Writes Markdown formatted report to stdout""" - report_out, _ = self.execute_main_with_format('markdown') + report_out, _ = self.execute_main_with_format("markdown") self.assert_report_contains(self.markdown, report_out) def test_markdown_output(self): """Writes Markdown formatted report to stdout and `--output` file.""" - report_out, report_file = ( - self.execute_main_with_format('markdown', test_output=True)) + report_out, report_file = self.execute_main_with_format( + "markdown", test_output=True + ) self.assertEqual(report_out, report_file) self.assert_report_contains(self.markdown, report_file) def test_git(self): """Writes Git formatted report to stdout.""" - report_out, _ = self.execute_main_with_format('git') + report_out, _ = self.execute_main_with_format("git") self.assert_report_contains(self.git, report_out) def test_git_output(self): """Writes Git formatted report to stdout and `--output` file.""" - report_out, report_file = ( - self.execute_main_with_format('git', test_output=True)) + report_out, report_file = self.execute_main_with_format("git", test_output=True) self.assertEqual(report_out, report_file) self.assert_report_contains(self.git, report_file) def test_html(self): """Writes HTML formatted report to stdout.""" - report_out, _ = self.execute_main_with_format('html') + report_out, _ = self.execute_main_with_format("html") self.assert_report_contains(self.html, report_out) def test_html_output(self): """Writes HTML formatted report to stdout and `--output` file.""" - report_out, report_file = ( - self.execute_main_with_format('html', test_output=True)) + report_out, report_file = self.execute_main_with_format( + "html", test_output=True + ) self.assertEqual(report_out, report_file) self.assert_report_contains(self.html, report_file) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/benchmark/scripts/test_utils.py b/benchmark/scripts/test_utils.py index 6a2bf8856a99f..4b675d9d82582 100644 --- a/benchmark/scripts/test_utils.py +++ b/benchmark/scripts/test_utils.py @@ -78,14 +78,15 @@ def expect(self, call_args, response): def assert_called_with(self, expected_args): """Verify that the tested method was called with provided arguments.""" expected_args = tuple(expected_args) - assert expected_args in self.calls, ( - 'Expected: {0} in Called: {1}'.format(expected_args, self.calls)) + assert expected_args in self.calls, "Expected: {0} in Called: {1}".format( + expected_args, self.calls + ) def assert_called_all_expected(self): """Verify that all expeced invocations of tested method were called.""" - assert self.calls == self.expected, ( - '\nExpected: {0}, \n Called: {1}'.format( - self.expected, self.calls)) + assert self.calls == self.expected, "\nExpected: {0}, \n Called: {1}".format( + self.expected, self.calls + ) class MockLoggingHandler(logging.Handler): @@ -103,5 +104,9 @@ def emit(self, record): def reset(self): """Clear all log messages.""" self.messages = { - 'debug': [], 'info': [], 'warning': [], 'error': [], 'critical': [] + "debug": [], + "info": [], + "warning": [], + "error": [], + "critical": [], } diff --git a/benchmark/utils/convertToJSON.py b/benchmark/utils/convertToJSON.py index 54aedc7270068..b7a547116d7ba 100644 --- a/benchmark/utils/convertToJSON.py +++ b/benchmark/utils/convertToJSON.py @@ -74,9 +74,9 @@ if __name__ == "__main__": data = {} - data['Tests'] = [] - data['Machine'] = {} - data['Run'] = {} + data["Tests"] = [] + data["Machine"] = {} + data["Run"] = {} for line in sys.stdin: m = SCORERE.match(line) if not m: @@ -84,8 +84,8 @@ if not m: continue test = {} - test['Data'] = [int(m.group(VALGROUP))] - test['Info'] = {} - test['Name'] = [m.group(KEYGROUP)] - data['Tests'].append(test) + test["Data"] = [int(m.group(VALGROUP))] + test["Info"] = {} + test["Name"] = [m.group(KEYGROUP)] + data["Tests"].append(test) print(json.dumps(data, sort_keys=True, indent=4))