From 4dfb3c43968eddc5612b895700eca7bba326d0a4 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sun, 1 Mar 2020 16:10:58 +0100 Subject: [PATCH 01/21] Revert "[NFC][Python: black] Reformatted the benchmark Python sources using utils/python_format.py." --- .flake8 | 4 +- benchmark/scripts/Benchmark_DTrace.in | 73 +- benchmark/scripts/Benchmark_Driver | 719 +++++-------- benchmark/scripts/Benchmark_GuardMalloc.in | 23 +- benchmark/scripts/Benchmark_QuickCheck.in | 68 +- .../scripts/Benchmark_RuntimeLeaksRunner.in | 100 +- benchmark/scripts/build_linux.py | 50 +- benchmark/scripts/build_script_helper.py | 50 +- benchmark/scripts/compare_perf_tests.py | 506 ++++----- benchmark/scripts/create_benchmark.py | 74 +- .../generate_harness/generate_harness.py | 19 +- .../perf_test_driver/perf_test_driver.py | 78 +- benchmark/scripts/run_smoke_bench | 249 ++--- benchmark/scripts/test_Benchmark_Driver.py | 980 +++++++----------- benchmark/scripts/test_compare_perf_tests.py | 842 +++++++-------- benchmark/scripts/test_utils.py | 17 +- benchmark/utils/convertToJSON.py | 14 +- 17 files changed, 1587 insertions(+), 2279 deletions(-) diff --git a/.flake8 b/.flake8 index 549c9aa559695..39188cae5c115 100644 --- a/.flake8 +++ b/.flake8 @@ -6,12 +6,11 @@ filename = ./benchmark/scripts/Benchmark_Driver, ./benchmark/scripts/Benchmark_DTrace.in, ./benchmark/scripts/Benchmark_GuardMalloc.in, - ./benchmark/scripts/Benchmark_QuickCheck.in, ./benchmark/scripts/Benchmark_RuntimeLeaksRunner.in, - ./benchmark/scripts/run_smoke_bench, ./docs/scripts/ns-html2rst, + ./test/Driver/Inputs/fake-toolchain/clang++, ./test/Driver/Inputs/fake-toolchain/ld, ./utils/80+-check, @@ -31,6 +30,7 @@ filename = ./utils/recursive-lipo, ./utils/round-trip-syntax-test, ./utils/rth, + ./utils/run-remote, ./utils/run-test, ./utils/scale-test, ./utils/submit-benchmark-results, diff --git a/benchmark/scripts/Benchmark_DTrace.in b/benchmark/scripts/Benchmark_DTrace.in index 300291813b96d..273c538cd650f 100644 --- a/benchmark/scripts/Benchmark_DTrace.in +++ b/benchmark/scripts/Benchmark_DTrace.in @@ -19,17 +19,20 @@ import sys DRIVER_LIBRARY_PATH = "@PATH_TO_DRIVER_LIBRARY@" sys.path.append(DRIVER_LIBRARY_PATH) -DTRACE_PATH = os.path.join(DRIVER_LIBRARY_PATH, "swift_stats.d") +DTRACE_PATH = os.path.join(DRIVER_LIBRARY_PATH, 'swift_stats.d') import perf_test_driver # noqa (E402 module level import not at top of file) # Regexes for the XFAIL_LIST. Matches against '([Onone|O|Osize],TestName)' -XFAIL_LIST = [] +XFAIL_LIST = [ +] class DTraceResult(perf_test_driver.Result): + def __init__(self, name, status, output, csv_output): - perf_test_driver.Result.__init__(self, name, status, output, XFAIL_LIST) + perf_test_driver.Result.__init__( + self, name, status, output, XFAIL_LIST) self.csv_output = csv_output def is_failure(self): @@ -37,38 +40,40 @@ class DTraceResult(perf_test_driver.Result): @classmethod def data_headers(cls): - return ["Name", "Result", "Total RR Opts", "Total RR Opts/Iter"] + return [ + 'Name', 'Result', 'Total RR Opts', 'Total RR Opts/Iter'] @classmethod def data_format(cls, max_test_len): non_name_headers = DTraceResult.data_headers()[1:] - fmt = ("{:<%d}" % (max_test_len + 5)) + "".join( - ["{:<%d}" % (len(h) + 2) for h in non_name_headers] - ) + fmt = ('{:<%d}' % (max_test_len + 5)) + \ + ''.join(['{:<%d}' % (len(h) + 2) for h in non_name_headers]) return fmt @classmethod def print_data_header(cls, max_test_len, csv_output): headers = cls.data_headers() if csv_output: - print(",".join(headers)) + print(','.join(headers)) return print(cls.data_format(max_test_len).format(*headers)) def print_data(self, max_test_len): result = [self.get_name(), self.get_result()] + map(str, self.output) if self.csv_output: - print(",".join(result)) + print(','.join(result)) return print(DTraceResult.data_format(max_test_len).format(*result)) class DTraceBenchmarkDriver(perf_test_driver.BenchmarkDriver): + def __init__(self, binary, xfail_list, csv_output): perf_test_driver.BenchmarkDriver.__init__( - self, binary, xfail_list, enable_parallel=True, opt_levels=["O"] - ) + self, binary, xfail_list, + enable_parallel=True, + opt_levels=['O']) self.csv_output = csv_output def print_data_header(self, max_test_len): @@ -78,37 +83,23 @@ class DTraceBenchmarkDriver(perf_test_driver.BenchmarkDriver): return {} def process_input(self, data): - test_name = "({}_{})".format(data["opt"], data["test_name"]) + test_name = '({}_{})'.format(data['opt'], data['test_name']) print("Running {}...".format(test_name)) sys.stdout.flush() def get_results_with_iters(iters): e = os.environ - e["SWIFT_DETERMINISTIC_HASHING"] = "1" - p = subprocess.Popen( - [ - "sudo", - "dtrace", - "-s", - DTRACE_PATH, - "-c", - "%s %s %s %s" - % ( - data["path"], - data["test_name"], - "--num-iters=%d" % iters, - "--num-samples=2", - ), - ], - stdout=subprocess.PIPE, - stderr=open("/dev/null", "w"), - env=e, - ) + e['SWIFT_DETERMINISTIC_HASHING'] = '1' + p = subprocess.Popen([ + 'sudo', 'dtrace', '-s', DTRACE_PATH, + '-c', '%s %s %s %s' % (data['path'], data['test_name'], + '--num-iters=%d' % iters, + '--num-samples=2') + ], stdout=subprocess.PIPE, stderr=open('/dev/null', 'w'), env=e) results = [x for x in p.communicate()[0].split("\n") if len(x) > 0] return [ - x.split(",")[1] for x in results[results.index("DTRACE RESULTS") + 1 :] - ] - + x.split(',')[1] for x in + results[results.index('DTRACE RESULTS') + 1:]] iter_2_results = get_results_with_iters(2) iter_3_results = get_results_with_iters(3) iter_5_results = get_results_with_iters(5) @@ -145,18 +136,16 @@ SWIFT_BIN_DIR = os.path.dirname(os.path.abspath(__file__)) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( - "-filter", + '-filter', type=str, default=None, - help="Filter out any test that does not match the given regex", - ) + help='Filter out any test that does not match the given regex') parser.add_argument( - "--emit-csv", + '--emit-csv', default=False, - action="store_true", + action='store_true', help="Emit csv output", - dest="csv_output", - ) + dest='csv_output') return parser.parse_args() diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 1e84738562bfe..31808852bcf22 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -61,22 +61,23 @@ class BenchmarkDriver(object): self.results = {} # Set a constant hash seed. Some tests are currently sensitive to # fluctuations in the number of hash collisions. - os.environ["SWIFT_DETERMINISTIC_HASHING"] = "1" + os.environ['SWIFT_DETERMINISTIC_HASHING'] = '1' def _invoke(self, cmd): - return self._subprocess.check_output(cmd, stderr=self._subprocess.STDOUT) + return self._subprocess.check_output( + cmd, stderr=self._subprocess.STDOUT) @property def test_harness(self): """Full path to test harness binary.""" - suffix = self.args.optimization if hasattr(self.args, "optimization") else "O" + suffix = (self.args.optimization if hasattr(self.args, 'optimization') + else 'O') return os.path.join(self.args.tests, "Benchmark_" + suffix) def _git(self, cmd): """Execute the Git command in the `swift-repo`.""" return self._invoke( - ("git -C {0} ".format(self.args.swift_repo) + cmd).split() - ).strip() + ('git -C {0} '.format(self.args.swift_repo) + cmd).split()).strip() @property def log_file(self): @@ -88,28 +89,27 @@ class BenchmarkDriver(object): return None log_dir = self.args.output_dir harness_name = os.path.basename(self.test_harness) - suffix = "-" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + suffix = '-' + time.strftime('%Y%m%d%H%M%S', time.localtime()) if self.args.swift_repo: log_dir = os.path.join( - log_dir, self._git("rev-parse --abbrev-ref HEAD") - ) # branch - suffix += "-" + self._git("rev-parse --short HEAD") # revision - return os.path.join(log_dir, harness_name + suffix + ".log") + log_dir, self._git('rev-parse --abbrev-ref HEAD')) # branch + suffix += '-' + self._git('rev-parse --short HEAD') # revision + return os.path.join(log_dir, harness_name + suffix + '.log') @property def _cmd_list_benchmarks(self): # Use tab delimiter for easier parsing to override the default comma. # (The third 'column' is always comma-separated list of tags in square # brackets -- currently unused here.) - return [self.test_harness, "--list", "--delim=\t"] + ( - ["--skip-tags="] if (self.args.benchmarks or self.args.filters) else [] - ) + return [self.test_harness, '--list', '--delim=\t'] + ( + ['--skip-tags='] if (self.args.benchmarks or + self.args.filters) else []) def _get_tests(self): """Return a list of performance tests to run.""" number_name_pairs = [ - line.split("\t")[:2] - for line in self._invoke(self._cmd_list_benchmarks).split("\n")[1:-1] + line.split('\t')[:2] for line in + self._invoke(self._cmd_list_benchmarks).split('\n')[1:-1] ] # unzip list of pairs into 2 lists test_numbers, self.all_tests = map(list, zip(*number_name_pairs)) @@ -122,79 +122,55 @@ class BenchmarkDriver(object): def _tests_matching_patterns(self): regexes = [re.compile(pattern) for pattern in self.args.filters] - return sorted( - list( - set( - [ - name - for pattern in regexes - for name in self.all_tests - if pattern.match(name) - ] - ) - ) - ) + return sorted(list(set([name for pattern in regexes + for name in self.all_tests + if pattern.match(name)]))) def _tests_by_name_or_number(self, test_numbers): benchmarks = set(self.args.benchmarks) number_to_name = dict(zip(test_numbers, self.all_tests)) - tests_by_number = [ - number_to_name[i] for i in benchmarks.intersection(set(test_numbers)) - ] - return sorted( - list(benchmarks.intersection(set(self.all_tests)).union(tests_by_number)) - ) - - def run( - self, - test=None, - num_samples=None, - num_iters=None, - sample_time=None, - verbose=None, - measure_memory=False, - quantile=None, - ): + tests_by_number = [number_to_name[i] + for i in benchmarks.intersection(set(test_numbers))] + return sorted(list(benchmarks + .intersection(set(self.all_tests)) + .union(tests_by_number))) + + def run(self, test=None, num_samples=None, num_iters=None, + sample_time=None, verbose=None, measure_memory=False, + quantile=None): """Execute benchmark and gather results.""" num_samples = num_samples or 0 num_iters = num_iters or 0 # automatically determine N to run for 1s sample_time = sample_time or 0 # default is 1s cmd = self._cmd_run( - test, num_samples, num_iters, sample_time, verbose, measure_memory, quantile - ) + test, num_samples, num_iters, sample_time, + verbose, measure_memory, quantile) output = self._invoke(cmd) results = self.parser.results_from_string(output) return results.items()[0][1] if test else results - def _cmd_run( - self, - test, - num_samples, - num_iters, - sample_time, - verbose, - measure_memory, - quantile, - ): + def _cmd_run(self, test, num_samples, num_iters, sample_time, + verbose, measure_memory, quantile): cmd = [self.test_harness] if test: cmd.append(test) else: - cmd.extend([self.test_number.get(name, name) for name in self.tests]) + cmd.extend([self.test_number.get(name, name) + for name in self.tests]) if num_samples > 0: - cmd.append("--num-samples={0}".format(num_samples)) + cmd.append('--num-samples={0}'.format(num_samples)) if num_iters > 0: - cmd.append("--num-iters={0}".format(num_iters)) + cmd.append('--num-iters={0}'.format(num_iters)) if sample_time > 0: - cmd.append("--sample-time={0}".format(sample_time)) + cmd.append('--sample-time={0}'.format(sample_time)) if verbose: - cmd.append("--verbose") + cmd.append('--verbose') if measure_memory: - cmd.append("--memory") + cmd.append('--memory') if quantile: - cmd.append("--quantile={0}".format(quantile)) - cmd.append("--delta") + cmd.append('--quantile={0}'.format(quantile)) + cmd.append('--delta') return cmd def run_independent_samples(self, test): @@ -202,18 +178,14 @@ class BenchmarkDriver(object): Returns the aggregated result of independent benchmark invocations. """ - def merge_results(a, b): a.merge(b) return a - return reduce( - merge_results, - [ - self.run(test, measure_memory=True, num_iters=1, quantile=20) - for _ in range(self.args.independent_samples) - ], - ) + return reduce(merge_results, + [self.run(test, measure_memory=True, + num_iters=1, quantile=20) + for _ in range(self.args.independent_samples)]) def log_results(self, output, log_file=None): """Log output to `log_file`. @@ -224,11 +196,11 @@ class BenchmarkDriver(object): dir = os.path.dirname(log_file) if not os.path.exists(dir): os.makedirs(dir) - print("Logging results to: %s" % log_file) - with open(log_file, "w") as f: + print('Logging results to: %s' % log_file) + with open(log_file, 'w') as f: f.write(output) - RESULT = "{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}" + RESULT = '{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}' def run_and_log(self, csv_console=True): """Run benchmarks and continuously log results to the console. @@ -240,41 +212,19 @@ class BenchmarkDriver(object): format is justified columns. """ format = ( - (lambda values: ",".join(values)) - if csv_console - else (lambda values: self.RESULT.format(*values)) - ) # justified columns + (lambda values: ','.join(values)) if csv_console else + (lambda values: self.RESULT.format(*values))) # justified columns def console_log(values): print(format(values)) def result_values(r): - return map( - str, - [ - r.test_num, - r.name, - r.num_samples, - r.min, - r.samples.q1, - r.median, - r.samples.q3, - r.max, - r.max_rss, - ], - ) - - header = [ - "#", - "TEST", - "SAMPLES", - "MIN(μs)", - "Q1(μs)", - "MEDIAN(μs)", - "Q3(μs)", - "MAX(μs)", - "MAX_RSS(B)", - ] + return map(str, [r.test_num, r.name, r.num_samples, r.min, + r.samples.q1, r.median, r.samples.q3, r.max, + r.max_rss]) + + header = ['#', 'TEST', 'SAMPLES', 'MIN(μs)', 'Q1(μs)', 'MEDIAN(μs)', + 'Q3(μs)', 'MAX(μs)', 'MAX_RSS(B)'] console_log(header) results = [header] for test in self.tests: @@ -282,10 +232,10 @@ class BenchmarkDriver(object): console_log(result) results.append(result) - print("\nTotal performance tests executed: {0}".format(len(self.tests))) - return ( - None if csv_console else ("\n".join([",".join(r) for r in results]) + "\n") - ) # csv_log + print( + '\nTotal performance tests executed: {0}'.format(len(self.tests))) + return (None if csv_console else + ('\n'.join([','.join(r) for r in results]) + '\n')) # csv_log @staticmethod def run_benchmarks(args): @@ -305,31 +255,22 @@ class LoggingReportFormatter(logging.Formatter): """ import logging as log - - colors = { - log.DEBUG: "9", - log.INFO: "2", - log.WARNING: "3", - log.ERROR: "1", - log.CRITICAL: "5", - } + colors = {log.DEBUG: '9', log.INFO: '2', log.WARNING: '3', log.ERROR: '1', + log.CRITICAL: '5'} def __init__(self, use_color=False): """Specify if report should use colors; defaults to False.""" - super(LoggingReportFormatter, self).__init__("%(message)s") + super(LoggingReportFormatter, self).__init__('%(message)s') self.use_color = use_color def format(self, record): """Format the log record with level and category.""" msg = super(LoggingReportFormatter, self).format(record) - category = (record.name.split(".")[-1] + ": ") if "." in record.name else "" - return ( - "\033[1;3{0}m{1}{2}\033[1;0m".format( - self.colors[record.levelno], category, msg - ) - if self.use_color - else "{0} {1}{2}".format(record.levelname, category, msg) - ) + category = ((record.name.split('.')[-1] + ': ') if '.' in record.name + else '') + return ('\033[1;3{0}m{1}{2}\033[1;0m'.format( + self.colors[record.levelno], category, msg) if self.use_color else + '{0} {1}{2}'.format(record.levelname, category, msg)) class MarkdownReportHandler(logging.StreamHandler): @@ -343,34 +284,27 @@ class MarkdownReportHandler(logging.StreamHandler): """Initialize the handler and write a Markdown table header.""" super(MarkdownReportHandler, self).__init__(stream) self.setLevel(logging.INFO) - self.stream.write("\n✅ | Benchmark Check Report\n---|---") + self.stream.write('\n✅ | Benchmark Check Report\n---|---') self.stream.flush() - levels = { - logging.WARNING: "\n⚠️", - logging.ERROR: "\n⛔️", - logging.INFO: "
", - } - categories = {"naming": "🔤", "runtime": "⏱", "memory": "Ⓜ️"} + levels = {logging.WARNING: '\n⚠️', logging.ERROR: '\n⛔️', + logging.INFO: '
'} + categories = {'naming': '🔤', 'runtime': '⏱', 'memory': 'Ⓜ️'} quotes_re = re.compile("'") def format(self, record): msg = super(MarkdownReportHandler, self).format(record) - return ( - self.levels.get(record.levelno, "") - + ( - "" - if record.levelno == logging.INFO - else self.categories.get(record.name.split(".")[-1], "") + " | " - ) - + self.quotes_re.sub("`", msg) - ) + return (self.levels.get(record.levelno, '') + + ('' if record.levelno == logging.INFO else + self.categories.get(record.name.split('.')[-1], '') + ' | ') + + self.quotes_re.sub('`', msg)) def emit(self, record): msg = self.format(record) stream = self.stream try: - if isinstance(msg, unicode) and getattr(stream, "encoding", None): + if (isinstance(msg, unicode) and + getattr(stream, 'encoding', None)): stream.write(msg.encode(stream.encoding)) else: stream.write(msg) @@ -379,7 +313,7 @@ class MarkdownReportHandler(logging.StreamHandler): self.flush() def close(self): - self.stream.write("\n\n") + self.stream.write('\n\n') self.stream.flush() super(MarkdownReportHandler, self).close() @@ -394,10 +328,10 @@ class BenchmarkDoctor(object): consumption). """ - log = logging.getLogger("BenchmarkDoctor") - log_naming = log.getChild("naming") - log_runtime = log.getChild("runtime") - log_memory = log.getChild("memory") + log = logging.getLogger('BenchmarkDoctor') + log_naming = log.getChild('naming') + log_runtime = log.getChild('runtime') + log_memory = log.getChild('memory') log.setLevel(logging.DEBUG) def __init__(self, args, driver=None): @@ -409,25 +343,23 @@ class BenchmarkDoctor(object): self.driver = driver or BenchmarkDriver(args) self.results = {} - if hasattr(args, "markdown") and args.markdown: + if hasattr(args, 'markdown') and args.markdown: self.console_handler = MarkdownReportHandler(sys.stdout) else: self.console_handler = logging.StreamHandler(sys.stdout) self.console_handler.setFormatter( - LoggingReportFormatter(use_color=sys.stdout.isatty()) - ) - self.console_handler.setLevel( - logging.DEBUG if args.verbose else logging.INFO - ) + LoggingReportFormatter(use_color=sys.stdout.isatty())) + self.console_handler.setLevel(logging.DEBUG if args.verbose else + logging.INFO) self.log.addHandler(self.console_handler) - self.log.debug("Checking tests: %s", ", ".join(self.driver.tests)) + self.log.debug('Checking tests: %s', ', '.join(self.driver.tests)) self.requirements = [ self._name_matches_benchmark_naming_convention, self._name_is_at_most_40_chars_long, self._no_setup_overhead, self._reasonable_setup_time, self._optimized_runtime_in_range, - self._constant_memory_use, + self._constant_memory_use ] def __del__(self): @@ -436,122 +368,95 @@ class BenchmarkDoctor(object): handler.close() self.log.removeHandler(self.console_handler) - benchmark_naming_convention_re = re.compile(r"[A-Z][a-zA-Z0-9\-.!?]+") - camel_humps_re = re.compile(r"[a-z][A-Z]") + benchmark_naming_convention_re = re.compile(r'[A-Z][a-zA-Z0-9\-.!?]+') + camel_humps_re = re.compile(r'[a-z][A-Z]') @staticmethod def _name_matches_benchmark_naming_convention(measurements): - name = measurements["name"] + name = measurements['name'] match = BenchmarkDoctor.benchmark_naming_convention_re.match(name) - matched = match.group(0) if match else "" + matched = match.group(0) if match else '' composite_words = len(BenchmarkDoctor.camel_humps_re.findall(name)) + 1 if name != matched: BenchmarkDoctor.log_naming.error( - "'%s' name doesn't conform to benchmark naming convention.", name - ) - BenchmarkDoctor.log_naming.info("See http://bit.ly/BenchmarkNaming") + "'%s' name doesn't conform to benchmark naming convention.", + name) + BenchmarkDoctor.log_naming.info( + 'See http://bit.ly/BenchmarkNaming') if composite_words > 4: BenchmarkDoctor.log_naming.warning( - "'%s' name is composed of %d words.", name, composite_words - ) + "'%s' name is composed of %d words.", name, composite_words) BenchmarkDoctor.log_naming.info( "Split '%s' name into dot-separated groups and variants. " - "See http://bit.ly/BenchmarkNaming", - name, - ) + "See http://bit.ly/BenchmarkNaming", name) @staticmethod def _name_is_at_most_40_chars_long(measurements): - name = measurements["name"] + name = measurements['name'] if len(name) > 40: BenchmarkDoctor.log_naming.error( - "'%s' name is %d characters long.", name, len(name) - ) + "'%s' name is %d characters long.", name, len(name)) BenchmarkDoctor.log_naming.info( - "Benchmark name should not be longer than 40 characters." - ) + 'Benchmark name should not be longer than 40 characters.') @staticmethod - def _select(measurements, num_iters=None, opt_level="O"): - prefix = measurements["name"] + " " + opt_level - prefix += "" if num_iters is None else (" i" + str(num_iters)) - return [ - series for name, series in measurements.items() if name.startswith(prefix) - ] + def _select(measurements, num_iters=None, opt_level='O'): + prefix = measurements['name'] + ' ' + opt_level + prefix += '' if num_iters is None else (' i' + str(num_iters)) + return [series for name, series in measurements.items() + if name.startswith(prefix)] @staticmethod def _optimized_runtime_in_range(measurements): - name = measurements["name"] + name = measurements['name'] setup, ratio = BenchmarkDoctor._setup_overhead(measurements) setup = 0 if ratio < 0.05 else setup runtime = min( - [ - (result.samples.min - correction) - for i_series in [ - BenchmarkDoctor._select(measurements, num_iters=i) - for correction in [(setup / i) for i in [1, 2]] - ] - for result in i_series - ] - ) + [(result.samples.min - correction) for i_series in + [BenchmarkDoctor._select(measurements, num_iters=i) + for correction in [(setup / i) for i in [1, 2]] + ] for result in i_series]) threshold = 1000 if threshold < runtime: - log = ( - BenchmarkDoctor.log_runtime.warning - if runtime < 10000 - else BenchmarkDoctor.log_runtime.error - ) - caveat = "" if setup == 0 else " (excluding the setup overhead)" + log = (BenchmarkDoctor.log_runtime.warning if runtime < 10000 else + BenchmarkDoctor.log_runtime.error) + caveat = '' if setup == 0 else ' (excluding the setup overhead)' log("'%s' execution took at least %d μs%s.", name, runtime, caveat) def factor(base): # suitable divisior that's integer power of base - return int( - pow(base, math.ceil(math.log(runtime / float(threshold), base))) - ) + return int(pow(base, math.ceil( + math.log(runtime / float(threshold), base)))) BenchmarkDoctor.log_runtime.info( "Decrease the workload of '%s' by a factor of %d (%d), to be " - "less than %d μs.", - name, - factor(2), - factor(10), - threshold, - ) + "less than %d μs.", name, factor(2), factor(10), threshold) threshold = 20 if runtime < threshold: - log = ( - BenchmarkDoctor.log_runtime.error - if runtime == 0 - else BenchmarkDoctor.log_runtime.warning - ) + log = (BenchmarkDoctor.log_runtime.error if runtime == 0 else + BenchmarkDoctor.log_runtime.warning) log("'%s' execution took %d μs.", name, runtime) BenchmarkDoctor.log_runtime.info( "Ensure the workload of '%s' has a properly measurable size" " (runtime > %d μs) and is not eliminated by the compiler (use" - " `blackHole` function if necessary)." - if runtime == 0 - else "Increase the workload of '%s' to be more than %d μs.", - name, - threshold, - ) + " `blackHole` function if necessary)." if runtime == 0 else + "Increase the workload of '%s' to be more than %d μs.", + name, threshold) @staticmethod def _setup_overhead(measurements): select = BenchmarkDoctor._select - ti1, ti2 = [ - float(min(mins)) - for mins in [ - [result.samples.min for result in i_series] - for i_series in [select(measurements, num_iters=i) for i in [1, 2]] - ] - ] - setup = int(round(2.0 * (ti1 - ti2))) if ti2 > 20 else 0 # limit of accuracy + ti1, ti2 = [float(min(mins)) for mins in + [[result.samples.min for result in i_series] + for i_series in + [select(measurements, num_iters=i) for i in [1, 2]]]] + setup = (int(round(2.0 * (ti1 - ti2))) if ti2 > 20 # limit of accuracy + else 0) ratio = (setup / ti1) if ti1 > 0 else 0 return (setup, ratio) @@ -561,63 +466,52 @@ class BenchmarkDoctor(object): if ratio > 0.05: BenchmarkDoctor.log_runtime.error( "'%s' has setup overhead of %d μs (%.1f%%).", - measurements["name"], - setup, - round((100 * ratio), 1), - ) + measurements['name'], setup, round((100 * ratio), 1)) BenchmarkDoctor.log_runtime.info( - "Move initialization of benchmark data to the `setUpFunction` " - "registered in `BenchmarkInfo`." - ) + 'Move initialization of benchmark data to the `setUpFunction` ' + 'registered in `BenchmarkInfo`.') @staticmethod def _reasonable_setup_time(measurements): - setup = min([result.setup for result in BenchmarkDoctor._select(measurements)]) + setup = min([result.setup + for result in BenchmarkDoctor._select(measurements)]) if 200000 < setup: # 200 ms BenchmarkDoctor.log_runtime.error( - "'%s' setup took at least %d μs.", measurements["name"], setup - ) + "'%s' setup took at least %d μs.", + measurements['name'], setup) BenchmarkDoctor.log_runtime.info( - "The `setUpFunction` should take no more than 200 ms." - ) + 'The `setUpFunction` should take no more than 200 ms.') @staticmethod def _constant_memory_use(measurements): select = BenchmarkDoctor._select (min_i1, max_i1), (min_i2, max_i2) = [ - (min(memory_use), max(memory_use)) - for memory_use in [ - [r.mem_pages for r in i_series] - for i_series in [select(measurements, num_iters=i) for i in [1, 2]] - ] - ] + (min(memory_use), max(memory_use)) for memory_use in + [[r.mem_pages for r in i_series] for i_series in + [select(measurements, num_iters=i) for i in + [1, 2]]]] range_i1, range_i2 = max_i1 - min_i1, max_i2 - min_i2 normal_range = 15 # pages - name = measurements["name"] + name = measurements['name'] more_info = False if abs(min_i1 - min_i2) > max(range_i1, range_i2, normal_range): more_info = True BenchmarkDoctor.log_memory.error( "'%s' varies the memory footprint of the base " - "workload depending on the `num-iters`.", - name, - ) + "workload depending on the `num-iters`.", name) if max(range_i1, range_i2) > normal_range: more_info = True BenchmarkDoctor.log_memory.warning( "'%s' has very wide range of memory used between " - "independent, repeated measurements.", - name, - ) + "independent, repeated measurements.", name) if more_info: BenchmarkDoctor.log_memory.info( "'%s' mem_pages [i1, i2]: min=[%d, %d] 𝚫=%d R=[%d, %d]", name, - *[min_i1, min_i2, abs(min_i1 - min_i2), range_i1, range_i2] - ) + *[min_i1, min_i2, abs(min_i1 - min_i2), range_i1, range_i2]) @staticmethod def _adjusted_1s_samples(runtime): @@ -636,52 +530,38 @@ class BenchmarkDoctor(object): Returns a dictionary with benchmark name and `PerformanceTestResult`s. """ - self.log.debug("Calibrating num-samples for {0}:".format(benchmark)) - r = self.driver.run( - benchmark, num_samples=3, num_iters=1, verbose=True - ) # calibrate + self.log.debug('Calibrating num-samples for {0}:'.format(benchmark)) + r = self.driver.run(benchmark, num_samples=3, num_iters=1, + verbose=True) # calibrate num_samples = self._adjusted_1s_samples(r.samples.min) def capped(s): return min(s, 200) - run_args = [(capped(num_samples), 1), (capped(num_samples / 2), 2)] opts = self.driver.args.optimization opts = opts if isinstance(opts, list) else [opts] self.log.debug( - "Runtime {0} μs yields {1} adjusted samples per second.".format( - r.samples.min, num_samples - ) - ) + 'Runtime {0} μs yields {1} adjusted samples per second.'.format( + r.samples.min, num_samples)) self.log.debug( - "Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)".format( - benchmark, run_args[0][0], run_args[1][0] - ) - ) + 'Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)'.format( + benchmark, run_args[0][0], run_args[1][0])) measurements = dict( - [ - ( - "{0} {1} i{2}{3}".format(benchmark, o, i, suffix), - self.driver.run( - benchmark, - num_samples=s, - num_iters=i, - verbose=True, - measure_memory=True, - ), - ) - for o in opts - for s, i in run_args - for suffix in list("abcde") - ] + [('{0} {1} i{2}{3}'.format(benchmark, o, i, suffix), + self.driver.run(benchmark, num_samples=s, num_iters=i, + verbose=True, measure_memory=True)) + for o in opts + for s, i in run_args + for suffix in list('abcde') + ] ) - measurements["name"] = benchmark + measurements['name'] = benchmark return measurements def analyze(self, benchmark_measurements): """Analyze whether benchmark fullfills all requirtements.""" - self.log.debug("Analyzing %s", benchmark_measurements["name"]) + self.log.debug('Analyzing %s', benchmark_measurements['name']) for rule in self.requirements: rule(benchmark_measurements) @@ -702,137 +582,93 @@ class BenchmarkDoctor(object): def format_name(log_path): """Return the filename and directory for a log file.""" - return "/".join(log_path.split("/")[-2:]) + return '/'.join(log_path.split('/')[-2:]) def compare_logs(compare_script, new_log, old_log, log_dir, opt): """Return diff of log files at paths `new_log` and `old_log`.""" - print("Comparing %s %s ..." % (format_name(old_log), format_name(new_log))) - subprocess.call( - [ - compare_script, - "--old-file", - old_log, - "--new-file", - new_log, - "--format", - "markdown", - "--output", - os.path.join(log_dir, "latest_compare_{0}.md".format(opt)), - ] - ) + print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log))) + subprocess.call([compare_script, '--old-file', old_log, + '--new-file', new_log, '--format', 'markdown', + '--output', os.path.join(log_dir, 'latest_compare_{0}.md' + .format(opt))]) def compare(args): log_dir = args.log_dir compare_script = args.compare_script baseline_branch = args.baseline_branch - current_branch = BenchmarkDriver(args, tests=[""])._git( - "rev-parse --abbrev-ref HEAD" - ) + current_branch = \ + BenchmarkDriver(args, tests=[''])._git('rev-parse --abbrev-ref HEAD') current_branch_dir = os.path.join(log_dir, current_branch) baseline_branch_dir = os.path.join(log_dir, baseline_branch) - if current_branch != baseline_branch and not os.path.isdir(baseline_branch_dir): - print( - ( - "Unable to find benchmark logs for {baseline_branch} branch. " - + "Set a baseline benchmark log by passing --benchmark to " - + "build-script while on {baseline_branch} branch." - ).format(baseline_branch=baseline_branch) - ) + if current_branch != baseline_branch and \ + not os.path.isdir(baseline_branch_dir): + print(('Unable to find benchmark logs for {baseline_branch} branch. ' + + 'Set a baseline benchmark log by passing --benchmark to ' + + 'build-script while on {baseline_branch} branch.') + .format(baseline_branch=baseline_branch)) return 1 recent_logs = {} for branch_dir in [current_branch_dir, baseline_branch_dir]: - for opt in ["O", "Onone"]: - recent_logs[os.path.basename(branch_dir) + "_" + opt] = sorted( - glob.glob(os.path.join(branch_dir, "Benchmark_" + opt + "-*.log")), - key=os.path.getctime, - reverse=True, - ) + for opt in ['O', 'Onone']: + recent_logs[os.path.basename(branch_dir) + '_' + opt] = sorted( + glob.glob(os.path.join( + branch_dir, 'Benchmark_' + opt + '-*.log')), + key=os.path.getctime, reverse=True) if current_branch == baseline_branch: - if ( - len(recent_logs[baseline_branch + "_O"]) > 1 - and len(recent_logs[baseline_branch + "_Onone"]) > 1 - ): - compare_logs( - compare_script, - recent_logs[baseline_branch + "_O"][0], - recent_logs[baseline_branch + "_O"][1], - log_dir, - "O", - ) - compare_logs( - compare_script, - recent_logs[baseline_branch + "_Onone"][0], - recent_logs[baseline_branch + "_Onone"][1], - log_dir, - "Onone", - ) + if len(recent_logs[baseline_branch + '_O']) > 1 and \ + len(recent_logs[baseline_branch + '_Onone']) > 1: + compare_logs(compare_script, + recent_logs[baseline_branch + '_O'][0], + recent_logs[baseline_branch + '_O'][1], + log_dir, 'O') + compare_logs(compare_script, + recent_logs[baseline_branch + '_Onone'][0], + recent_logs[baseline_branch + '_Onone'][1], + log_dir, 'Onone') else: - print( - ( - "{baseline_branch}/{baseline_branch} comparison " - + "skipped: no previous {baseline_branch} logs" - ).format(baseline_branch=baseline_branch) - ) + print(('{baseline_branch}/{baseline_branch} comparison ' + + 'skipped: no previous {baseline_branch} logs') + .format(baseline_branch=baseline_branch)) else: # TODO: Check for outdated baseline branch log - if ( - len(recent_logs[current_branch + "_O"]) == 0 - or len(recent_logs[current_branch + "_Onone"]) == 0 - ): - print("branch sanity failure: missing branch logs") + if len(recent_logs[current_branch + '_O']) == 0 or \ + len(recent_logs[current_branch + '_Onone']) == 0: + print('branch sanity failure: missing branch logs') return 1 - if ( - len(recent_logs[current_branch + "_O"]) == 1 - or len(recent_logs[current_branch + "_Onone"]) == 1 - ): - print("branch/branch comparison skipped: no previous branch logs") + if len(recent_logs[current_branch + '_O']) == 1 or \ + len(recent_logs[current_branch + '_Onone']) == 1: + print('branch/branch comparison skipped: no previous branch logs') else: - compare_logs( - compare_script, - recent_logs[current_branch + "_O"][0], - recent_logs[current_branch + "_O"][1], - log_dir, - "O", - ) - compare_logs( - compare_script, - recent_logs[current_branch + "_Onone"][0], - recent_logs[current_branch + "_Onone"][1], - log_dir, - "Onone", - ) - - if ( - len(recent_logs[baseline_branch + "_O"]) == 0 - or len(recent_logs[baseline_branch + "_Onone"]) == 0 - ): - print( - ( - "branch/{baseline_branch} failure: no {baseline_branch} " + "logs" - ).format(baseline_branch=baseline_branch) - ) + compare_logs(compare_script, + recent_logs[current_branch + '_O'][0], + recent_logs[current_branch + '_O'][1], + log_dir, 'O') + compare_logs(compare_script, + recent_logs[current_branch + '_Onone'][0], + recent_logs[current_branch + '_Onone'][1], + log_dir, 'Onone') + + if len(recent_logs[baseline_branch + '_O']) == 0 or \ + len(recent_logs[baseline_branch + '_Onone']) == 0: + print(('branch/{baseline_branch} failure: no {baseline_branch} ' + + 'logs') + .format(baseline_branch=baseline_branch)) return 1 else: - compare_logs( - compare_script, - recent_logs[current_branch + "_O"][0], - recent_logs[baseline_branch + "_O"][0], - log_dir, - "O", - ) - compare_logs( - compare_script, - recent_logs[current_branch + "_Onone"][0], - recent_logs[baseline_branch + "_Onone"][0], - log_dir, - "Onone", - ) + compare_logs(compare_script, + recent_logs[current_branch + '_O'][0], + recent_logs[baseline_branch + '_O'][0], + log_dir, 'O') + compare_logs(compare_script, + recent_logs[current_branch + '_Onone'][0], + recent_logs[baseline_branch + '_Onone'][0], + log_dir, 'Onone') # TODO: Fail on large regressions @@ -850,100 +686,79 @@ def positive_int(value): def parse_args(args): """Parse command line arguments and set default values.""" parser = argparse.ArgumentParser( - epilog="Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*" + epilog='Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*' ) subparsers = parser.add_subparsers( - title="Swift benchmark driver commands", - help="See COMMAND -h for additional arguments", - metavar="COMMAND", - ) + title='Swift benchmark driver commands', + help='See COMMAND -h for additional arguments', metavar='COMMAND') shared_benchmarks_parser = argparse.ArgumentParser(add_help=False) benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group() benchmarks_group.add_argument( - "benchmarks", + 'benchmarks', default=[], - help="benchmark to run (default: all)", - nargs="*", - metavar="BENCHMARK", - ) + help='benchmark to run (default: all)', nargs='*', metavar="BENCHMARK") benchmarks_group.add_argument( - "-f", - "--filter", - dest="filters", - action="append", - help="run all tests whose name match regular expression PATTERN, " - + "multiple filters are supported", - metavar="PATTERN", - ) + '-f', '--filter', dest='filters', action='append', + help='run all tests whose name match regular expression PATTERN, ' + + 'multiple filters are supported', metavar="PATTERN") shared_benchmarks_parser.add_argument( - "-t", - "--tests", - help="directory containing Benchmark_O{,none,size} " + "(default: DRIVER_DIR)", - default=DRIVER_DIR, - ) + '-t', '--tests', + help='directory containing Benchmark_O{,none,size} ' + + '(default: DRIVER_DIR)', + default=DRIVER_DIR) shared_benchmarks_parser.add_argument( - "-o", - "--optimization", - metavar="OPT", - choices=["O", "Onone", "Osize"], - help="optimization level to use: {O,Onone,Osize}, (default: O)", - default="O", - ) + '-o', '--optimization', + metavar='OPT', + choices=['O', 'Onone', 'Osize'], + help='optimization level to use: {O,Onone,Osize}, (default: O)', + default='O') run_parser = subparsers.add_parser( - "run", - help="Run benchmarks and output results to stdout", - parents=[shared_benchmarks_parser], - ) + 'run', + help='Run benchmarks and output results to stdout', + parents=[shared_benchmarks_parser]) run_parser.add_argument( - "-i", - "--independent-samples", - help="number of times to run each test (default: 1)", - type=positive_int, - default=1, - ) + '-i', '--independent-samples', + help='number of times to run each test (default: 1)', + type=positive_int, default=1) run_parser.add_argument( - "--output-dir", help="log results to directory (default: no logging)" - ) + '--output-dir', + help='log results to directory (default: no logging)') run_parser.add_argument( - "--swift-repo", help="absolute path to the Swift source repository" - ) + '--swift-repo', + help='absolute path to the Swift source repository') run_parser.set_defaults(func=BenchmarkDriver.run_benchmarks) check_parser = subparsers.add_parser( - "check", help="", parents=[shared_benchmarks_parser] - ) + 'check', + help='', + parents=[shared_benchmarks_parser]) check_group = check_parser.add_mutually_exclusive_group() check_group.add_argument( - "-v", - "--verbose", - action="store_true", - help="show more details during benchmark analysis", - ) + '-v', '--verbose', action='store_true', + help='show more details during benchmark analysis') check_group.add_argument( - "-md", "--markdown", action="store_true", help="format report as Markdown table" - ) + '-md', '--markdown', action='store_true', + help='format report as Markdown table') check_parser.set_defaults(func=BenchmarkDoctor.run_check) - compare_parser = subparsers.add_parser("compare", help="Compare benchmark results") + compare_parser = subparsers.add_parser( + 'compare', + help='Compare benchmark results') compare_parser.add_argument( - "--log-dir", required=True, help="directory containing benchmark logs" - ) + '--log-dir', required=True, + help='directory containing benchmark logs') compare_parser.add_argument( - "--swift-repo", - required=True, - help="absolute path to the Swift source repository", - ) + '--swift-repo', required=True, + help='absolute path to the Swift source repository') compare_parser.add_argument( - "--compare-script", required=True, help="absolute path to compare script" - ) + '--compare-script', required=True, + help='absolute path to compare script') compare_parser.add_argument( - "--baseline-branch", - default="master", - help="attempt to compare results to baseline results for specified " - "branch (default: master)", - ) + '--baseline-branch', default='master', + help='attempt to compare results to baseline results for specified ' + 'branch (default: master)') compare_parser.set_defaults(func=compare) return parser.parse_args(args) @@ -955,5 +770,5 @@ def main(): return args.func(args) -if __name__ == "__main__": +if __name__ == '__main__': exit(main()) diff --git a/benchmark/scripts/Benchmark_GuardMalloc.in b/benchmark/scripts/Benchmark_GuardMalloc.in index 872179e1d28de..e7d001d4bfa1d 100644 --- a/benchmark/scripts/Benchmark_GuardMalloc.in +++ b/benchmark/scripts/Benchmark_GuardMalloc.in @@ -21,36 +21,37 @@ sys.path.append("@PATH_TO_DRIVER_LIBRARY@") import perf_test_driver # noqa (E402 module level import not at top of file) # Regexes for the XFAIL_LIST. Matches against '([Onone|O|Osize],TestName)' -XFAIL_LIST = [] +XFAIL_LIST = [ +] class GuardMallocResult(perf_test_driver.Result): + def __init__(self, name, status): perf_test_driver.Result.__init__(self, name, status, "", XFAIL_LIST) class GuardMallocBenchmarkDriver(perf_test_driver.BenchmarkDriver): + def __init__(self, binary, xfail_list): perf_test_driver.BenchmarkDriver.__init__( - self, binary, xfail_list, enable_parallel=True - ) + self, binary, xfail_list, + enable_parallel=True) self.new_env = os.environ.copy() - self.new_env["DYLD_INSERT_LIBRARIES"] = "/usr/lib/libgmalloc.dylib" + self.new_env['DYLD_INSERT_LIBRARIES'] = '/usr/lib/libgmalloc.dylib' def prepare_input(self, name): - return {"env": self.new_env} + return {'env': self.new_env} def process_input(self, data): - test_name = "({},{})".format(data["opt"], data["test_name"]) + test_name = '({},{})'.format(data['opt'], data['test_name']) print("Running {}...".format(test_name)) sys.stdout.flush() p = subprocess.Popen( - [data["path"], data["test_name"], "--num-iters=2"], - env=data["env"], - stderr=open("/dev/null", "w"), - stdout=open("/dev/null", "w"), - ) + [data['path'], data['test_name'], '--num-iters=2'], + env=data['env'], stderr=open('/dev/null', 'w'), + stdout=open('/dev/null', 'w')) status = p.wait() return GuardMallocResult(test_name, status) diff --git a/benchmark/scripts/Benchmark_QuickCheck.in b/benchmark/scripts/Benchmark_QuickCheck.in index a2cc257476240..0599d9eb2c8d5 100644 --- a/benchmark/scripts/Benchmark_QuickCheck.in +++ b/benchmark/scripts/Benchmark_QuickCheck.in @@ -12,6 +12,7 @@ # # ===---------------------------------------------------------------------===// +import json import os import subprocess import sys @@ -22,48 +23,47 @@ import perf_test_driver # noqa (E402 module level import not at top of file) # This is a hacked up XFAIL list. It should really be a json file, but it will # work for now. Add in the exact name of the pass to XFAIL. -XFAIL_LIST = [] +XFAIL_LIST = [ +] class QuickCheckResult(perf_test_driver.Result): + def __init__(self, name, success): - assert isinstance(success, bool) + assert(isinstance(success, bool)) did_fail = not success perf_test_driver.Result.__init__(self, name, did_fail, "", XFAIL_LIST) def print_data(self, max_test_len): - fmt = "{:<%d}{:<10}" % (max_test_len + 5) + fmt = '{:<%d}{:<10}' % (max_test_len + 5) print(fmt.format(self.get_name(), self.get_result())) class QuickCheckBenchmarkDriver(perf_test_driver.BenchmarkDriver): + def __init__(self, binary, xfail_list, num_iters, opt_levels): perf_test_driver.BenchmarkDriver.__init__( - self, binary, xfail_list, enable_parallel=True, opt_levels=opt_levels - ) + self, binary, xfail_list, + enable_parallel=True, + opt_levels=opt_levels) self.num_iters = num_iters def print_data_header(self, max_test_len): - fmt = "{:<%d}{:<10}" % (max_test_len + 5) - print(fmt.format("Name", "Result")) + fmt = '{:<%d}{:<10}' % (max_test_len + 5) + print(fmt.format('Name', 'Result')) # Propagate any data from this class that is needed for individual # tests. The reason this is needed is to avoid issues with attempting to # access a value in a different process. def prepare_input(self, name): - return {"num_samples": 1, "num_iters": self.num_iters} + return {'num_samples': 1, 'num_iters': self.num_iters} def run_test_inner(self, data, num_iters): - p = subprocess.Popen( - [ - data["path"], - "--num-samples={}".format(data["num_samples"]), - "--num-iters={}".format(num_iters), - data["test_name"], - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) + p = subprocess.Popen([ + data['path'], + "--num-samples={}".format(data['num_samples']), + "--num-iters={}".format(num_iters), data['test_name']], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) error_out = p.communicate()[1].split("\n") result = p.returncode if result is None: @@ -75,21 +75,20 @@ class QuickCheckBenchmarkDriver(perf_test_driver.BenchmarkDriver): def run_test(self, data, num_iters): try: args = [data, num_iters] - perf_test_driver.run_with_timeout(self.run_test_inner, args) + result = perf_test_driver.run_with_timeout(self.run_test_inner, + args) except Exception, e: - sys.stderr.write( - "Child Process Failed! (%s,%s). Error: %s\n" - % (data["path"], data["test_name"], e) - ) + sys.stderr.write("Child Process Failed! (%s,%s). Error: %s\n" % ( + data['path'], data['test_name'], e)) sys.stderr.flush() return None return True def process_input(self, data): - test_name = "({},{})".format(data["opt"], data["test_name"]) + test_name = '({},{})'.format(data['opt'], data['test_name']) print("Running {}...".format(test_name)) sys.stdout.flush() - if self.run_test(data, data["num_iters"]) is None: + if self.run_test(data, data['num_iters']) is None: return QuickCheckResult(test_name, success=False) return QuickCheckResult(test_name, success=True) @@ -99,17 +98,13 @@ SWIFT_BIN_DIR = os.path.dirname(os.path.abspath(__file__)) def parse_args(): import argparse - parser = argparse.ArgumentParser() parser.add_argument( - "--filter", - type=str, - default=None, - help="Filter out any test that does not match the given regex", - ) - parser.add_argument("--num-iters", type=int, default=2) + '--filter', type=str, default=None, + help='Filter out any test that does not match the given regex') + parser.add_argument('--num-iters', type=int, default=2) default_opt_levels = perf_test_driver.BenchmarkDriver_OptLevels - parser.add_argument("--opt-level", choices=default_opt_levels) + parser.add_argument('--opt-level', choices=default_opt_levels) return parser.parse_args() @@ -118,10 +113,9 @@ if __name__ == "__main__": opt_levels = perf_test_driver.BenchmarkDriver_OptLevels if args.opt_level is not None: opt_levels = [args.opt_level] - driver = QuickCheckBenchmarkDriver( - SWIFT_BIN_DIR, XFAIL_LIST, args.num_iters, opt_levels - ) - if driver.run(args.filter): + l = QuickCheckBenchmarkDriver(SWIFT_BIN_DIR, XFAIL_LIST, args.num_iters, + opt_levels) + if l.run(args.filter): sys.exit(0) else: sys.exit(-1) diff --git a/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in b/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in index 756af2348c6b5..2a7dd0d81d986 100644 --- a/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in +++ b/benchmark/scripts/Benchmark_RuntimeLeaksRunner.in @@ -23,26 +23,26 @@ import perf_test_driver # noqa (E402 module level import not at top of file) # This is a hacked up XFAIL list. It should really be a json file, but it will # work for now. Add in the exact name of the pass to XFAIL. -XFAIL_LIST = [] +XFAIL_LIST = [ +] # Global Objective-C classes created by various frameworks. We do not care # about these. -IGNORABLE_GLOBAL_OBJC_CLASSES = set( - [ - "__NSPlaceholderDate", - "NSCache", - "__NSPlaceholderTimeZone", - "NSPlaceholderNumber", - "NSPlaceholderString", - "__NSPlaceholderArray", - "__NSPlaceholderDictionary", - "_NSPlaceholderData", - "_NSJSONReader", - ] -) +IGNORABLE_GLOBAL_OBJC_CLASSES = set([ + '__NSPlaceholderDate', + 'NSCache', + '__NSPlaceholderTimeZone', + 'NSPlaceholderNumber', + 'NSPlaceholderString', + '__NSPlaceholderArray', + '__NSPlaceholderDictionary', + '_NSPlaceholderData', + '_NSJSONReader' +]) class LeaksRunnerResult(perf_test_driver.Result): + def __init__(self, name, count=None): # True = 1, False = 0. # @@ -57,39 +57,36 @@ class LeaksRunnerResult(perf_test_driver.Result): return "N/A" def print_data(self, max_test_len): - fmt = "{:<%d}{:<10}{:}" % (max_test_len + 5) - print(fmt.format(self.get_name(), self.get_result(), self.get_count())) + fmt = '{:<%d}{:<10}{:}' % (max_test_len + 5) + print(fmt.format(self.get_name(), self.get_result(), + self.get_count())) class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver): + def __init__(self, binary, xfail_list, num_samples, num_iters): perf_test_driver.BenchmarkDriver.__init__( - self, binary, xfail_list, enable_parallel=True - ) + self, binary, xfail_list, + enable_parallel=True) self.num_samples = num_samples self.num_iters = num_iters def print_data_header(self, max_test_len): - fmt = "{:<%d}{:<10}{:}" % (max_test_len + 5) - print(fmt.format("Name", "Result", "RC Delta")) + fmt = '{:<%d}{:<10}{:}' % (max_test_len + 5) + print(fmt.format('Name', 'Result', 'RC Delta')) # Propagate any data from this class that is needed for individual # tests. The reason this is needed is to avoid issues with attempting to # access a value in a different process. def prepare_input(self, name): - return {"num_samples": self.num_samples, "num_iters": self.num_iters} + return {'num_samples': self.num_samples, 'num_iters': self.num_iters} def run_test_inner(self, data, num_iters): - p = subprocess.Popen( - [ - data["path"], - "--num-samples={}".format(data["num_samples"]), - "--num-iters={}".format(num_iters), - data["test_name"], - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) + p = subprocess.Popen([ + data['path'], + "--num-samples={}".format(data['num_samples']), + "--num-iters={}".format(num_iters), data['test_name']], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) error_out = p.communicate()[1].split("\n") result = p.returncode if result is None: @@ -101,12 +98,11 @@ class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver): def run_test(self, data, num_iters): try: args = [data, num_iters] - result = perf_test_driver.run_with_timeout(self.run_test_inner, args) + result = perf_test_driver.run_with_timeout(self.run_test_inner, + args) except Exception, e: - sys.stderr.write( - "Child Process Failed! (%s,%s). Error: %s\n" - % (data["path"], data["test_name"], e) - ) + sys.stderr.write("Child Process Failed! (%s,%s). Error: %s\n" % ( + data['path'], data['test_name'], e)) sys.stderr.flush() return None @@ -114,27 +110,26 @@ class LeaksRunnerBenchmarkDriver(perf_test_driver.BenchmarkDriver): # We grab the second line since swift globals get lazily created in # the first iteration. d = json.loads(result[1]) - d["objc_objects"] = [ - x for x in d["objc_objects"] if x not in IGNORABLE_GLOBAL_OBJC_CLASSES - ] - d["objc_count"] = len(d["objc_objects"]) + d['objc_objects'] = [x for x in d['objc_objects'] + if x not in IGNORABLE_GLOBAL_OBJC_CLASSES] + d['objc_count'] = len(d['objc_objects']) - total_count = d["objc_count"] + d["swift_count"] + total_count = d['objc_count'] + d['swift_count'] return total_count except Exception: - tmp = (data["path"], data["test_name"]) + tmp = (data['path'], data['test_name']) sys.stderr.write("Failed parse output! (%s,%s)\n" % tmp) sys.stderr.flush() return None def process_input(self, data): - test_name = "({},{})".format(data["opt"], data["test_name"]) + test_name = '({},{})'.format(data['opt'], data['test_name']) print("Running {}...".format(test_name)) sys.stdout.flush() - total_count1 = self.run_test(data, data["num_iters"]) + total_count1 = self.run_test(data, data['num_iters']) if total_count1 is None: return LeaksRunnerResult(test_name) - total_count2 = self.run_test(data, data["num_iters"] + 1) + total_count2 = self.run_test(data, data['num_iters'] + 1) if total_count2 is None: return LeaksRunnerResult(test_name) return LeaksRunnerResult(test_name, total_count2 - total_count1) @@ -145,24 +140,19 @@ SWIFT_BIN_DIR = os.path.dirname(os.path.abspath(__file__)) def parse_args(): import argparse - parser = argparse.ArgumentParser() parser.add_argument( - "-filter", - type=str, - default=None, - help="Filter out any test that does not match the given regex", - ) - parser.add_argument("-num-samples", type=int, default=2) - parser.add_argument("-num-iters", type=int, default=2) + '-filter', type=str, default=None, + help='Filter out any test that does not match the given regex') + parser.add_argument('-num-samples', type=int, default=2) + parser.add_argument('-num-iters', type=int, default=2) return parser.parse_args() if __name__ == "__main__": args = parse_args() driver = LeaksRunnerBenchmarkDriver( - SWIFT_BIN_DIR, XFAIL_LIST, args.num_samples, args.num_iters - ) + SWIFT_BIN_DIR, XFAIL_LIST, args.num_samples, args.num_iters) if driver.run(args.filter): sys.exit(0) else: diff --git a/benchmark/scripts/build_linux.py b/benchmark/scripts/build_linux.py index 4404815931182..64bee4692bbb1 100755 --- a/benchmark/scripts/build_linux.py +++ b/benchmark/scripts/build_linux.py @@ -7,45 +7,39 @@ def main(): p = argparse.ArgumentParser() - p.add_argument("cmake_path", help="The cmake binary to use") - p.add_argument("swift_src_dir", help="The swift source directory") - p.add_argument("clang", help="The path to the clang binary to use") - p.add_argument( - "swift_root_dir", - help="A path to a swift root produced by installing " - "Swift and Foundation together. We infer swiftc " - "from here", - ) - p.add_argument("destdir", help="The directory to perform the actual " "build in") - p.add_argument( - "--clean", action="store_true", help="Delete destdir before performing a build." - ) + p.add_argument('cmake_path', help='The cmake binary to use') + p.add_argument('swift_src_dir', help='The swift source directory') + p.add_argument('clang', help='The path to the clang binary to use') + p.add_argument('swift_root_dir', + help='A path to a swift root produced by installing ' + 'Swift and Foundation together. We infer swiftc ' + 'from here') + p.add_argument('destdir', help='The directory to perform the actual ' + 'build in') + p.add_argument('--clean', action='store_true', + help='Delete destdir before performing a build.') args = p.parse_args() if args.clean: print("Asked to clean... Cleaning!") - subprocess.check_output(["/bin/rm", "-rfv", args.destdir]) - subprocess.check_call(["/bin/mkdir", "-p", args.destdir]) + subprocess.check_output(['/bin/rm', '-rfv', args.destdir]) + subprocess.check_call(['/bin/mkdir', '-p', args.destdir]) os.chdir(args.destdir) configureInvocation = [ - args.cmake_path, - "-GNinja", - "-DSWIFT_EXEC={}/bin/swiftc".format(args.swift_root_dir), - "-DCLANG_EXEC={}".format(args.clang), - "-DSWIFT_LIBRARY_PATH={}/lib/swift".format(args.swift_root_dir), - "{}/benchmark".format(args.swift_src_dir), + args.cmake_path, '-GNinja', + '-DSWIFT_EXEC={}/bin/swiftc'.format(args.swift_root_dir), + '-DCLANG_EXEC={}'.format(args.clang), + '-DSWIFT_LIBRARY_PATH={}/lib/swift'.format(args.swift_root_dir), + '{}/benchmark'.format(args.swift_src_dir) ] - print("COMMAND: {}".format(" ".join(configureInvocation))) + print('COMMAND: {}'.format(' '.join(configureInvocation))) subprocess.check_call(configureInvocation) buildInvocation = [ - args.cmake_path, - "--build", - args.destdir, - "--", - "swift-benchmark-linux-x86_64", + args.cmake_path, '--build', args.destdir, '--', + 'swift-benchmark-linux-x86_64' ] - print("COMMAND: {}".format(" ".join(buildInvocation))) + print('COMMAND: {}'.format(' '.join(buildInvocation))) subprocess.check_call(buildInvocation) diff --git a/benchmark/scripts/build_script_helper.py b/benchmark/scripts/build_script_helper.py index 53bf7b19f6862..a3f999042289f 100755 --- a/benchmark/scripts/build_script_helper.py +++ b/benchmark/scripts/build_script_helper.py @@ -9,54 +9,50 @@ def perform_build(args, swiftbuild_path, config, binary_name, opt_flag): - assert config in ["debug", "release"] - assert binary_name in ["Benchmark_O", "Benchmark_Osize", "Benchmark_Onone"] - assert opt_flag in ["-O", "-Osize", "-Onone"] + assert(config in ['debug', 'release']) + assert(binary_name in ['Benchmark_O', 'Benchmark_Osize', + 'Benchmark_Onone']) + assert(opt_flag in ['-O', '-Osize', '-Onone']) inner_build_dir = os.path.join(args.build_path, binary_name) swiftbuild_args = [ swiftbuild_path, - "--package-path", - args.package_path, - "--build-path", - inner_build_dir, - "--configuration", - config, - "-Xswiftc", - "-Xllvm", - "-Xswiftc", - "-align-module-to-page-size", - "-Xswiftc", - opt_flag, + '--package-path', args.package_path, + '--build-path', inner_build_dir, + '--configuration', config, + '-Xswiftc', '-Xllvm', + '-Xswiftc', '-align-module-to-page-size', + '-Xswiftc', opt_flag, ] if args.verbose: - swiftbuild_args.append("--verbose") + swiftbuild_args.append('--verbose') subprocess.call(swiftbuild_args) # Copy the benchmark file into the final ./bin directory. - binpath = os.path.join(inner_build_dir, config, "SwiftBench") - finalpath = os.path.join(args.build_path, "bin", binary_name) + binpath = os.path.join(inner_build_dir, config, 'SwiftBench') + finalpath = os.path.join(args.build_path, 'bin', binary_name) shutil.copy(binpath, finalpath) def main(): parser = argparse.ArgumentParser() - parser.add_argument("--verbose", "-v", action="store_true") - parser.add_argument("--package-path", type=str, required=True) - parser.add_argument("--build-path", type=str, required=True) - parser.add_argument("--toolchain", type=str, required=True) + parser.add_argument('--verbose', '-v', action='store_true') + parser.add_argument('--package-path', type=str, required=True) + parser.add_argument('--build-path', type=str, required=True) + parser.add_argument('--toolchain', type=str, required=True) args = parser.parse_args() # Create our bin directory so we can copy in the binaries. - bin_dir = os.path.join(args.build_path, "bin") + bin_dir = os.path.join(args.build_path, 'bin') if not os.path.isdir(bin_dir): os.makedirs(bin_dir) - swiftbuild_path = os.path.join(args.toolchain, "usr", "bin", "swift-build") - perform_build(args, swiftbuild_path, "debug", "Benchmark_Onone", "-Onone") - perform_build(args, swiftbuild_path, "release", "Benchmark_Osize", "-Osize") - perform_build(args, swiftbuild_path, "release", "Benchmark_O", "-O") + swiftbuild_path = os.path.join(args.toolchain, 'usr', 'bin', 'swift-build') + perform_build(args, swiftbuild_path, 'debug', 'Benchmark_Onone', '-Onone') + perform_build(args, swiftbuild_path, 'release', 'Benchmark_Osize', + '-Osize') + perform_build(args, swiftbuild_path, 'release', 'Benchmark_O', '-O') if __name__ == "__main__": diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 69450cb4b97b5..017ba24c10229 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -37,7 +37,7 @@ class `ReportFormatter` creates the test comparison report in specified format. from math import ceil, sqrt -class Sample(namedtuple("Sample", "i num_iters runtime")): +class Sample(namedtuple('Sample', 'i num_iters runtime')): u"""Single benchmark measurement. Initialized with: @@ -48,10 +48,10 @@ class Sample(namedtuple("Sample", "i num_iters runtime")): def __repr__(self): """Shorter Sample formating for debugging purposes.""" - return "s({0.i!r}, {0.num_iters!r}, {0.runtime!r})".format(self) + return 's({0.i!r}, {0.num_iters!r}, {0.runtime!r})'.format(self) -class Yield(namedtuple("Yield", "before_sample after")): +class Yield(namedtuple('Yield', 'before_sample after')): u"""Meta-measurement of when the Benchmark_X voluntarily yielded process. `before_sample`: index of measurement taken just after returning from yield @@ -79,14 +79,13 @@ def __init__(self, name, samples=None): def __str__(self): """Text summary of benchmark statistics.""" return ( - "{0.name!s} n={0.count!r} " - "Min={0.min!r} Q1={0.q1!r} M={0.median!r} Q3={0.q3!r} " - "Max={0.max!r} " - "R={0.range!r} {0.spread:.2%} IQR={0.iqr!r} " - "Mean={0.mean:.0f} SD={0.sd:.0f} CV={0.cv:.2%}".format(self) - if self.samples - else "{0.name!s} n=0".format(self) - ) + '{0.name!s} n={0.count!r} ' + 'Min={0.min!r} Q1={0.q1!r} M={0.median!r} Q3={0.q3!r} ' + 'Max={0.max!r} ' + 'R={0.range!r} {0.spread:.2%} IQR={0.iqr!r} ' + 'Mean={0.mean:.0f} SD={0.sd:.0f} CV={0.cv:.2%}' + .format(self) if self.samples else + '{0.name!s} n=0'.format(self)) def add(self, sample): """Add sample to collection and recompute statistics.""" @@ -98,9 +97,8 @@ def add(self, sample): def _update_stats(self, sample): old_stats = (self.count, self.mean, self.S_runtime) - _, self.mean, self.S_runtime = self.running_mean_variance( - old_stats, sample.runtime - ) + _, self.mean, self.S_runtime = ( + self.running_mean_variance(old_stats, sample.runtime)) def exclude_outliers(self, top_only=False): """Exclude outliers by applying Interquartile Range Rule. @@ -114,11 +112,8 @@ def exclude_outliers(self, top_only=False): benchmark runtimes in the microbenchmark range to filter out the environment noise caused by preemtive multitasking. """ - lo = ( - 0 - if top_only - else bisect_left(self._runtimes, int(self.q1 - 1.5 * self.iqr)) - ) + lo = (0 if top_only else + bisect_left(self._runtimes, int(self.q1 - 1.5 * self.iqr))) hi = bisect_right(self._runtimes, int(self.q3 + 1.5 * self.iqr)) outliers = self.samples[:lo] + self.samples[hi:] @@ -186,7 +181,8 @@ def iqr(self): @property def sd(self): u"""Standard Deviation (μs).""" - return 0 if self.count < 2 else sqrt(self.S_runtime / (self.count - 1)) + return (0 if self.count < 2 else + sqrt(self.S_runtime / (self.count - 1))) @staticmethod def running_mean_variance((k, M_, S_), x): @@ -233,13 +229,14 @@ class PerformanceTestResult(object): `--quantile`parameter. In both cases, the last column, MAX_RSS is optional. """ - def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=False): + def __init__(self, csv_row, quantiles=False, memory=False, delta=False, + meta=False): """Initialize from a row of multiple columns with benchmark summary. The row is an iterable, such as a row provided by the CSV parser. """ - self.test_num = csv_row[0] # Ordinal number of the test - self.name = csv_row[1] # Name of the performance test + self.test_num = csv_row[0] # Ordinal number of the test + self.name = csv_row[1] # Name of the performance test self.num_samples = int(csv_row[2]) # Number of measurements taken if quantiles: # Variable number of columns representing quantiles @@ -247,63 +244,50 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=Fal runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:] if delta: runtimes = [int(x) if x else 0 for x in runtimes] - runtimes = reduce( - lambda l, x: l.append(l[-1] + x) or l if l else [x], # runnin - runtimes, - None, - ) # total + runtimes = reduce(lambda l, x: l.append(l[-1] + x) or # runnin + l if l else [x], runtimes, None) # total num_values = len(runtimes) if self.num_samples < num_values: # remove repeated samples quantile = num_values - 1 qs = [float(i) / float(quantile) for i in range(0, num_values)] - indices = [ - max(0, int(ceil(self.num_samples * float(q))) - 1) for q in qs - ] - runtimes = [ - runtimes[indices.index(i)] for i in range(0, self.num_samples) - ] + indices = [max(0, int(ceil(self.num_samples * float(q))) - 1) + for q in qs] + runtimes = [runtimes[indices.index(i)] + for i in range(0, self.num_samples)] self.samples = PerformanceTestSamples( - self.name, [Sample(None, None, int(runtime)) for runtime in runtimes] - ) + self.name, + [Sample(None, None, int(runtime)) for runtime in runtimes]) self.samples.exclude_outliers(top_only=True) sams = self.samples - self.min, self.max, self.median, self.mean, self.sd = ( - sams.min, - sams.max, - sams.median, - sams.mean, - sams.sd, - ) - self.max_rss = ( # Maximum Resident Set Size (B) - int(csv_row[mem_index]) if memory else None - ) + self.min, self.max, self.median, self.mean, self.sd = \ + sams.min, sams.max, sams.median, sams.mean, sams.sd + self.max_rss = ( # Maximum Resident Set Size (B) + int(csv_row[mem_index]) if memory else None) else: # Legacy format with statistics for normal distribution. - self.min = int(csv_row[3]) # Minimum runtime (μs) - self.max = int(csv_row[4]) # Maximum runtime (μs) - self.mean = float(csv_row[5]) # Mean (average) runtime (μs) - self.sd = float(csv_row[6]) # Standard Deviation (μs) - self.median = int(csv_row[7]) # Median runtime (μs) - self.max_rss = ( # Maximum Resident Set Size (B) - int(csv_row[8]) if len(csv_row) > 8 else None - ) + self.min = int(csv_row[3]) # Minimum runtime (μs) + self.max = int(csv_row[4]) # Maximum runtime (μs) + self.mean = float(csv_row[5]) # Mean (average) runtime (μs) + self.sd = float(csv_row[6]) # Standard Deviation (μs) + self.median = int(csv_row[7]) # Median runtime (μs) + self.max_rss = ( # Maximum Resident Set Size (B) + int(csv_row[8]) if len(csv_row) > 8 else None) self.samples = None # Optional measurement metadata. The number of: # memory pages used, involuntary context switches and voluntary yields - self.mem_pages, self.involuntary_cs, self.yield_count = ( + self.mem_pages, self.involuntary_cs, self.yield_count = \ [int(x) for x in csv_row[-3:]] if meta else (None, None, None) - ) self.yields = None self.setup = None def __repr__(self): """Short summary for debugging purposes.""" return ( - "".format(self) - ) + '' + .format(self)) def merge(self, r): """Merge two results. @@ -318,13 +302,8 @@ def merge(self, r): map(self.samples.add, r.samples.samples) sams = self.samples self.num_samples = sams.num_samples - self.min, self.max, self.median, self.mean, self.sd = ( - sams.min, - sams.max, - sams.median, - sams.mean, - sams.sd, - ) + self.min, self.max, self.median, self.mean, self.sd = \ + sams.min, sams.max, sams.median, sams.mean, sams.sd else: self.min = min(self.min, r.min) self.max = max(self.max, r.max) @@ -336,8 +315,8 @@ def merge(self, r): # Metadata def minimum(a, b): # work around None being less than everything - return min(filter(lambda x: x is not None, [a, b])) if any([a, b]) else None - + return (min(filter(lambda x: x is not None, [a, b])) if any([a, b]) + else None) self.max_rss = minimum(self.max_rss, r.max_rss) self.setup = minimum(self.setup, r.setup) @@ -360,13 +339,12 @@ def __init__(self, old, new): # Test runtime improvement in % ratio = (new.min + 0.001) / (old.min + 0.001) - self.delta = (ratio - 1) * 100 + self.delta = ((ratio - 1) * 100) # Indication of dubious changes: when result's MIN falls inside the # (MIN, MAX) interval of result they are being compared with. - self.is_dubious = (old.min < new.min and new.min < old.max) or ( - new.min < old.min and old.min < new.max - ) + self.is_dubious = ((old.min < new.min and new.min < old.max) or + (new.min < old.min and old.min < new.max)) class LogParser(object): @@ -393,20 +371,15 @@ def _reset(self): # Parse lines like this # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs) results_re = re.compile( - r"( *\d+[, \t]+[\w.\-\?!]+[, \t]+" - + r"[, \t]+".join([r"\d+"] * 2) # #,TEST - + r"(?:[, \t]+\d*)*)" # at least 2... - ) # ...or more numeric columns + r'( *\d+[, \t]+[\w.\-\?!]+[, \t]+' + # #,TEST + r'[, \t]+'.join([r'\d+'] * 2) + # at least 2... + r'(?:[, \t]+\d*)*)') # ...or more numeric columns def _append_result(self, result): - columns = result.split(",") if "," in result else result.split() + columns = result.split(',') if ',' in result else result.split() r = PerformanceTestResult( - columns, - quantiles=self.quantiles, - memory=self.memory, - delta=self.delta, - meta=self.meta, - ) + columns, quantiles=self.quantiles, memory=self.memory, + delta=self.delta, meta=self.meta) r.setup = self.setup r.max_rss = r.max_rss or self.max_rss r.mem_pages = r.mem_pages or self.mem_pages @@ -424,43 +397,45 @@ def _store_memory_stats(self, max_rss, mem_pages): self.mem_pages = int(mem_pages) def _configure_format(self, header): - self.quantiles = "MEAN" not in header - self.memory = "MAX_RSS" in header - self.meta = "PAGES" in header - self.delta = "𝚫" in header + self.quantiles = 'MEAN' not in header + self.memory = 'MAX_RSS' in header + self.meta = 'PAGES' in header + self.delta = '𝚫' in header # Regular expression and action to take when it matches the parsed line state_actions = { results_re: _append_result, + # Verbose mode adds new productions: # Adaptively determined N; test loop multiple adjusting runtime to ~1s - re.compile(r"\s+Measuring with scale (\d+)."): ( - lambda self, num_iters: setattr(self, "num_iters", num_iters) - ), - re.compile(r"\s+Sample (\d+),(\d+)"): ( - lambda self, i, runtime: self.samples.append( - Sample(int(i), int(self.num_iters), int(runtime)) - ) - ), - re.compile(r"\s+SetUp (\d+)"): ( - lambda self, setup: setattr(self, "setup", int(setup)) - ), - re.compile(r"\s+Yielding after ~(\d+) μs"): ( - lambda self, since_last_yield: self.yields.append( - Yield(len(self.samples), int(since_last_yield)) - ) - ), - re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t]+MIN.*)"): _configure_format, + re.compile(r'\s+Measuring with scale (\d+).'): + (lambda self, num_iters: setattr(self, 'num_iters', num_iters)), + + re.compile(r'\s+Sample (\d+),(\d+)'): + (lambda self, i, runtime: + self.samples.append( + Sample(int(i), int(self.num_iters), int(runtime)))), + + re.compile(r'\s+SetUp (\d+)'): + (lambda self, setup: setattr(self, 'setup', int(setup))), + + re.compile(r'\s+Yielding after ~(\d+) μs'): + (lambda self, since_last_yield: + self.yields.append( + Yield(len(self.samples), int(since_last_yield)))), + + re.compile(r'( *#[, \t]+TEST[, \t]+SAMPLES[, \t]+MIN.*)'): + _configure_format, + # Environmental statistics: memory usage and context switches - re.compile( - r"\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)" - ): _store_memory_stats, - re.compile(r"\s+VCS \d+ - \d+ = (\d+)"): ( - lambda self, vcs: setattr(self, "voluntary_cs", int(vcs)) - ), - re.compile(r"\s+ICS \d+ - \d+ = (\d+)"): ( - lambda self, ics: setattr(self, "involuntary_cs", int(ics)) - ), + re.compile(r'\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)'): + _store_memory_stats, + + re.compile(r'\s+VCS \d+ - \d+ = (\d+)'): + (lambda self, vcs: setattr(self, 'voluntary_cs', int(vcs))), + + re.compile(r'\s+ICS \d+ - \d+ = (\d+)'): + (lambda self, ics: setattr(self, 'involuntary_cs', int(ics))), } def parse_results(self, lines): @@ -536,10 +511,10 @@ def __init__(self, old_results, new_results, delta_threshold): added_tests = new_tests.difference(old_tests) removed_tests = old_tests.difference(new_tests) - self.added = sorted([new_results[t] for t in added_tests], key=lambda r: r.name) - self.removed = sorted( - [old_results[t] for t in removed_tests], key=lambda r: r.name - ) + self.added = sorted([new_results[t] for t in added_tests], + key=lambda r: r.name) + self.removed = sorted([old_results[t] for t in removed_tests], + key=lambda r: r.name) def compare(name): return ResultComparison(old_results[name], new_results[name]) @@ -550,24 +525,19 @@ def partition(l, p): return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], [])) decreased, not_decreased = partition( - comparisons, lambda c: c.ratio < (1 - delta_threshold) - ) + comparisons, lambda c: c.ratio < (1 - delta_threshold)) increased, unchanged = partition( - not_decreased, lambda c: c.ratio > (1 + delta_threshold) - ) + not_decreased, lambda c: c.ratio > (1 + delta_threshold)) # sorted partitions names = [c.name for c in comparisons] comparisons = dict(zip(names, comparisons)) - self.decreased = [ - comparisons[c.name] for c in sorted(decreased, key=lambda c: -c.delta) - ] - self.increased = [ - comparisons[c.name] for c in sorted(increased, key=lambda c: c.delta) - ] - self.unchanged = [ - comparisons[c.name] for c in sorted(unchanged, key=lambda c: c.name) - ] + self.decreased = [comparisons[c.name] + for c in sorted(decreased, key=lambda c: -c.delta)] + self.increased = [comparisons[c.name] + for c in sorted(increased, key=lambda c: c.delta)] + self.unchanged = [comparisons[c.name] + for c in sorted(unchanged, key=lambda c: c.name)] class ReportFormatter(object): @@ -579,25 +549,23 @@ class ReportFormatter(object): GitHub), `git` and `html`. """ - def __init__(self, comparator, changes_only, single_table=False): + def __init__(self, comparator, changes_only, + single_table=False): """Initialize with `TestComparator` and names of branches.""" self.comparator = comparator self.changes_only = changes_only self.single_table = single_table - PERFORMANCE_TEST_RESULT_HEADER = ("TEST", "MIN", "MAX", "MEAN", "MAX_RSS") - RESULT_COMPARISON_HEADER = ("TEST", "OLD", "NEW", "DELTA", "RATIO") + PERFORMANCE_TEST_RESULT_HEADER = ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS') + RESULT_COMPARISON_HEADER = ('TEST', 'OLD', 'NEW', 'DELTA', 'RATIO') @staticmethod def header_for(result): """Column labels for header row in results table.""" - return ( - ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER - if isinstance(result, PerformanceTestResult) - else - # isinstance(result, ResultComparison) - ReportFormatter.RESULT_COMPARISON_HEADER - ) + return (ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER + if isinstance(result, PerformanceTestResult) else + # isinstance(result, ResultComparison) + ReportFormatter.RESULT_COMPARISON_HEADER) @staticmethod def values(result): @@ -606,63 +574,53 @@ def values(result): Returns tuple of strings to display in the results table. """ return ( - ( - result.name, - str(result.min), - str(result.max), - str(int(result.mean)), - str(result.max_rss) if result.max_rss else "—", - ) - if isinstance(result, PerformanceTestResult) - else + (result.name, + str(result.min), str(result.max), str(int(result.mean)), + str(result.max_rss) if result.max_rss else '—') + if isinstance(result, PerformanceTestResult) else # isinstance(result, ResultComparison) - ( - result.name, - str(result.old.min), - str(result.new.min), - "{0:+.1f}%".format(result.delta), - "{0:.2f}x{1}".format(result.ratio, " (?)" if result.is_dubious else ""), - ) + (result.name, + str(result.old.min), str(result.new.min), + '{0:+.1f}%'.format(result.delta), + '{0:.2f}x{1}'.format(result.ratio, + ' (?)' if result.is_dubious else '')) ) def markdown(self): """Report results of benchmark comparisons in Markdown format.""" return self._formatted_text( - label_formatter=lambda s: ("**" + s + "**"), - COLUMN_SEPARATOR=" | ", - DELIMITER_ROW=([":---"] + ["---:"] * 4), - SEPARATOR="  | | | | \n", + label_formatter=lambda s: ('**' + s + '**'), + COLUMN_SEPARATOR=' | ', + DELIMITER_ROW=([':---'] + ['---:'] * 4), + SEPARATOR='  | | | | \n', SECTION="""
{0} ({1}) {2}
-""", - ) +""") def git(self): """Report results of benchmark comparisons in 'git' format.""" return self._formatted_text( label_formatter=lambda s: s.upper(), - COLUMN_SEPARATOR=" ", + COLUMN_SEPARATOR=' ', DELIMITER_ROW=None, - SEPARATOR="\n", + SEPARATOR='\n', SECTION=""" -{0} ({1}): \n{2}""", - ) +{0} ({1}): \n{2}""") def _column_widths(self): changed = self.comparator.decreased + self.comparator.increased - results = changed if self.changes_only else changed + self.comparator.unchanged + results = (changed if self.changes_only else + changed + self.comparator.unchanged) results += self.comparator.added + self.comparator.removed widths = [ - map(len, columns) - for columns in [ - ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER, - ReportFormatter.RESULT_COMPARISON_HEADER, - ] - + [ReportFormatter.values(r) for r in results] + map(len, columns) for columns in + [ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER, + ReportFormatter.RESULT_COMPARISON_HEADER] + + [ReportFormatter.values(r) for r in results] ] def max_widths(maximum, widths): @@ -670,9 +628,8 @@ def max_widths(maximum, widths): return reduce(max_widths, widths, [0] * 5) - def _formatted_text( - self, label_formatter, COLUMN_SEPARATOR, DELIMITER_ROW, SEPARATOR, SECTION - ): + def _formatted_text(self, label_formatter, COLUMN_SEPARATOR, + DELIMITER_ROW, SEPARATOR, SECTION): widths = self._column_widths() self.header_printed = False @@ -680,62 +637,43 @@ def justify_columns(contents): return [c.ljust(w) for w, c in zip(widths, contents)] def row(contents): - return ( - "" - if not contents - else COLUMN_SEPARATOR.join(justify_columns(contents)) + "\n" - ) + return ('' if not contents else + COLUMN_SEPARATOR.join(justify_columns(contents)) + '\n') def header(title, column_labels): - labels = ( - column_labels - if not self.single_table - else map(label_formatter, (title,) + column_labels[1:]) - ) - h = ( - ("" if not self.header_printed else SEPARATOR) - + row(labels) - + (row(DELIMITER_ROW) if not self.header_printed else "") - ) + labels = (column_labels if not self.single_table else + map(label_formatter, (title, ) + column_labels[1:])) + h = (('' if not self.header_printed else SEPARATOR) + + row(labels) + + (row(DELIMITER_ROW) if not self.header_printed else '')) if self.single_table and not self.header_printed: self.header_printed = True return h def format_columns(r, is_strong): - return r if not is_strong else r[:-1] + ("**" + r[-1] + "**",) + return (r if not is_strong else + r[:-1] + ('**' + r[-1] + '**', )) def table(title, results, is_strong=False, is_open=False): if not results: - return "" - rows = [ - row(format_columns(ReportFormatter.values(r), is_strong)) - for r in results - ] - table = header( - title if self.single_table else "", - ReportFormatter.header_for(results[0]), - ) + "".join(rows) - return ( - table - if self.single_table - else SECTION.format( - title, len(results), table, "open" if is_open else "" - ) - ) - - return "\n" + "".join( - [ - table("Regression", self.comparator.decreased, True, True), - table("Improvement", self.comparator.increased, True), - ( - "" - if self.changes_only - else table("No Changes", self.comparator.unchanged) - ), - table("Added", self.comparator.added, is_open=True), - table("Removed", self.comparator.removed, is_open=True), - ] - ) + return '' + rows = [row(format_columns(ReportFormatter.values(r), is_strong)) + for r in results] + table = (header(title if self.single_table else '', + ReportFormatter.header_for(results[0])) + + ''.join(rows)) + return (table if self.single_table else + SECTION.format( + title, len(results), table, 'open' if is_open else '')) + + return '\n' + ''.join([ + table('Regression', self.comparator.decreased, True, True), + table('Improvement', self.comparator.increased, True), + ('' if self.changes_only else + table('No Changes', self.comparator.unchanged)), + table('Added', self.comparator.added, is_open=True), + table('Removed', self.comparator.removed, is_open=True) + ]) HTML = """ @@ -784,90 +722,68 @@ def table(title, results, is_strong=False, is_open=False): def html(self): """Report results of benchmark comparisons in HTML format.""" - def row(name, old, new, delta, speedup, speedup_color): - return self.HTML_ROW.format(name, old, new, delta, speedup_color, speedup) + return self.HTML_ROW.format( + name, old, new, delta, speedup_color, speedup) def header(contents): - return self.HTML_HEADER_ROW.format(*contents) + return self.HTML_HEADER_ROW.format(* contents) def table(title, results, speedup_color): rows = [ - row(*(ReportFormatter.values(r) + (speedup_color,))) for r in results + row(*(ReportFormatter.values(r) + (speedup_color,))) + for r in results ] - return ( - "" - if not rows - else header( - (title, len(results)) + ReportFormatter.header_for(results[0])[1:] - ) - + "".join(rows) - ) + return ('' if not rows else + header((title, len(results)) + + ReportFormatter.header_for(results[0])[1:]) + + ''.join(rows)) return self.HTML.format( - "".join( - [ - table("Regression", self.comparator.decreased, "red"), - table("Improvement", self.comparator.increased, "green"), - ( - "" - if self.changes_only - else table("No Changes", self.comparator.unchanged, "black") - ), - table("Added", self.comparator.added, ""), - table("Removed", self.comparator.removed, ""), - ] - ) - ) + ''.join([ + table('Regression', self.comparator.decreased, 'red'), + table('Improvement', self.comparator.increased, 'green'), + ('' if self.changes_only else + table('No Changes', self.comparator.unchanged, 'black')), + table('Added', self.comparator.added, ''), + table('Removed', self.comparator.removed, '') + ])) def parse_args(args): """Parse command line arguments and set default values.""" - parser = argparse.ArgumentParser(description="Compare Performance tests.") - parser.add_argument( - "--old-file", help="Baseline performance test suite (csv file)", required=True - ) - parser.add_argument( - "--new-file", help="New performance test suite (csv file)", required=True - ) - parser.add_argument( - "--format", - choices=["markdown", "git", "html"], - help="Output format. Default is markdown.", - default="markdown", - ) - parser.add_argument("--output", help="Output file name") - parser.add_argument( - "--changes-only", help="Output only affected tests", action="store_true" - ) - parser.add_argument( - "--single-table", - help="Combine data in a single table in git and markdown formats", - action="store_true", - ) + parser = argparse.ArgumentParser(description='Compare Performance tests.') + parser.add_argument('--old-file', + help='Baseline performance test suite (csv file)', + required=True) + parser.add_argument('--new-file', + help='New performance test suite (csv file)', + required=True) + parser.add_argument('--format', + choices=['markdown', 'git', 'html'], + help='Output format. Default is markdown.', + default="markdown") + parser.add_argument('--output', help='Output file name') + parser.add_argument('--changes-only', + help='Output only affected tests', action='store_true') parser.add_argument( - "--delta-threshold", - help="Delta threshold. Default 0.05.", - type=float, - default=0.05, - ) + '--single-table', + help='Combine data in a single table in git and markdown formats', + action='store_true') + parser.add_argument('--delta-threshold', + help='Delta threshold. Default 0.05.', + type=float, default=0.05) return parser.parse_args(args) -def create_report( - old_results, - new_results, - delta_threshold, - format, - changes_only=True, - single_table=True, -): +def create_report(old_results, new_results, delta_threshold, format, + changes_only=True, single_table=True): comparator = TestComparator(old_results, new_results, delta_threshold) formatter = ReportFormatter(comparator, changes_only, single_table) formats = { - "markdown": formatter.markdown, - "git": formatter.git, - "html": formatter.html, + 'markdown': formatter.markdown, + 'git': formatter.git, + 'html': formatter.html } report = formats[format]() @@ -877,20 +793,16 @@ def create_report( def main(): """Compare benchmarks for changes in a formatted report.""" args = parse_args(sys.argv[1:]) - report = create_report( - LogParser.results_from_file(args.old_file), - LogParser.results_from_file(args.new_file), - args.delta_threshold, - args.format, - args.changes_only, - args.single_table, - ) + report = create_report(LogParser.results_from_file(args.old_file), + LogParser.results_from_file(args.new_file), + args.delta_threshold, args.format, + args.changes_only, args.single_table) print(report) if args.output: - with open(args.output, "w") as f: + with open(args.output, 'w') as f: f.write(report) -if __name__ == "__main__": +if __name__ == '__main__': sys.exit(main()) diff --git a/benchmark/scripts/create_benchmark.py b/benchmark/scripts/create_benchmark.py index cccaae23c76bd..2e2a4786752ae 100755 --- a/benchmark/scripts/create_benchmark.py +++ b/benchmark/scripts/create_benchmark.py @@ -7,7 +7,7 @@ def main(): p = argparse.ArgumentParser() - p.add_argument("name", help="The name of the new benchmark to be created") + p.add_argument('name', help='The name of the new benchmark to be created') args = p.parse_args() # adds benchmark to `CMakeLists.txt` @@ -24,19 +24,19 @@ def update_cmakelists(name): """Adds a new entry to the `CMakeLists.txt` file with the given benchmark name. """ - relative_path = create_relative_path("../CMakeLists.txt") + relative_path = create_relative_path('../CMakeLists.txt') file_contents = [] - with open(relative_path, "r") as f: + with open(relative_path, 'r') as f: file_contents = f.readlines() file_new_contents = insert_line_alphabetically( name, - " single-source/" + name + "\n", + ' single-source/' + name + '\n', file_contents, - r" single-source\/([a-zA-Z]+)", + r" single-source\/([a-zA-Z]+)" ) - with open(relative_path, "w") as f: + with open(relative_path, 'w') as f: for line in file_new_contents: f.write(line) @@ -46,17 +46,17 @@ def create_benchmark_file(name): and places it in the `single-source` directory. """ - template_path = create_relative_path("Template.swift") - benchmark_template = "" - with open(template_path, "r") as f: - benchmark_template = "".join(f.readlines()) + template_path = create_relative_path('Template.swift') + benchmark_template = '' + with open(template_path, 'r') as f: + benchmark_template = ''.join(f.readlines()) # fill in template with benchmark name. formatted_template = benchmark_template.format(name=name) - relative_path = create_relative_path("../single-source/") - source_file_path = os.path.join(relative_path, name + ".swift") - with open(source_file_path, "w") as f: + relative_path = create_relative_path('../single-source/') + source_file_path = os.path.join(relative_path, name + '.swift') + with open(source_file_path, 'w') as f: f.write(formatted_template) @@ -64,14 +64,14 @@ def add_import_benchmark(name): """Adds an `import` statement to the `main.swift` file for the new benchmark. """ - relative_path = create_relative_path("../utils/main.swift") + relative_path = create_relative_path('../utils/main.swift') # read current contents into an array file_contents = [] - with open(relative_path, "r") as f: + with open(relative_path, 'r') as f: file_contents = f.readlines() - # the test dependencies are placed before all benchmarks, so we have to + # the test dependencies are placed before all benchmarks, so we have to # insert the benchmark in the right alphabetical order after we have seen # all test dependencies. read_test_dependencies = False @@ -82,27 +82,23 @@ def add_import_benchmark(name): match = re.search(r"import ([a-zA-Z]+)", line) if match and match.group(1): benchmark_name = match.group(1) - # find where to insert the new benchmark in the right alphabetical + # find where to insert the new benchmark in the right alphabetical # order. - if ( - name < benchmark_name - and previous_benchmark_name is None - or name < benchmark_name - and name > previous_benchmark_name - ): + if (name < benchmark_name and previous_benchmark_name is None or + name < benchmark_name and name > previous_benchmark_name): if read_test_dependencies: - file_new_contents.append("import " + name + "\n" + line) + file_new_contents.append('import ' + name + '\n' + line) else: - # all test dependencies are first specified, so from now + # all test dependencies are first specified, so from now # on we can look where to insert the new benchmark. read_test_dependencies = True file_new_contents.append(line) else: - file_new_contents.append(line) + file_new_contents.append(line) previous_benchmark_name = benchmark_name else: file_new_contents.append(line) - with open(relative_path, "w") as f: + with open(relative_path, 'w') as f: for line in file_new_contents: f.write(line) @@ -111,19 +107,19 @@ def add_register_benchmark(name): """Adds an `import` statement to the `main.swift` file for the new benchmark. """ - relative_path = create_relative_path("../utils/main.swift") + relative_path = create_relative_path('../utils/main.swift') file_contents = [] - with open(relative_path, "r") as f: + with open(relative_path, 'r') as f: file_contents = f.readlines() file_new_contents = insert_line_alphabetically( name, - "registerBenchmark(" + name + ")\n", - file_contents, - r"registerBenchmark\(([a-zA-Z]+)\)", + 'registerBenchmark(' + name + ')\n', + file_contents, + r"registerBenchmark\(([a-zA-Z]+)\)" ) - with open(relative_path, "w") as f: + with open(relative_path, 'w') as f: for line in file_new_contents: f.write(line) @@ -133,7 +129,7 @@ def insert_line_alphabetically(name, new_line, lines, regex): find where the new benchmark should be inserted with the given `new_line`. """ # the name of the previous seen benchmark in order to insert the new - # one at the correct position + # one at the correct position previous_benchmark_name = None # the new contents of the file updated_lines = [] @@ -144,15 +140,11 @@ def insert_line_alphabetically(name, new_line, lines, regex): benchmark_name = match.group(1) # check if we're at the line where we have to insert the new # benchmark in the correct alphabetical order - if ( - name < benchmark_name - and previous_benchmark_name is None - or name < benchmark_name - and name > previous_benchmark_name - ): + if (name < benchmark_name and previous_benchmark_name is None or + name < benchmark_name and name > previous_benchmark_name): updated_lines.append(new_line + line) else: - updated_lines.append(line) + updated_lines.append(line) previous_benchmark_name = benchmark_name else: updated_lines.append(line) diff --git a/benchmark/scripts/generate_harness/generate_harness.py b/benchmark/scripts/generate_harness/generate_harness.py index c5c6f87242133..6e4bc0f815c5e 100755 --- a/benchmark/scripts/generate_harness/generate_harness.py +++ b/benchmark/scripts/generate_harness/generate_harness.py @@ -21,12 +21,12 @@ import subprocess script_dir = os.path.dirname(os.path.realpath(__file__)) -perf_dir = os.path.realpath(os.path.join(script_dir, "../..")) -gyb = os.path.realpath(os.path.join(perf_dir, "../utils/gyb")) +perf_dir = os.path.realpath(os.path.join(script_dir, '../..')) +gyb = os.path.realpath(os.path.join(perf_dir, '../utils/gyb')) parser = argparse.ArgumentParser() -parser.add_argument( - "--output-dir", help="Output directory (for validation test)", default=perf_dir -) +parser.add_argument("--output-dir", + help="Output directory (for validation test)", + default=perf_dir) args = parser.parse_args() output_dir = args.output_dir @@ -35,8 +35,7 @@ def all_files(directory, extension): # matching: [directory]/**/*[extension] return [ os.path.join(root, f) for root, _, files in os.walk(directory) - for f in files - if f.endswith(extension) + for f in files if f.endswith(extension) ] @@ -47,13 +46,13 @@ def will_write(filename): # ensure path to file exists before writing os.makedirs(output_path) -if __name__ == "__main__": +if __name__ == '__main__': # Generate Your Boilerplate # Make sure longer paths are done first as CMakeLists.txt and main.swift # depend on the other gybs being generated first. - gyb_files = sorted(all_files(perf_dir, ".gyb"), key=len, reverse=True) + gyb_files = sorted(all_files(perf_dir, '.gyb'), key=len, reverse=True) for f in gyb_files: relative_path = os.path.relpath(f[:-4], perf_dir) out_file = os.path.join(output_dir, relative_path) will_write(out_file) - subprocess.call([gyb, "--line-directive", "", "-o", out_file, f]) + subprocess.call([gyb, '--line-directive', '', '-o', out_file, f]) diff --git a/benchmark/scripts/perf_test_driver/perf_test_driver.py b/benchmark/scripts/perf_test_driver/perf_test_driver.py index 7f8929f771764..449059b031d74 100644 --- a/benchmark/scripts/perf_test_driver/perf_test_driver.py +++ b/benchmark/scripts/perf_test_driver/perf_test_driver.py @@ -21,27 +21,29 @@ import subprocess -BENCHMARK_OUTPUT_RE = re.compile(r"\d+,([^,]+)") +BENCHMARK_OUTPUT_RE = re.compile(r'\d+,([^,]+)') class Result(object): + def __init__(self, name, status, output, xfail_list): self.name = name self.status = status self.output = output - self.is_xfailed = any((re.match(x, self.name) is not None for x in xfail_list)) + self.is_xfailed = any( + (re.match(x, self.name) is not None for x in xfail_list)) def is_failure(self): - return self.get_result() in ["FAIL", "XPASS"] + return self.get_result() in ['FAIL', 'XPASS'] def get_result(self): if self.is_xfailed: if self.status: - return "XFAIL" - return "XPASS" + return 'XFAIL' + return 'XPASS' if self.status: - return "FAIL" - return "PASS" + return 'FAIL' + return 'PASS' def get_name(self): return self.name @@ -51,7 +53,7 @@ def merge_in_extra_data(self, d): return d def print_data(self, max_test_len): - fmt = "{:<%d}{:}" % (max_test_len + 5) + fmt = '{:<%d}{:}' % (max_test_len + 5) print(fmt.format(self.get_name(), self.get_result())) @@ -63,44 +65,36 @@ def run_with_timeout(func, args): # we update to use python >= 3.3, use the timeout API on communicate # instead. import multiprocessing.dummy - fakeThreadPool = multiprocessing.dummy.Pool(1) try: result = fakeThreadPool.apply_async(func, args=args) return result.get(timeout_seconds) except multiprocessing.TimeoutError: fakeThreadPool.terminate() - raise RuntimeError( - "Child process aborted due to timeout. " - "Timeout: %s seconds" % timeout_seconds - ) + raise RuntimeError("Child process aborted due to timeout. " + "Timeout: %s seconds" % timeout_seconds) def _unwrap_self(args): return type(args[0]).process_input(*args) -BenchmarkDriver_OptLevels = ["Onone", "O", "Osize"] +BenchmarkDriver_OptLevels = ['Onone', 'O', 'Osize'] class BenchmarkDriver(object): - def __init__( - self, - binary_dir, - xfail_list, - enable_parallel=False, - opt_levels=BenchmarkDriver_OptLevels, - ): - self.targets = [ - (os.path.join(binary_dir, "Benchmark_%s" % o), o) for o in opt_levels - ] + + def __init__(self, binary_dir, xfail_list, enable_parallel=False, + opt_levels=BenchmarkDriver_OptLevels): + self.targets = [(os.path.join(binary_dir, 'Benchmark_%s' % o), o) + for o in opt_levels] self.xfail_list = xfail_list self.enable_parallel = enable_parallel self.data = None def print_data_header(self, max_test_len): - fmt = "{:<%d}{:}" % (max_test_len + 5) - print(fmt.format("Name", "Result")) + fmt = '{:<%d}{:}' % (max_test_len + 5) + print(fmt.format('Name', 'Result')) def prepare_input(self, name, opt_level): raise RuntimeError("Abstract method") @@ -121,7 +115,7 @@ def run_for_opt_level(self, binary, opt_level, test_filter): names = [n for n in names if regex.match(n)] def prepare_input_wrapper(name): - x = {"opt": opt_level, "path": binary, "test_name": name} + x = {'opt': opt_level, 'path': binary, 'test_name': name} x.update(self.prepare_input(name)) return x @@ -135,31 +129,33 @@ def prepare_input_wrapper(name): results = map(self.process_input, prepared_input) def reduce_results(acc, r): - acc["result"].append(r) - acc["has_failure"] = acc["has_failure"] or r.is_failure() - acc["max_test_len"] = max(acc["max_test_len"], len(r.get_name())) - acc["extra_data"] = r.merge_in_extra_data(acc["extra_data"]) + acc['result'].append(r) + acc['has_failure'] = acc['has_failure'] or r.is_failure() + acc['max_test_len'] = max(acc['max_test_len'], len(r.get_name())) + acc['extra_data'] = r.merge_in_extra_data(acc['extra_data']) return acc - return functools.reduce( - reduce_results, - results, - {"result": [], "has_failure": False, "max_test_len": 0, "extra_data": {}}, - ) + return functools.reduce(reduce_results, results, { + 'result': [], + 'has_failure': False, + 'max_test_len': 0, + 'extra_data': {} + }) def print_data(self, data, max_test_len): print("Results:") self.print_data_header(max_test_len) for d in data: - for r in d["result"]: + for r in d['result']: r.print_data(max_test_len) def run(self, test_filter=None): self.data = [ self.run_for_opt_level(binary, opt_level, test_filter) - for binary, opt_level in self.targets - ] - max_test_len = functools.reduce(max, [d["max_test_len"] for d in self.data]) - has_failure = functools.reduce(max, [d["has_failure"] for d in self.data]) + for binary, opt_level in self.targets] + max_test_len = functools.reduce(max, + [d['max_test_len'] for d in self.data]) + has_failure = functools.reduce(max, + [d['has_failure'] for d in self.data]) self.print_data(self.data, max_test_len) return not has_failure diff --git a/benchmark/scripts/run_smoke_bench b/benchmark/scripts/run_smoke_bench index 0facbe7b344f5..f478c7e95a869 100755 --- a/benchmark/scripts/run_smoke_bench +++ b/benchmark/scripts/run_smoke_bench @@ -28,15 +28,14 @@ import glob import os import subprocess import sys -from imp import load_source from compare_perf_tests import LogParser, TestComparator, create_report +from imp import load_source # import Benchmark_Driver # doesn't work because it misses '.py' extension Benchmark_Driver = load_source( - "Benchmark_Driver", - os.path.join(os.path.dirname(os.path.abspath(__file__)), "Benchmark_Driver"), -) + 'Benchmark_Driver', os.path.join(os.path.dirname( + os.path.abspath(__file__)), 'Benchmark_Driver')) # from Benchmark_Driver import BenchmarkDriver, BenchmarkDoctor, ... BenchmarkDriver = Benchmark_Driver.BenchmarkDriver BenchmarkDoctor = Benchmark_Driver.BenchmarkDoctor @@ -47,12 +46,11 @@ VERBOSE = False class DriverArgs(object): """Arguments for BenchmarkDriver.""" - - def __init__(self, tests, optimization="O"): + def __init__(self, tests, optimization='O'): """Initialize with path to the build-dir and optimization level.""" self.benchmarks = None self.filters = None - self.tests = os.path.join(tests, "bin") + self.tests = os.path.join(tests, 'bin') self.optimization = optimization @@ -65,76 +63,49 @@ def main(): global VERBOSE argparser = argparse.ArgumentParser() argparser.add_argument( - "-verbose", action="store_true", help="print verbose messages" - ) + '-verbose', action='store_true', + help='print verbose messages') argparser.add_argument( - "-O", - action="append_const", - const="O", - dest="opt_levels", - help="test -O benchmarks", - ) + '-O', action='append_const', const='O', dest='opt_levels', + help='test -O benchmarks') argparser.add_argument( - "-Osize", - action="append_const", - const="Osize", - dest="opt_levels", - help="test -Osize benchmarks", - ) + '-Osize', action='append_const', const='Osize', dest='opt_levels', + help='test -Osize benchmarks') argparser.add_argument( - "-Onone", - action="append_const", - const="Onone", - dest="opt_levels", - help="test -Onone benchmarks (except code size)", - ) + '-Onone', action='append_const', const='Onone', dest='opt_levels', + help='test -Onone benchmarks (except code size)') argparser.add_argument( - "-skip-code-size", - action="store_true", - help="Don't report code size differences", - ) + '-skip-code-size', action='store_true', + help="Don't report code size differences") argparser.add_argument( - "-skip-performance", - action="store_true", - help="Don't report performance differences", - ) + '-skip-performance', action='store_true', + help="Don't report performance differences") argparser.add_argument( - "-skip-check-added", - action="store_true", - help="Don't validate newly added benchmarks", - ) + '-skip-check-added', action='store_true', + help="Don't validate newly added benchmarks") argparser.add_argument( - "-o", - type=str, - help="In addition to stdout, write the results into a markdown file", - ) + '-o', type=str, + help='In addition to stdout, write the results into a markdown file') argparser.add_argument( - "-threshold", - type=float, - help="The performance threshold in %% which triggers a re-run", - default=5, - ) + '-threshold', type=float, + help='The performance threshold in %% which triggers a re-run', + default=5) argparser.add_argument( - "-num-samples", - type=int, - help="The (minimum) number of samples to run", - default=3, - ) + '-num-samples', type=int, + help='The (minimum) number of samples to run', default=3) argparser.add_argument( - "-num-reruns", - type=int, + '-num-reruns', type=int, help="The number of re-runs until it's assumed to be a real change", - default=8, - ) + default=8) argparser.add_argument( - "-platform", type=str, help="The benchmark build platform", default="macosx" - ) + '-platform', type=str, + help='The benchmark build platform', default='macosx') argparser.add_argument( - "oldbuilddir", nargs=1, type=str, help="old benchmark build directory" - ) + 'oldbuilddir', nargs=1, type=str, + help='old benchmark build directory') argparser.add_argument( - "newbuilddir", nargs=1, type=str, help="new benchmark build directory" - ) + 'newbuilddir', nargs=1, type=str, + help='new benchmark build directory') args = argparser.parse_args() VERBOSE = args.verbose @@ -144,42 +115,29 @@ def main(): def test_opt_levels(args): output_file = None if args.o: - output_file = open(args.o, "w") + output_file = open(args.o, 'w') changes = False - for opt_level in args.opt_levels or ["O", "Osize", "Onone"]: - log("####### Testing optimization level -" + opt_level + " #######") + for opt_level in args.opt_levels or ['O', 'Osize', 'Onone']: + log('####### Testing optimization level -' + opt_level + ' #######') if not args.skip_performance: - if test_performance( - opt_level, - args.oldbuilddir[0], - args.newbuilddir[0], - float(args.threshold) / 100, - args.num_samples, - args.num_reruns, - output_file, - ): + if test_performance(opt_level, args.oldbuilddir[0], + args.newbuilddir[0], + float(args.threshold) / 100, args.num_samples, + args.num_reruns, output_file): changes = True # There is no point in reporting code size for Onone. - if not args.skip_code_size and opt_level != "Onone": - if report_code_size( - opt_level, - args.oldbuilddir[0], - args.newbuilddir[0], - args.platform, - output_file, - ): + if not args.skip_code_size and opt_level != 'Onone': + if report_code_size(opt_level, args.oldbuilddir[0], + args.newbuilddir[0], + args.platform, output_file): changes = True if not args.skip_code_size: - if report_code_size( - "swiftlibs", - args.oldbuilddir[0], - args.newbuilddir[0], - args.platform, - output_file, - ): + if report_code_size('swiftlibs', args.oldbuilddir[0], + args.newbuilddir[0], + args.platform, output_file): changes = True if not args.skip_check_added: @@ -200,14 +158,10 @@ def measure(driver, tests, i): Collect increasing number of samples, depending on the iteration. """ num_samples = min(i + 3, 10) - msg = " Iteration {0} for {1}: num samples = {2}, ".format( - i, driver.args.tests, num_samples - ) - msg += ( - "running all tests" - if driver.all_tests == tests - else "re-testing {0} tests".format(len(tests)) - ) + msg = ' Iteration {0} for {1}: num samples = {2}, '.format( + i, driver.args.tests, num_samples) + msg += ('running all tests' if driver.all_tests == tests else + 're-testing {0} tests'.format(len(tests))) log(msg) driver.tests = tests return driver.run(num_samples=num_samples, sample_time=0.0025) @@ -220,9 +174,8 @@ def merge(results, other_results): return results -def test_performance( - opt_level, old_dir, new_dir, threshold, num_samples, num_reruns, output_file -): +def test_performance(opt_level, old_dir, new_dir, threshold, num_samples, + num_reruns, output_file): """Detect performance changes in benchmarks. Start fast with few samples per benchmark and gradually spend more time @@ -230,10 +183,8 @@ def test_performance( """ i, unchanged_length_count = 0, 0 - old, new = [ - BenchmarkDriver(DriverArgs(dir, optimization=opt_level)) - for dir in [old_dir, new_dir] - ] + old, new = [BenchmarkDriver(DriverArgs(dir, optimization=opt_level)) + for dir in [old_dir, new_dir]] results = [measure(driver, driver.tests, i) for driver in [old, new]] tests = TestComparator(results[0], results[1], threshold) changed = tests.decreased + tests.increased @@ -241,11 +192,10 @@ def test_performance( while len(changed) > 0 and unchanged_length_count < num_reruns: i += 1 if VERBOSE: - log(" test again: " + str([test.name for test in changed])) - results = [ - merge(the_results, measure(driver, [test.name for test in changed], i)) - for the_results, driver in zip(results, [old, new]) - ] + log(' test again: ' + str([test.name for test in changed])) + results = [merge(the_results, + measure(driver, [test.name for test in changed], i)) + for the_results, driver in zip(results, [old, new])] tests = TestComparator(results[0], results[1], threshold) changed = tests.decreased + tests.increased @@ -254,19 +204,19 @@ def test_performance( else: unchanged_length_count = 0 - log("") - return report_results( - "Performance: -" + opt_level, None, None, threshold * 1.4, output_file, *results - ) + log('') + return report_results("Performance: -" + opt_level, None, None, + threshold * 1.4, output_file, *results) def report_code_size(opt_level, old_dir, new_dir, platform, output_file): - if opt_level == "swiftlibs": - files = glob.glob(os.path.join(old_dir, "lib", "swift", platform, "*.dylib")) + if opt_level == 'swiftlibs': + files = glob.glob(os.path.join(old_dir, 'lib', 'swift', platform, + '*.dylib')) else: - files = glob.glob( - os.path.join(old_dir, opt_level + "-*" + platform + "*", "*.o") - ) + files = glob.glob(os.path.join(old_dir, + opt_level + '-*' + platform + '*', + '*.o')) idx = 1 old_lines = "" @@ -279,44 +229,37 @@ def report_code_size(opt_level, old_dir, new_dir, platform, output_file): bname = os.path.basename(oldfile) def result_line(value): - v = "," + str(value) - return str(idx) + "," + bname + ",1" + (v * 3) + ",0" + v + "\n" + v = ',' + str(value) + return (str(idx) + ',' + bname + ',1' + (v * 3) + + ',0' + v + '\n') old_lines += result_line(oldsize) new_lines += result_line(newsize) idx += 1 - return report_results( - "Code size: -" + opt_level, old_lines, new_lines, 0.01, output_file - ) + return report_results("Code size: -" + opt_level, + old_lines, new_lines, 0.01, output_file) def get_codesize(filename): - output = subprocess.check_output(["size", filename]).splitlines() + output = subprocess.check_output(['size', filename]).splitlines() header_line = output[0] data_line = output[1] - if header_line.find("__TEXT") != 0: - sys.exit("unexpected output from size command:\n" + output) - return int(data_line.split("\t")[0]) - - -def report_results( - title, - old_lines, - new_lines, - threshold, - output_file, - old_results=None, - new_results=None, -): + if header_line.find('__TEXT') != 0: + sys.exit('unexpected output from size command:\n' + output) + return int(data_line.split('\t')[0]) + + +def report_results(title, old_lines, new_lines, threshold, output_file, + old_results=None, new_results=None): old_results = old_results or LogParser.results_from_string(old_lines) new_results = new_results or LogParser.results_from_string(new_lines) print("------- " + title + " -------") - print(create_report(old_results, new_results, threshold, "git")) + print(create_report(old_results, new_results, threshold, 'git')) if output_file: - report = create_report(old_results, new_results, threshold, "markdown") + report = create_report(old_results, new_results, threshold, 'markdown') if report != "": output_file.write("### " + title + "\n") output_file.write(report) @@ -346,19 +289,17 @@ performance team (@eeckstein). Hardware Overview """ - po = subprocess.check_output(["system_profiler", "SPHardwareDataType"]) + po = subprocess.check_output(['system_profiler', 'SPHardwareDataType']) for line in po.splitlines(): - selection = [ - "Model Name", - "Model Identifier", - "Processor Name", - "Processor Speed", - "Number of Processors", - "Total Number of Cores", - "L2 Cache", - "L3 Cache", - "Memory:", - ] + selection = ['Model Name', + 'Model Identifier', + 'Processor Name', + 'Processor Speed', + 'Number of Processors', + 'Total Number of Cores', + 'L2 Cache', + 'L3 Cache', + 'Memory:'] if any(s in line for s in selection): text += line + "\n" @@ -379,5 +320,5 @@ def check_added(args, output_file=None): doctor.check() -if __name__ == "__main__": +if __name__ == '__main__': sys.exit(main()) diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py index 570fee82f2f8b..32b1a9e527635 100644 --- a/benchmark/scripts/test_Benchmark_Driver.py +++ b/benchmark/scripts/test_Benchmark_Driver.py @@ -26,9 +26,8 @@ # import Benchmark_Driver # doesn't work because it misses '.py' extension Benchmark_Driver = load_source( - "Benchmark_Driver", - os.path.join(os.path.dirname(os.path.abspath(__file__)), "Benchmark_Driver"), -) + 'Benchmark_Driver', os.path.join(os.path.dirname( + os.path.abspath(__file__)), 'Benchmark_Driver')) # from Benchmark_Driver import parse_args parse_args = Benchmark_Driver.parse_args BenchmarkDriver = Benchmark_Driver.BenchmarkDriver @@ -46,122 +45,121 @@ def assert_contains(self, texts, output): def test_requires_command_argument(self): with captured_output() as (_, err): self.assertRaises(SystemExit, parse_args, []) - self.assert_contains(["usage:", "COMMAND", "too few arguments"], err.getvalue()) + self.assert_contains(['usage:', 'COMMAND', 'too few arguments'], + err.getvalue()) def test_command_help_lists_commands(self): with captured_output() as (out, _): - self.assertRaises(SystemExit, parse_args, ["-h"]) - self.assert_contains(["COMMAND", "run", "compare", "check"], out.getvalue()) + self.assertRaises(SystemExit, parse_args, ['-h']) + self.assert_contains(['COMMAND', 'run', 'compare', 'check'], + out.getvalue()) def test_run_benchmarks_by_name_or_ordinal(self): - benchmarks = ["AngryPhonebook", "42"] - self.assertEqual(parse_args(["run"] + benchmarks).benchmarks, benchmarks) + benchmarks = ['AngryPhonebook', '42'] + self.assertEqual( + parse_args(['run'] + benchmarks).benchmarks, benchmarks) def test_run_benchmarks_matching_pattern(self): - regexes = ["Prefix", ".*Suffix.*"] - filters = ["-f", regexes[0], "-f", regexes[1]] - self.assertEqual(parse_args(["run"] + filters).filters, regexes) + regexes = ['Prefix', '.*Suffix.*'] + filters = ['-f', regexes[0], '-f', regexes[1]] + self.assertEqual(parse_args(['run'] + filters).filters, regexes) def test_run_benchmarks_and_filters_are_exclusive(self): with captured_output() as (_, err): - self.assertRaises( - SystemExit, parse_args, "run -f Filter1 Benchmark1".split() - ) + self.assertRaises(SystemExit, + parse_args, 'run -f Filter1 Benchmark1'.split()) self.assert_contains( - ["error", "argument BENCHMARK: not allowed with argument -f/--filter"], - err.getvalue(), - ) + ['error', + 'argument BENCHMARK: not allowed with argument -f/--filter'], + err.getvalue()) def test_tests_location(self): here = os.path.dirname(os.path.abspath(__file__)) - self.assertEqual(parse_args(["run"]).tests, here) - tests = "/benchmarks/are/here" - self.assertEqual(parse_args(["run", "-t", tests]).tests, tests) + self.assertEqual(parse_args(['run']).tests, here) + tests = '/benchmarks/are/here' + self.assertEqual(parse_args(['run', '-t', tests]).tests, tests) def test_optimization_argument(self): - self.assertEqual(parse_args(["run"]).optimization, "O") - self.assertEqual(parse_args(["run", "-o", "O"]).optimization, "O") - self.assertEqual(parse_args(["run", "-o", "Onone"]).optimization, "Onone") - self.assertEqual(parse_args(["run", "-o", "Osize"]).optimization, "Osize") + self.assertEqual(parse_args(['run']).optimization, 'O') + self.assertEqual( + parse_args(['run', '-o', 'O']).optimization, 'O') + self.assertEqual( + parse_args(['run', '-o', 'Onone']).optimization, 'Onone') + self.assertEqual( + parse_args(['run', '-o', 'Osize']).optimization, 'Osize') with captured_output() as (_, err): - self.assertRaises(SystemExit, parse_args, ["run", "-o", "bogus"]) + self.assertRaises(SystemExit, + parse_args, ['run', '-o', 'bogus']) self.assert_contains( - [ - "error:", - "argument -o/--optimization: invalid choice: 'bogus'", - "(choose from 'O', 'Onone', 'Osize')", - ], - err.getvalue(), - ) + ['error:', + "argument -o/--optimization: invalid choice: 'bogus'", + "(choose from 'O', 'Onone', 'Osize')"], + err.getvalue()) def test_independent_samples(self): - self.assertEqual(parse_args(["run"]).independent_samples, 1) - self.assertEqual(parse_args(["run", "-i", "3"]).independent_samples, 3) + self.assertEqual(parse_args(['run']).independent_samples, 1) + self.assertEqual(parse_args(['run', '-i', '3']).independent_samples, + 3) with captured_output() as (out, err): - self.assertRaises(SystemExit, parse_args, ["run", "-i", "-3"]) + self.assertRaises(SystemExit, + parse_args, ['run', '-i', '-3']) self.assert_contains( - [ - "error:", - "argument -i/--independent-samples: " - + "invalid positive_int value: '-3'", - ], - err.getvalue(), - ) + ['error:', "argument -i/--independent-samples: " + + "invalid positive_int value: '-3'"], + err.getvalue()) def test_output_dir(self): - self.assertIsNone(parse_args(["run"]).output_dir) - self.assertEqual(parse_args(["run", "--output-dir", "/log"]).output_dir, "/log") + self.assertIsNone(parse_args(['run']).output_dir) + self.assertEqual( + parse_args(['run', '--output-dir', '/log']).output_dir, '/log') def test_check_supports_vebose_output(self): - self.assertFalse(parse_args(["check"]).verbose) - self.assertTrue(parse_args(["check", "-v"]).verbose) - self.assertTrue(parse_args(["check", "--verbose"]).verbose) + self.assertFalse(parse_args(['check']).verbose) + self.assertTrue(parse_args(['check', '-v']).verbose) + self.assertTrue(parse_args(['check', '--verbose']).verbose) def test_check_supports_mardown_output(self): - self.assertFalse(parse_args(["check"]).markdown) - self.assertTrue(parse_args(["check", "-md"]).markdown) - self.assertTrue(parse_args(["check", "--markdown"]).markdown) + self.assertFalse(parse_args(['check']).markdown) + self.assertTrue(parse_args(['check', '-md']).markdown) + self.assertTrue(parse_args(['check', '--markdown']).markdown) def test_check_flags_are_mutually_exclusive(self): with captured_output() as (out, err): - self.assertRaises(SystemExit, parse_args, ["check", "-md", "-v"]) + self.assertRaises(SystemExit, + parse_args, ['check', '-md', '-v']) self.assert_contains( - [ - "error:", - "argument -v/--verbose: " + "not allowed with argument -md/--markdown", - ], - err.getvalue(), - ) + ['error:', 'argument -v/--verbose: ' + + 'not allowed with argument -md/--markdown'], + err.getvalue()) class ArgsStub(object): def __init__(self): self.benchmarks = None self.filters = None - self.tests = "/benchmarks/" - self.optimization = "O" + self.tests = '/benchmarks/' + self.optimization = 'O' class SubprocessMock(Mock): """Mock for subprocess module's `check_output` method.""" - STDOUT = object() def __init__(self, responses=None): super(SubprocessMock, self).__init__(responses) - def _check_output(args, stdin=None, stdout=None, stderr=None, shell=False): + def _check_output(args, stdin=None, stdout=None, stderr=None, + shell=False): return self.record_and_respond(args, stdin, stdout, stderr, shell) - self.check_output = _check_output def record_and_respond(self, args, stdin, stdout, stderr, shell): # _ = stdin, stdout, shell # ignored in mock - assert stderr == self.STDOUT, "Errors are NOT redirected to STDOUT" + assert stderr == self.STDOUT, 'Errors are NOT redirected to STDOUT' args = tuple(args) self.calls.append(args) - return self.respond.get(args, "") + return self.respond.get(args, '') class TestBenchmarkDriverInitialization(unittest.TestCase): @@ -171,95 +169,81 @@ def setUp(self): def test_test_harness(self): self.assertEqual( - BenchmarkDriver(self.args, tests=["ignored"]).test_harness, - "/benchmarks/Benchmark_O", - ) - self.args.tests = "/path" - self.args.optimization = "Suffix" + BenchmarkDriver(self.args, tests=['ignored']).test_harness, + '/benchmarks/Benchmark_O') + self.args.tests = '/path' + self.args.optimization = 'Suffix' self.assertEqual( - BenchmarkDriver(self.args, tests=["ignored"]).test_harness, - "/path/Benchmark_Suffix", - ) + BenchmarkDriver(self.args, tests=['ignored']).test_harness, + '/path/Benchmark_Suffix') def test_gets_list_of_precommit_benchmarks(self): self.subprocess_mock.expect( - "/benchmarks/Benchmark_O --list --delim=\t".split(" "), - "#\tTest\t[Tags]\n1\tBenchmark1\t[t1, t2]\n2\tBenchmark2\t[t3]\n", - ) - driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock) + '/benchmarks/Benchmark_O --list --delim=\t'.split(' '), + '#\tTest\t[Tags]\n1\tBenchmark1\t[t1, t2]\n2\tBenchmark2\t[t3]\n') + driver = BenchmarkDriver( + self.args, _subprocess=self.subprocess_mock) self.subprocess_mock.assert_called_all_expected() - self.assertEqual(driver.tests, ["Benchmark1", "Benchmark2"]) - self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2"]) - self.assertEquals(driver.test_number["Benchmark1"], "1") - self.assertEquals(driver.test_number["Benchmark2"], "2") + self.assertEqual(driver.tests, + ['Benchmark1', 'Benchmark2']) + self.assertEqual(driver.all_tests, + ['Benchmark1', 'Benchmark2']) + self.assertEquals(driver.test_number['Benchmark1'], "1") + self.assertEquals(driver.test_number['Benchmark2'], "2") list_all_tests = ( - "/benchmarks/Benchmark_O --list --delim=\t --skip-tags=".split(" "), + '/benchmarks/Benchmark_O --list --delim=\t --skip-tags='.split(' '), """# Test [Tags] 1 Benchmark1 [t1, t2] 2 Benchmark2 [t3] 3 Benchmark3 [t3, t4] -""", - ) +""") def test_gets_list_of_all_benchmarks_when_benchmarks_args_exist(self): """Filters tests by name or test number, ignoring unknown.""" - self.args.benchmarks = "1 Benchmark3 1 bogus".split() + self.args.benchmarks = '1 Benchmark3 1 bogus'.split() self.subprocess_mock.expect(*self.list_all_tests) - driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock) + driver = BenchmarkDriver( + self.args, _subprocess=self.subprocess_mock) self.subprocess_mock.assert_called_all_expected() - self.assertEqual(driver.tests, ["Benchmark1", "Benchmark3"]) - self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2", "Benchmark3"]) + self.assertEqual(driver.tests, ['Benchmark1', 'Benchmark3']) + self.assertEqual(driver.all_tests, + ['Benchmark1', 'Benchmark2', 'Benchmark3']) def test_filters_benchmarks_by_pattern(self): - self.args.filters = "-f .+3".split() + self.args.filters = '-f .+3'.split() self.subprocess_mock.expect(*self.list_all_tests) - driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock) + driver = BenchmarkDriver( + self.args, _subprocess=self.subprocess_mock) self.subprocess_mock.assert_called_all_expected() - self.assertEqual(driver.tests, ["Benchmark3"]) - self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2", "Benchmark3"]) + self.assertEqual(driver.tests, ['Benchmark3']) + self.assertEqual(driver.all_tests, + ['Benchmark1', 'Benchmark2', 'Benchmark3']) def test_log_file(self): """When swift-repo is set, log is tied to Git branch and revision.""" - self.assertIsNone( - BenchmarkDriver( - Stub(output_dir=None, tests="/bin/"), tests=["ignored"] - ).log_file - ) + self.assertIsNone(BenchmarkDriver( + Stub(output_dir=None, tests='/bin/'), tests=['ignored']).log_file) - now = time.strftime("%Y%m%d%H%M%S", time.localtime()) + now = time.strftime('%Y%m%d%H%M%S', time.localtime()) driver = BenchmarkDriver( - Stub( - output_dir="/path", - tests="/bin/", - optimization="Suffix", - swift_repo=None, - ), - tests=["ignored"], - ) - self.assertEqual(driver.log_file, "/path/Benchmark_Suffix-" + now + ".log") - - r = "/repo/" - subprocess_mock = SubprocessMock( - responses=[ - ( - "git -C {0} rev-parse --abbrev-ref HEAD".format(r).split(" "), - "branch\n", - ), - ( - "git -C {0} rev-parse --short HEAD".format(r).split(" "), - "short_hash\n", - ), - ] - ) + Stub(output_dir='/path', tests='/bin/', optimization='Suffix', + swift_repo=None,), tests=['ignored']) + self.assertEqual(driver.log_file, + '/path/Benchmark_Suffix-' + now + '.log') + + r = '/repo/' + subprocess_mock = SubprocessMock(responses=[ + ('git -C {0} rev-parse --abbrev-ref HEAD'.format(r).split(' '), + 'branch\n'), + ('git -C {0} rev-parse --short HEAD'.format(r).split(' '), + 'short_hash\n'), + ]) driver = BenchmarkDriver( - Stub(output_dir="/log/", tests="", optimization="S", swift_repo=r), - tests=["ignored"], - _subprocess=subprocess_mock, - ) - self.assertEqual( - driver.log_file, "/log/branch/Benchmark_S-" + now + "-short_hash.log" - ) + Stub(output_dir='/log/', tests='', optimization='S', swift_repo=r), + tests=['ignored'], _subprocess=subprocess_mock) + self.assertEqual(driver.log_file, + '/log/branch/Benchmark_S-' + now + '-short_hash.log') subprocess_mock.assert_called_all_expected() @@ -269,8 +253,8 @@ class LogParserStub(object): @staticmethod def results_from_string(log_contents): LogParserStub.results_from_string_called = True - r = PerformanceTestResult("3,b1,1,123,123,123,0,123".split(",")) - return {"b1": r} + r = PerformanceTestResult('3,b1,1,123,123,123,0,123'.split(',')) + return {'b1': r} class TestBenchmarkDriverRunningTests(unittest.TestCase): @@ -279,38 +263,34 @@ def setUp(self): self.parser_stub = LogParserStub() self.subprocess_mock = SubprocessMock() self.subprocess_mock.expect( - "/benchmarks/Benchmark_O --list --delim=\t".split(" "), - "#\tTest\t[Tags]\n1\tb1\t[tag]\n", - ) + '/benchmarks/Benchmark_O --list --delim=\t'.split(' '), + '#\tTest\t[Tags]\n1\tb1\t[tag]\n') self.driver = BenchmarkDriver( - self.args, _subprocess=self.subprocess_mock, parser=self.parser_stub - ) + self.args, _subprocess=self.subprocess_mock, + parser=self.parser_stub) def test_run_benchmark_with_multiple_samples(self): - self.driver.run("b1") - self.subprocess_mock.assert_called_with(("/benchmarks/Benchmark_O", "b1")) - self.driver.run("b2", num_samples=5) + self.driver.run('b1') + self.subprocess_mock.assert_called_with( + ('/benchmarks/Benchmark_O', 'b1')) + self.driver.run('b2', num_samples=5) self.subprocess_mock.assert_called_with( - ("/benchmarks/Benchmark_O", "b2", "--num-samples=5") - ) + ('/benchmarks/Benchmark_O', 'b2', '--num-samples=5')) def test_run_benchmark_with_specified_number_of_iterations(self): - self.driver.run("b", num_iters=1) + self.driver.run('b', num_iters=1) self.subprocess_mock.assert_called_with( - ("/benchmarks/Benchmark_O", "b", "--num-iters=1") - ) + ('/benchmarks/Benchmark_O', 'b', '--num-iters=1')) def test_run_benchmark_for_specified_time(self): - self.driver.run("b", sample_time=0.5) + self.driver.run('b', sample_time=0.5) self.subprocess_mock.assert_called_with( - ("/benchmarks/Benchmark_O", "b", "--sample-time=0.5") - ) + ('/benchmarks/Benchmark_O', 'b', '--sample-time=0.5')) def test_run_benchmark_in_verbose_mode(self): - self.driver.run("b", verbose=True) + self.driver.run('b', verbose=True) self.subprocess_mock.assert_called_with( - ("/benchmarks/Benchmark_O", "b", "--verbose") - ) + ('/benchmarks/Benchmark_O', 'b', '--verbose')) def test_run_batch(self): """Run all active tests in a single execution of the Benchmark_X. @@ -318,9 +298,10 @@ def test_run_batch(self): Known test names are passed to the harness in a compressed form as test numbers. """ - self.driver.tests = ["b1", "bx"] + self.driver.tests = ['b1', 'bx'] self.driver.run() - self.subprocess_mock.assert_called_with(("/benchmarks/Benchmark_O", "1", "bx")) + self.subprocess_mock.assert_called_with( + ('/benchmarks/Benchmark_O', '1', 'bx')) def test_parse_results_from_running_benchmarks(self): """Parse measurements results using LogParser. @@ -328,70 +309,55 @@ def test_parse_results_from_running_benchmarks(self): Individual test run returns the first PerformanceTestResult directly. Batch run returns the dictionary of PerformanceTestResults. """ - r = self.driver.run("b") + r = self.driver.run('b') self.assertTrue(self.parser_stub.results_from_string_called) - self.assertEquals(r.name, "b1") # non-matching name, just 1st result + self.assertEquals(r.name, 'b1') # non-matching name, just 1st result r = self.driver.run() self.assertTrue(isinstance(r, dict)) - self.assertEquals(r["b1"].name, "b1") + self.assertEquals(r['b1'].name, 'b1') def test_measure_memory(self): - self.driver.run("b", measure_memory=True) + self.driver.run('b', measure_memory=True) self.subprocess_mock.assert_called_with( - ("/benchmarks/Benchmark_O", "b", "--memory") - ) + ('/benchmarks/Benchmark_O', 'b', '--memory')) def test_report_quantiles(self): """Use delta compression for quantile reports.""" - self.driver.run("b", quantile=4) + self.driver.run('b', quantile=4) self.subprocess_mock.assert_called_with( - ("/benchmarks/Benchmark_O", "b", "--quantile=4", "--delta") - ) + ('/benchmarks/Benchmark_O', 'b', '--quantile=4', '--delta')) def test_run_benchmark_independent_samples(self): """Extract up to 20 measurements from an independent run.""" self.driver.args.independent_samples = 3 - r = self.driver.run_independent_samples("b1") - self.assertEqual( - self.subprocess_mock.calls.count( - ( - "/benchmarks/Benchmark_O", - "b1", - "--num-iters=1", - "--memory", - "--quantile=20", - "--delta", - ) - ), - 3, - ) + r = self.driver.run_independent_samples('b1') + self.assertEqual(self.subprocess_mock.calls.count( + ('/benchmarks/Benchmark_O', 'b1', '--num-iters=1', '--memory', + '--quantile=20', '--delta')), 3) self.assertEqual(r.num_samples, 3) # results are merged def test_run_and_log(self): def mock_run(test): - self.assertEqual(test, "b1") + self.assertEqual(test, 'b1') return PerformanceTestResult( - "3,b1,5,101,1,1,1,1,888".split(","), - quantiles=True, - delta=True, - memory=True, - ) - - driver = BenchmarkDriver(tests=["b1"], args=Stub(output_dir=None)) + '3,b1,5,101,1,1,1,1,888'.split(','), + quantiles=True, delta=True, memory=True) + driver = BenchmarkDriver(tests=['b1'], args=Stub(output_dir=None)) driver.run_independent_samples = mock_run # patching with captured_output() as (out, _): log = driver.run_and_log() - header = ( - "#,TEST,SAMPLES,MIN(μs),Q1(μs),MEDIAN(μs),Q3(μs),MAX(μs)," + "MAX_RSS(B)\n" - ) - csv_log = "3,b1,5,101,102,103,104,105,888\n" + header = '#,TEST,SAMPLES,MIN(μs),Q1(μs),MEDIAN(μs),Q3(μs),MAX(μs),' +\ + 'MAX_RSS(B)\n' + csv_log = '3,b1,5,101,102,103,104,105,888\n' self.assertEqual(log, None) self.assertEqual( out.getvalue(), - header + csv_log + "\n" + "Total performance tests executed: 1\n", - ) + header + + csv_log + + '\n' + + 'Total performance tests executed: 1\n') with captured_output() as (out, _): log = driver.run_and_log(csv_console=False) @@ -399,72 +365,66 @@ def mock_run(test): self.assertEqual(log, header + csv_log) self.assertEqual( out.getvalue(), - " # TEST SAMPLES MIN(μs)" - + " Q1(μs) MEDIAN(μs) Q3(μs) MAX(μs) MAX_RSS(B)\n" - + " 3 b1 5 101" - + " 102 103 104 105 888\n" - + "\n" - + "Total performance tests executed: 1\n", - ) + ' # TEST SAMPLES MIN(μs)' + + ' Q1(μs) MEDIAN(μs) Q3(μs) MAX(μs) MAX_RSS(B)\n' + + ' 3 b1 5 101' + + ' 102 103 104 105 888\n' + + '\n' + + 'Total performance tests executed: 1\n') def test_log_results(self): """Create log directory if it doesn't exist and write the log file.""" - def assert_log_written(out, log_file, content): - self.assertEqual(out.getvalue(), "Logging results to: " + log_file + "\n") - with open(log_file, "rU") as f: + self.assertEqual(out.getvalue(), + 'Logging results to: ' + log_file + '\n') + with open(log_file, 'rU') as f: text = f.read() self.assertEqual(text, "formatted output") try: import tempfile # setUp - temp_dir = tempfile.mkdtemp() - log_dir = os.path.join(temp_dir, "sub-dir/") - driver = BenchmarkDriver(Stub(), tests=[""]) + log_dir = os.path.join(temp_dir, 'sub-dir/') + driver = BenchmarkDriver(Stub(), tests=['']) self.assertFalse(os.path.exists(log_dir)) content = "formatted output" - log_file = os.path.join(log_dir, "1.log") + log_file = os.path.join(log_dir, '1.log') with captured_output() as (out, _): driver.log_results(content, log_file=log_file) assert_log_written(out, log_file, content) self.assertTrue(os.path.exists(log_dir)) - log_file = os.path.join(log_dir, "2.log") + log_file = os.path.join(log_dir, '2.log') with captured_output() as (out, _): driver.log_results(content, log_file=log_file) assert_log_written(out, log_file, content) finally: import shutil # tearDown - shutil.rmtree(temp_dir) def test_deterministing_hashing(self): - cmd = ["printenv", "SWIFT_DETERMINISTIC_HASHING"] - driver = BenchmarkDriver(["no args"], tests=["ignored"]) - self.assertEqual(driver._invoke(cmd).strip(), "1") + cmd = ['printenv', 'SWIFT_DETERMINISTIC_HASHING'] + driver = BenchmarkDriver(['no args'], tests=['ignored']) + self.assertEqual(driver._invoke(cmd).strip(), '1') class BenchmarkDriverMock(Mock): """Mock for BenchmarkDriver's `run` method""" - def __init__(self, tests, responses=None): super(BenchmarkDriverMock, self).__init__(responses) self.tests = tests self.args = ArgsStub() - def _run( - test, num_samples=None, num_iters=None, verbose=None, measure_memory=False - ): - return self.record_and_respond( - test, num_samples, num_iters, verbose, measure_memory - ) - + def _run(test, num_samples=None, num_iters=None, + verbose=None, measure_memory=False): + return self.record_and_respond(test, num_samples, num_iters, + verbose, measure_memory) self.run = _run - def record_and_respond(self, test, num_samples, num_iters, verbose, measure_memory): + def record_and_respond(self, test, num_samples, num_iters, + verbose, measure_memory): args = (test, num_samples, num_iters, verbose, measure_memory) self.calls.append(args) return self.respond.get(args, _PTR(min=700)) @@ -472,53 +432,35 @@ def record_and_respond(self, test, num_samples, num_iters, verbose, measure_memo class TestLoggingReportFormatter(unittest.TestCase): def test_plain_log_format(self): - lr = logging.makeLogRecord( - { - "name": "Base.category", - "level": logging.DEBUG, - "levelname": "DEBUG", - "msg": "Hi!", - } - ) + lr = logging.makeLogRecord({ + 'name': 'Base.category', 'level': logging.DEBUG, + 'levelname': 'DEBUG', 'msg': 'Hi!'}) f = LoggingReportFormatter() - self.assertEqual(f.format(lr), "DEBUG category: Hi!") + self.assertEqual(f.format(lr), 'DEBUG category: Hi!') def test_colored_log_format(self): def record(level, level_name): - return logging.makeLogRecord( - { - "name": "Base.category", - "levelno": level, - "levelname": level_name, - "msg": "Hi!", - } - ) - + return logging.makeLogRecord({ + 'name': 'Base.category', 'levelno': level, + 'levelname': level_name, 'msg': 'Hi!'}) f = LoggingReportFormatter(use_color=True) - self.assertEqual( - f.format(record(logging.DEBUG, "DEBUG")), "\x1b[1;39mcategory: Hi!\x1b[1;0m" - ) - self.assertEqual( - f.format(record(logging.INFO, "INFO")), "\x1b[1;32mcategory: Hi!\x1b[1;0m" - ) - self.assertEqual( - f.format(record(logging.WARNING, "WARNING")), - "\x1b[1;33mcategory: Hi!\x1b[1;0m", - ) - self.assertEqual( - f.format(record(logging.ERROR, "ERROR")), "\x1b[1;31mcategory: Hi!\x1b[1;0m" - ) - self.assertEqual( - f.format(record(logging.CRITICAL, "CRITICAL")), - "\x1b[1;35mcategory: Hi!\x1b[1;0m", - ) + self.assertEqual(f.format(record(logging.DEBUG, 'DEBUG')), + '\x1b[1;39mcategory: Hi!\x1b[1;0m') + self.assertEqual(f.format(record(logging.INFO, 'INFO')), + '\x1b[1;32mcategory: Hi!\x1b[1;0m') + self.assertEqual(f.format(record(logging.WARNING, 'WARNING')), + '\x1b[1;33mcategory: Hi!\x1b[1;0m') + self.assertEqual(f.format(record(logging.ERROR, 'ERROR')), + '\x1b[1;31mcategory: Hi!\x1b[1;0m') + self.assertEqual(f.format(record(logging.CRITICAL, 'CRITICAL')), + '\x1b[1;35mcategory: Hi!\x1b[1;0m') def test_no_prefix_for_base_logging(self): - lr = logging.makeLogRecord( - {"name": "Base", "level": logging.INFO, "levelname": "INFO", "msg": "Hi!"} - ) + lr = logging.makeLogRecord({ + 'name': 'Base', 'level': logging.INFO, + 'levelname': 'INFO', 'msg': 'Hi!'}) f = LoggingReportFormatter() - self.assertEqual(f.format(lr), "INFO Hi!") + self.assertEqual(f.format(lr), 'INFO Hi!') class TestMarkdownReportHandler(unittest.TestCase): @@ -533,41 +475,44 @@ def assert_contains(self, texts): self.assertIn(text, self.stream.getvalue()) def record(self, level, category, msg): - return logging.makeLogRecord( - {"name": "BenchmarkDoctor." + category, "levelno": level, "msg": msg} - ) + return logging.makeLogRecord({ + 'name': 'BenchmarkDoctor.' + category, + 'levelno': level, 'msg': msg}) def test_init_writes_table_header(self): self.assertEqual(self.handler.level, logging.INFO) - self.assert_contains(["Benchmark Check Report\n", "---|---"]) + self.assert_contains(['Benchmark Check Report\n', '---|---']) def test_close_writes_final_newlines(self): self.handler.close() - self.assert_contains(["---|---\n\n"]) + self.assert_contains(['---|---\n\n']) def test_errors_and_warnings_start_new_rows_with_icons(self): - self.handler.emit(self.record(logging.ERROR, "", "Blunder")) - self.handler.emit(self.record(logging.WARNING, "", "Boo-boo")) - self.assert_contains(["\n⛔️ | Blunder", "\n⚠️ | Boo-boo"]) + self.handler.emit(self.record(logging.ERROR, '', 'Blunder')) + self.handler.emit(self.record(logging.WARNING, '', 'Boo-boo')) + self.assert_contains(['\n⛔️ | Blunder', + '\n⚠️ | Boo-boo']) def test_category_icons(self): - self.handler.emit(self.record(logging.WARNING, "naming", "naming")) - self.handler.emit(self.record(logging.WARNING, "runtime", "runtime")) - self.handler.emit(self.record(logging.WARNING, "memory", "memory")) - self.assert_contains(["🔤 | naming", "⏱ | runtime", "Ⓜ️ | memory"]) + self.handler.emit(self.record(logging.WARNING, 'naming', 'naming')) + self.handler.emit(self.record(logging.WARNING, 'runtime', 'runtime')) + self.handler.emit(self.record(logging.WARNING, 'memory', 'memory')) + self.assert_contains(['🔤 | naming', + '⏱ | runtime', + 'Ⓜ️ | memory']) def test_info_stays_in_table_cell_breaking_line_row_to_subscript(self): """Assuming Infos only follow after Errors and Warnings. Infos don't emit category icons. """ - self.handler.emit(self.record(logging.ERROR, "naming", "Blunder")) - self.handler.emit(self.record(logging.INFO, "naming", "Fixit")) - self.assert_contains(["Blunder
Fixit"]) + self.handler.emit(self.record(logging.ERROR, 'naming', 'Blunder')) + self.handler.emit(self.record(logging.INFO, 'naming', 'Fixit')) + self.assert_contains(['Blunder
Fixit']) def test_names_in_code_format(self): - self.handler.emit(self.record(logging.WARNING, "", "'QuotedName'")) - self.assert_contains(["| `QuotedName`"]) + self.handler.emit(self.record(logging.WARNING, '', "'QuotedName'")) + self.assert_contains(['| `QuotedName`']) def _PTR(min=700, mem_pages=1000, setup=None): @@ -575,17 +520,19 @@ def _PTR(min=700, mem_pages=1000, setup=None): return Stub(samples=Stub(min=min), mem_pages=mem_pages, setup=setup) -def _run(test, num_samples=None, num_iters=None, verbose=None, measure_memory=False): +def _run(test, num_samples=None, num_iters=None, verbose=None, + measure_memory=False): """Helper function that constructs tuple with arguments for run method.""" - return (test, num_samples, num_iters, verbose, measure_memory) + return ( + test, num_samples, num_iters, verbose, measure_memory) class TestBenchmarkDoctor(unittest.TestCase): @classmethod def setUpClass(cls): super(TestBenchmarkDoctor, cls).setUpClass() - doctor_log = logging.getLogger("BenchmarkDoctor") - cls._doctor_log_handler = MockLoggingHandler(level="DEBUG") + doctor_log = logging.getLogger('BenchmarkDoctor') + cls._doctor_log_handler = MockLoggingHandler(level='DEBUG') doctor_log.addHandler(cls._doctor_log_handler) def setUp(self): @@ -600,33 +547,34 @@ def assert_contains(self, texts, output): self.assertIn(text, output) def test_uses_logging(self): - driver = BenchmarkDriverMock(tests=["B1", "B2"]) + driver = BenchmarkDriverMock(tests=['B1', 'B2']) with captured_output() as (out, _): BenchmarkDoctor(self.args, driver) - self.assert_contains(["Checking tests: B1, B2"], self.logs["debug"]) - self.assertEqual(out.getvalue(), "") + self.assert_contains(['Checking tests: B1, B2'], self.logs['debug']) + self.assertEqual(out.getvalue(), '') def test_supports_verbose_output(self): - driver = BenchmarkDriverMock(tests=["B1", "B2"]) + driver = BenchmarkDriverMock(tests=['B1', 'B2']) driver.verbose = True self.args.verbose = True with captured_output() as (out, _): BenchmarkDoctor(self.args, driver) - self.assert_contains(["Checking tests: B1, B2"], out.getvalue()) + self.assert_contains(['Checking tests: B1, B2'], out.getvalue()) def test_uses_report_formatter(self): - doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock(tests=["B1"])) - console_handler = logging.getLogger("BenchmarkDoctor").handlers[1] + doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock(tests=['B1'])) + console_handler = logging.getLogger('BenchmarkDoctor').handlers[1] self.assertTrue(doctor) self.assertTrue(isinstance(console_handler, logging.StreamHandler)) - self.assertTrue(isinstance(console_handler.formatter, LoggingReportFormatter)) + self.assertTrue(isinstance(console_handler.formatter, + LoggingReportFormatter)) def test_uses_optional_markdown_report_formatter(self): self.args.markdown = True with captured_output() as (_, _): - doc = BenchmarkDoctor(self.args, BenchmarkDriverMock(tests=["B1"])) + doc = BenchmarkDoctor(self.args, BenchmarkDriverMock(tests=['B1'])) self.assertTrue(doc) - console_handler = logging.getLogger("BenchmarkDoctor").handlers[1] + console_handler = logging.getLogger('BenchmarkDoctor').handlers[1] self.assertTrue(isinstance(console_handler, MarkdownReportHandler)) def test_measure_10_independent_1s_benchmark_series(self): @@ -636,155 +584,86 @@ def test_measure_10_independent_1s_benchmark_series(self): take measurements for approximately 1s based on short initial runtime sampling. Capped at 200 samples. """ - driver = BenchmarkDriverMock( - tests=["B1"], - responses=( - [ - # calibration run, returns a stand-in for PerformanceTestResult - ( - _run("B1", num_samples=3, num_iters=1, verbose=True), - _PTR(min=300), - ) - ] - + - # 5x i1 series, with 300 μs runtime its possible to take 4098 - # samples/s, but it should be capped at 2k - ( - [ - ( - _run( - "B1", - num_samples=200, - num_iters=1, - verbose=True, - measure_memory=True, - ), - _PTR(min=300), - ) - ] - * 5 - ) - + - # 5x i2 series - ( - [ - ( - _run( - "B1", - num_samples=200, - num_iters=2, - verbose=True, - measure_memory=True, - ), - _PTR(min=300), - ) - ] - * 5 - ) - ), - ) + driver = BenchmarkDriverMock(tests=['B1'], responses=([ + # calibration run, returns a stand-in for PerformanceTestResult + (_run('B1', num_samples=3, num_iters=1, + verbose=True), _PTR(min=300))] + + # 5x i1 series, with 300 μs runtime its possible to take 4098 + # samples/s, but it should be capped at 2k + ([(_run('B1', num_samples=200, num_iters=1, + verbose=True, measure_memory=True), _PTR(min=300))] * 5) + + # 5x i2 series + ([(_run('B1', num_samples=200, num_iters=2, + verbose=True, measure_memory=True), _PTR(min=300))] * 5) + )) doctor = BenchmarkDoctor(self.args, driver) with captured_output() as (out, _): - measurements = doctor.measure("B1") + measurements = doctor.measure('B1') driver.assert_called_all_expected() self.assert_contains( - [ - "name", - "B1 O i1a", - "B1 O i1b", - "B1 O i1c", - "B1 O i1d", - "B1 O i1e", - "B1 O i2a", - "B1 O i2b", - "B1 O i2c", - "B1 O i2d", - "B1 O i2e", - ], - measurements.keys(), - ) - self.assertEqual(measurements["name"], "B1") + ['name', + 'B1 O i1a', 'B1 O i1b', 'B1 O i1c', 'B1 O i1d', 'B1 O i1e', + 'B1 O i2a', 'B1 O i2b', 'B1 O i2c', 'B1 O i2d', 'B1 O i2e'], + measurements.keys()) + self.assertEqual(measurements['name'], 'B1') self.assert_contains( - [ - "Calibrating num-samples for B1:", - "Runtime 300 μs yields 4096 adjusted samples per second.", - "Measuring B1, 5 x i1 (200 samples), 5 x i2 (200 samples)", - ], - self.logs["debug"], - ) + ['Calibrating num-samples for B1:', + 'Runtime 300 μs yields 4096 adjusted samples per second.', + 'Measuring B1, 5 x i1 (200 samples), 5 x i2 (200 samples)'], + self.logs['debug']) def test_benchmark_name_matches_naming_conventions(self): - driver = BenchmarkDriverMock( - tests=[ - "BenchmarkName", - "CapitalWordsConvention", - "ABBRName", - "TooManyCamelCaseHumps", - "Existential.Array.method.1x.Val4", - "Flatten.Array.Array.Str.for-in.reserved", - "Flatten.Array.String?.as!.NSArray", - "wrongCase", - "Wrong_convention", - "Illegal._$%[]<>{}@^()", - ] - ) + driver = BenchmarkDriverMock(tests=[ + 'BenchmarkName', 'CapitalWordsConvention', 'ABBRName', + 'TooManyCamelCaseHumps', + 'Existential.Array.method.1x.Val4', + 'Flatten.Array.Array.Str.for-in.reserved', + 'Flatten.Array.String?.as!.NSArray', + 'wrongCase', 'Wrong_convention', 'Illegal._$%[]<>{}@^()']) with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, driver) doctor.check() output = out.getvalue() - self.assertIn("naming: ", output) - self.assertNotIn("BenchmarkName", output) - self.assertNotIn("CapitalWordsConvention", output) - self.assertNotIn("ABBRName", output) - self.assertNotIn("Existential.Array.method.1x.Val4", output) - self.assertNotIn("Flatten.Array.Array.Str.for-in.reserved", output) - self.assertNotIn("Flatten.Array.String?.as!.NSArray", output) + self.assertIn('naming: ', output) + self.assertNotIn('BenchmarkName', output) + self.assertNotIn('CapitalWordsConvention', output) + self.assertNotIn('ABBRName', output) + self.assertNotIn('Existential.Array.method.1x.Val4', output) + self.assertNotIn('Flatten.Array.Array.Str.for-in.reserved', output) + self.assertNotIn('Flatten.Array.String?.as!.NSArray', output) err_msg = " name doesn't conform to benchmark naming convention." self.assert_contains( - [ - "'wrongCase'" + err_msg, - "'Wrong_convention'" + err_msg, - "'Illegal._$%[]<>{}@^()'" + err_msg, - ], - self.logs["error"], - ) + ["'wrongCase'" + err_msg, "'Wrong_convention'" + err_msg, + "'Illegal._$%[]<>{}@^()'" + err_msg], self.logs['error']) self.assert_contains( ["'TooManyCamelCaseHumps' name is composed of 5 words."], - self.logs["warning"], - ) - self.assert_contains(["See http://bit.ly/BenchmarkNaming"], self.logs["info"]) + self.logs['warning']) self.assert_contains( - [ - "Split 'TooManyCamelCaseHumps' name into dot-separated groups " - "and variants. See http://bit.ly/BenchmarkNaming" - ], - self.logs["info"], - ) + ['See http://bit.ly/BenchmarkNaming'], self.logs['info']) + self.assert_contains( + ["Split 'TooManyCamelCaseHumps' name into dot-separated groups " + "and variants. See http://bit.ly/BenchmarkNaming"], + self.logs['info']) def test_benchmark_name_is_at_most_40_chars_long(self): - driver = BenchmarkDriverMock( - tests=["BenchmarkName", "ThisTestNameIsTooLongAndCausesOverflowsInReports"] - ) + driver = BenchmarkDriverMock(tests=[ + 'BenchmarkName', + 'ThisTestNameIsTooLongAndCausesOverflowsInReports']) with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, driver) doctor.check() output = out.getvalue() - self.assertIn("naming: ", output) - self.assertNotIn("BenchmarkName", output) + self.assertIn('naming: ', output) + self.assertNotIn('BenchmarkName', output) self.assert_contains( - [ - "'ThisTestNameIsTooLongAndCausesOverflowsInReports' name is " - "48 characters long." - ], - self.logs["error"], - ) + ["'ThisTestNameIsTooLongAndCausesOverflowsInReports' name is " + "48 characters long."], self.logs['error']) self.assert_contains( ["Benchmark name should not be longer than 40 characters."], - self.logs["info"], - ) + self.logs['info']) def test_benchmark_runtime_range(self): """Optimized benchmark should have runtime between 20 μs and 1000 μs. @@ -800,226 +679,159 @@ def test_benchmark_runtime_range(self): Warn about longer runtime. Runtimes over 10ms are an error. """ - def measurements(name, runtime): - return { - "name": name, - name + " O i1a": _PTR(min=runtime + 2), - name + " O i2a": _PTR(min=runtime), - } + return {'name': name, + name + ' O i1a': _PTR(min=runtime + 2), + name + ' O i2a': _PTR(min=runtime)} with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([])) - doctor.analyze(measurements("Sylph", 0)) - doctor.analyze(measurements("Unicorn", 3)) - doctor.analyze(measurements("Cheetah", 200)) - doctor.analyze(measurements("Hare", 1001)) - doctor.analyze(measurements("Tortoise", 500000)) - doctor.analyze( - { - "name": "OverheadTurtle", - "OverheadTurtle O i1a": _PTR(min=800000), - "OverheadTurtle O i2a": _PTR(min=700000), - } - ) + doctor.analyze(measurements('Sylph', 0)) + doctor.analyze(measurements('Unicorn', 3)) + doctor.analyze(measurements('Cheetah', 200)) + doctor.analyze(measurements('Hare', 1001)) + doctor.analyze(measurements('Tortoise', 500000)) + doctor.analyze({'name': 'OverheadTurtle', + 'OverheadTurtle O i1a': _PTR(min=800000), + 'OverheadTurtle O i2a': _PTR(min=700000)}) output = out.getvalue() - self.assertIn("runtime: ", output) - self.assertNotIn("Cheetah", output) - self.assert_contains(["'Sylph' execution took 0 μs."], self.logs["error"]) + self.assertIn('runtime: ', output) + self.assertNotIn('Cheetah', output) + self.assert_contains(["'Sylph' execution took 0 μs."], + self.logs['error']) self.assert_contains( - [ - "Ensure the workload of 'Sylph' has a properly measurable size" - " (runtime > 20 μs) and is not eliminated by the compiler (use " - "`blackHole` function if necessary)." - ], - self.logs["info"], - ) - self.assert_contains(["'Unicorn' execution took 3 μs."], self.logs["warning"]) + ["Ensure the workload of 'Sylph' has a properly measurable size" + " (runtime > 20 μs) and is not eliminated by the compiler (use " + "`blackHole` function if necessary)."], + self.logs['info']) + self.assert_contains(["'Unicorn' execution took 3 μs."], + self.logs['warning']) self.assert_contains( ["Increase the workload of 'Unicorn' to be more than 20 μs."], - self.logs["info"], - ) - self.assert_contains( - ["'Hare' execution took at least 1001 μs."], self.logs["warning"] - ) + self.logs['info']) + self.assert_contains(["'Hare' execution took at least 1001 μs."], + self.logs['warning']) self.assert_contains( - [ - "Decrease the workload of 'Hare' by a factor of 2 (10), " - "to be less than 1000 μs." - ], - self.logs["info"], - ) + ["Decrease the workload of 'Hare' by a factor of 2 (10), " + "to be less than 1000 μs."], self.logs['info']) self.assert_contains( - ["'Tortoise' execution took at least 500000 μs."], self.logs["error"] - ) + ["'Tortoise' execution took at least 500000 μs."], + self.logs['error']) self.assert_contains( - [ - "Decrease the workload of 'Tortoise' by a factor of 512 (1000), " - "to be less than 1000 μs." - ], - self.logs["info"], - ) + ["Decrease the workload of 'Tortoise' by a factor of 512 (1000), " + "to be less than 1000 μs."], self.logs['info']) self.assert_contains( - [ - "'OverheadTurtle' execution took at least 600000 μs" - " (excluding the setup overhead)." - ], - self.logs["error"], - ) + ["'OverheadTurtle' execution took at least 600000 μs" + " (excluding the setup overhead)."], + self.logs['error']) def test_benchmark_has_no_significant_setup_overhead(self): with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([])) - doctor.analyze( - { - "name": "NoOverhead", # not 'significant' enough - # Based on DropFirstArray a10/e10: overhead 3.7% (6 μs) - "NoOverhead O i1a": _PTR(min=162), - "NoOverhead O i2a": _PTR(min=159), - } - ) - doctor.analyze( - { - "name": "SO", # Setup Overhead - # Based on SuffixArrayLazy a10/e10: overhead 5.8% (4 μs) - "SO O i1a": _PTR(min=69), - "SO O i1b": _PTR(min=70), - "SO O i2a": _PTR(min=67), - "SO O i2b": _PTR(min=68), - } - ) - doctor.analyze( - {"name": "Zero", "Zero O i1a": _PTR(min=0), "Zero O i2a": _PTR(min=0)} - ) - doctor.analyze( - { - "name": "LOA", # Limit of Accuracy - # Impossible to detect overhead: - # Even 1μs change in 20μs runtime is 5%. - "LOA O i1a": _PTR(min=21), - "LOA O i2a": _PTR(min=20), - } - ) + doctor.analyze({ + 'name': 'NoOverhead', # not 'significant' enough + # Based on DropFirstArray a10/e10: overhead 3.7% (6 μs) + 'NoOverhead O i1a': _PTR(min=162), + 'NoOverhead O i2a': _PTR(min=159)}) + doctor.analyze({ + 'name': 'SO', # Setup Overhead + # Based on SuffixArrayLazy a10/e10: overhead 5.8% (4 μs) + 'SO O i1a': _PTR(min=69), 'SO O i1b': _PTR(min=70), + 'SO O i2a': _PTR(min=67), 'SO O i2b': _PTR(min=68)}) + doctor.analyze({'name': 'Zero', 'Zero O i1a': _PTR(min=0), + 'Zero O i2a': _PTR(min=0)}) + doctor.analyze({ + 'name': 'LOA', # Limit of Accuracy + # Impossible to detect overhead: + # Even 1μs change in 20μs runtime is 5%. + 'LOA O i1a': _PTR(min=21), + 'LOA O i2a': _PTR(min=20)}) output = out.getvalue() - self.assertIn("runtime: ", output) - self.assertNotIn("NoOverhead", output) - self.assertNotIn("ZeroRuntime", output) - self.assertNotIn("LOA", output) + self.assertIn('runtime: ', output) + self.assertNotIn('NoOverhead', output) + self.assertNotIn('ZeroRuntime', output) + self.assertNotIn('LOA', output) self.assert_contains( - ["'SO' has setup overhead of 4 μs (5.8%)."], self.logs["error"] - ) + ["'SO' has setup overhead of 4 μs (5.8%)."], + self.logs['error']) self.assert_contains( - [ - "Move initialization of benchmark data to the `setUpFunction` " - "registered in `BenchmarkInfo`." - ], - self.logs["info"], - ) + ["Move initialization of benchmark data to the `setUpFunction` " + "registered in `BenchmarkInfo`."], self.logs['info']) def test_benchmark_setup_takes_reasonable_time(self): """Setup < 200 ms (20% extra on top of the typical 1 s measurement)""" with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([])) - doctor.analyze( - { - "name": "NormalSetup", - "NormalSetup O i1a": _PTR(setup=199999), - "NormalSetup O i2a": _PTR(setup=200001), - } - ) - doctor.analyze( - { - "name": "LongSetup", - "LongSetup O i1a": _PTR(setup=200001), - "LongSetup O i2a": _PTR(setup=200002), - } - ) + doctor.analyze({ + 'name': 'NormalSetup', + 'NormalSetup O i1a': _PTR(setup=199999), + 'NormalSetup O i2a': _PTR(setup=200001)}) + doctor.analyze({ + 'name': 'LongSetup', + 'LongSetup O i1a': _PTR(setup=200001), + 'LongSetup O i2a': _PTR(setup=200002)}) output = out.getvalue() - self.assertIn("runtime: ", output) - self.assertNotIn("NormalSetup", output) + self.assertIn('runtime: ', output) + self.assertNotIn('NormalSetup', output) self.assert_contains( - ["'LongSetup' setup took at least 200001 μs."], self.logs["error"] - ) + ["'LongSetup' setup took at least 200001 μs."], + self.logs['error']) self.assert_contains( - ["The `setUpFunction` should take no more than 200 ms."], self.logs["info"] - ) + ["The `setUpFunction` should take no more than 200 ms."], + self.logs['info']) def test_benchmark_has_constant_memory_use(self): """Benchmark's memory footprint must not vary with num-iters.""" with captured_output() as (out, _): doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([])) - doctor.analyze( - { - # The threshold of 15 pages was estimated from previous - # measurements. The normal range should be probably aproximated - # by a function instead of a simple constant. - # TODO: re-evaluate normal range from whole SBS - "name": "ConstantMemory", - "ConstantMemory O i1a": _PTR(mem_pages=1460), - "ConstantMemory O i2a": _PTR(mem_pages=(1460 + 15)), - } - ) - doctor.analyze( - { - "name": "VariableMemory", # ObserverForwardStruct - "VariableMemory O i1a": _PTR(mem_pages=1460), - "VariableMemory O i1b": _PTR(mem_pages=1472), - # i2 series start at 290 pages higher - "VariableMemory O i2a": _PTR(mem_pages=1750), - "VariableMemory O i2b": _PTR(mem_pages=1752), - } - ) - measurements = dict( - [ - ( - "HighVariance O i{0}{1}".format(num_iters, suffix), - _PTR(mem_pages=num_pages), - ) - for num_iters, pages in [ - (1, [6200, 5943, 4818, 5612, 5469]), - (2, [6244, 5832, 4674, 5176, 5490]), - ] - for num_pages, suffix in zip(pages, list("abcde")) - ] - ) - measurements["name"] = "HighVariance" # Array2D + doctor.analyze({ + # The threshold of 15 pages was estimated from previous + # measurements. The normal range should be probably aproximated + # by a function instead of a simple constant. + # TODO: re-evaluate normal range from whole SBS + 'name': 'ConstantMemory', + 'ConstantMemory O i1a': _PTR(mem_pages=1460), + 'ConstantMemory O i2a': _PTR(mem_pages=(1460 + 15))}) + doctor.analyze({ + 'name': 'VariableMemory', # ObserverForwardStruct + 'VariableMemory O i1a': _PTR(mem_pages=1460), + 'VariableMemory O i1b': _PTR(mem_pages=1472), + # i2 series start at 290 pages higher + 'VariableMemory O i2a': _PTR(mem_pages=1750), + 'VariableMemory O i2b': _PTR(mem_pages=1752)}) + measurements = dict([ + ('HighVariance O i{0}{1}'.format(num_iters, suffix), + _PTR(mem_pages=num_pages)) + for num_iters, pages in [ + (1, [6200, 5943, 4818, 5612, 5469]), + (2, [6244, 5832, 4674, 5176, 5490])] + for num_pages, suffix in zip(pages, list('abcde'))]) + measurements['name'] = 'HighVariance' # Array2D doctor.analyze(measurements) output = out.getvalue() - self.assertIn("memory: ", output) - self.assertNotIn("ConstantMemory", output) + self.assertIn('memory: ', output) + self.assertNotIn('ConstantMemory', output) self.assert_contains( - [ - "'VariableMemory' varies the memory footprint of the base " - "workload depending on the `num-iters`." - ], - self.logs["error"], - ) + ["'VariableMemory' varies the memory footprint of the base " + "workload depending on the `num-iters`."], + self.logs['error']) self.assert_contains( - [ - "'VariableMemory' " - "mem_pages [i1, i2]: min=[1460, 1750] 𝚫=290 R=[12, 2]" - ], - self.logs["info"], - ) + ["'VariableMemory' " + "mem_pages [i1, i2]: min=[1460, 1750] 𝚫=290 R=[12, 2]"], + self.logs['info']) self.assert_contains( - [ - "'HighVariance' has very wide range of memory used between " - "independent, repeated measurements." - ], - self.logs["warning"], - ) + ["'HighVariance' has very wide range of memory used between " + "independent, repeated measurements."], + self.logs['warning']) self.assert_contains( - [ - "'HighVariance' " - "mem_pages [i1, i2]: min=[4818, 4674] 𝚫=144 R=[1382, 1570]" - ], - self.logs["info"], - ) + ["'HighVariance' " + "mem_pages [i1, i2]: min=[4818, 4674] 𝚫=144 R=[1382, 1570]"], + self.logs['info']) -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 2053e93c0b42b..4c1c6effffcd5 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -48,11 +48,11 @@ def test_is_iterable(self): class TestPerformanceTestSamples(unittest.TestCase): def setUp(self): - self.samples = PerformanceTestSamples("B1") + self.samples = PerformanceTestSamples('B1') self.samples.add(Sample(7, 42, 1000)) def test_has_name(self): - self.assertEqual(self.samples.name, "B1") + self.assertEqual(self.samples.name, 'B1') def test_stores_samples(self): self.assertEqual(self.samples.count, 1) @@ -70,7 +70,7 @@ def test_quantile(self): self.assertEqual(self.samples.quantile(1), 1100) self.samples.add(Sample(3, 1, 1050)) self.assertEqual(self.samples.quantile(0), 1000) - self.assertEqual(self.samples.quantile(0.5), 1050) + self.assertEqual(self.samples.quantile(.5), 1050) self.assertEqual(self.samples.quantile(1), 1100) def assertEqualFiveNumberSummary(self, ss, expected_fns): @@ -82,15 +82,20 @@ def assertEqualFiveNumberSummary(self, ss, expected_fns): self.assertEqual(ss.max, e_max) def test_computes_five_number_summary(self): - self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1000, 1000, 1000)) + self.assertEqualFiveNumberSummary( + self.samples, (1000, 1000, 1000, 1000, 1000)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1000, 1100, 1100)) + self.assertEqualFiveNumberSummary( + self.samples, (1000, 1000, 1000, 1100, 1100)) self.samples.add(Sample(3, 1, 1050)) - self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1050, 1100, 1100)) + self.assertEqualFiveNumberSummary( + self.samples, (1000, 1000, 1050, 1100, 1100)) self.samples.add(Sample(4, 1, 1025)) - self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1025, 1050, 1100)) + self.assertEqualFiveNumberSummary( + self.samples, (1000, 1000, 1025, 1050, 1100)) self.samples.add(Sample(5, 1, 1075)) - self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100)) + self.assertEqualFiveNumberSummary( + self.samples, (1000, 1025, 1050, 1075, 1100)) def test_computes_inter_quartile_range(self): self.assertEqual(self.samples.iqr, 0) @@ -106,66 +111,59 @@ def assertEqualtats(self, stats, expected_stats): def test_computes_mean_sd_cv(self): ss = self.samples - self.assertEqualtats((ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0)) + self.assertEqualtats( + (ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualtats((ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100)) + self.assertEqualtats( + (ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100)) def test_computes_range_spread(self): ss = self.samples - self.assertEqualtats((ss.range, ss.spread), (0, 0)) + self.assertEqualtats( + (ss.range, ss.spread), (0, 0)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualtats((ss.range, ss.spread), (100, 10.0 / 100)) + self.assertEqualtats( + (ss.range, ss.spread), (100, 10.0 / 100)) def test_init_with_samples(self): self.samples = PerformanceTestSamples( - "B2", [Sample(0, 1, 1000), Sample(1, 1, 1100)] - ) + 'B2', [Sample(0, 1, 1000), Sample(1, 1, 1100)]) self.assertEqual(self.samples.count, 2) self.assertEqualtats( - ( - self.samples.mean, - self.samples.sd, - self.samples.range, - self.samples.spread, - ), - (1050.0, 70.71, 100, 9.52 / 100), - ) + (self.samples.mean, self.samples.sd, + self.samples.range, self.samples.spread), + (1050.0, 70.71, 100, 9.52 / 100)) def test_can_handle_zero_runtime(self): # guard against dividing by 0 - self.samples = PerformanceTestSamples("Zero") + self.samples = PerformanceTestSamples('Zero') self.samples.add(Sample(0, 1, 0)) self.assertEqualtats( - ( - self.samples.mean, - self.samples.sd, - self.samples.cv, - self.samples.range, - self.samples.spread, - ), - (0, 0, 0.0, 0, 0.0), - ) + (self.samples.mean, self.samples.sd, self.samples.cv, + self.samples.range, self.samples.spread), + (0, 0, 0.0, 0, 0.0)) def test_excludes_outliers(self): - ss = [ - Sample(*map(int, s.split())) - for s in "0 1 1000, 1 1 1025, 2 1 1050, 3 1 1075, 4 1 1100, " - "5 1 1000, 6 1 1025, 7 1 1050, 8 1 1075, 9 1 1100, " - "10 1 1050, 11 1 949, 12 1 1151".split(",") - ] - self.samples = PerformanceTestSamples("Outliers", ss) + ss = [Sample(*map(int, s.split())) for s in + '0 1 1000, 1 1 1025, 2 1 1050, 3 1 1075, 4 1 1100, ' + '5 1 1000, 6 1 1025, 7 1 1050, 8 1 1075, 9 1 1100, ' + '10 1 1050, 11 1 949, 12 1 1151'.split(',')] + self.samples = PerformanceTestSamples('Outliers', ss) self.assertEqual(self.samples.count, 13) - self.assertEqualtats((self.samples.mean, self.samples.sd), (1050, 52.36)) + self.assertEqualtats( + (self.samples.mean, self.samples.sd), (1050, 52.36)) self.samples.exclude_outliers() self.assertEqual(self.samples.count, 11) self.assertEqual(self.samples.outliers, ss[11:]) - self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100)) - self.assertEqualtats((self.samples.mean, self.samples.sd), (1050, 35.36)) + self.assertEqualFiveNumberSummary( + self.samples, (1000, 1025, 1050, 1075, 1100)) + self.assertEqualtats( + (self.samples.mean, self.samples.sd), (1050, 35.36)) def test_excludes_outliers_zero_IQR(self): - self.samples = PerformanceTestSamples("Tight") + self.samples = PerformanceTestSamples('Tight') self.samples.add(Sample(0, 2, 23)) self.samples.add(Sample(1, 2, 18)) self.samples.add(Sample(2, 2, 18)) @@ -175,14 +173,13 @@ def test_excludes_outliers_zero_IQR(self): self.samples.exclude_outliers() self.assertEqual(self.samples.count, 3) - self.assertEqualtats((self.samples.min, self.samples.max), (18, 18)) + self.assertEqualtats( + (self.samples.min, self.samples.max), (18, 18)) def test_excludes_outliers_top_only(self): - ss = [ - Sample(*map(int, s.split())) - for s in "0 1 1, 1 1 2, 2 1 2, 3 1 2, 4 1 3".split(",") - ] - self.samples = PerformanceTestSamples("Top", ss) + ss = [Sample(*map(int, s.split())) for s in + '0 1 1, 1 1 2, 2 1 2, 3 1 2, 4 1 3'.split(',')] + self.samples = PerformanceTestSamples('Top', ss) self.assertEqualFiveNumberSummary(self.samples, (1, 2, 2, 2, 3)) self.assertEqual(self.samples.iqr, 0) @@ -194,52 +191,48 @@ def test_excludes_outliers_top_only(self): class TestPerformanceTestResult(unittest.TestCase): def test_init(self): - log_line = "1,AngryPhonebook,20,10664,12933,11035,576,10884" - r = PerformanceTestResult(log_line.split(",")) - self.assertEqual(r.test_num, "1") - self.assertEqual(r.name, "AngryPhonebook") + log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' + r = PerformanceTestResult(log_line.split(',')) + self.assertEqual(r.test_num, '1') + self.assertEqual(r.name, 'AngryPhonebook') self.assertEqual( (r.num_samples, r.min, r.max, r.mean, r.sd, r.median), - (20, 10664, 12933, 11035, 576, 10884), - ) + (20, 10664, 12933, 11035, 576, 10884)) self.assertEqual(r.samples, None) - log_line = "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336" - r = PerformanceTestResult(log_line.split(",")) + log_line = '1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336' + r = PerformanceTestResult(log_line.split(',')) self.assertEqual(r.max_rss, 10510336) def test_init_quantiles(self): # #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs) - log = "1,Ackermann,3,54383,54512,54601" - r = PerformanceTestResult(log.split(","), quantiles=True) - self.assertEqual(r.test_num, "1") - self.assertEqual(r.name, "Ackermann") - self.assertEqual( - (r.num_samples, r.min, r.median, r.max), (3, 54383, 54512, 54601) - ) + log = '1,Ackermann,3,54383,54512,54601' + r = PerformanceTestResult(log.split(','), quantiles=True) + self.assertEqual(r.test_num, '1') + self.assertEqual(r.name, 'Ackermann') + self.assertEqual((r.num_samples, r.min, r.median, r.max), + (3, 54383, 54512, 54601)) self.assertAlmostEquals(r.mean, 54498.67, places=2) self.assertAlmostEquals(r.sd, 109.61, places=2) self.assertEqual(r.samples.count, 3) self.assertEqual(r.samples.num_samples, 3) - self.assertEqual( - [s.runtime for s in r.samples.all_samples], [54383, 54512, 54601] - ) + self.assertEqual([s.runtime for s in r.samples.all_samples], + [54383, 54512, 54601]) # #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B) - log = "1,Ackermann,3,54529,54760,55807,266240" - r = PerformanceTestResult(log.split(","), quantiles=True, memory=True) + log = '1,Ackermann,3,54529,54760,55807,266240' + r = PerformanceTestResult(log.split(','), quantiles=True, memory=True) self.assertEqual((r.samples.count, r.max_rss), (3, 266240)) # #,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs) - log = "1,Ackermann,5,54570,54593,54644,57212,58304" - r = PerformanceTestResult(log.split(","), quantiles=True, memory=False) - self.assertEqual( - (r.num_samples, r.min, r.median, r.max), (5, 54570, 54644, 58304) - ) + log = '1,Ackermann,5,54570,54593,54644,57212,58304' + r = PerformanceTestResult(log.split(','), quantiles=True, memory=False) + self.assertEqual((r.num_samples, r.min, r.median, r.max), + (5, 54570, 54644, 58304)) self.assertEqual((r.samples.q1, r.samples.q3), (54593, 57212)) self.assertEqual(r.samples.count, 5) # #,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B) - log = "1,Ackermann,5,54686,54731,54774,55030,63466,270336" - r = PerformanceTestResult(log.split(","), quantiles=True, memory=True) + log = '1,Ackermann,5,54686,54731,54774,55030,63466,270336' + r = PerformanceTestResult(log.split(','), quantiles=True, memory=True) self.assertEqual(r.samples.num_samples, 5) self.assertEqual(r.samples.count, 4) # outlier was excluded self.assertEqual(r.max_rss, 270336) @@ -248,9 +241,10 @@ def test_init_delta_quantiles(self): # #,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX # 2-quantile from 2 samples in repeated min, when delta encoded, # the difference is 0, which is ommited -- only separator remains - log = "202,DropWhileArray,2,265,,22" - r = PerformanceTestResult(log.split(","), quantiles=True, delta=True) - self.assertEqual((r.num_samples, r.min, r.median, r.max), (2, 265, 265, 287)) + log = '202,DropWhileArray,2,265,,22' + r = PerformanceTestResult(log.split(','), quantiles=True, delta=True) + self.assertEqual((r.num_samples, r.min, r.median, r.max), + (2, 265, 265, 287)) self.assertEqual(r.samples.count, 2) self.assertEqual(r.samples.num_samples, 2) @@ -266,17 +260,14 @@ def test_init_oversampled_quantiles(self): qs <- subsample(x, s); c(qs[1], diff(qs)) })) sapply(c(3, 5, 11, 21), tbl) """ - def validatePTR(deq): # construct from delta encoded quantiles string - deq = deq.split(",") - num_samples = deq.count("1") - r = PerformanceTestResult( - ["0", "B", str(num_samples)] + deq, quantiles=True, delta=True - ) + deq = deq.split(',') + num_samples = deq.count('1') + r = PerformanceTestResult(['0', 'B', str(num_samples)] + deq, + quantiles=True, delta=True) self.assertEqual(r.samples.num_samples, num_samples) - self.assertEqual( - [s.runtime for s in r.samples.all_samples], range(1, num_samples + 1) - ) + self.assertEqual([s.runtime for s in r.samples.all_samples], + range(1, num_samples + 1)) delta_encoded_quantiles = """ 1,, @@ -315,58 +306,55 @@ def validatePTR(deq): # construct from delta encoded quantiles string 1,,1,1,1,1,1,1,1,1,,1,1,1,1,1,1,1,1,1, 1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1""" - map(validatePTR, delta_encoded_quantiles.split("\n")[1:]) + map(validatePTR, delta_encoded_quantiles.split('\n')[1:]) def test_init_meta(self): # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),… # …PAGES,ICS,YIELD - log = "1,Ackermann,200,715,1281,726,47,715,7,29,15" - r = PerformanceTestResult(log.split(","), meta=True) - self.assertEqual((r.test_num, r.name), ("1", "Ackermann")) + log = '1,Ackermann,200,715,1281,726,47,715,7,29,15' + r = PerformanceTestResult(log.split(','), meta=True) + self.assertEqual((r.test_num, r.name), ('1', 'Ackermann')) self.assertEqual( (r.num_samples, r.min, r.max, r.mean, r.sd, r.median), - (200, 715, 1281, 726, 47, 715), - ) - self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (7, 29, 15)) + (200, 715, 1281, 726, 47, 715)) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), + (7, 29, 15)) # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B),… # …PAGES,ICS,YIELD - log = "1,Ackermann,200,715,1951,734,97,715,36864,9,50,15" - r = PerformanceTestResult(log.split(","), memory=True, meta=True) + log = '1,Ackermann,200,715,1951,734,97,715,36864,9,50,15' + r = PerformanceTestResult(log.split(','), memory=True, meta=True) self.assertEqual( (r.num_samples, r.min, r.max, r.mean, r.sd, r.median), - (200, 715, 1951, 734, 97, 715), - ) + (200, 715, 1951, 734, 97, 715)) self.assertEqual( (r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss), - (9, 50, 15, 36864), - ) + (9, 50, 15, 36864)) # #,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD - log = "1,Ackermann,200,715,3548,8,31,15" - r = PerformanceTestResult(log.split(","), quantiles=True, meta=True) + log = '1,Ackermann,200,715,3548,8,31,15' + r = PerformanceTestResult(log.split(','), quantiles=True, meta=True) self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 3548)) - self.assertEqual( - (r.samples.count, r.samples.min, r.samples.max), (2, 715, 3548) - ) - self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 31, 15)) + self.assertEqual((r.samples.count, r.samples.min, r.samples.max), + (2, 715, 3548)) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), + (8, 31, 15)) # #,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD - log = "1,Ackermann,200,715,1259,32768,8,28,15" + log = '1,Ackermann,200,715,1259,32768,8,28,15' r = PerformanceTestResult( - log.split(","), quantiles=True, memory=True, meta=True - ) + log.split(','), quantiles=True, memory=True, meta=True) self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 1259)) - self.assertEqual( - (r.samples.count, r.samples.min, r.samples.max), (2, 715, 1259) - ) + self.assertEqual((r.samples.count, r.samples.min, r.samples.max), + (2, 715, 1259)) self.assertEquals(r.max_rss, 32768) - self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 28, 15)) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), + (8, 28, 15)) def test_repr(self): - log_line = "1,AngryPhonebook,20,10664,12933,11035,576,10884" - r = PerformanceTestResult(log_line.split(",")) + log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' + r = PerformanceTestResult(log_line.split(',')) self.assertEqual( str(r), - "", + '' ) def test_merge(self): @@ -374,70 +362,51 @@ def test_merge(self): 1,AngryPhonebook,1,12045,12045,12045,0,12045 1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336 1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144 -1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split( - "\n" - )[ - 1: - ] - results = map(PerformanceTestResult, [line.split(",") for line in tests]) +1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split('\n')[1:] + results = map(PerformanceTestResult, + [line.split(',') for line in tests]) results[2].setup = 9 results[3].setup = 7 def as_tuple(r): - return ( - r.num_samples, - r.min, - r.max, - round(r.mean, 2), - r.sd, - r.median, - r.max_rss, - r.setup, - ) + return (r.num_samples, r.min, r.max, round(r.mean, 2), + r.sd, r.median, r.max_rss, r.setup) r = results[0] - self.assertEqual(as_tuple(r), (1, 12045, 12045, 12045, 0, 12045, None, None)) + self.assertEqual(as_tuple(r), + (1, 12045, 12045, 12045, 0, 12045, None, None)) r.merge(results[1]) - self.assertEqual( - as_tuple(r), # drops SD and median, +max_rss - (2, 12045, 12325, 12185, None, None, 10510336, None), - ) + self.assertEqual(as_tuple(r), # drops SD and median, +max_rss + (2, 12045, 12325, 12185, None, None, 10510336, None)) r.merge(results[2]) - self.assertEqual( - as_tuple(r), # picks smaller of the MAX_RSS, +setup - (3, 11616, 12325, 11995.33, None, None, 10502144, 9), - ) + self.assertEqual(as_tuple(r), # picks smaller of the MAX_RSS, +setup + (3, 11616, 12325, 11995.33, None, None, 10502144, 9)) r.merge(results[3]) - self.assertEqual( - as_tuple(r), # picks smaller of the setup values - (4, 11616, 12325, 12064, None, None, 10498048, 7), - ) + self.assertEqual(as_tuple(r), # picks smaller of the setup values + (4, 11616, 12325, 12064, None, None, 10498048, 7)) class TestResultComparison(unittest.TestCase): def setUp(self): self.r0 = PerformanceTestResult( - "101,GlobalClass,20,0,0,0,0,0,10185728".split(",") - ) + '101,GlobalClass,20,0,0,0,0,0,10185728'.split(',')) self.r01 = PerformanceTestResult( - "101,GlobalClass,20,20,20,20,0,0,10185728".split(",") - ) + '101,GlobalClass,20,20,20,20,0,0,10185728'.split(',')) self.r1 = PerformanceTestResult( - "1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336".split(",") - ) + '1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336'.split(',')) self.r2 = PerformanceTestResult( - "1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144".split(",") - ) + '1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144'.split(',')) def test_init(self): rc = ResultComparison(self.r1, self.r2) - self.assertEqual(rc.name, "AngryPhonebook") + self.assertEqual(rc.name, 'AngryPhonebook') self.assertAlmostEquals(rc.ratio, 12325.0 / 11616.0) - self.assertAlmostEquals(rc.delta, (((11616.0 / 12325.0) - 1) * 100), places=3) + self.assertAlmostEquals(rc.delta, (((11616.0 / 12325.0) - 1) * 100), + places=3) # handle test results that sometimes change to zero, when compiler # optimizes out the body of the incorrectly written test rc = ResultComparison(self.r0, self.r0) - self.assertEqual(rc.name, "GlobalClass") + self.assertEqual(rc.name, 'GlobalClass') self.assertAlmostEquals(rc.ratio, 1) self.assertAlmostEquals(rc.delta, 0, places=3) rc = ResultComparison(self.r0, self.r01) @@ -447,7 +416,10 @@ def test_init(self): self.assertAlmostEquals(rc.ratio, 20001) self.assertAlmostEquals(rc.delta, -99.995, places=3) # disallow comparison of different test results - self.assertRaises(AssertionError, ResultComparison, self.r0, self.r1) + self.assertRaises( + AssertionError, + ResultComparison, self.r0, self.r1 + ) def test_values_is_dubious(self): self.assertFalse(ResultComparison(self.r1, self.r2).is_dubious) @@ -469,7 +441,7 @@ def tearDown(self): def write_temp_file(self, file_name, data): temp_file_name = os.path.join(self.test_dir, file_name) - with open(temp_file_name, "w") as f: + with open(temp_file_name, 'w') as f: f.write(data) return temp_file_name @@ -489,25 +461,19 @@ class OldAndNewLog(unittest.TestCase): 3,Array2D,20,335831,400221,346622,0,346622 1,AngryPhonebook,20,10458,12714,11000,0,11000""" - old_results = dict( - [ - (r.name, r) - for r in map( - PerformanceTestResult, - [line.split(",") for line in old_log_content.splitlines()], - ) - ] - ) - - new_results = dict( - [ - (r.name, r) - for r in map( - PerformanceTestResult, - [line.split(",") for line in new_log_content.splitlines()], - ) - ] - ) + old_results = dict([(r.name, r) + for r in + map(PerformanceTestResult, + [line.split(',') + for line in + old_log_content.splitlines()])]) + + new_results = dict([(r.name, r) + for r in + map(PerformanceTestResult, + [line.split(',') + for line in + new_log_content.splitlines()])]) def assert_report_contains(self, texts, report): assert not isinstance(texts, str) @@ -528,108 +494,95 @@ def test_parse_results_csv(self): parser = LogParser() results = parser.parse_results(log.splitlines()) self.assertTrue(isinstance(results[0], PerformanceTestResult)) - self.assertEquals(results[0].name, "Array.append.Array.Int?") - self.assertEquals(results[1].name, "Bridging.NSArray.as!.Array.NSString") - self.assertEquals(results[2].name, "Flatten.Array.Tuple4.lazy.for-in.Reserve") + self.assertEquals(results[0].name, 'Array.append.Array.Int?') + self.assertEquals(results[1].name, + 'Bridging.NSArray.as!.Array.NSString') + self.assertEquals(results[2].name, + 'Flatten.Array.Tuple4.lazy.for-in.Reserve') def test_parse_results_tab_delimited(self): - log = "34\tBitCount\t20\t3\t4\t4\t0\t4" + log = '34\tBitCount\t20\t3\t4\t4\t0\t4' parser = LogParser() results = parser.parse_results(log.splitlines()) self.assertTrue(isinstance(results[0], PerformanceTestResult)) - self.assertEqual(results[0].name, "BitCount") + self.assertEqual(results[0].name, 'BitCount') def test_parse_results_formatted_text(self): """Parse format that Benchmark_Driver prints to console""" - log = """ + log = (""" # TEST SAMPLES MIN(μs) MAX(μs) MEAN(μs) SD(μs) MEDIAN(μs) MAX_RSS(B) 3 Array2D 20 2060 2188 2099 0 2099 20915200 Total performance tests executed: 1 -""" +""") parser = LogParser() results = parser.parse_results(log.splitlines()[1:]) # without 1st \n self.assertTrue(isinstance(results[0], PerformanceTestResult)) r = results[0] - self.assertEqual(r.name, "Array2D") + self.assertEqual(r.name, 'Array2D') self.assertEqual(r.max_rss, 20915200) def test_parse_quantiles(self): """Gathers samples from reported quantiles. Handles optional memory.""" r = LogParser.results_from_string( """#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs) -1,Ackermann,3,54383,54512,54601""" - )["Ackermann"] - self.assertEqual( - [s.runtime for s in r.samples.all_samples], [54383, 54512, 54601] - ) +1,Ackermann,3,54383,54512,54601""")['Ackermann'] + self.assertEqual([s.runtime for s in r.samples.all_samples], + [54383, 54512, 54601]) r = LogParser.results_from_string( """#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B) -1,Ackermann,3,54529,54760,55807,266240""" - )["Ackermann"] - self.assertEqual( - [s.runtime for s in r.samples.all_samples], [54529, 54760, 55807] - ) +1,Ackermann,3,54529,54760,55807,266240""")['Ackermann'] + self.assertEqual([s.runtime for s in r.samples.all_samples], + [54529, 54760, 55807]) self.assertEqual(r.max_rss, 266240) def test_parse_delta_quantiles(self): r = LogParser.results_from_string( # 2-quantile aka. median - "#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,," - )["B"] + '#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,')['B'] self.assertEqual( (r.num_samples, r.min, r.median, r.max, r.samples.count), - (1, 101, 101, 101, 1), - ) + (1, 101, 101, 101, 1)) r = LogParser.results_from_string( - "#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1" - )["B"] + '#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1')['B'] self.assertEqual( (r.num_samples, r.min, r.median, r.max, r.samples.count), - (2, 101, 101, 102, 2), - ) + (2, 101, 101, 102, 2)) r = LogParser.results_from_string( # 20-quantiles aka. ventiles - "#,TEST,SAMPLES,MIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8," - + "𝚫V9,𝚫VA,𝚫VB,𝚫VC,𝚫VD,𝚫VE,𝚫VF,𝚫VG,𝚫VH,𝚫VI,𝚫VJ,𝚫MAX\n" - + "202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464" - )["DropWhileArray"] + '#,TEST,SAMPLES,MIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,' + + '𝚫V9,𝚫VA,𝚫VB,𝚫VC,𝚫VD,𝚫VE,𝚫VF,𝚫VG,𝚫VH,𝚫VI,𝚫VJ,𝚫MAX\n' + + '202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464' + )['DropWhileArray'] self.assertEqual( (r.num_samples, r.min, r.max, r.samples.count), # last 3 ventiles were outliers and were excluded from the sample - (200, 214, 215, 18), - ) + (200, 214, 215, 18)) def test_parse_meta(self): r = LogParser.results_from_string( - "#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs)," - + "PAGES,ICS,YIELD\n" - + "0,B,1,2,2,2,0,2,7,29,15" - )["B"] + '#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),' + + 'PAGES,ICS,YIELD\n' + + '0,B,1,2,2,2,0,2,7,29,15')['B'] self.assertEqual( - (r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (2, 7, 29, 15) - ) + (r.min, r.mem_pages, r.involuntary_cs, r.yield_count), + (2, 7, 29, 15)) r = LogParser.results_from_string( - "#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs)," - + "MAX_RSS(B),PAGES,ICS,YIELD\n" - + "0,B,1,3,3,3,0,3,36864,9,50,15" - )["B"] + '#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),' + + 'MAX_RSS(B),PAGES,ICS,YIELD\n' + + '0,B,1,3,3,3,0,3,36864,9,50,15')['B'] self.assertEqual( (r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss), - (3, 9, 50, 15, 36864), - ) + (3, 9, 50, 15, 36864)) r = LogParser.results_from_string( - "#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15" - )["B"] - self.assertEqual( - (r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15) - ) + '#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD\n' + + '0,B,1,4,4,8,31,15')['B'] + self.assertEqual((r.min, r.mem_pages, r.involuntary_cs, r.yield_count), + (4, 8, 31, 15)) r = LogParser.results_from_string( - "#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n" - + "0,B,1,5,5,32768,8,28,15" - )["B"] + '#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n' + + '0,B,1,5,5,32768,8,28,15')['B'] self.assertEqual( (r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss), - (5, 8, 28, 15, 32768), - ) + (5, 8, 28, 15, 32768)) def test_parse_results_verbose(self): """Parse multiple performance test results with 2 sample formats: @@ -655,31 +608,27 @@ def test_parse_results_verbose(self): Totals,2""" parser = LogParser() - results = parser.parse_results(verbose_log.split("\n")) + results = parser.parse_results(verbose_log.split('\n')) r = results[0] self.assertEqual( (r.name, r.min, r.max, int(r.mean), int(r.sd), r.median), - ("AngryPhonebook", 11467, 13898, 12392, 1315, 11812), + ('AngryPhonebook', 11467, 13898, 12392, 1315, 11812) ) self.assertEqual(r.num_samples, r.samples.num_samples) - self.assertEqual( - results[0].samples.all_samples, - [(0, 78, 11812), (1, 90, 13898), (2, 90, 11467)], - ) + self.assertEqual(results[0].samples.all_samples, + [(0, 78, 11812), (1, 90, 13898), (2, 90, 11467)]) self.assertEqual(r.yields, None) r = results[1] self.assertEqual( (r.name, r.min, r.max, int(r.mean), int(r.sd), r.median), - ("Array2D", 369900, 381039, 373994, 6127, 371043), + ('Array2D', 369900, 381039, 373994, 6127, 371043) ) self.assertEqual(r.setup, 14444) self.assertEqual(r.num_samples, r.samples.num_samples) - self.assertEqual( - results[1].samples.all_samples, - [(0, 1, 369900), (1, 1, 381039), (2, 1, 371043)], - ) + self.assertEqual(results[1].samples.all_samples, + [(0, 1, 369900), (1, 1, 381039), (2, 1, 371043)]) yielded = r.yields[0] self.assertEqual(yielded.before_sample, 1) self.assertEqual(yielded.after, 369918) @@ -693,7 +642,7 @@ def test_parse_environment_verbose(self): 2,AngryPhonebook,3,11269,11884,11657,338,11820 """ parser = LogParser() - results = parser.parse_results(verbose_log.split("\n")) + results = parser.parse_results(verbose_log.split('\n')) r = results[0] self.assertEqual(r.max_rss, 32768) @@ -706,8 +655,8 @@ def test_results_from_merge(self): concatenated_logs = """4,ArrayAppend,20,23641,29000,24990,0,24990 4,ArrayAppend,1,20000,20000,20000,0,20000""" results = LogParser.results_from_string(concatenated_logs) - self.assertEqual(results.keys(), ["ArrayAppend"]) - result = results["ArrayAppend"] + self.assertEqual(results.keys(), ['ArrayAppend']) + result = results['ArrayAppend'] self.assertTrue(isinstance(result, PerformanceTestResult)) self.assertEqual(result.min, 20000) self.assertEqual(result.max, 29000) @@ -728,8 +677,8 @@ def test_results_from_merge_verbose(self): Sample 3,364245 3,Array2D,4,363094,376131,368159,5931,369169""" results = LogParser.results_from_string(concatenated_logs) - self.assertEqual(results.keys(), ["Array2D"]) - result = results["Array2D"] + self.assertEqual(results.keys(), ['Array2D']) + result = results['Array2D'] self.assertTrue(isinstance(result, PerformanceTestResult)) self.assertEqual(result.min, 350815) self.assertEqual(result.max, 376131) @@ -766,7 +715,7 @@ def test_excludes_outliers_from_samples(self): 65,DropFirstAnySeqCntRangeLazy,10,184,455,228,79,206 """ parser = LogParser() - result = parser.parse_results(verbose_log.split("\n"))[0] + result = parser.parse_results(verbose_log.split('\n'))[0] self.assertEqual(result.num_samples, 10) self.assertEqual(result.samples.count, 8) self.assertEqual(len(result.samples.outliers), 2) @@ -778,26 +727,26 @@ def names(tests): return [t.name for t in tests] tc = TestComparator(self.old_results, self.new_results, 0.05) - self.assertEqual(names(tc.unchanged), ["AngryPhonebook", "Array2D"]) - self.assertEqual(names(tc.increased), ["ByteSwap", "ArrayAppend"]) - self.assertEqual(names(tc.decreased), ["BitCount"]) - self.assertEqual(names(tc.added), ["TwoSum"]) - self.assertEqual(names(tc.removed), ["AnyHashableWithAClass"]) + self.assertEqual(names(tc.unchanged), ['AngryPhonebook', 'Array2D']) + self.assertEqual(names(tc.increased), ['ByteSwap', 'ArrayAppend']) + self.assertEqual(names(tc.decreased), ['BitCount']) + self.assertEqual(names(tc.added), ['TwoSum']) + self.assertEqual(names(tc.removed), ['AnyHashableWithAClass']) # other way around tc = TestComparator(self.new_results, self.old_results, 0.05) - self.assertEqual(names(tc.unchanged), ["AngryPhonebook", "Array2D"]) - self.assertEqual(names(tc.increased), ["BitCount"]) - self.assertEqual(names(tc.decreased), ["ByteSwap", "ArrayAppend"]) - self.assertEqual(names(tc.added), ["AnyHashableWithAClass"]) - self.assertEqual(names(tc.removed), ["TwoSum"]) + self.assertEqual(names(tc.unchanged), ['AngryPhonebook', 'Array2D']) + self.assertEqual(names(tc.increased), ['BitCount']) + self.assertEqual(names(tc.decreased), ['ByteSwap', 'ArrayAppend']) + self.assertEqual(names(tc.added), ['AnyHashableWithAClass']) + self.assertEqual(names(tc.removed), ['TwoSum']) # delta_threshold determines the sorting into change groups; # report only change above 100% (ByteSwap's runtime went to 0): tc = TestComparator(self.old_results, self.new_results, 1) self.assertEqual( names(tc.unchanged), - ["AngryPhonebook", "Array2D", "ArrayAppend", "BitCount"], + ['AngryPhonebook', 'Array2D', 'ArrayAppend', 'BitCount'] ) - self.assertEqual(names(tc.increased), ["ByteSwap"]) + self.assertEqual(names(tc.increased), ['ByteSwap']) self.assertEqual(tc.decreased, []) @@ -821,58 +770,45 @@ def assert_html_contains(self, texts): def test_values(self): self.assertEqual( - ReportFormatter.values( - PerformanceTestResult( - "1,AngryPhonebook,20,10664,12933,11035,576,10884".split(",") - ) - ), - ("AngryPhonebook", "10664", "12933", "11035", "—"), + ReportFormatter.values(PerformanceTestResult( + '1,AngryPhonebook,20,10664,12933,11035,576,10884'.split(','))), + ('AngryPhonebook', '10664', '12933', '11035', '—') ) self.assertEqual( - ReportFormatter.values( - PerformanceTestResult( - "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(",") - ) - ), - ("AngryPhonebook", "12045", "12045", "12045", "10510336"), + ReportFormatter.values(PerformanceTestResult( + '1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336' + .split(','))), + ('AngryPhonebook', '12045', '12045', '12045', '10510336') ) r1 = PerformanceTestResult( - "1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336".split(",") - ) + '1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336'.split(',')) r2 = PerformanceTestResult( - "1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144".split(",") - ) + '1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144'.split(',')) self.assertEqual( ReportFormatter.values(ResultComparison(r1, r2)), - ("AngryPhonebook", "12325", "11616", "-5.8%", "1.06x"), + ('AngryPhonebook', '12325', '11616', '-5.8%', '1.06x') ) self.assertEqual( ReportFormatter.values(ResultComparison(r2, r1)), - ("AngryPhonebook", "11616", "12325", "+6.1%", "0.94x"), + ('AngryPhonebook', '11616', '12325', '+6.1%', '0.94x') ) r2.max = r1.min + 1 self.assertEqual( ReportFormatter.values(ResultComparison(r1, r2))[4], - "1.06x (?)", # is_dubious + '1.06x (?)' # is_dubious ) def test_justified_columns(self): """Table columns are all formated with same width, defined by the longest value. """ - self.assert_markdown_contains( - [ - "AnyHashableWithAClass | 247027 | 319065 | 259056 | 10250445", - "Array2D | 335831 | 335831 | +0.0% | 1.00x", - ] - ) - self.assert_git_contains( - [ - "AnyHashableWithAClass 247027 319065 259056 10250445", - "Array2D 335831 335831 +0.0% 1.00x", - ] - ) + self.assert_markdown_contains([ + 'AnyHashableWithAClass | 247027 | 319065 | 259056 | 10250445', + 'Array2D | 335831 | 335831 | +0.0% | 1.00x']) + self.assert_git_contains([ + 'AnyHashableWithAClass 247027 319065 259056 10250445', + 'Array2D 335831 335831 +0.0% 1.00x']) def test_column_headers(self): """Report contains table headers for ResultComparisons and changed @@ -881,63 +817,49 @@ def test_column_headers(self): performance_test_result = self.tc.added[0] self.assertEqual( ReportFormatter.header_for(performance_test_result), - ("TEST", "MIN", "MAX", "MEAN", "MAX_RSS"), + ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS') ) comparison_result = self.tc.increased[0] self.assertEqual( ReportFormatter.header_for(comparison_result), - ("TEST", "OLD", "NEW", "DELTA", "RATIO"), - ) - self.assert_markdown_contains( - [ - "TEST | OLD | NEW | DELTA | RATIO", - ":--- | ---: | ---: | ---: | ---: ", - "TEST | MIN | MAX | MEAN | MAX_RSS", - ] - ) - self.assert_git_contains( - [ - "TEST OLD NEW DELTA RATIO", - "TEST MIN MAX MEAN MAX_RSS", - ] - ) - self.assert_html_contains( - [ - """ + ('TEST', 'OLD', 'NEW', 'DELTA', 'RATIO') + ) + self.assert_markdown_contains([ + 'TEST | OLD | NEW | DELTA | RATIO', + ':--- | ---: | ---: | ---: | ---: ', + 'TEST | MIN | MAX | MEAN | MAX_RSS']) + self.assert_git_contains([ + 'TEST OLD NEW DELTA RATIO', + 'TEST MIN MAX MEAN MAX_RSS']) + self.assert_html_contains([ + """ OLD NEW DELTA RATIO""", - """ + """ MIN MAX MEAN - MAX_RSS""", - ] - ) + MAX_RSS"""]) def test_emphasize_speedup(self): """Emphasize speedup values for regressions and improvements""" # tests in No Changes don't have emphasized speedup - self.assert_markdown_contains( - [ - "BitCount | 3 | 9 | +199.9% | **0.33x**", - "ByteSwap | 4 | 0 | -100.0% | **4001.00x**", - "AngryPhonebook | 10458 | 10458 | +0.0% | 1.00x ", - "ArrayAppend | 23641 | 20000 | -15.4% | **1.18x (?)**", - ] - ) - self.assert_git_contains( - [ - "BitCount 3 9 +199.9% **0.33x**", - "ByteSwap 4 0 -100.0% **4001.00x**", - "AngryPhonebook 10458 10458 +0.0% 1.00x", - "ArrayAppend 23641 20000 -15.4% **1.18x (?)**", - ] - ) - self.assert_html_contains( - [ - """ + self.assert_markdown_contains([ + 'BitCount | 3 | 9 | +199.9% | **0.33x**', + 'ByteSwap | 4 | 0 | -100.0% | **4001.00x**', + 'AngryPhonebook | 10458 | 10458 | +0.0% | 1.00x ', + 'ArrayAppend | 23641 | 20000 | -15.4% | **1.18x (?)**' + ]) + self.assert_git_contains([ + 'BitCount 3 9 +199.9% **0.33x**', + 'ByteSwap 4 0 -100.0% **4001.00x**', + 'AngryPhonebook 10458 10458 +0.0% 1.00x', + 'ArrayAppend 23641 20000 -15.4% **1.18x (?)**' + ]) + self.assert_html_contains([ + """ BitCount 3 @@ -945,7 +867,7 @@ def test_emphasize_speedup(self): +199.9% 0.33x """, - """ + """ ByteSwap 4 @@ -953,221 +875,182 @@ def test_emphasize_speedup(self): -100.0% 4001.00x """, - """ + """ AngryPhonebook 10458 10458 +0.0% 1.00x - """, - ] - ) + """ + ]) def test_sections(self): """Report is divided into sections with summaries.""" - self.assert_markdown_contains( - [ - """
+ self.assert_markdown_contains([ + """
Regression (1)""", - """
+ """
Improvement (2)""", - """
+ """
No Changes (2)""", - """
+ """
Added (1)""", - """
- Removed (1)""", - ] - ) - self.assert_git_contains( - [ - "Regression (1): \n", - "Improvement (2): \n", - "No Changes (2): \n", - "Added (1): \n", - "Removed (1): \n", - ] - ) - self.assert_html_contains( - [ - "Regression (1)", - "Improvement (2)", - "No Changes (2)", - "Added (1)", - "Removed (1)", - ] - ) + """
+ Removed (1)"""]) + self.assert_git_contains([ + 'Regression (1): \n', + 'Improvement (2): \n', + 'No Changes (2): \n', + 'Added (1): \n', + 'Removed (1): \n']) + self.assert_html_contains([ + "Regression (1)", + "Improvement (2)", + "No Changes (2)", + "Added (1)", + "Removed (1)"]) def test_report_only_changes(self): """Leave out tests without significant change.""" rf = ReportFormatter(self.tc, changes_only=True) markdown, git, html = rf.markdown(), rf.git(), rf.html() - self.assertNotIn("No Changes", markdown) - self.assertNotIn("AngryPhonebook", markdown) - self.assertNotIn("No Changes", git) - self.assertNotIn("AngryPhonebook", git) - self.assertNotIn("No Changes", html) - self.assertNotIn("AngryPhonebook", html) + self.assertNotIn('No Changes', markdown) + self.assertNotIn('AngryPhonebook', markdown) + self.assertNotIn('No Changes', git) + self.assertNotIn('AngryPhonebook', git) + self.assertNotIn('No Changes', html) + self.assertNotIn('AngryPhonebook', html) def test_single_table_report(self): """Single table report has inline headers and no elaborate sections.""" self.tc.removed = [] # test handling empty section rf = ReportFormatter(self.tc, changes_only=True, single_table=True) markdown = rf.markdown() - self.assertNotIn("Regression (1)", - "TEST | OLD | NEW | DELTA | RATIO", - "BitCount | 3 | 9 | +199.9% | **0.33x**", + 'Regression (1)', + 'TEST | OLD | NEW | DELTA | RATIO', + 'BitCount | 3 | 9 | +199.9% | **0.33x**', ] git = [ - "Regression (1):", - "TEST OLD NEW DELTA RATIO", - "BitCount 3 9 +199.9% **0.33x**", + 'Regression (1):', + 'TEST OLD NEW DELTA RATIO', + 'BitCount 3 9 +199.9% **0.33x**', ] - html = ["", "BitCount"] + html = ['', "BitCount"] def setUp(self): super(Test_compare_perf_tests_main, self).setUp() - self.old_log = self.write_temp_file("old.log", self.old_log_content) - self.new_log = self.write_temp_file("new.log", self.new_log_content) + self.old_log = self.write_temp_file('old.log', self.old_log_content) + self.new_log = self.write_temp_file('new.log', self.new_log_content) def execute_main_with_format(self, report_format, test_output=False): - report_file = self.test_dir + "report.log" - args = [ - "compare_perf_tests.py", - "--old-file", - self.old_log, - "--new-file", - self.new_log, - "--format", - report_format, - ] - - sys.argv = args if not test_output else args + ["--output", report_file] + report_file = self.test_dir + 'report.log' + args = ['compare_perf_tests.py', + '--old-file', self.old_log, + '--new-file', self.new_log, + '--format', report_format] + + sys.argv = (args if not test_output else + args + ['--output', report_file]) with captured_output() as (out, _): main() report_out = out.getvalue() if test_output: - with open(report_file, "r") as f: + with open(report_file, 'r') as f: report = f.read() # because print adds newline, add one here, too: - report_file = str(report + "\n") + report_file = str(report + '\n') else: report_file = None @@ -1175,41 +1058,40 @@ def execute_main_with_format(self, report_format, test_output=False): def test_markdown(self): """Writes Markdown formatted report to stdout""" - report_out, _ = self.execute_main_with_format("markdown") + report_out, _ = self.execute_main_with_format('markdown') self.assert_report_contains(self.markdown, report_out) def test_markdown_output(self): """Writes Markdown formatted report to stdout and `--output` file.""" - report_out, report_file = self.execute_main_with_format( - "markdown", test_output=True - ) + report_out, report_file = ( + self.execute_main_with_format('markdown', test_output=True)) self.assertEqual(report_out, report_file) self.assert_report_contains(self.markdown, report_file) def test_git(self): """Writes Git formatted report to stdout.""" - report_out, _ = self.execute_main_with_format("git") + report_out, _ = self.execute_main_with_format('git') self.assert_report_contains(self.git, report_out) def test_git_output(self): """Writes Git formatted report to stdout and `--output` file.""" - report_out, report_file = self.execute_main_with_format("git", test_output=True) + report_out, report_file = ( + self.execute_main_with_format('git', test_output=True)) self.assertEqual(report_out, report_file) self.assert_report_contains(self.git, report_file) def test_html(self): """Writes HTML formatted report to stdout.""" - report_out, _ = self.execute_main_with_format("html") + report_out, _ = self.execute_main_with_format('html') self.assert_report_contains(self.html, report_out) def test_html_output(self): """Writes HTML formatted report to stdout and `--output` file.""" - report_out, report_file = self.execute_main_with_format( - "html", test_output=True - ) + report_out, report_file = ( + self.execute_main_with_format('html', test_output=True)) self.assertEqual(report_out, report_file) self.assert_report_contains(self.html, report_file) -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() diff --git a/benchmark/scripts/test_utils.py b/benchmark/scripts/test_utils.py index 4b675d9d82582..6a2bf8856a99f 100644 --- a/benchmark/scripts/test_utils.py +++ b/benchmark/scripts/test_utils.py @@ -78,15 +78,14 @@ def expect(self, call_args, response): def assert_called_with(self, expected_args): """Verify that the tested method was called with provided arguments.""" expected_args = tuple(expected_args) - assert expected_args in self.calls, "Expected: {0} in Called: {1}".format( - expected_args, self.calls - ) + assert expected_args in self.calls, ( + 'Expected: {0} in Called: {1}'.format(expected_args, self.calls)) def assert_called_all_expected(self): """Verify that all expeced invocations of tested method were called.""" - assert self.calls == self.expected, "\nExpected: {0}, \n Called: {1}".format( - self.expected, self.calls - ) + assert self.calls == self.expected, ( + '\nExpected: {0}, \n Called: {1}'.format( + self.expected, self.calls)) class MockLoggingHandler(logging.Handler): @@ -104,9 +103,5 @@ def emit(self, record): def reset(self): """Clear all log messages.""" self.messages = { - "debug": [], - "info": [], - "warning": [], - "error": [], - "critical": [], + 'debug': [], 'info': [], 'warning': [], 'error': [], 'critical': [] } diff --git a/benchmark/utils/convertToJSON.py b/benchmark/utils/convertToJSON.py index b7a547116d7ba..54aedc7270068 100644 --- a/benchmark/utils/convertToJSON.py +++ b/benchmark/utils/convertToJSON.py @@ -74,9 +74,9 @@ if __name__ == "__main__": data = {} - data["Tests"] = [] - data["Machine"] = {} - data["Run"] = {} + data['Tests'] = [] + data['Machine'] = {} + data['Run'] = {} for line in sys.stdin: m = SCORERE.match(line) if not m: @@ -84,8 +84,8 @@ if not m: continue test = {} - test["Data"] = [int(m.group(VALGROUP))] - test["Info"] = {} - test["Name"] = [m.group(KEYGROUP)] - data["Tests"].append(test) + test['Data'] = [int(m.group(VALGROUP))] + test['Info'] = {} + test['Name'] = [m.group(KEYGROUP)] + data['Tests'].append(test) print(json.dumps(data, sort_keys=True, indent=4)) From ac294f39867559ea8c1373d5451e01595d907a83 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Mon, 29 Jul 2019 14:04:25 +0200 Subject: [PATCH 02/21] [benchmark] Fix parsing delta zeroed metadata --- benchmark/scripts/compare_perf_tests.py | 5 +++-- benchmark/scripts/test_compare_perf_tests.py | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 017ba24c10229..4efc16804f5da 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -276,8 +276,9 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, # Optional measurement metadata. The number of: # memory pages used, involuntary context switches and voluntary yields - self.mem_pages, self.involuntary_cs, self.yield_count = \ - [int(x) for x in csv_row[-3:]] if meta else (None, None, None) + self.mem_pages, self.involuntary_cs, self.yield_count = ( + [int(x) if x else 0 for x in csv_row[-3:]] if meta else + (None, None, None)) self.yields = None self.setup = None diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 4c1c6effffcd5..5b66b7a762c12 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -347,6 +347,11 @@ def test_init_meta(self): self.assertEquals(r.max_rss, 32768) self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 28, 15)) + log = '1,Ackermann,2,715,,16,9,,' # --delta erased 0s + r = PerformanceTestResult( + log.split(','), quantiles=True, meta=True) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), + (9, 0, 0)) def test_repr(self): log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' From 9f7e78288fed0bf9565fd86a7785b04b9759b512 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sat, 3 Aug 2019 20:48:30 +0200 Subject: [PATCH 03/21] [benchmark] PerformanceTestResults merge samples Adjusted how merged PerformanceTestResults track the number of underlying samples when using quantile subsampling. --- benchmark/scripts/compare_perf_tests.py | 9 +++---- benchmark/scripts/test_compare_perf_tests.py | 27 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 4efc16804f5da..67e8e7af09016 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -302,7 +302,8 @@ def merge(self, r): if self.samples and r.samples: map(self.samples.add, r.samples.samples) sams = self.samples - self.num_samples = sams.num_samples + self.num_samples += r.num_samples + sams.outliers += r.samples.outliers self.min, self.max, self.median, self.mean, self.sd = \ sams.min, sams.max, sams.median, sams.mean, sams.sd else: @@ -517,10 +518,8 @@ def __init__(self, old_results, new_results, delta_threshold): self.removed = sorted([old_results[t] for t in removed_tests], key=lambda r: r.name) - def compare(name): - return ResultComparison(old_results[name], new_results[name]) - - comparisons = map(compare, comparable_tests) + comparisons = [ResultComparison(old_results[name], new_results[name]) + for name in comparable_tests] def partition(l, p): return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], [])) diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 5b66b7a762c12..8683340edf0b5 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -390,6 +390,33 @@ def as_tuple(r): self.assertEqual(as_tuple(r), # picks smaller of the setup values (4, 11616, 12325, 12064, None, None, 10498048, 7)) + def test_merge_with_samples(self): + # --quantile=20 --delta + log = """ +684,B,200,967,,14,5,3,3,2,1,1,,,,,1,,3,3,5,11,76,1827 +684,B,200,972,,,,,,,,2,2,3,1,,,3,6,21,30,146,694,4590 +684,B,200,986,,,1,1,,1,,,,1,,2,2,9,5,6,15,28,224,2902 +""".split('\n')[1:-1] + results = [ + PerformanceTestResult(line.split(','), quantiles=True, delta=True) + for line in log] + self.assertEqual([r.num_samples for r in results], [200, 200, 200]) + self.assertEqual( + [r.samples.num_samples for r in results], [21, 21, 21]) + # after excluding outliers, the real sample count is lower + self.assertEqual([r.samples.count for r in results], [18, 17, 18]) + + def as_tuple(r): + return (r.num_samples, r.samples.num_samples, r.samples.count, + r.min, r.samples.median, r.max) + + r = results[0] + self.assertEqual(as_tuple(r), (200, 21, 18, 967, 996, 1008)) + r.merge(results[1]) # 18 + 17 = 35, after merge using only ventiles + self.assertEqual(as_tuple(r), (400, 42, 35, 967, 983, 1010)) + r.merge(results[2]) # 35 + 18 = 53 + self.assertEqual(as_tuple(r), (600, 63, 53, 967, 989, 1029)) + class TestResultComparison(unittest.TestCase): def setUp(self): From e3a639a8ffc50598a65c5a667061c277a5fa112d Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Mon, 16 Sep 2019 08:34:01 +0200 Subject: [PATCH 04/21] =?UTF-8?q?[benchmark]=20Don=E2=80=99t=20justify=20l?= =?UTF-8?q?ast=20column=20in=20reports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In text reports, don’t justify the last columns with unnecessary spaces. --- benchmark/scripts/compare_perf_tests.py | 7 ++++--- benchmark/scripts/test_compare_perf_tests.py | 22 ++++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 67e8e7af09016..e18ef0e39e2ba 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -617,7 +617,7 @@ def _column_widths(self): results += self.comparator.added + self.comparator.removed widths = [ - map(len, columns) for columns in + map(len, row[:-1]) for row in [ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER, ReportFormatter.RESULT_COMPARISON_HEADER] + [ReportFormatter.values(r) for r in results] @@ -626,7 +626,7 @@ def _column_widths(self): def max_widths(maximum, widths): return map(max, zip(maximum, widths)) - return reduce(max_widths, widths, [0] * 5) + return reduce(max_widths, widths, [0] * 4) def _formatted_text(self, label_formatter, COLUMN_SEPARATOR, DELIMITER_ROW, SEPARATOR, SECTION): @@ -634,7 +634,8 @@ def _formatted_text(self, label_formatter, COLUMN_SEPARATOR, self.header_printed = False def justify_columns(contents): - return [c.ljust(w) for w, c in zip(widths, contents)] + return ([c.ljust(w) for w, c in zip(widths, contents[:-1])] + + [contents[-1]]) def row(contents): return ('' if not contents else diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 8683340edf0b5..5cc97bb1b4c47 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -833,14 +833,14 @@ def test_values(self): def test_justified_columns(self): """Table columns are all formated with same width, defined by the - longest value. + longest value, except the last column. """ self.assert_markdown_contains([ - 'AnyHashableWithAClass | 247027 | 319065 | 259056 | 10250445', - 'Array2D | 335831 | 335831 | +0.0% | 1.00x']) + 'AnyHashableWithAClass | 247027 | 319065 | 259056 | 10250445\n', + 'Array2D | 335831 | 335831 | +0.0% | 1.00x\n']) self.assert_git_contains([ - 'AnyHashableWithAClass 247027 319065 259056 10250445', - 'Array2D 335831 335831 +0.0% 1.00x']) + 'AnyHashableWithAClass 247027 319065 259056 10250445\n', + 'Array2D 335831 335831 +0.0% 1.00x\n']) def test_column_headers(self): """Report contains table headers for ResultComparisons and changed @@ -857,9 +857,9 @@ def test_column_headers(self): ('TEST', 'OLD', 'NEW', 'DELTA', 'RATIO') ) self.assert_markdown_contains([ - 'TEST | OLD | NEW | DELTA | RATIO', - ':--- | ---: | ---: | ---: | ---: ', - 'TEST | MIN | MAX | MEAN | MAX_RSS']) + 'TEST | OLD | NEW | DELTA | RATIO\n' + ':--- | ---: | ---: | ---: | ---:\n', + 'TEST | MIN | MAX | MEAN | MAX_RSS\n']) self.assert_git_contains([ 'TEST OLD NEW DELTA RATIO', 'TEST MIN MAX MEAN MAX_RSS']) @@ -881,7 +881,7 @@ def test_emphasize_speedup(self): self.assert_markdown_contains([ 'BitCount | 3 | 9 | +199.9% | **0.33x**', 'ByteSwap | 4 | 0 | -100.0% | **4001.00x**', - 'AngryPhonebook | 10458 | 10458 | +0.0% | 1.00x ', + 'AngryPhonebook | 10458 | 10458 | +0.0% | 1.00x', 'ArrayAppend | 23641 | 20000 | -15.4% | **1.18x (?)**' ]) self.assert_git_contains([ @@ -978,8 +978,8 @@ def test_single_table_report(self): self.assertNotIn('): \n', git) # no sections self.assertNotIn('REMOVED', git) self.assert_report_contains([ - '\nREGRESSION ', ' OLD ', ' NEW ', ' DELTA ', ' RATIO ', - '\n\nADDED ', ' MIN ', ' MAX ', ' MEAN ', ' MAX_RSS ' + '\nREGRESSION ', ' OLD ', ' NEW ', ' DELTA ', ' RATIO', + '\n\nADDED ', ' MIN ', ' MAX ', ' MEAN ', ' MAX_RSS' ], git) # Separator before every inline header (new section): self.assertEqual(git.count('\n\n'), 2) From 979aced53e286edcbd414388ab961b27304203f2 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Tue, 30 Jul 2019 12:01:51 +0200 Subject: [PATCH 05/21] [benchmark] BenchmarkDriver min-samples & metadata Support for invoking benchmark drivers with min-samples and gathering environmental metadata. --- benchmark/scripts/Benchmark_Driver | 28 +++++++-------- benchmark/scripts/test_Benchmark_Driver.py | 42 +++++++++++++++------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 31808852bcf22..8588cc292e2cc 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -137,37 +137,37 @@ class BenchmarkDriver(object): def run(self, test=None, num_samples=None, num_iters=None, sample_time=None, verbose=None, measure_memory=False, - quantile=None): + quantile=None, min_samples=None, gather_metadata=False): """Execute benchmark and gather results.""" - num_samples = num_samples or 0 - num_iters = num_iters or 0 # automatically determine N to run for 1s - sample_time = sample_time or 0 # default is 1s - cmd = self._cmd_run( - test, num_samples, num_iters, sample_time, - verbose, measure_memory, quantile) + test, num_samples, num_iters, sample_time, min_samples, + verbose, measure_memory, quantile, gather_metadata) output = self._invoke(cmd) results = self.parser.results_from_string(output) return results.items()[0][1] if test else results - def _cmd_run(self, test, num_samples, num_iters, sample_time, - verbose, measure_memory, quantile): + def _cmd_run(self, test, num_samples, num_iters, sample_time, min_samples, + verbose, measure_memory, quantile, gather_metadata): cmd = [self.test_harness] if test: cmd.append(test) else: cmd.extend([self.test_number.get(name, name) for name in self.tests]) - if num_samples > 0: + if num_samples: cmd.append('--num-samples={0}'.format(num_samples)) - if num_iters > 0: + if min_samples: + cmd.append('--min-samples={0}'.format(min_samples)) + if num_iters: cmd.append('--num-iters={0}'.format(num_iters)) - if sample_time > 0: + if sample_time: cmd.append('--sample-time={0}'.format(sample_time)) if verbose: cmd.append('--verbose') if measure_memory: cmd.append('--memory') + if gather_metadata: + cmd.append('--meta') if quantile: cmd.append('--quantile={0}'.format(quantile)) cmd.append('--delta') @@ -183,8 +183,8 @@ class BenchmarkDriver(object): return a return reduce(merge_results, - [self.run(test, measure_memory=True, - num_iters=1, quantile=20) + [self.run(test, num_iters=1, quantile=20, + measure_memory=True, gather_metadata=True) for _ in range(self.args.independent_samples)]) def log_results(self, output, log_file=None): diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py index 32b1a9e527635..af1fd61ef5018 100644 --- a/benchmark/scripts/test_Benchmark_Driver.py +++ b/benchmark/scripts/test_Benchmark_Driver.py @@ -277,6 +277,11 @@ def test_run_benchmark_with_multiple_samples(self): self.subprocess_mock.assert_called_with( ('/benchmarks/Benchmark_O', 'b2', '--num-samples=5')) + def test_run_benchmark_with_minimum_samples(self): + self.driver.run('b', min_samples=7) + self.subprocess_mock.assert_called_with( + ('/benchmarks/Benchmark_O', 'b', '--min-samples=7')) + def test_run_benchmark_with_specified_number_of_iterations(self): self.driver.run('b', num_iters=1) self.subprocess_mock.assert_called_with( @@ -321,6 +326,11 @@ def test_measure_memory(self): self.subprocess_mock.assert_called_with( ('/benchmarks/Benchmark_O', 'b', '--memory')) + def test_gather_metadata(self): + self.driver.run('b', gather_metadata=True) + self.subprocess_mock.assert_called_with( + ('/benchmarks/Benchmark_O', 'b', '--meta')) + def test_report_quantiles(self): """Use delta compression for quantile reports.""" self.driver.run('b', quantile=4) @@ -333,7 +343,7 @@ def test_run_benchmark_independent_samples(self): r = self.driver.run_independent_samples('b1') self.assertEqual(self.subprocess_mock.calls.count( ('/benchmarks/Benchmark_O', 'b1', '--num-iters=1', '--memory', - '--quantile=20', '--delta')), 3) + '--meta', '--quantile=20', '--delta')), 3) self.assertEqual(r.num_samples, 3) # results are merged def test_run_and_log(self): @@ -411,21 +421,26 @@ def test_deterministing_hashing(self): class BenchmarkDriverMock(Mock): - """Mock for BenchmarkDriver's `run` method""" + """Mock for BenchmarkDriver's `run` method.""" + def __init__(self, tests, responses=None): super(BenchmarkDriverMock, self).__init__(responses) self.tests = tests self.args = ArgsStub() - def _run(test, num_samples=None, num_iters=None, - verbose=None, measure_memory=False): - return self.record_and_respond(test, num_samples, num_iters, - verbose, measure_memory) + def _run(test=None, num_samples=None, num_iters=None, + sample_time=None, verbose=None, measure_memory=False, + quantile=None, min_samples=None, gather_metadata=False): + return self._record_and_respond( + test, num_samples, num_iters, sample_time, min_samples, + verbose, measure_memory, quantile, gather_metadata) self.run = _run - def record_and_respond(self, test, num_samples, num_iters, - verbose, measure_memory): - args = (test, num_samples, num_iters, verbose, measure_memory) + def _record_and_respond( + self, test, num_samples, num_iters, sample_time, min_samples, + verbose, measure_memory, quantile, gather_metadata): + args = (test, num_samples, num_iters, sample_time, min_samples, + verbose, measure_memory, quantile, gather_metadata) self.calls.append(args) return self.respond.get(args, _PTR(min=700)) @@ -520,11 +535,12 @@ def _PTR(min=700, mem_pages=1000, setup=None): return Stub(samples=Stub(min=min), mem_pages=mem_pages, setup=setup) -def _run(test, num_samples=None, num_iters=None, verbose=None, - measure_memory=False): +def _run(test=None, num_samples=None, num_iters=None, + sample_time=None, verbose=None, measure_memory=False, + quantile=None, min_samples=None, gather_metadata=False): """Helper function that constructs tuple with arguments for run method.""" - return ( - test, num_samples, num_iters, verbose, measure_memory) + return (test, num_samples, num_iters, sample_time, min_samples, + verbose, measure_memory, quantile, gather_metadata) class TestBenchmarkDoctor(unittest.TestCase): From 6ee22de3fab6f3ea5f8089c5d5890b60546ecba9 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 13 Nov 2019 09:23:23 +0100 Subject: [PATCH 06/21] [benchmark] [Gardening] Fix assertEqual naming --- benchmark/scripts/test_Benchmark_Driver.py | 8 +-- benchmark/scripts/test_compare_perf_tests.py | 62 ++++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py index af1fd61ef5018..552dec85481cd 100644 --- a/benchmark/scripts/test_Benchmark_Driver.py +++ b/benchmark/scripts/test_Benchmark_Driver.py @@ -188,8 +188,8 @@ def test_gets_list_of_precommit_benchmarks(self): ['Benchmark1', 'Benchmark2']) self.assertEqual(driver.all_tests, ['Benchmark1', 'Benchmark2']) - self.assertEquals(driver.test_number['Benchmark1'], "1") - self.assertEquals(driver.test_number['Benchmark2'], "2") + self.assertEqual(driver.test_number['Benchmark1'], "1") + self.assertEqual(driver.test_number['Benchmark2'], "2") list_all_tests = ( '/benchmarks/Benchmark_O --list --delim=\t --skip-tags='.split(' '), @@ -316,10 +316,10 @@ def test_parse_results_from_running_benchmarks(self): """ r = self.driver.run('b') self.assertTrue(self.parser_stub.results_from_string_called) - self.assertEquals(r.name, 'b1') # non-matching name, just 1st result + self.assertEqual(r.name, 'b1') # non-matching name, just 1st result r = self.driver.run() self.assertTrue(isinstance(r, dict)) - self.assertEquals(r['b1'].name, 'b1') + self.assertEqual(r['b1'].name, 'b1') def test_measure_memory(self): self.driver.run('b', measure_memory=True) diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 5cc97bb1b4c47..3dc087a5944d5 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -105,31 +105,31 @@ def test_computes_inter_quartile_range(self): self.samples.add(Sample(5, 1, 1100)) self.assertEqual(self.samples.iqr, 50) - def assertEqualtats(self, stats, expected_stats): + def assertEqualStats(self, stats, expected_stats): for actual, expected in zip(stats, expected_stats): - self.assertAlmostEquals(actual, expected, places=2) + self.assertAlmostEqual(actual, expected, places=2) def test_computes_mean_sd_cv(self): ss = self.samples - self.assertEqualtats( + self.assertEqualStats( (ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualtats( + self.assertEqualStats( (ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100)) def test_computes_range_spread(self): ss = self.samples - self.assertEqualtats( + self.assertEqualStats( (ss.range, ss.spread), (0, 0)) self.samples.add(Sample(2, 1, 1100)) - self.assertEqualtats( + self.assertEqualStats( (ss.range, ss.spread), (100, 10.0 / 100)) def test_init_with_samples(self): self.samples = PerformanceTestSamples( 'B2', [Sample(0, 1, 1000), Sample(1, 1, 1100)]) self.assertEqual(self.samples.count, 2) - self.assertEqualtats( + self.assertEqualStats( (self.samples.mean, self.samples.sd, self.samples.range, self.samples.spread), (1050.0, 70.71, 100, 9.52 / 100)) @@ -138,7 +138,7 @@ def test_can_handle_zero_runtime(self): # guard against dividing by 0 self.samples = PerformanceTestSamples('Zero') self.samples.add(Sample(0, 1, 0)) - self.assertEqualtats( + self.assertEqualStats( (self.samples.mean, self.samples.sd, self.samples.cv, self.samples.range, self.samples.spread), (0, 0, 0.0, 0, 0.0)) @@ -150,7 +150,7 @@ def test_excludes_outliers(self): '10 1 1050, 11 1 949, 12 1 1151'.split(',')] self.samples = PerformanceTestSamples('Outliers', ss) self.assertEqual(self.samples.count, 13) - self.assertEqualtats( + self.assertEqualStats( (self.samples.mean, self.samples.sd), (1050, 52.36)) self.samples.exclude_outliers() @@ -159,7 +159,7 @@ def test_excludes_outliers(self): self.assertEqual(self.samples.outliers, ss[11:]) self.assertEqualFiveNumberSummary( self.samples, (1000, 1025, 1050, 1075, 1100)) - self.assertEqualtats( + self.assertEqualStats( (self.samples.mean, self.samples.sd), (1050, 35.36)) def test_excludes_outliers_zero_IQR(self): @@ -173,7 +173,7 @@ def test_excludes_outliers_zero_IQR(self): self.samples.exclude_outliers() self.assertEqual(self.samples.count, 3) - self.assertEqualtats( + self.assertEqualStats( (self.samples.min, self.samples.max), (18, 18)) def test_excludes_outliers_top_only(self): @@ -186,7 +186,7 @@ def test_excludes_outliers_top_only(self): self.samples.exclude_outliers(top_only=True) self.assertEqual(self.samples.count, 4) - self.assertEqualtats((self.samples.min, self.samples.max), (1, 2)) + self.assertEqualStats((self.samples.min, self.samples.max), (1, 2)) class TestPerformanceTestResult(unittest.TestCase): @@ -212,8 +212,8 @@ def test_init_quantiles(self): self.assertEqual(r.name, 'Ackermann') self.assertEqual((r.num_samples, r.min, r.median, r.max), (3, 54383, 54512, 54601)) - self.assertAlmostEquals(r.mean, 54498.67, places=2) - self.assertAlmostEquals(r.sd, 109.61, places=2) + self.assertAlmostEqual(r.mean, 54498.67, places=2) + self.assertAlmostEqual(r.sd, 109.61, places=2) self.assertEqual(r.samples.count, 3) self.assertEqual(r.samples.num_samples, 3) self.assertEqual([s.runtime for s in r.samples.all_samples], @@ -344,7 +344,7 @@ def test_init_meta(self): self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 1259)) self.assertEqual((r.samples.count, r.samples.min, r.samples.max), (2, 715, 1259)) - self.assertEquals(r.max_rss, 32768) + self.assertEqual(r.max_rss, 32768) self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 28, 15)) log = '1,Ackermann,2,715,,16,9,,' # --delta erased 0s @@ -432,21 +432,21 @@ def setUp(self): def test_init(self): rc = ResultComparison(self.r1, self.r2) self.assertEqual(rc.name, 'AngryPhonebook') - self.assertAlmostEquals(rc.ratio, 12325.0 / 11616.0) - self.assertAlmostEquals(rc.delta, (((11616.0 / 12325.0) - 1) * 100), - places=3) + self.assertAlmostEqual(rc.ratio, 12325.0 / 11616.0) + self.assertAlmostEqual(rc.delta, (((11616.0 / 12325.0) - 1) * 100), + places=3) # handle test results that sometimes change to zero, when compiler # optimizes out the body of the incorrectly written test rc = ResultComparison(self.r0, self.r0) self.assertEqual(rc.name, 'GlobalClass') - self.assertAlmostEquals(rc.ratio, 1) - self.assertAlmostEquals(rc.delta, 0, places=3) + self.assertAlmostEqual(rc.ratio, 1) + self.assertAlmostEqual(rc.delta, 0, places=3) rc = ResultComparison(self.r0, self.r01) - self.assertAlmostEquals(rc.ratio, 0, places=3) - self.assertAlmostEquals(rc.delta, 2000000, places=3) + self.assertAlmostEqual(rc.ratio, 0, places=3) + self.assertAlmostEqual(rc.delta, 2000000, places=3) rc = ResultComparison(self.r01, self.r0) - self.assertAlmostEquals(rc.ratio, 20001) - self.assertAlmostEquals(rc.delta, -99.995, places=3) + self.assertAlmostEqual(rc.ratio, 20001) + self.assertAlmostEqual(rc.delta, -99.995, places=3) # disallow comparison of different test results self.assertRaises( AssertionError, @@ -526,11 +526,11 @@ def test_parse_results_csv(self): parser = LogParser() results = parser.parse_results(log.splitlines()) self.assertTrue(isinstance(results[0], PerformanceTestResult)) - self.assertEquals(results[0].name, 'Array.append.Array.Int?') - self.assertEquals(results[1].name, - 'Bridging.NSArray.as!.Array.NSString') - self.assertEquals(results[2].name, - 'Flatten.Array.Tuple4.lazy.for-in.Reserve') + self.assertEqual(results[0].name, 'Array.append.Array.Int?') + self.assertEqual(results[1].name, + 'Bridging.NSArray.as!.Array.NSString') + self.assertEqual(results[2].name, + 'Flatten.Array.Tuple4.lazy.for-in.Reserve') def test_parse_results_tab_delimited(self): log = '34\tBitCount\t20\t3\t4\t4\t0\t4' @@ -715,8 +715,8 @@ def test_results_from_merge_verbose(self): self.assertEqual(result.min, 350815) self.assertEqual(result.max, 376131) self.assertEqual(result.median, 358817) - self.assertAlmostEquals(result.sd, 8443.37, places=2) - self.assertAlmostEquals(result.mean, 361463.25, places=2) + self.assertAlmostEqual(result.sd, 8443.37, places=2) + self.assertAlmostEqual(result.mean, 361463.25, places=2) self.assertEqual(result.num_samples, 8) samples = result.samples self.assertTrue(isinstance(samples, PerformanceTestSamples)) From ed5940edc4ab5054e6896aaf3bb8fbf2039dfcb8 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 19 Feb 2020 20:56:34 +0100 Subject: [PATCH 07/21] [benchmark] Report ventiles for dubious results For dubious result comparisons, print out empirical sample distribution (ventiles) to enable humans to reach informed decisions about these performance changes. --- benchmark/scripts/compare_perf_tests.py | 60 +++++++++++++++++--- benchmark/scripts/test_compare_perf_tests.py | 46 ++++++++++++--- 2 files changed, 88 insertions(+), 18 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index e18ef0e39e2ba..d4c21682724cd 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -568,10 +568,12 @@ def header_for(result): ReportFormatter.RESULT_COMPARISON_HEADER) @staticmethod - def values(result): + def values(result, dubious_formatter=lambda r: ' (?)'): """Format values from PerformanceTestResult or ResultComparison. - Returns tuple of strings to display in the results table. + Returns tuple of strings to display in the results table. Uses the + supplied `dubious_formatter` to display the empirical sample + distribution of the dubious result comparison. """ return ( (result.name, @@ -582,14 +584,22 @@ def values(result): (result.name, str(result.old.min), str(result.new.min), '{0:+.1f}%'.format(result.delta), - '{0:.2f}x{1}'.format(result.ratio, - ' (?)' if result.is_dubious else '')) + '{0:.2f}x{1}'.format( + result.ratio, + dubious_formatter(result) if result.is_dubious else '')) ) def markdown(self): """Report results of benchmark comparisons in Markdown format.""" return self._formatted_text( label_formatter=lambda s: ('**' + s + '**'), + ventile_formatter=lambda r: ReportFormatter.ventiles( + r, + START='O: ', + MIDDLE='
N: ', + END='', + OLD_QUARTILE='{0}', + NEW_QUARTILE='{0}'), COLUMN_SEPARATOR=' | ', DELIMITER_ROW=([':---'] + ['---:'] * 4), SEPARATOR='  | | | | \n', @@ -604,12 +614,39 @@ def git(self): """Report results of benchmark comparisons in 'git' format.""" return self._formatted_text( label_formatter=lambda s: s.upper(), + ventile_formatter=lambda r: ReportFormatter.ventiles( + r, + START='\n O: ', + MIDDLE='\n N: ', + END='', + OLD_QUARTILE=' {0} ', + NEW_QUARTILE=' {0} '), COLUMN_SEPARATOR=' ', DELIMITER_ROW=None, SEPARATOR='\n', SECTION=""" {0} ({1}): \n{2}""") + @staticmethod + def ventiles(result, START, MIDDLE, END, OLD_QUARTILE, NEW_QUARTILE): + v = ' (?)' + if not (result.old.samples and result.new.samples): + return v + + def ventiles(samples, QUARTILE): + vs = [str(samples.quantile(ventile)) for ventile in + [v / 100.0 for v in range(5, 100, 5)]] + for i in [4, 9, 14]: + vs[i] = QUARTILE.format(vs[i]) + return ' '.join(vs) + + v += START + v += ventiles(result.old.samples, OLD_QUARTILE) + v += MIDDLE + v += ventiles(result.new.samples, NEW_QUARTILE) + v += END + return v + def _column_widths(self): changed = self.comparator.decreased + self.comparator.increased results = (changed if self.changes_only else @@ -628,8 +665,8 @@ def max_widths(maximum, widths): return reduce(max_widths, widths, [0] * 4) - def _formatted_text(self, label_formatter, COLUMN_SEPARATOR, - DELIMITER_ROW, SEPARATOR, SECTION): + def _formatted_text(self, label_formatter, ventile_formatter, + COLUMN_SEPARATOR, DELIMITER_ROW, SEPARATOR, SECTION): widths = self._column_widths() self.header_printed = False @@ -651,15 +688,20 @@ def header(title, column_labels): self.header_printed = True return h + def bold_first(value): + first, sep, rest = value.partition(' ') + return '**' + first + '**' + sep + rest + def format_columns(r, is_strong): return (r if not is_strong else - r[:-1] + ('**' + r[-1] + '**', )) + r[:-1] + (bold_first(r[-1]), )) def table(title, results, is_strong=False, is_open=False): if not results: return '' - rows = [row(format_columns(ReportFormatter.values(r), is_strong)) - for r in results] + rows = [row(format_columns( + ReportFormatter.values(r, ventile_formatter), is_strong)) + for r in results] table = (header(title if self.single_table else '', ReportFormatter.header_for(results[0])) + ''.join(rows)) diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 3dc087a5944d5..f0ae78b4c1416 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -507,6 +507,13 @@ class OldAndNewLog(unittest.TestCase): for line in new_log_content.splitlines()])]) + old_results['D'] = PerformanceTestResult( + '184,D,200,648,4,1,5,9,5,3,45,40,3,1,,,,1,1,,4,4,4,268'.split(','), + quantiles=True, delta=True) + new_results['D'] = PerformanceTestResult( + '184,D,200,746,1,3,1,3,2,1,,2,3,1,,1,1,3,1,3,7,9,61,1792'.split(','), + quantiles=True, delta=True) + def assert_report_contains(self, texts, report): assert not isinstance(texts, str) for text in texts: @@ -761,13 +768,13 @@ def names(tests): tc = TestComparator(self.old_results, self.new_results, 0.05) self.assertEqual(names(tc.unchanged), ['AngryPhonebook', 'Array2D']) self.assertEqual(names(tc.increased), ['ByteSwap', 'ArrayAppend']) - self.assertEqual(names(tc.decreased), ['BitCount']) + self.assertEqual(names(tc.decreased), ['BitCount', 'D']) self.assertEqual(names(tc.added), ['TwoSum']) self.assertEqual(names(tc.removed), ['AnyHashableWithAClass']) # other way around tc = TestComparator(self.new_results, self.old_results, 0.05) self.assertEqual(names(tc.unchanged), ['AngryPhonebook', 'Array2D']) - self.assertEqual(names(tc.increased), ['BitCount']) + self.assertEqual(names(tc.increased), ['BitCount', 'D']) self.assertEqual(names(tc.decreased), ['ByteSwap', 'ArrayAppend']) self.assertEqual(names(tc.added), ['AnyHashableWithAClass']) self.assertEqual(names(tc.removed), ['TwoSum']) @@ -776,7 +783,7 @@ def names(tests): tc = TestComparator(self.old_results, self.new_results, 1) self.assertEqual( names(tc.unchanged), - ['AngryPhonebook', 'Array2D', 'ArrayAppend', 'BitCount'] + ['AngryPhonebook', 'Array2D', 'ArrayAppend', 'BitCount', 'D'] ) self.assertEqual(names(tc.increased), ['ByteSwap']) self.assertEqual(tc.decreased, []) @@ -876,19 +883,19 @@ def test_column_headers(self): MAX_RSS"""]) def test_emphasize_speedup(self): - """Emphasize speedup values for regressions and improvements""" + """Emphasize speedup values for regressions and improvements.""" # tests in No Changes don't have emphasized speedup self.assert_markdown_contains([ 'BitCount | 3 | 9 | +199.9% | **0.33x**', 'ByteSwap | 4 | 0 | -100.0% | **4001.00x**', 'AngryPhonebook | 10458 | 10458 | +0.0% | 1.00x', - 'ArrayAppend | 23641 | 20000 | -15.4% | **1.18x (?)**' + 'ArrayAppend | 23641 | 20000 | -15.4% | **1.18x** (?)' ]) self.assert_git_contains([ 'BitCount 3 9 +199.9% **0.33x**', 'ByteSwap 4 0 -100.0% **4001.00x**', 'AngryPhonebook 10458 10458 +0.0% 1.00x', - 'ArrayAppend 23641 20000 -15.4% **1.18x (?)**' + 'ArrayAppend 23641 20000 -15.4% **1.18x** (?)' ]) self.assert_html_contains([ """ @@ -917,11 +924,32 @@ def test_emphasize_speedup(self): """ ]) + def test_print_quantiles_for_dubious_changes_with_samples(self): + self.assert_markdown_contains([ + 'D | 648 | 746 | +15.1% | **0.87x** (?)' + '' + 'O: 648 652 653 658 667 672 675 720 760 ' + '763 764 764 764 764 765 766 766 770 774' + '
' + 'N: 746 747 750 751 754 756 757 757 759 ' + '762 763 763 764 765 768 769 772 779 788' + '' + '\n' + ]) + self.assert_git_contains([ + 'D 648 746 +15.1% **0.87x** (?)' + '\n O: 648 652 653 658 667 672 675 720 760 ' # ventiles, old + ' 763 764 764 764 764 765 766 766 770 774' + '\n N: 746 747 750 751 754 756 757 757 759 ' # ventiles, new + ' 762 763 763 764 765 768 769 772 779 788' + '\n' + ]) + def test_sections(self): """Report is divided into sections with summaries.""" self.assert_markdown_contains([ """
- Regression (1)""", + Regression (2)""", """
Improvement (2)""", """
@@ -931,13 +959,13 @@ def test_sections(self): """
Removed (1)"""]) self.assert_git_contains([ - 'Regression (1): \n', + 'Regression (2): \n', 'Improvement (2): \n', 'No Changes (2): \n', 'Added (1): \n', 'Removed (1): \n']) self.assert_html_contains([ - "Regression (1)", + "Regression (2)", "Improvement (2)", "No Changes (2)", "Added (1)", From ef2993aca5738381f3b77af6fa0a888059fc6265 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 13 Nov 2019 09:06:35 +0100 Subject: [PATCH 08/21] [benchmark] Retire old num-iters in verbose format After commit 331c0bf772a4626aa1f8e1f8aae7629e10d350db from a year ago, all samples from the same run have the same num-iters. --- benchmark/scripts/test_compare_perf_tests.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index f0ae78b4c1416..460e392ad9eea 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -632,7 +632,6 @@ def test_parse_results_verbose(self): Running AngryPhonebook for 3 samples. Measuring with scale 78. Sample 0,11812 - Measuring with scale 90. Sample 1,13898 Sample 2,11467 1,AngryPhonebook,3,11467,13898,12392,1315,11812 @@ -656,7 +655,7 @@ def test_parse_results_verbose(self): ) self.assertEqual(r.num_samples, r.samples.num_samples) self.assertEqual(results[0].samples.all_samples, - [(0, 78, 11812), (1, 90, 13898), (2, 90, 11467)]) + [(0, 78, 11812), (1, 78, 13898), (2, 78, 11467)]) self.assertEqual(r.yields, None) r = results[1] @@ -731,25 +730,15 @@ def test_results_from_merge_verbose(self): def test_excludes_outliers_from_samples(self): verbose_log = """Running DropFirstAnySeqCntRangeLazy for 10 samples. - Measuring with scale 2. Sample 0,455 - Measuring with scale 2. Sample 1,203 - Measuring with scale 2. Sample 2,205 - Measuring with scale 2. Sample 3,207 - Measuring with scale 2. Sample 4,208 - Measuring with scale 2. Sample 5,206 - Measuring with scale 2. Sample 6,205 - Measuring with scale 2. Sample 7,206 - Measuring with scale 2. Sample 8,208 - Measuring with scale 2. Sample 9,184 65,DropFirstAnySeqCntRangeLazy,10,184,455,228,79,206 """ From 05b20098d55f6522ed95ac2e0c91a59923fea70a Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 13 Nov 2019 12:59:42 +0100 Subject: [PATCH 09/21] [benchmark] Add tests for all_samples --- benchmark/scripts/test_compare_perf_tests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 460e392ad9eea..ab9c490a79e9b 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -157,6 +157,7 @@ def test_excludes_outliers(self): self.assertEqual(self.samples.count, 11) self.assertEqual(self.samples.outliers, ss[11:]) + self.assertEqual(self.samples.all_samples, ss) self.assertEqualFiveNumberSummary( self.samples, (1000, 1025, 1050, 1075, 1100)) self.assertEqualStats( @@ -175,6 +176,9 @@ def test_excludes_outliers_zero_IQR(self): self.assertEqual(self.samples.count, 3) self.assertEqualStats( (self.samples.min, self.samples.max), (18, 18)) + self.assertEqual(self.samples.all_samples, + [Sample(0, 2, 23), Sample(1, 2, 18), + Sample(2, 2, 18), Sample(3, 2, 18)]) def test_excludes_outliers_top_only(self): ss = [Sample(*map(int, s.split())) for s in From 5342ab3b95622263701c30aa79a1c31f90678e82 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 13 Nov 2019 13:24:10 +0100 Subject: [PATCH 10/21] [benchmark] Refactor: store num_iters on PTS Store the number of iterations averaged in each sample on the PerformanceTestSamples. --- benchmark/scripts/compare_perf_tests.py | 10 ++++++---- benchmark/scripts/test_compare_perf_tests.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index d4c21682724cd..a5a4a2b93c679 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -65,9 +65,10 @@ class PerformanceTestSamples(object): Computes the sample population statistics. """ - def __init__(self, name, samples=None): + def __init__(self, name, samples=None, num_iters=None): """Initialize with benchmark name and optional list of Samples.""" self.name = name # Name of the performance test + self.num_iters = num_iters # Number of iterations averaged in sample self.samples = [] self.outliers = [] self._runtimes = [] @@ -119,7 +120,7 @@ def exclude_outliers(self, top_only=False): outliers = self.samples[:lo] + self.samples[hi:] samples = self.samples[lo:hi] - self.__init__(self.name) # re-initialize + self.__init__(self.name, num_iters=self.num_iters) # re-initialize for sample in samples: # and self.add(sample) # re-compute stats self.outliers = outliers @@ -388,7 +389,8 @@ def _append_result(self, result): r.voluntary_cs = self.voluntary_cs r.involuntary_cs = r.involuntary_cs or self.involuntary_cs if self.samples: - r.samples = PerformanceTestSamples(r.name, self.samples) + r.samples = PerformanceTestSamples( + r.name, self.samples, self.num_iters) r.samples.exclude_outliers() self.results.append(r) r.yields = self.yields or None @@ -411,7 +413,7 @@ def _configure_format(self, header): # Verbose mode adds new productions: # Adaptively determined N; test loop multiple adjusting runtime to ~1s re.compile(r'\s+Measuring with scale (\d+).'): - (lambda self, num_iters: setattr(self, 'num_iters', num_iters)), + (lambda self, num_iters: setattr(self, 'num_iters', int(num_iters))), re.compile(r'\s+Sample (\d+),(\d+)'): (lambda self, i, runtime: diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index ab9c490a79e9b..9d1307ffdd922 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -62,6 +62,11 @@ def test_stores_samples(self): self.assertEqual(s.num_iters, 42) self.assertEqual(s.runtime, 1000) + def test_num_iters(self): + self.assertIsNone(self.samples.num_iters) + averaged = PerformanceTestSamples('B1', num_iters=42) + self.assertEqual(averaged.num_iters, 42) + def test_quantile(self): self.assertEqual(self.samples.quantile(1), 1000) self.assertEqual(self.samples.quantile(0), 1000) @@ -658,7 +663,8 @@ def test_parse_results_verbose(self): ('AngryPhonebook', 11467, 13898, 12392, 1315, 11812) ) self.assertEqual(r.num_samples, r.samples.num_samples) - self.assertEqual(results[0].samples.all_samples, + self.assertEqual(r.samples.num_iters, 78) + self.assertEqual(r.samples.all_samples, [(0, 78, 11812), (1, 78, 13898), (2, 78, 11467)]) self.assertEqual(r.yields, None) @@ -669,7 +675,7 @@ def test_parse_results_verbose(self): ) self.assertEqual(r.setup, 14444) self.assertEqual(r.num_samples, r.samples.num_samples) - self.assertEqual(results[1].samples.all_samples, + self.assertEqual(r.samples.all_samples, [(0, 1, 369900), (1, 1, 381039), (2, 1, 371043)]) yielded = r.yields[0] self.assertEqual(yielded.before_sample, 1) From 15dcdaf831c06157f6eefbe99b2af7f31786788c Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 13 Nov 2019 14:55:14 +0100 Subject: [PATCH 11/21] [benchmark] Refactor: remove class Sample Removed Sample class, that was previously holding num_iters and the ordinal number of the sample. --- benchmark/scripts/compare_perf_tests.py | 42 ++++------ benchmark/scripts/test_compare_perf_tests.py | 88 +++++++------------- 2 files changed, 45 insertions(+), 85 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index a5a4a2b93c679..8e6ff7af3131e 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -17,7 +17,6 @@ Invoke `$ compare_perf_tests.py -h ` for complete list of options. -class `Sample` is single benchmark measurement. class `PerformanceTestSamples` is collection of `Sample`s and their statistics. class `PerformanceTestResult` is a summary of performance test execution. class `LogParser` converts log files into `PerformanceTestResult`s. @@ -37,20 +36,6 @@ class `ReportFormatter` creates the test comparison report in specified format. from math import ceil, sqrt -class Sample(namedtuple('Sample', 'i num_iters runtime')): - u"""Single benchmark measurement. - - Initialized with: - `i`: ordinal number of the sample taken, - `num-num_iters`: number or iterations used to compute it, - `runtime`: in microseconds (μs). - """ - - def __repr__(self): - """Shorter Sample formating for debugging purposes.""" - return 's({0.i!r}, {0.num_iters!r}, {0.runtime!r})'.format(self) - - class Yield(namedtuple('Yield', 'before_sample after')): u"""Meta-measurement of when the Benchmark_X voluntarily yielded process. @@ -70,6 +55,7 @@ def __init__(self, name, samples=None, num_iters=None): self.name = name # Name of the performance test self.num_iters = num_iters # Number of iterations averaged in sample self.samples = [] + self._all_samples = [] self.outliers = [] self._runtimes = [] self.mean = 0.0 @@ -90,16 +76,17 @@ def __str__(self): def add(self, sample): """Add sample to collection and recompute statistics.""" - assert isinstance(sample, Sample) + assert isinstance(sample, int) self._update_stats(sample) - i = bisect(self._runtimes, sample.runtime) - self._runtimes.insert(i, sample.runtime) + i = bisect(self._runtimes, sample) + self._runtimes.insert(i, sample) self.samples.insert(i, sample) + self._all_samples.append(sample) def _update_stats(self, sample): old_stats = (self.count, self.mean, self.S_runtime) _, self.mean, self.S_runtime = ( - self.running_mean_variance(old_stats, sample.runtime)) + self.running_mean_variance(old_stats, sample)) def exclude_outliers(self, top_only=False): """Exclude outliers by applying Interquartile Range Rule. @@ -119,11 +106,13 @@ def exclude_outliers(self, top_only=False): outliers = self.samples[:lo] + self.samples[hi:] samples = self.samples[lo:hi] + all = self._all_samples self.__init__(self.name, num_iters=self.num_iters) # re-initialize for sample in samples: # and self.add(sample) # re-compute stats self.outliers = outliers + self._all_samples = all @property def count(self): @@ -138,17 +127,17 @@ def num_samples(self): @property def all_samples(self): """List of all samples in ascending order.""" - return sorted(self.samples + self.outliers, key=lambda s: s.i) + return self._all_samples @property def min(self): """Minimum sampled value.""" - return self.samples[0].runtime + return self.samples[0] @property def max(self): """Maximum sampled value.""" - return self.samples[-1].runtime + return self.samples[-1] def quantile(self, q): """Return runtime for given quantile. @@ -157,7 +146,7 @@ def quantile(self, q): https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample """ index = max(0, int(ceil(self.count * float(q))) - 1) - return self.samples[index].runtime + return self.samples[index] @property def median(self): @@ -257,8 +246,7 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, for i in range(0, self.num_samples)] self.samples = PerformanceTestSamples( - self.name, - [Sample(None, None, int(runtime)) for runtime in runtimes]) + self.name, [int(runtime) for runtime in runtimes]) self.samples.exclude_outliers(top_only=True) sams = self.samples self.min, self.max, self.median, self.mean, self.sd = \ @@ -416,9 +404,7 @@ def _configure_format(self, header): (lambda self, num_iters: setattr(self, 'num_iters', int(num_iters))), re.compile(r'\s+Sample (\d+),(\d+)'): - (lambda self, i, runtime: - self.samples.append( - Sample(int(i), int(self.num_iters), int(runtime)))), + (lambda self, i, runtime: self.samples.append(int(runtime))), re.compile(r'\s+SetUp (\d+)'): (lambda self, setup: setattr(self, 'setup', int(setup))), diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 9d1307ffdd922..fa4e7c054afd5 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -24,7 +24,6 @@ from compare_perf_tests import PerformanceTestSamples from compare_perf_tests import ReportFormatter from compare_perf_tests import ResultComparison -from compare_perf_tests import Sample from compare_perf_tests import TestComparator from compare_perf_tests import main from compare_perf_tests import parse_args @@ -32,35 +31,17 @@ from test_utils import captured_output -class TestSample(unittest.TestCase): - def test_has_named_fields(self): - s = Sample(1, 2, 3) - self.assertEqual(s.i, 1) - self.assertEqual(s.num_iters, 2) - self.assertEqual(s.runtime, 3) - - def test_is_iterable(self): - s = Sample(1, 2, 3) - self.assertEqual(s[0], 1) - self.assertEqual(s[1], 2) - self.assertEqual(s[2], 3) - - class TestPerformanceTestSamples(unittest.TestCase): def setUp(self): self.samples = PerformanceTestSamples('B1') - self.samples.add(Sample(7, 42, 1000)) + self.samples.add(1000) def test_has_name(self): self.assertEqual(self.samples.name, 'B1') def test_stores_samples(self): self.assertEqual(self.samples.count, 1) - s = self.samples.samples[0] - self.assertTrue(isinstance(s, Sample)) - self.assertEqual(s.i, 7) - self.assertEqual(s.num_iters, 42) - self.assertEqual(s.runtime, 1000) + self.assertEqual(self.samples.samples[0], 1000) def test_num_iters(self): self.assertIsNone(self.samples.num_iters) @@ -70,10 +51,10 @@ def test_num_iters(self): def test_quantile(self): self.assertEqual(self.samples.quantile(1), 1000) self.assertEqual(self.samples.quantile(0), 1000) - self.samples.add(Sample(2, 1, 1100)) + self.samples.add(1100) self.assertEqual(self.samples.quantile(0), 1000) self.assertEqual(self.samples.quantile(1), 1100) - self.samples.add(Sample(3, 1, 1050)) + self.samples.add(1050) self.assertEqual(self.samples.quantile(0), 1000) self.assertEqual(self.samples.quantile(.5), 1050) self.assertEqual(self.samples.quantile(1), 1100) @@ -89,25 +70,25 @@ def assertEqualFiveNumberSummary(self, ss, expected_fns): def test_computes_five_number_summary(self): self.assertEqualFiveNumberSummary( self.samples, (1000, 1000, 1000, 1000, 1000)) - self.samples.add(Sample(2, 1, 1100)) + self.samples.add(1100) self.assertEqualFiveNumberSummary( self.samples, (1000, 1000, 1000, 1100, 1100)) - self.samples.add(Sample(3, 1, 1050)) + self.samples.add(1050) self.assertEqualFiveNumberSummary( self.samples, (1000, 1000, 1050, 1100, 1100)) - self.samples.add(Sample(4, 1, 1025)) + self.samples.add(1025) self.assertEqualFiveNumberSummary( self.samples, (1000, 1000, 1025, 1050, 1100)) - self.samples.add(Sample(5, 1, 1075)) + self.samples.add(1075) self.assertEqualFiveNumberSummary( self.samples, (1000, 1025, 1050, 1075, 1100)) def test_computes_inter_quartile_range(self): self.assertEqual(self.samples.iqr, 0) - self.samples.add(Sample(2, 1, 1025)) - self.samples.add(Sample(3, 1, 1050)) - self.samples.add(Sample(4, 1, 1075)) - self.samples.add(Sample(5, 1, 1100)) + self.samples.add(1025) + self.samples.add(1050) + self.samples.add(1075) + self.samples.add(1100) self.assertEqual(self.samples.iqr, 50) def assertEqualStats(self, stats, expected_stats): @@ -118,7 +99,7 @@ def test_computes_mean_sd_cv(self): ss = self.samples self.assertEqualStats( (ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0)) - self.samples.add(Sample(2, 1, 1100)) + self.samples.add(1100) self.assertEqualStats( (ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100)) @@ -126,13 +107,13 @@ def test_computes_range_spread(self): ss = self.samples self.assertEqualStats( (ss.range, ss.spread), (0, 0)) - self.samples.add(Sample(2, 1, 1100)) + self.samples.add(1100) self.assertEqualStats( (ss.range, ss.spread), (100, 10.0 / 100)) def test_init_with_samples(self): self.samples = PerformanceTestSamples( - 'B2', [Sample(0, 1, 1000), Sample(1, 1, 1100)]) + 'B2', [1000, 1100]) self.assertEqual(self.samples.count, 2) self.assertEqualStats( (self.samples.mean, self.samples.sd, @@ -142,17 +123,16 @@ def test_init_with_samples(self): def test_can_handle_zero_runtime(self): # guard against dividing by 0 self.samples = PerformanceTestSamples('Zero') - self.samples.add(Sample(0, 1, 0)) + self.samples.add(0) self.assertEqualStats( (self.samples.mean, self.samples.sd, self.samples.cv, self.samples.range, self.samples.spread), (0, 0, 0.0, 0, 0.0)) def test_excludes_outliers(self): - ss = [Sample(*map(int, s.split())) for s in - '0 1 1000, 1 1 1025, 2 1 1050, 3 1 1075, 4 1 1100, ' - '5 1 1000, 6 1 1025, 7 1 1050, 8 1 1075, 9 1 1100, ' - '10 1 1050, 11 1 949, 12 1 1151'.split(',')] + ss = [1000, 1025, 1050, 1075, 1100, + 1000, 1025, 1050, 1075, 1100, + 1050, 949, 1151] self.samples = PerformanceTestSamples('Outliers', ss) self.assertEqual(self.samples.count, 13) self.assertEqualStats( @@ -170,10 +150,10 @@ def test_excludes_outliers(self): def test_excludes_outliers_zero_IQR(self): self.samples = PerformanceTestSamples('Tight') - self.samples.add(Sample(0, 2, 23)) - self.samples.add(Sample(1, 2, 18)) - self.samples.add(Sample(2, 2, 18)) - self.samples.add(Sample(3, 2, 18)) + self.samples.add(23) + self.samples.add(18) + self.samples.add(18) + self.samples.add(18) self.assertEqual(self.samples.iqr, 0) self.samples.exclude_outliers() @@ -181,14 +161,10 @@ def test_excludes_outliers_zero_IQR(self): self.assertEqual(self.samples.count, 3) self.assertEqualStats( (self.samples.min, self.samples.max), (18, 18)) - self.assertEqual(self.samples.all_samples, - [Sample(0, 2, 23), Sample(1, 2, 18), - Sample(2, 2, 18), Sample(3, 2, 18)]) + self.assertEqual(self.samples.all_samples, [23, 18, 18, 18]) def test_excludes_outliers_top_only(self): - ss = [Sample(*map(int, s.split())) for s in - '0 1 1, 1 1 2, 2 1 2, 3 1 2, 4 1 3'.split(',')] - self.samples = PerformanceTestSamples('Top', ss) + self.samples = PerformanceTestSamples('Top', [1, 2, 2, 2, 3]) self.assertEqualFiveNumberSummary(self.samples, (1, 2, 2, 2, 3)) self.assertEqual(self.samples.iqr, 0) @@ -225,7 +201,7 @@ def test_init_quantiles(self): self.assertAlmostEqual(r.sd, 109.61, places=2) self.assertEqual(r.samples.count, 3) self.assertEqual(r.samples.num_samples, 3) - self.assertEqual([s.runtime for s in r.samples.all_samples], + self.assertEqual(r.samples.all_samples, [54383, 54512, 54601]) # #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B) @@ -275,7 +251,7 @@ def validatePTR(deq): # construct from delta encoded quantiles string r = PerformanceTestResult(['0', 'B', str(num_samples)] + deq, quantiles=True, delta=True) self.assertEqual(r.samples.num_samples, num_samples) - self.assertEqual([s.runtime for s in r.samples.all_samples], + self.assertEqual(r.samples.all_samples, range(1, num_samples + 1)) delta_encoded_quantiles = """ @@ -575,12 +551,12 @@ def test_parse_quantiles(self): r = LogParser.results_from_string( """#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs) 1,Ackermann,3,54383,54512,54601""")['Ackermann'] - self.assertEqual([s.runtime for s in r.samples.all_samples], + self.assertEqual(r.samples.all_samples, [54383, 54512, 54601]) r = LogParser.results_from_string( """#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B) 1,Ackermann,3,54529,54760,55807,266240""")['Ackermann'] - self.assertEqual([s.runtime for s in r.samples.all_samples], + self.assertEqual(r.samples.all_samples, [54529, 54760, 55807]) self.assertEqual(r.max_rss, 266240) @@ -664,8 +640,7 @@ def test_parse_results_verbose(self): ) self.assertEqual(r.num_samples, r.samples.num_samples) self.assertEqual(r.samples.num_iters, 78) - self.assertEqual(r.samples.all_samples, - [(0, 78, 11812), (1, 78, 13898), (2, 78, 11467)]) + self.assertEqual(r.samples.all_samples, [11812, 13898, 11467]) self.assertEqual(r.yields, None) r = results[1] @@ -675,8 +650,7 @@ def test_parse_results_verbose(self): ) self.assertEqual(r.setup, 14444) self.assertEqual(r.num_samples, r.samples.num_samples) - self.assertEqual(r.samples.all_samples, - [(0, 1, 369900), (1, 1, 381039), (2, 1, 371043)]) + self.assertEqual(r.samples.all_samples, [369900, 381039, 371043]) yielded = r.yields[0] self.assertEqual(yielded.before_sample, 1) self.assertEqual(yielded.after, 369918) From 6f0eb7b15ca9e25075ef0b332419cf08432f019c Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Thu, 14 Nov 2019 13:21:12 +0100 Subject: [PATCH 12/21] [benchmark] Refactor: simpler exclude_outliers --- benchmark/scripts/compare_perf_tests.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 8e6ff7af3131e..b19bcdb274f2e 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -104,15 +104,11 @@ def exclude_outliers(self, top_only=False): bisect_left(self._runtimes, int(self.q1 - 1.5 * self.iqr))) hi = bisect_right(self._runtimes, int(self.q3 + 1.5 * self.iqr)) - outliers = self.samples[:lo] + self.samples[hi:] - samples = self.samples[lo:hi] - all = self._all_samples - - self.__init__(self.name, num_iters=self.num_iters) # re-initialize - for sample in samples: # and - self.add(sample) # re-compute stats - self.outliers = outliers - self._all_samples = all + self.outliers = self.samples[:lo] + self.samples[hi:] + self.samples = self.samples[lo:hi] + # re-compute stats + _, self.mean, self.S_runtime = reduce( + self.running_mean_variance, self.samples, (0, 0.0, 0.0)) @property def count(self): From abacc9f4edf63688a662ece37c3658fed21170cc Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 19 Feb 2020 21:17:30 +0100 Subject: [PATCH 13/21] [benchmark] Refactor: remove redundant _runtimes --- benchmark/scripts/compare_perf_tests.py | 28 ++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index b19bcdb274f2e..3a518a9d2d87e 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -54,14 +54,17 @@ def __init__(self, name, samples=None, num_iters=None): """Initialize with benchmark name and optional list of Samples.""" self.name = name # Name of the performance test self.num_iters = num_iters # Number of iterations averaged in sample - self.samples = [] - self._all_samples = [] self.outliers = [] - self._runtimes = [] self.mean = 0.0 self.S_runtime = 0.0 # For computing running variance - for sample in samples or []: - self.add(sample) + if samples: + self._all_samples = samples + ascending = sorted(samples) + self.samples = samples if samples == ascending else ascending + self._recompute_stats() + else: + self.samples = [] + self._all_samples = [] def __str__(self): """Text summary of benchmark statistics.""" @@ -78,8 +81,7 @@ def add(self, sample): """Add sample to collection and recompute statistics.""" assert isinstance(sample, int) self._update_stats(sample) - i = bisect(self._runtimes, sample) - self._runtimes.insert(i, sample) + i = bisect(self.samples, sample) self.samples.insert(i, sample) self._all_samples.append(sample) @@ -88,6 +90,10 @@ def _update_stats(self, sample): _, self.mean, self.S_runtime = ( self.running_mean_variance(old_stats, sample)) + def _recompute_stats(self): + _, self.mean, self.S_runtime = reduce( + self.running_mean_variance, self.samples, (0, 0.0, 0.0)) + def exclude_outliers(self, top_only=False): """Exclude outliers by applying Interquartile Range Rule. @@ -101,14 +107,12 @@ def exclude_outliers(self, top_only=False): the environment noise caused by preemtive multitasking. """ lo = (0 if top_only else - bisect_left(self._runtimes, int(self.q1 - 1.5 * self.iqr))) - hi = bisect_right(self._runtimes, int(self.q3 + 1.5 * self.iqr)) + bisect_left(self.samples, int(self.q1 - 1.5 * self.iqr))) + hi = bisect_right(self.samples, int(self.q3 + 1.5 * self.iqr)) self.outliers = self.samples[:lo] + self.samples[hi:] self.samples = self.samples[lo:hi] - # re-compute stats - _, self.mean, self.S_runtime = reduce( - self.running_mean_variance, self.samples, (0, 0.0, 0.0)) + self._recompute_stats() @property def count(self): From cafb644388ea829d2d4103f4fe4ae586e8d5f07c Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Thu, 14 Nov 2019 18:35:17 +0100 Subject: [PATCH 14/21] [benchmark] More immutable PerformanceTestSamples Removed the ability to add individual samples to PTS. PerformanceTestSamples are technically not really immutable, because of exclude_outliers. --- benchmark/scripts/compare_perf_tests.py | 19 ++--- benchmark/scripts/test_compare_perf_tests.py | 74 +++++++++----------- 2 files changed, 38 insertions(+), 55 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 3a518a9d2d87e..92d092da0408f 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -31,7 +31,7 @@ class `ReportFormatter` creates the test comparison report in specified format. import argparse import re import sys -from bisect import bisect, bisect_left, bisect_right +from bisect import bisect_left, bisect_right from collections import namedtuple from math import ceil, sqrt @@ -77,19 +77,6 @@ def __str__(self): .format(self) if self.samples else '{0.name!s} n=0'.format(self)) - def add(self, sample): - """Add sample to collection and recompute statistics.""" - assert isinstance(sample, int) - self._update_stats(sample) - i = bisect(self.samples, sample) - self.samples.insert(i, sample) - self._all_samples.append(sample) - - def _update_stats(self, sample): - old_stats = (self.count, self.mean, self.S_runtime) - _, self.mean, self.S_runtime = ( - self.running_mean_variance(old_stats, sample)) - def _recompute_stats(self): _, self.mean, self.S_runtime = reduce( self.running_mean_variance, self.samples, (0, 0.0, 0.0)) @@ -289,7 +276,9 @@ def merge(self, r): """ # Statistics if self.samples and r.samples: - map(self.samples.add, r.samples.samples) + self.samples.samples = sorted( + self.samples.samples + r.samples.samples) + self.samples._recompute_stats() sams = self.samples self.num_samples += r.num_samples sams.outliers += r.samples.outliers diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index fa4e7c054afd5..63eed97561d81 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -33,8 +33,7 @@ class TestPerformanceTestSamples(unittest.TestCase): def setUp(self): - self.samples = PerformanceTestSamples('B1') - self.samples.add(1000) + self.samples = PerformanceTestSamples('B1', [1000]) def test_has_name(self): self.assertEqual(self.samples.name, 'B1') @@ -42,22 +41,29 @@ def test_has_name(self): def test_stores_samples(self): self.assertEqual(self.samples.count, 1) self.assertEqual(self.samples.samples[0], 1000) + self.assertEqual(self.samples.samples, [1000]) + self.assertEqual(self.samples.all_samples, [1000]) + + s = PerformanceTestSamples('B1', [1000, 1100, 1050]) + self.assertEqual(s.samples, [1000, 1050, 1100]) # sorted + self.assertEqual(s.all_samples, [1000, 1100, 1050]) # maintains order def test_num_iters(self): self.assertIsNone(self.samples.num_iters) - averaged = PerformanceTestSamples('B1', num_iters=42) + averaged = PerformanceTestSamples('B', num_iters=42) self.assertEqual(averaged.num_iters, 42) def test_quantile(self): - self.assertEqual(self.samples.quantile(1), 1000) - self.assertEqual(self.samples.quantile(0), 1000) - self.samples.add(1100) - self.assertEqual(self.samples.quantile(0), 1000) - self.assertEqual(self.samples.quantile(1), 1100) - self.samples.add(1050) - self.assertEqual(self.samples.quantile(0), 1000) - self.assertEqual(self.samples.quantile(.5), 1050) - self.assertEqual(self.samples.quantile(1), 1100) + s = PerformanceTestSamples('B1', [1000]) + self.assertEqual(s.quantile(1), 1000) + self.assertEqual(s.quantile(0), 1000) + s = PerformanceTestSamples('B1', [1000, 1100]) + self.assertEqual(s.quantile(0), 1000) + self.assertEqual(s.quantile(1), 1100) + s = PerformanceTestSamples('B1', [1000, 1100, 1050]) + self.assertEqual(s.quantile(0), 1000) + self.assertEqual(s.quantile(.5), 1050) + self.assertEqual(s.quantile(1), 1100) def assertEqualFiveNumberSummary(self, ss, expected_fns): e_min, e_q1, e_median, e_q3, e_max = expected_fns @@ -68,28 +74,21 @@ def assertEqualFiveNumberSummary(self, ss, expected_fns): self.assertEqual(ss.max, e_max) def test_computes_five_number_summary(self): - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1000, 1000, 1000, 1000)) - self.samples.add(1100) - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1000, 1000, 1100, 1100)) - self.samples.add(1050) - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1000, 1050, 1100, 1100)) - self.samples.add(1025) - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1000, 1025, 1050, 1100)) - self.samples.add(1075) - self.assertEqualFiveNumberSummary( - self.samples, (1000, 1025, 1050, 1075, 1100)) + s = PerformanceTestSamples('B', [1000]) + self.assertEqualFiveNumberSummary(s, (1000, 1000, 1000, 1000, 1000)) + s = PerformanceTestSamples('B', [1000, 1100]) + self.assertEqualFiveNumberSummary(s, (1000, 1000, 1000, 1100, 1100)) + s = PerformanceTestSamples('B', [1000, 1100, 1050]) + self.assertEqualFiveNumberSummary(s, (1000, 1000, 1050, 1100, 1100)) + s = PerformanceTestSamples('B', [1000, 1100, 1050, 1025]) + self.assertEqualFiveNumberSummary(s, (1000, 1000, 1025, 1050, 1100)) + s = PerformanceTestSamples('B', [1000, 1100, 1050, 1025, 1075]) + self.assertEqualFiveNumberSummary(s, (1000, 1025, 1050, 1075, 1100)) def test_computes_inter_quartile_range(self): self.assertEqual(self.samples.iqr, 0) - self.samples.add(1025) - self.samples.add(1050) - self.samples.add(1075) - self.samples.add(1100) - self.assertEqual(self.samples.iqr, 50) + s = PerformanceTestSamples('B', [1000, 1025, 1050, 1075, 1100]) + self.assertEqual(s.iqr, 50) def assertEqualStats(self, stats, expected_stats): for actual, expected in zip(stats, expected_stats): @@ -99,7 +98,7 @@ def test_computes_mean_sd_cv(self): ss = self.samples self.assertEqualStats( (ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0)) - self.samples.add(1100) + ss = PerformanceTestSamples('B', [1000, 1100]) self.assertEqualStats( (ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100)) @@ -107,7 +106,7 @@ def test_computes_range_spread(self): ss = self.samples self.assertEqualStats( (ss.range, ss.spread), (0, 0)) - self.samples.add(1100) + ss = PerformanceTestSamples('B', [1000, 1100]) self.assertEqualStats( (ss.range, ss.spread), (100, 10.0 / 100)) @@ -122,8 +121,7 @@ def test_init_with_samples(self): def test_can_handle_zero_runtime(self): # guard against dividing by 0 - self.samples = PerformanceTestSamples('Zero') - self.samples.add(0) + self.samples = PerformanceTestSamples('Zero', [0]) self.assertEqualStats( (self.samples.mean, self.samples.sd, self.samples.cv, self.samples.range, self.samples.spread), @@ -149,11 +147,7 @@ def test_excludes_outliers(self): (self.samples.mean, self.samples.sd), (1050, 35.36)) def test_excludes_outliers_zero_IQR(self): - self.samples = PerformanceTestSamples('Tight') - self.samples.add(23) - self.samples.add(18) - self.samples.add(18) - self.samples.add(18) + self.samples = PerformanceTestSamples('Tight', [23, 18, 18, 18]) self.assertEqual(self.samples.iqr, 0) self.samples.exclude_outliers() From bbdcaf818b9f9d95807adf6f23a65b67c46f2ef4 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Thu, 21 Nov 2019 09:35:41 +0100 Subject: [PATCH 15/21] [benchmark] Fix crash in oversampled results Gracefully handle the parsing of oversampled values in critical configuration, when the sampling error causes an ommision of certain quantiles from the report. --- benchmark/scripts/compare_perf_tests.py | 3 +- benchmark/scripts/test_compare_perf_tests.py | 42 +++++++++++++++++--- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 92d092da0408f..99baa359f6c19 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -230,7 +230,8 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, indices = [max(0, int(ceil(self.num_samples * float(q))) - 1) for q in qs] runtimes = [runtimes[indices.index(i)] - for i in range(0, self.num_samples)] + for i in range(0, self.num_samples) + if i in indices] self.samples = PerformanceTestSamples( self.name, [int(runtime) for runtime in runtimes]) diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 63eed97561d81..4e953fa7ffc37 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -232,12 +232,14 @@ def test_init_oversampled_quantiles(self): repeated in the report summary. Samples should contain only true values, discarding the repetated artifacts from quantile estimation. - The test string is slightly massaged output of the following R script: - subsample <- function(x, q) { - quantile(1:x, probs=((0:(q-1))/(q-1)), type=1)} - tbl <- function(s) t(sapply(1:s, function(x) { - qs <- subsample(x, s); c(qs[1], diff(qs)) })) - sapply(c(3, 5, 11, 21), tbl) + The test string is generated by the following R script: + subsample <- function(x, q) { quantile(1:x, probs=((0:q)/q), type=1)} + drop0s <- function(x) switch(x+1, '', '1') + tbl <- function(s) paste0(sapply(1:s, function(x) { + qs <- subsample(x, s); + paste0(sapply(c(qs[1], diff(qs)), drop0s), collapse=',') + }), collapse='\n') + cat(paste0(sapply(c(2, 4, 10, 20), tbl), collapse='\n')) """ def validatePTR(deq): # construct from delta encoded quantiles string deq = deq.split(',') @@ -287,6 +289,34 @@ def validatePTR(deq): # construct from delta encoded quantiles string 1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1""" map(validatePTR, delta_encoded_quantiles.split('\n')[1:]) + def test_init_oversampled_percentiles_100_samples(self): + """Test that we don't crash parsing result with sampling error. + + Our chosen discontinuous sample quantile type (R's type 1) combined + with the imprecise nature of the floating point arithmetic is, in + certain cases, causing sampling errors. + + For example when working with percentiles and there are precisely 100 + samples: the 7th, 14th and 28th samples are skipped and the following + samples are reported twice; the 55th sample is skipped (reporting 56th + instead) and 57th is reported twice. + + Since the issue is even present in R's quantile function (type=1) as + well as our Swift and Python implementations, we will ignore these + sampling errors. + """ + dep = ('1,,1,1,1,1,1,2,,1,1,1,1,1,2,,1,1,1,1,1,1,1,1,1,1,1,1,2,,1,1,' + + '1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,,1,1,1,1,1' + + ',1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1' + + ',1,1,1,1,1,1,1,1') + dep = dep.split(',') # delta encoded percentiles + self.assertEqual(len(dep), 101) # min + 99 percentiles + max + num_samples = '100' + r = PerformanceTestResult(['0', 'B', num_samples] + dep, + quantiles=True, delta=True) + self.assertEqual(r.num_samples, 100) + self.assertEqual(r.samples.count, 96) # missing indexes 7, 14, 28, 55 + def test_init_meta(self): # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),… # …PAGES,ICS,YIELD From dd2d83d90ec96e7a021b5d6941ebd1f5b1d71b8f Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Thu, 27 Feb 2020 23:09:42 +0100 Subject: [PATCH 16/21] [benchmark] Collate metadata when merging PTRs Handle optional `--meta` data in `merge` of `PerformanceTestResults`. Pick minimum from memory pages, sum the number of involuntrary contex switches and yield counts. --- benchmark/scripts/compare_perf_tests.py | 9 +++++++- benchmark/scripts/test_compare_perf_tests.py | 23 ++++++++++++++------ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 99baa359f6c19..dd028d0383bbe 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -294,12 +294,19 @@ def merge(self, r): self.num_samples += r.num_samples self.median, self.sd = None, None - # Metadata + # Collate the Metadata def minimum(a, b): # work around None being less than everything return (min(filter(lambda x: x is not None, [a, b])) if any([a, b]) else None) + + def sum_none(a, b): # work around None being less than everything + return (sum(filter(lambda x: x is not None, [a, b])) if any([a, b]) + else None) self.max_rss = minimum(self.max_rss, r.max_rss) + self.mem_pages = minimum(self.mem_pages, r.mem_pages) self.setup = minimum(self.setup, r.setup) + self.involuntary_cs = sum_none(self.involuntary_cs, r.involuntary_cs) + self.yield_count = sum_none(self.yield_count, r.yield_count) class ResultComparison(object): diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 4e953fa7ffc37..a8e4e0200fa24 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -379,8 +379,6 @@ def test_merge(self): 1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split('\n')[1:] results = map(PerformanceTestResult, [line.split(',') for line in tests]) - results[2].setup = 9 - results[3].setup = 7 def as_tuple(r): return (r.num_samples, r.min, r.max, round(r.mean, 2), @@ -392,9 +390,11 @@ def as_tuple(r): r.merge(results[1]) self.assertEqual(as_tuple(r), # drops SD and median, +max_rss (2, 12045, 12325, 12185, None, None, 10510336, None)) + results[2].setup = 9 r.merge(results[2]) self.assertEqual(as_tuple(r), # picks smaller of the MAX_RSS, +setup (3, 11616, 12325, 11995.33, None, None, 10502144, 9)) + results[3].setup = 7 r.merge(results[3]) self.assertEqual(as_tuple(r), # picks smaller of the setup values (4, 11616, 12325, 12064, None, None, 10498048, 7)) @@ -417,14 +417,23 @@ def test_merge_with_samples(self): def as_tuple(r): return (r.num_samples, r.samples.num_samples, r.samples.count, - r.min, r.samples.median, r.max) + r.min, r.samples.median, r.max, + r.mem_pages, r.involuntary_cs, r.yield_count) r = results[0] - self.assertEqual(as_tuple(r), (200, 21, 18, 967, 996, 1008)) + self.assertEqual(as_tuple(r), + (200, 21, 18, 967, 996, 1008, None, None, None)) + # merging optional --meta data + results[1].mem_pages = 9 + results[1].involuntary_cs = 1 + results[1].yield_count = 4 r.merge(results[1]) # 18 + 17 = 35, after merge using only ventiles - self.assertEqual(as_tuple(r), (400, 42, 35, 967, 983, 1010)) - r.merge(results[2]) # 35 + 18 = 53 - self.assertEqual(as_tuple(r), (600, 63, 53, 967, 989, 1029)) + self.assertEqual(as_tuple(r), (400, 42, 35, 967, 983, 1010, 9, 1, 4)) + results[2].mem_pages = 7 + results[2].involuntary_cs = 2 + results[2].yield_count = 6 + r.merge(results[2]) # 35 + 18 = 53; sum yields and context switches + self.assertEqual(as_tuple(r), (600, 63, 53, 967, 989, 1029, 7, 3, 10)) class TestResultComparison(unittest.TestCase): From 7d25484f79edcf0574f8c96a5f7d5f0f318ab1e4 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Fri, 28 Feb 2020 00:46:10 +0100 Subject: [PATCH 17/21] [benchmark] Keep merged independent run samples When merging `PerformanceTestResults`s keep the original `PerformanceTestSample`s from all independent runs. These will be used to choose the most stable (least variable) location estimate for the `ResultComparison` down the road. --- benchmark/scripts/compare_perf_tests.py | 17 ++++++++++++----- benchmark/scripts/test_compare_perf_tests.py | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index dd028d0383bbe..434ea34b07726 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -113,7 +113,7 @@ def num_samples(self): @property def all_samples(self): - """List of all samples in ascending order.""" + """List of all samples in original order.""" return self._all_samples @property @@ -236,6 +236,7 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False, self.samples = PerformanceTestSamples( self.name, [int(runtime) for runtime in runtimes]) self.samples.exclude_outliers(top_only=True) + self.independent_runs = [self.samples] sams = self.samples self.min, self.max, self.median, self.mean, self.sd = \ sams.min, sams.max, sams.median, sams.mean, sams.sd @@ -277,12 +278,18 @@ def merge(self, r): """ # Statistics if self.samples and r.samples: - self.samples.samples = sorted( - self.samples.samples + r.samples.samples) - self.samples._recompute_stats() + if hasattr(self, 'independent_runs'): + self.independent_runs.append(r.samples) + else: + self.independent_runs = [self.samples, r.samples] + outliers = self.samples.outliers + r.samples.outliers + all_samples = self.samples.all_samples + r.samples.all_samples + self.samples = PerformanceTestSamples( + self.name, sorted(self.samples.samples + r.samples.samples)) sams = self.samples self.num_samples += r.num_samples - sams.outliers += r.samples.outliers + sams.outliers = outliers + sams._all_samples = all_samples self.min, self.max, self.median, self.mean, self.sd = \ sams.min, sams.max, sams.median, sams.mean, sams.sd else: diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index a8e4e0200fa24..843ad76ea9f5d 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -423,18 +423,29 @@ def as_tuple(r): r = results[0] self.assertEqual(as_tuple(r), (200, 21, 18, 967, 996, 1008, None, None, None)) + self.assertEqual(len(r.independent_runs), 1) + self.assertEqual(r.independent_runs[0], r.samples) + # merging optional --meta data results[1].mem_pages = 9 results[1].involuntary_cs = 1 results[1].yield_count = 4 r.merge(results[1]) # 18 + 17 = 35, after merge using only ventiles self.assertEqual(as_tuple(r), (400, 42, 35, 967, 983, 1010, 9, 1, 4)) + results[2].mem_pages = 7 results[2].involuntary_cs = 2 results[2].yield_count = 6 r.merge(results[2]) # 35 + 18 = 53; sum yields and context switches self.assertEqual(as_tuple(r), (600, 63, 53, 967, 989, 1029, 7, 3, 10)) + self.assertEqual(len(r.samples.all_samples), 63) + self.assertEqual(r.samples.outliers, [1019, 1095, 2922, 1040, 1186, + 1880, 6470, 1057, 1281, 4183]) + self.assertEqual(len(r.independent_runs), 3) + self.assertEqual([i.count for i in r.independent_runs], [18, 17, 18]) + self.assertEqual([i.min for i in r.independent_runs], [967, 972, 986]) + class TestResultComparison(unittest.TestCase): def setUp(self): @@ -744,6 +755,9 @@ def test_results_from_merge_verbose(self): samples = result.samples self.assertTrue(isinstance(samples, PerformanceTestSamples)) self.assertEqual(samples.count, 8) + self.assertEqual( + samples.all_samples, + [355883, 358817, 353552, 350815, 363094, 369169, 376131, 364245]) def test_excludes_outliers_from_samples(self): verbose_log = """Running DropFirstAnySeqCntRangeLazy for 10 samples. From 3f93e3145aed2432e44080823e43300464ca51e5 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sat, 29 Feb 2020 18:46:52 +0100 Subject: [PATCH 18/21] [benchmark] Override all_samples on merged results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To save on memory used by merged `PerformanceTestResult`s, the rarely used `PerformanceTestSample.all_samples` can gather the samples on demand from the result’s `independent_runs` instead of keeping another copy. --- benchmark/scripts/compare_perf_tests.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 434ea34b07726..15dd58ba8dadd 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -58,6 +58,7 @@ def __init__(self, name, samples=None, num_iters=None): self.mean = 0.0 self.S_runtime = 0.0 # For computing running variance if samples: + self._override_all_samples = None self._all_samples = samples ascending = sorted(samples) self.samples = samples if samples == ascending else ascending @@ -114,7 +115,8 @@ def num_samples(self): @property def all_samples(self): """List of all samples in original order.""" - return self._all_samples + return (self._all_samples if not self._override_all_samples else + self._override_all_samples()) @property def min(self): @@ -282,14 +284,19 @@ def merge(self, r): self.independent_runs.append(r.samples) else: self.independent_runs = [self.samples, r.samples] + outliers = self.samples.outliers + r.samples.outliers - all_samples = self.samples.all_samples + r.samples.all_samples self.samples = PerformanceTestSamples( self.name, sorted(self.samples.samples + r.samples.samples)) sams = self.samples + + def all_samples(): + return [s for samples in self.independent_runs + for s in samples.all_samples] + + sams._override_all_samples = all_samples self.num_samples += r.num_samples sams.outliers = outliers - sams._all_samples = all_samples self.min, self.max, self.median, self.mean, self.sd = \ sams.min, sams.max, sams.median, sams.mean, sams.sd else: From 85260cab7a8a3de11ac8e04bbb044f4883d9374d Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sun, 1 Mar 2020 21:31:34 +0100 Subject: [PATCH 19/21] [benchmark] Dubious indicator for changes only Display the `(?)` indicator for dubious result only for changes, never for the unchanged results. Refactored `ResultComparison.init` with simplyfied range check. --- benchmark/scripts/compare_perf_tests.py | 50 ++++++++++++-------- benchmark/scripts/test_compare_perf_tests.py | 8 ++++ 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 15dd58ba8dadd..4939cd86e37fe 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -329,24 +329,24 @@ class ResultComparison(object): It computes speedup ratio and improvement delta (%). """ - def __init__(self, old, new): + def __init__(self, old_result, new_result): """Initialize with old and new `PerformanceTestResult`s to compare.""" - self.old = old - self.new = new - assert old.name == new.name - self.name = old.name # Test name, convenience accessor + self.old = old_result + self.new = new_result + assert old_result.name == new_result.name + self.name = old_result.name # Test name, convenience accessor - # Speedup ratio - self.ratio = (old.min + 0.001) / (new.min + 0.001) + # Location estimates + "epsilon" to prevent division by 0 + old = old_result.min + 0.001 + new = new_result.min + 0.001 - # Test runtime improvement in % - ratio = (new.min + 0.001) / (old.min + 0.001) - self.delta = ((ratio - 1) * 100) + self.ratio = old / new # Speedup ratio + self.delta = ((new / old) - 1) * 100 # Test runtime improvement in % - # Indication of dubious changes: when result's MIN falls inside the - # (MIN, MAX) interval of result they are being compared with. - self.is_dubious = ((old.min < new.min and new.min < old.max) or - (new.min < old.min and old.min < new.max)) + # Indication of dubious changes: when results' ranges overlap + o_min, o_max, n_min, n_max = \ + self.old.min, self.old.max, self.new.min, self.new.max + self.is_dubious = (o_min <= n_max and n_min <= o_max) class LogParser(object): @@ -695,11 +695,16 @@ def format_columns(r, is_strong): return (r if not is_strong else r[:-1] + (bold_first(r[-1]), )) - def table(title, results, is_strong=False, is_open=False): + def table(title, results, is_strong=False, is_open=False, + mark_dubious=True): if not results: return '' + + def dubious(r): + return ventile_formatter(r) if mark_dubious else '' + rows = [row(format_columns( - ReportFormatter.values(r, ventile_formatter), is_strong)) + ReportFormatter.values(r, dubious), is_strong)) for r in results] table = (header(title if self.single_table else '', ReportFormatter.header_for(results[0])) + @@ -712,7 +717,8 @@ def table(title, results, is_strong=False, is_open=False): table('Regression', self.comparator.decreased, True, True), table('Improvement', self.comparator.increased, True), ('' if self.changes_only else - table('No Changes', self.comparator.unchanged)), + table('No Changes', self.comparator.unchanged, + mark_dubious=False)), table('Added', self.comparator.added, is_open=True), table('Removed', self.comparator.removed, is_open=True) ]) @@ -771,9 +777,12 @@ def row(name, old, new, delta, speedup, speedup_color): def header(contents): return self.HTML_HEADER_ROW.format(* contents) - def table(title, results, speedup_color): + def table(title, results, speedup_color, mark_dubious=True): + def dubious(r): + return ' (?)' if mark_dubious else '' + rows = [ - row(*(ReportFormatter.values(r) + (speedup_color,))) + row(*(ReportFormatter.values(r, dubious) + (speedup_color,))) for r in results ] return ('' if not rows else @@ -786,7 +795,8 @@ def table(title, results, speedup_color): table('Regression', self.comparator.decreased, 'red'), table('Improvement', self.comparator.increased, 'green'), ('' if self.changes_only else - table('No Changes', self.comparator.unchanged, 'black')), + table('No Changes', self.comparator.unchanged, 'black', + mark_dubious=False)), table('Added', self.comparator.added, ''), table('Removed', self.comparator.removed, '') ])) diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 843ad76ea9f5d..ce8fb6000f027 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -941,6 +941,14 @@ def test_emphasize_speedup(self): 10458 +0.0% 1.00x + """, + """ + + ArrayAppend + 23641 + 20000 + -15.4% + 1.18x (?) """ ]) From 08047e491162e48b125456c7132cb9eca5d9147b Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Sun, 1 Mar 2020 05:15:52 +0100 Subject: [PATCH 20/21] [benchmark] Python 3 Support --- benchmark/scripts/Benchmark_Driver | 30 ++++++++++++-------- benchmark/scripts/compare_perf_tests.py | 19 +++++++++---- benchmark/scripts/test_Benchmark_Driver.py | 11 ++++--- benchmark/scripts/test_compare_perf_tests.py | 29 +++++++++---------- benchmark/scripts/test_utils.py | 16 +++++++++-- 5 files changed, 64 insertions(+), 41 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 8588cc292e2cc..30ff98c73d6d4 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -34,6 +34,7 @@ import re import subprocess import sys import time +from functools import reduce from compare_perf_tests import LogParser @@ -65,7 +66,7 @@ class BenchmarkDriver(object): def _invoke(self, cmd): return self._subprocess.check_output( - cmd, stderr=self._subprocess.STDOUT) + cmd, stderr=self._subprocess.STDOUT, universal_newlines=True) @property def test_harness(self): @@ -144,7 +145,7 @@ class BenchmarkDriver(object): verbose, measure_memory, quantile, gather_metadata) output = self._invoke(cmd) results = self.parser.results_from_string(output) - return results.items()[0][1] if test else results + return list(results.items())[0][1] if test else results def _cmd_run(self, test, num_samples, num_iters, sample_time, min_samples, verbose, measure_memory, quantile, gather_metadata): @@ -219,9 +220,9 @@ class BenchmarkDriver(object): print(format(values)) def result_values(r): - return map(str, [r.test_num, r.name, r.num_samples, r.min, - r.samples.q1, r.median, r.samples.q3, r.max, - r.max_rss]) + return [str(value) for value in + [r.test_num, r.name, r.num_samples, r.min, + r.samples.q1, r.median, r.samples.q3, r.max, r.max_rss]] header = ['#', 'TEST', 'SAMPLES', 'MIN(μs)', 'Q1(μs)', 'MEDIAN(μs)', 'Q3(μs)', 'MAX(μs)', 'MAX_RSS(B)'] @@ -303,7 +304,11 @@ class MarkdownReportHandler(logging.StreamHandler): msg = self.format(record) stream = self.stream try: - if (isinstance(msg, unicode) and + unicode_type = unicode # Python 2 + except NameError: + unicode_type = str # Python 3 + try: + if (isinstance(msg, unicode_type) and getattr(stream, 'encoding', None)): stream.write(msg.encode(stream.encoding)) else: @@ -415,10 +420,10 @@ class BenchmarkDoctor(object): setup, ratio = BenchmarkDoctor._setup_overhead(measurements) setup = 0 if ratio < 0.05 else setup runtime = min( - [(result.samples.min - correction) for i_series in - [BenchmarkDoctor._select(measurements, num_iters=i) - for correction in [(setup / i) for i in [1, 2]] - ] for result in i_series]) + [(result.samples.min - correction) for correction, i_series in + [(correction, BenchmarkDoctor._select(measurements, num_iters=i)) + for i, correction in [(i, setup // i) for i in [1, 2]]] + for result in i_series]) threshold = 1000 if threshold < runtime: @@ -473,7 +478,7 @@ class BenchmarkDoctor(object): @staticmethod def _reasonable_setup_time(measurements): - setup = min([result.setup + setup = min([result.setup or 0 for result in BenchmarkDoctor._select(measurements)]) if 200000 < setup: # 200 ms BenchmarkDoctor.log_runtime.error( @@ -537,7 +542,7 @@ class BenchmarkDoctor(object): def capped(s): return min(s, 200) - run_args = [(capped(num_samples), 1), (capped(num_samples / 2), 2)] + run_args = [(capped(num_samples), 1), (capped(num_samples // 2), 2)] opts = self.driver.args.optimization opts = opts if isinstance(opts, list) else [opts] self.log.debug( @@ -691,6 +696,7 @@ def parse_args(args): subparsers = parser.add_subparsers( title='Swift benchmark driver commands', help='See COMMAND -h for additional arguments', metavar='COMMAND') + subparsers.required = True shared_benchmarks_parser = argparse.ArgumentParser(add_help=False) benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group() diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 4939cd86e37fe..f2a1af460cf76 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # -*- coding: utf-8 -*- # ===--- compare_perf_tests.py -------------------------------------------===// @@ -33,6 +33,7 @@ class `ReportFormatter` creates the test comparison report in specified format. import sys from bisect import bisect_left, bisect_right from collections import namedtuple +from functools import reduce from math import ceil, sqrt @@ -164,13 +165,14 @@ def sd(self): sqrt(self.S_runtime / (self.count - 1))) @staticmethod - def running_mean_variance((k, M_, S_), x): + def running_mean_variance(stats, x): """Compute running variance, B. P. Welford's method. See Knuth TAOCP vol 2, 3rd edition, page 232, or https://www.johndcook.com/blog/standard_deviation/ M is mean, Standard Deviation is defined as sqrt(S/k-1) """ + (k, M_, S_) = stats k = float(k + 1) M = M_ + (x - M_) / k S = S_ + (x - M_) * (x - M) @@ -662,7 +664,7 @@ def _column_widths(self): def max_widths(maximum, widths): return map(max, zip(maximum, widths)) - return reduce(max_widths, widths, [0] * 4) + return list(reduce(max_widths, widths, [0] * 4)) def _formatted_text(self, label_formatter, ventile_formatter, COLUMN_SEPARATOR, DELIMITER_ROW, SEPARATOR, SECTION): @@ -679,7 +681,8 @@ def row(contents): def header(title, column_labels): labels = (column_labels if not self.single_table else - map(label_formatter, (title, ) + column_labels[1:])) + [label_formatter(c) + for c in (title, ) + column_labels[1:]]) h = (('' if not self.header_printed else SEPARATOR) + row(labels) + (row(DELIMITER_ROW) if not self.header_printed else '')) @@ -852,8 +855,12 @@ def main(): print(report) if args.output: - with open(args.output, 'w') as f: - f.write(report) + if sys.version_info < (3, 0): + with open(args.output, 'w') as f: + f.write(report) + else: + with open(args.output, 'w', encoding='utf-8') as f: + f.write(report) if __name__ == '__main__': diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py index 552dec85481cd..08479d353afab 100644 --- a/benchmark/scripts/test_Benchmark_Driver.py +++ b/benchmark/scripts/test_Benchmark_Driver.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # -*- coding: utf-8 -*- # ===--- test_Benchmark_Driver.py ----------------------------------------===// @@ -17,12 +17,11 @@ import os import time import unittest -from StringIO import StringIO -from imp import load_source from compare_perf_tests import PerformanceTestResult from test_utils import Mock, MockLoggingHandler, Stub, captured_output +from test_utils import StringIO, load_source # import Benchmark_Driver # doesn't work because it misses '.py' extension Benchmark_Driver = load_source( @@ -45,7 +44,7 @@ def assert_contains(self, texts, output): def test_requires_command_argument(self): with captured_output() as (_, err): self.assertRaises(SystemExit, parse_args, []) - self.assert_contains(['usage:', 'COMMAND', 'too few arguments'], + self.assert_contains(['usage:', 'COMMAND', 'error:', 'arguments'], err.getvalue()) def test_command_help_lists_commands(self): @@ -150,7 +149,7 @@ def __init__(self, responses=None): super(SubprocessMock, self).__init__(responses) def _check_output(args, stdin=None, stdout=None, stderr=None, - shell=False): + shell=False, universal_newlines=False): return self.record_and_respond(args, stdin, stdout, stderr, shell) self.check_output = _check_output @@ -387,7 +386,7 @@ def test_log_results(self): def assert_log_written(out, log_file, content): self.assertEqual(out.getvalue(), 'Logging results to: ' + log_file + '\n') - with open(log_file, 'rU') as f: + with open(log_file, 'r') as f: text = f.read() self.assertEqual(text, "formatted output") diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index ce8fb6000f027..cad8d2e932d62 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # -*- coding: utf-8 -*- # ===--- test_compare_perf_tests.py --------------------------------------===// @@ -377,8 +377,7 @@ def test_merge(self): 1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336 1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144 1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split('\n')[1:] - results = map(PerformanceTestResult, - [line.split(',') for line in tests]) + results = [PerformanceTestResult(line.split(',')) for line in tests] def as_tuple(r): return (r.num_samples, r.min, r.max, round(r.mean, 2), @@ -524,17 +523,13 @@ class OldAndNewLog(unittest.TestCase): old_results = dict([(r.name, r) for r in - map(PerformanceTestResult, - [line.split(',') - for line in - old_log_content.splitlines()])]) + [PerformanceTestResult(line.split(',')) + for line in old_log_content.splitlines()]]) new_results = dict([(r.name, r) for r in - map(PerformanceTestResult, - [line.split(',') - for line in - new_log_content.splitlines()])]) + [PerformanceTestResult(line.split(',')) + for line in new_log_content.splitlines()]]) old_results['D'] = PerformanceTestResult( '184,D,200,648,4,1,5,9,5,3,45,40,3,1,,,,1,1,,4,4,4,268'.split(','), @@ -721,7 +716,7 @@ def test_results_from_merge(self): concatenated_logs = """4,ArrayAppend,20,23641,29000,24990,0,24990 4,ArrayAppend,1,20000,20000,20000,0,20000""" results = LogParser.results_from_string(concatenated_logs) - self.assertEqual(results.keys(), ['ArrayAppend']) + self.assertEqual(list(results.keys()), ['ArrayAppend']) result = results['ArrayAppend'] self.assertTrue(isinstance(result, PerformanceTestResult)) self.assertEqual(result.min, 20000) @@ -743,7 +738,7 @@ def test_results_from_merge_verbose(self): Sample 3,364245 3,Array2D,4,363094,376131,368159,5931,369169""" results = LogParser.results_from_string(concatenated_logs) - self.assertEqual(results.keys(), ['Array2D']) + self.assertEqual(list(results.keys()), ['Array2D']) result = results['Array2D'] self.assertTrue(isinstance(result, PerformanceTestResult)) self.assertEqual(result.min, 350815) @@ -1135,8 +1130,12 @@ def execute_main_with_format(self, report_format, test_output=False): report_out = out.getvalue() if test_output: - with open(report_file, 'r') as f: - report = f.read() + if sys.version_info < (3, 0): + with open(report_file, 'r') as f: + report = f.read() + else: + with open(report_file, 'r', encoding='utf-8') as f: + report = f.read() # because print adds newline, add one here, too: report_file = str(report + '\n') else: diff --git a/benchmark/scripts/test_utils.py b/benchmark/scripts/test_utils.py index 6a2bf8856a99f..150928f0aa1c4 100644 --- a/benchmark/scripts/test_utils.py +++ b/benchmark/scripts/test_utils.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # -*- coding: utf-8 -*- # ===--- test_utils.py ---------------------------------------------------===// @@ -24,9 +24,21 @@ import logging import sys -from StringIO import StringIO + +# Cross-version compatibility layer +try: + from StringIO import StringIO # for Python 2 +except ImportError: + from io import StringIO # for Python 3 from contextlib import contextmanager +if sys.version_info < (3, 4): # imp.load_source is deprecated in Python 3.4 + from imp import load_source +else: + def load_source(name, path): + from importlib.machinery import SourceFileLoader + return SourceFileLoader(name, path).load_module() + @contextmanager def captured_output(): From bf06df68ec0b595e1000ae2c92990ab43772ba03 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Tue, 3 Mar 2020 18:52:29 +0100 Subject: [PATCH 21/21] [Gardening] Refactored runtimes correction for SO Replaced list comprehension that computes the minimum from runtimes corrected for setup overhead with a procedural style that is easier to understand. --- benchmark/scripts/Benchmark_Driver | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 30ff98c73d6d4..fbb8d326a7dc7 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -419,11 +419,12 @@ class BenchmarkDoctor(object): name = measurements['name'] setup, ratio = BenchmarkDoctor._setup_overhead(measurements) setup = 0 if ratio < 0.05 else setup - runtime = min( - [(result.samples.min - correction) for correction, i_series in - [(correction, BenchmarkDoctor._select(measurements, num_iters=i)) - for i, correction in [(i, setup // i) for i in [1, 2]]] - for result in i_series]) + runtimes = [] + for i in [1, 2]: + correction = setup // i + for result in BenchmarkDoctor._select(measurements, num_iters=i): + runtimes.append(result.samples.min - correction) + runtime = min(runtimes) threshold = 1000 if threshold < runtime: