From b414e7f24bc20df6b9d5ce58fe55f7a47550ea72 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 3 Jun 2021 14:24:21 +0300 Subject: [PATCH] compare.py: sort the results Currently, the tooling just keeps the whatever benchmark order that was present, and this is fine nowadays, but once the benchmarks will be optionally run interleaved, that will be rather suboptimal. So, now that i have introduced family index and per-family instance index, we can define an order for the benchmarks, and sort them accordingly. There is a caveat with aggregates, we assume that they are in-order, and hopefully we won't mess that order up.. --- tools/compare.py | 8 +-- tools/gbench/Inputs/test4_run.json | 96 ++++++++++++++++++++++++++++++ tools/gbench/report.py | 47 ++++++++++++++- tools/gbench/util.py | 18 ++++++ 4 files changed, 164 insertions(+), 5 deletions(-) create mode 100644 tools/gbench/Inputs/test4_run.json diff --git a/tools/compare.py b/tools/compare.py index 66eed932c2..01d2c89f50 100755 --- a/tools/compare.py +++ b/tools/compare.py @@ -238,10 +238,10 @@ def main(): options_contender = ['--benchmark_filter=%s' % filter_contender] # Run the benchmarks and report the results - json1 = json1_orig = gbench.util.run_or_load_benchmark( - test_baseline, benchmark_options + options_baseline) - json2 = json2_orig = gbench.util.run_or_load_benchmark( - test_contender, benchmark_options + options_contender) + json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_baseline, benchmark_options + options_baseline)) + json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_contender, benchmark_options + options_contender)) # Now, filter the benchmarks so that the difference report can work if filter_baseline and filter_contender: diff --git a/tools/gbench/Inputs/test4_run.json b/tools/gbench/Inputs/test4_run.json new file mode 100644 index 0000000000..eaa005f3a9 --- /dev/null +++ b/tools/gbench/Inputs/test4_run.json @@ -0,0 +1,96 @@ +{ + "benchmarks": [ + { + "name": "99 family 0 instance 0 repetition 0", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 0, + "repetition_index": 0 + }, + { + "name": "98 family 0 instance 0 repetition 1", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 0, + "repetition_index": 1 + }, + { + "name": "97 family 0 instance 0 aggregate", + "run_type": "aggregate", + "family_index": 0, + "per_family_instance_index": 0, + "aggregate_name": "9 aggregate" + }, + + + { + "name": "96 family 0 instance 1 repetition 0", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 1, + "repetition_index": 0 + }, + { + "name": "95 family 0 instance 1 repetition 1", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 1, + "repetition_index": 1 + }, + { + "name": "94 family 0 instance 1 aggregate", + "run_type": "aggregate", + "family_index": 0, + "per_family_instance_index": 1, + "aggregate_name": "9 aggregate" + }, + + + + + { + "name": "93 family 1 instance 0 repetition 0", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 0, + "repetition_index": 0 + }, + { + "name": "92 family 1 instance 0 repetition 1", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 0, + "repetition_index": 1 + }, + { + "name": "91 family 1 instance 0 aggregate", + "run_type": "aggregate", + "family_index": 1, + "per_family_instance_index": 0, + "aggregate_name": "9 aggregate" + }, + + + { + "name": "90 family 1 instance 1 repetition 0", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 1, + "repetition_index": 0 + }, + { + "name": "89 family 1 instance 1 repetition 1", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 1, + "repetition_index": 1 + }, + { + "name": "88 family 1 instance 1 aggregate", + "run_type": "aggregate", + "family_index": 1, + "per_family_instance_index": 1, + "aggregate_name": "9 aggregate" + } + ] +} diff --git a/tools/gbench/report.py b/tools/gbench/report.py index 69b643e92b..6bea82f6bf 100644 --- a/tools/gbench/report.py +++ b/tools/gbench/report.py @@ -1,9 +1,11 @@ -import unittest """report.py - Utilities for reporting statistics about benchmark results """ + +import unittest import os import re import copy +import random from scipy.stats import mannwhitneyu @@ -912,6 +914,49 @@ def test_json_diff_report(self): assert_measurements(self, out, expected) +class TestReportSorting(unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_result(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test4_run.json') + with open(testOutput, 'r') as f: + json = json.load(f) + return json + + cls.json = load_result() + + def test_json_diff_report_pretty_printing(self): + import util + + expected_names = [ + "99 family 0 instance 0 repetition 0", + "98 family 0 instance 0 repetition 1", + "97 family 0 instance 0 aggregate", + "96 family 0 instance 1 repetition 0", + "95 family 0 instance 1 repetition 1", + "94 family 0 instance 1 aggregate", + "93 family 1 instance 0 repetition 0", + "92 family 1 instance 0 repetition 1", + "91 family 1 instance 0 aggregate", + "90 family 1 instance 1 repetition 0", + "89 family 1 instance 1 repetition 1", + "88 family 1 instance 1 aggregate" + ] + + for n in range(len(self.json['benchmarks']) ** 2): + random.shuffle(self.json['benchmarks']) + sorted_benchmarks = util.sort_benchmark_results(self.json)[ + 'benchmarks'] + self.assertEqual(len(expected_names), len(sorted_benchmarks)) + for out, expected in zip(sorted_benchmarks, expected_names): + self.assertEqual(out['name'], expected) + + def assert_utest(unittest_instance, lhs, rhs): if lhs['utest']: unittest_instance.assertAlmostEqual( diff --git a/tools/gbench/util.py b/tools/gbench/util.py index 661c4bad8d..5d0012c0cb 100644 --- a/tools/gbench/util.py +++ b/tools/gbench/util.py @@ -5,6 +5,7 @@ import tempfile import subprocess import sys +import functools # Input file type enumeration IT_Invalid = 0 @@ -119,6 +120,23 @@ def load_benchmark_results(fname): return json.load(f) +def sort_benchmark_results(result): + benchmarks = result['benchmarks'] + + # From inner key to the outer key! + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1) + benchmarks = sorted( + benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0) + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1) + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1) + + result['benchmarks'] = benchmarks + return result + + def run_benchmark(exe_name, benchmark_flags): """ Run a benchmark specified by 'exe_name' with the specified