compare.py: sort the results

Currently, the tooling just keeps the whatever benchmark order that was present, and this is fine nowadays, but once the benchmarks will be optionally run interleaved, that will be rather suboptimal. So, now that i have introduced family index and per-family instance index, we can define an order for the benchmarks, and sort them accordingly. There is a caveat with aggregates, we assume that they are in-order, and hopefully we won't mess that order up..
google · Jun 3, 2021 · 8d70c3b · 8d70c3b
1 parent 0c1da0a
commit 8d70c3b
Show file tree

Hide file tree

Showing 4 changed files with 164 additions and 5 deletions.
diff --git a/tools/compare.py b/tools/compare.py
@@ -238,10 +238,10 @@ def main():
         options_contender = ['--benchmark_filter=%s' % filter_contender]
 
     # Run the benchmarks and report the results
-    json1 = json1_orig = gbench.util.run_or_load_benchmark(
-        test_baseline, benchmark_options + options_baseline)
-    json2 = json2_orig = gbench.util.run_or_load_benchmark(
-        test_contender, benchmark_options + options_contender)
+    json1 = json1_orig = gbench.util.sort_benchmark_results(run_or_load_benchmark(
+        test_baseline, benchmark_options + options_baseline))
+    json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
+        test_contender, benchmark_options + options_contender))
 
     # Now, filter the benchmarks so that the difference report can work
     if filter_baseline and filter_contender:

diff --git a/tools/gbench/Inputs/test4_run.json b/tools/gbench/Inputs/test4_run.json
@@ -0,0 +1,96 @@
+{
+  "benchmarks": [
+    {
+      "name": "99 family 0 instance 0 repetition 0",
+      "run_type": "iteration",
+      "family_index": 0,
+      "per_family_instance_index": 0,
+      "repetition_index": 0
+    },
+    {
+      "name": "98 family 0 instance 0 repetition 1",
+      "run_type": "iteration",
+      "family_index": 0,
+      "per_family_instance_index": 0,
+      "repetition_index": 1
+    },
+    {
+      "name": "97 family 0 instance 0 aggregate",
+      "run_type": "aggregate",
+      "family_index": 0,
+      "per_family_instance_index": 0,
+      "aggregate_name": "9 aggregate"
+    },
+
+
+    {
+      "name": "96 family 0 instance 1 repetition 0",
+      "run_type": "iteration",
+      "family_index": 0,
+      "per_family_instance_index": 1,
+      "repetition_index": 0
+    },
+    {
+      "name": "95 family 0 instance 1 repetition 1",
+      "run_type": "iteration",
+      "family_index": 0,
+      "per_family_instance_index": 1,
+      "repetition_index": 1
+    },
+    {
+      "name": "94 family 0 instance 1 aggregate",
+      "run_type": "aggregate",
+      "family_index": 0,
+      "per_family_instance_index": 1,
+      "aggregate_name": "9 aggregate"
+    },
+
+
+
+
+    {
+      "name": "93 family 1 instance 0 repetition 0",
+      "run_type": "iteration",
+      "family_index": 1,
+      "per_family_instance_index": 0,
+      "repetition_index": 0
+    },
+    {
+      "name": "92 family 1 instance 0 repetition 1",
+      "run_type": "iteration",
+      "family_index": 1,
+      "per_family_instance_index": 0,
+      "repetition_index": 1
+    },
+    {
+      "name": "91 family 1 instance 0 aggregate",
+      "run_type": "aggregate",
+      "family_index": 1,
+      "per_family_instance_index": 0,
+      "aggregate_name": "9 aggregate"
+    },
+
+
+    {
+      "name": "90 family 1 instance 1 repetition 0",
+      "run_type": "iteration",
+      "family_index": 1,
+      "per_family_instance_index": 1,
+      "repetition_index": 0
+    },
+    {
+      "name": "89 family 1 instance 1 repetition 1",
+      "run_type": "iteration",
+      "family_index": 1,
+      "per_family_instance_index": 1,
+      "repetition_index": 1
+    },
+    {
+      "name": "88 family 1 instance 1 aggregate",
+      "run_type": "aggregate",
+      "family_index": 1,
+      "per_family_instance_index": 1,
+      "aggregate_name": "9 aggregate"
+    }
+  ]
+}
diff --git a/tools/gbench/report.py b/tools/gbench/report.py
@@ -1,9 +1,11 @@
-import unittest
 """report.py - Utilities for reporting statistics about benchmark results
 """
+
+import unittest
 import os
 import re
 import copy
+import random
 
 from scipy.stats import mannwhitneyu
 
@@ -912,6 +914,49 @@ def test_json_diff_report(self):
             assert_measurements(self, out, expected)
 
 
+class TestReportSorting(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        def load_result():
+            import json
+            testInputs = os.path.join(
+                os.path.dirname(
+                    os.path.realpath(__file__)),
+                'Inputs')
+            testOutput = os.path.join(testInputs, 'test4_run.json')
+            with open(testOutput, 'r') as f:
+                json = json.load(f)
+            return json
+
+        cls.json = load_result()
+
+    def test_json_diff_report_pretty_printing(self):
+        import util
+
+        expected_names = [
+            "99 family 0 instance 0 repetition 0",
+            "98 family 0 instance 0 repetition 1",
+            "97 family 0 instance 0 aggregate",
+            "96 family 0 instance 1 repetition 0",
+            "95 family 0 instance 1 repetition 1",
+            "94 family 0 instance 1 aggregate",
+            "93 family 1 instance 0 repetition 0",
+            "92 family 1 instance 0 repetition 1",
+            "91 family 1 instance 0 aggregate",
+            "90 family 1 instance 1 repetition 0",
+            "89 family 1 instance 1 repetition 1",
+            "88 family 1 instance 1 aggregate"
+        ]
+
+        for n in range(len(self.json['benchmarks']) ** 2):
+            random.shuffle(self.json['benchmarks'])
+            sorted_benchmarks = util.sort_benchmark_results(self.json)[
+                'benchmarks']
+            self.assertEqual(len(expected_names), len(sorted_benchmarks))
+            for out, expected in zip(sorted_benchmarks, expected_names):
+                self.assertEqual(out['name'], expected)
+
+
 def assert_utest(unittest_instance, lhs, rhs):
     if lhs['utest']:
         unittest_instance.assertAlmostEqual(

diff --git a/tools/gbench/util.py b/tools/gbench/util.py
@@ -5,6 +5,7 @@
 import tempfile
 import subprocess
 import sys
+import functools
 
 # Input file type enumeration
 IT_Invalid = 0
@@ -119,6 +120,23 @@ def load_benchmark_results(fname):
         return json.load(f)
 
 
+def sort_benchmark_results(result):
+    benchmarks = result['benchmarks']
+
+    # From inner key to the outer key!
+    benchmarks = sorted(
+        benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1)
+    benchmarks = sorted(
+        benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0)
+    benchmarks = sorted(
+        benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1)
+    benchmarks = sorted(
+        benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1)
+
+    result['benchmarks'] = benchmarks
+    return result
+
+
 def run_benchmark(exe_name, benchmark_flags):
     """
     Run a benchmark specified by 'exe_name' with the specified