Clean up the benchmark suite (#668)

enthought · Mar 3, 2021 · 1700cbf · 1700cbf
1 parent fb2c6a1
commit 1700cbf
Show file tree

Hide file tree

Showing 3 changed files with 228 additions and 103 deletions.
diff --git a/enable/gcbench/bench.py b/enable/gcbench/bench.py
@@ -14,10 +14,12 @@
 
 import numpy as np
 
+from enable.gcbench.data import BenchResult, BenchTiming
+
 _MAX_DURATION = 1.0
 _SIZE = (512, 512)
 _BACKENDS = {
-    "ui": {
+    "gui": {
         "kiva.agg": "enable.null.image",
         "cairo": "enable.null.cairo",
         "celiagg": "enable.null.celiagg",
@@ -37,8 +39,8 @@ def benchmark(outdir=None):
     """ Benchmark all backends
     """
     suite = gen_suite()
+    results = {btype: {} for btype in _BACKENDS}
 
-    results = {t: {} for t in _BACKENDS}
     for btype, backends in _BACKENDS.items():
         for name, mod_name in backends.items():
             print(f"Benchmarking backend: {name}", end="")
@@ -48,12 +50,13 @@ def benchmark(outdir=None):
                 print(" ... Not available")
                 continue
 
-            # UI backends are checked for performance, File backends are not.
-            if btype == "ui":
+            if btype == "gui":
+                # GUI backends are checked for performance (and features).
                 results[btype][name] = benchmark_backend(
                     suite, name, module, outdir=outdir
                 )
             else:
+                # File backends are checked for feature coverage.
                 # XXX: Use the fact that `name` is the same as the file ext.
                 results[btype][name] = exercise_backend(
                     suite, name, module, extension=name, outdir=outdir
@@ -70,32 +73,35 @@ def benchmark_backend(suite, mod_name, module, outdir=None):
 
     results = {}
     for name, symbol in suite.items():
+        # Result `summary` defaults to "fail"
+        results[name] = result = BenchResult()
+
         print(f"\n\tBenchmark {name}", end="")
         try:
             instance = symbol(gc, module)
         except Exception:
+            print(f" ... Failed", end="")
             continue
 
         if name.endswith("2x"):
             # Double sized
             with gc:
                 gc.scale_ctm(2, 2)
-                stats = gen_timings(gc, instance)
+                timing = gen_timing(gc, instance)
         else:
             # Normal scale
-            stats = gen_timings(gc, instance)
+            timing = gen_timing(gc, instance)
 
-        if stats is None:
+        if timing is None:
             print(f" ... Failed", end="")
-            results[name] = None
             continue
 
-        results[name] = {"times": stats}
+        result.timing = timing
+        result.summary = "success"
         if outdir is not None:
             fname = os.path.join(outdir, f"{mod_name}.{name}.png")
             gc.save(fname)
-            results[name]["format"] = "png"
-            results[name]["filename"] = os.path.basename(fname)
+            result.output = os.path.basename(fname)
 
     print()  # End the line that was left
     return results
@@ -106,11 +112,14 @@ def exercise_backend(suite, mod_name, module, extension, outdir=None):
     """
     GraphicsContext = getattr(module, "GraphicsContext")
 
-    results = {name: None for name in suite}
+    results = {}
     for name, symbol in suite.items():
+        # Result `summary` defaults to "fail"
+        results[name] = result = BenchResult()
+
         # Skip 2x versions
         if name.endswith("2x"):
-            results[name] = {"skip": True}
+            result.summary = "skip"
             continue
 
         # Use a fresh context each time
@@ -120,20 +129,21 @@ def exercise_backend(suite, mod_name, module, extension, outdir=None):
         try:
             instance = symbol(gc, module)
         except Exception:
+            print(f" ... Failed", end="")
             continue
 
         try:
             instance()
+            result.summary = "success"
         except Exception:
             print(f" ... Failed", end="")
             continue
 
-        results[name] = {"times": {}}
         if outdir is not None:
             fname = os.path.join(outdir, f"{mod_name}.{name}.{extension}")
             gc.save(fname)
-            results[name]["format"] = extension
-            results[name]["filename"] = os.path.basename(fname)
+            # Record the output
+            result.output = os.path.basename(fname)
 
     print()  # End the line that was left
     return results
@@ -142,19 +152,20 @@ def exercise_backend(suite, mod_name, module, extension, outdir=None):
 def gen_suite():
     """ Create a suite of benchmarks to run against each backend
     """
+    # Import here so we can use `suite` as a name elsewhere.
     from enable.gcbench import suite
 
     benchmarks = {}
     for name in dir(suite):
         symbol = getattr(suite, name)
         if inspect.isclass(symbol):
             benchmarks[name] = symbol
-            benchmarks[f"{name} 2x"] = symbol
+            benchmarks[f"{name}_2x"] = symbol
 
     return benchmarks
 
 
-def gen_timings(gc, func):
+def gen_timing(gc, func):
     """ Run a function multiple times and generate some stats
     """
     duration = 0.0
@@ -174,10 +185,10 @@ def gen_timings(gc, func):
         return None
 
     times = np.array(times)
-    return {
-        "mean": times.mean() * 1000,
-        "min": times.min() * 1000,
-        "max": times.max() * 1000,
-        "std": times.std() * 1000,
-        "count": len(times),
-    }
+    return BenchTiming(
+        count=len(times),
+        mean=times.mean() * 1000,
+        minimum=times.min() * 1000,
+        maximum=times.max() * 1000,
+        stddev=times.std() * 1000,
+    )
diff --git a/enable/gcbench/data.py b/enable/gcbench/data.py
@@ -0,0 +1,97 @@
+# (C) Copyright 2005-2021 Enthought, Inc., Austin, TX
+# All rights reserved.
+#
+# This software is provided without warranty under the terms of the BSD
+# license included in LICENSE.txt and may be redistributed only under
+# the conditions described in the aforementioned license. The license
+# is also available online at http://www.enthought.com/licenses/BSD.txt
+#
+# Thanks for using Enthought open source!
+import os
+
+from traits.api import (
+    Enum, File, Float, HasStrictTraits, Instance, Int, Property, Str
+)
+
+
+class BenchResult(HasStrictTraits):
+    """ The result of a benchmark run on a single backend
+    """
+    #: Short status field for checking the outcome of a benchmark
+    # Default to "fail"!
+    summary = Enum("fail", "skip", "success")
+
+    #: A path to an output file and its format
+    output = File()
+    output_format = Property(Str(), observe="output")
+
+    #: Timing results
+    timing = Instance("BenchTiming")
+
+    def _get_output_format(self):
+        if self.output:
+            return os.path.splitext(self.output)[-1]
+        return ""
+
+    def compare_to(self, other):
+        return BenchComparison.from_pair(self, baseline=other)
+
+
+class BenchComparison(HasStrictTraits):
+    """ A comparison table entry.
+    """
+    #: CSS class to use for `td`
+    css_class = Enum("valid", "invalid", "skipped")
+
+    #: The content for the `td`
+    value = Str()
+
+    @classmethod
+    def from_pair(cls, result, baseline=None):
+        """ Create an instance from two BenchResult instances.
+        """
+        if result.summary == "fail":
+            return cls(value="\N{HEAVY BALLOT X}", css_class="invalid")
+
+        elif result.summary == "skip":
+            return cls(value="\N{HEAVY MINUS SIGN}", css_class="skipped")
+
+        elif result.summary == "success":
+            if result.timing is not None:
+                # Compare timing to the baseline result
+                relvalue = baseline.timing.mean / result.timing.mean
+                return cls(value=f"{relvalue:0.2f}", css_class="valid")
+            else:
+                # No timing, but the result was successful
+                return cls(value="\N{HEAVY CHECK MARK}", css_class="valid")
+
+        else:
+            raise RuntimeError("Unhandled result `summary`")
+
+        return None
+
+
+class BenchTiming(HasStrictTraits):
+    """ The timing results of a single benchmark.
+    """
+    #: How many times the benchmark ran
+    count = Int(0)
+
+    #: avg/min/max/std
+    mean = Float(0.0)
+    minimum = Float(0.0)
+    maximum = Float(0.0)
+    stddev = Float(0.0)
+
+    def to_html(self):
+        """ Format this instance as an HTML <table>
+        """
+        names = ("mean", "minimum", "maximum", "stddev", "count")
+        rows = [
+            (f"<tr><td>{name.capitalize()}</td>"
+             f"<td>{getattr(self, name):0.4f}</td></tr>")
+            for name in names
+        ]
+
+        rows = "\n".join(rows)
+        return f'<table>{rows}</table>'