diff --git a/tools/benchcomp/.gitignore b/tools/benchcomp/.gitignore
new file mode 100644
index 000000000000..8f272745239e
--- /dev/null
+++ b/tools/benchcomp/.gitignore
@@ -0,0 +1,2 @@
+# the regression tests write result.yaml files into their directories
+result.yaml
diff --git a/tools/benchcomp/README.md b/tools/benchcomp/README.md
new file mode 100644
index 000000000000..ca363a3cb9d3
--- /dev/null
+++ b/tools/benchcomp/README.md
@@ -0,0 +1,11 @@
+# Benchcomp
+
+This directory contains `bin/benchcomp`, a tool for comparing one or
+more suites of benchmarks using two or more 'variants' (command line
+arguments and environment variables).
+
+`benchcomp` runs all combinations of suite x variant, parsing the unique
+output formats of each of these runs. `benchcomp` then combines the
+parsed outputs and writes them into a single file. `benchcomp` can
+post-process that combined file to create visualizations, exit if the
+results are not as expected, or perform other actions.
diff --git a/tools/benchcomp/benchcomp/__init__.py b/tools/benchcomp/benchcomp/__init__.py
new file mode 100644
index 000000000000..eedd6ebc1f32
--- /dev/null
+++ b/tools/benchcomp/benchcomp/__init__.py
@@ -0,0 +1,119 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Common utilities for benchcomp
+
+
+import argparse
+import collections
+import contextlib
+import dataclasses
+import logging
+import pathlib
+import sys
+import textwrap
+
+import yaml
+
+
+class ConfigFile(collections.UserDict):
+    _schema: str = textwrap.dedent("""\
+variants:
+  type: dict
+  keysrules:
+    type: string
+  valuesrules:
+    schema:
+      config:
+        type: dict
+        keysrules:
+          type: string
+        valuesrules:
+          allow_unknown: true
+          schema:
+            command_line:
+              type: string
+            directory:
+              type: string
+            env:
+              type: dict
+              keysrules:
+                type: string
+              valuesrules:
+                type: string
+run:
+  type: dict
+  keysrules:
+    type: string
+  schema:
+    suites:
+      type: dict
+      keysrules:
+        type: string
+      valuesrules:
+        schema:
+          variants:
+            type: list
+          parser:
+            type: dict
+            keysrules:
+              type: string
+            valuesrules:
+              anyof:
+                - schema:
+                    type: {}
+filter: {}
+visualize: {}
+""")
+
+    def __init__(self, path):
+        super().__init__()
+
+        try:
+            with open(path, encoding="utf-8") as handle:
+                data = yaml.safe_load(handle)
+        except (FileNotFoundError, OSError) as exc:
+            raise argparse.ArgumentTypeError(
+                f"{path}: file not found") from exc
+
+        schema = yaml.safe_load(self._schema)
+        try:
+            import cerberus
+            validate = cerberus.Validator(schema)
+            if not validate(data):
+                for error in validate._errors:
+                    doc_path = "/".join(error.document_path)
+                    msg = (
+                        f"config file '{path}': key "
+                        f"'{doc_path}': expected "
+                        f"{error.constraint}, got '{error.value}'")
+                    if error.rule:
+                        msg += f" (rule {error.rule})"
+                    msg += f" while traversing {error.schema_path}"
+                    logging.error(msg)
+                logging.error(validate.document_error_tree["variants"])
+                raise argparse.ArgumentTypeError(
+                    "failed to validate configuration file")
+        except ImportError:
+            pass
+        self.data = data
+
+
+@dataclasses.dataclass
+class Outfile:
+    """Return a handle to a file on disk or stdout if given '-'"""
+
+    path: str
+
+    def __str__(self):
+        return str(self.path)
+
+    @contextlib.contextmanager
+    def __call__(self):
+        if self.path == "-":
+            yield sys.stdout
+            return
+        path = pathlib.Path(self.path)
+        path.parent.mkdir(exist_ok=True)
+        with open(path, "w", encoding="utf-8") as handle:
+            yield handle
diff --git a/tools/benchcomp/benchcomp/cmd_args.py b/tools/benchcomp/benchcomp/cmd_args.py
new file mode 100644
index 000000000000..13bb0c39844e
--- /dev/null
+++ b/tools/benchcomp/benchcomp/cmd_args.py
@@ -0,0 +1,225 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Command line argument processing
+
+
+import argparse
+import importlib
+import pathlib
+import re
+import textwrap
+
+import benchcomp
+import benchcomp.entry.benchcomp
+import benchcomp.entry.run
+
+
+def _get_epilogs():
+    epilogs = {
+        "top_level": """\
+            benchcomp can help you to understand the difference between two or
+            more toolchains, by running benchmarks that use those toolchains and
+            comparing the results.
+
+            benchcomp runs two or more 'variants' of a set of benchmark suites,
+            and compares and visualizes the results of these variants. This
+            allows you to understand the differences between the two variants,
+            for example how they affect the benchmarks' performance or output or
+            even whether they pass at all.
+
+            benchmark is structured as a pipeline of several commands. Running
+            `benchcomp` runs each of them sequentially. You can run the
+            subcommands manually to dump the intermediate files if required.""",
+        "run": """\
+            The run command writes one YAML file for each (suite, variant) pair.
+            These YAML files are in "suite.yaml" format.  Typically, users
+            should read the combined YAML file emitted by `benchcomp collate`
+            rather than the multiple YAML files written by `benchcomp run`.
+
+            The `run` command writes its output files into a directory, which
+            `collate` then reads from. By default, `run` writes the files into a
+            new directory with a common prefix on each invocation, meaning that
+            all previous runs are preserved without the user needing to specify
+            a different directory each time. Benchcomp also creates a symbolic
+            link to the latest run. Thus, the directories after several runs
+            will look something like this:
+
+            /tmp/benchcomp/suites/2F0D3DC4-0D02-4E95-B887-4759F08FA90D
+            /tmp/benchcomp/suites/119F11EB-9BC0-42D8-9EC1-47DFD661AC88
+            /tmp/benchcomp/suites/A3E83FE8-CD42-4118-BED3-ED89EC88BFB0
+            /tmp/benchcomp/suites/latest -> /tmp/benchcomp/suites/119F11EB...
+
+            '/tmp/benchcomp/suites' is the "out-prefix"; the UUID is the
+            "out-dir"; and '/tmp/benchcomp/latest' is the "out-symlink". Users
+            can set each of these manually by passing the corresponding flag, if
+            needed.
+
+            Passing `--out-symlink ./latest` will place the symbolic link in the
+            current directory, while keeping all runs under /tmp to avoid
+            clutter. If you wish to keep all previous runs in a local directory,
+            you can do so with
+
+                `--out-prefix ./output --out-symlink ./output/latest`""",
+        "filter": "",  # TODO
+        "visualize": "",  # TODO
+        "collate": "",
+    }
+
+    wrapper = textwrap.TextWrapper()
+    ret = {}
+    for subcommand, epilog in epilogs.items():
+        paragraphs = re.split(r"\n\s*\n", epilog)
+        buf = []
+        for p in paragraphs:
+            p = textwrap.dedent(p)
+            buf.extend(wrapper.wrap(p))
+            buf.append("")
+        ret[subcommand] = "\n".join(buf)
+    return ret
+
+
+def _existing_directory(arg):
+    path = pathlib.Path(arg)
+    if not path.exists():
+        raise ValueError(f"directory '{arg}' must already exist")
+    return path
+
+
+def _get_args_dict():
+    epilogs = _get_epilogs()
+    ret = {
+        "top_level": {
+            "description":
+                "Run and compare variants of a set of benchmark suites",
+            "epilog": epilogs["top_level"],
+            "formatter_class": argparse.RawDescriptionHelpFormatter,
+        },
+        "args": [],
+        "subparsers": {
+            "title": "benchcomp subcommands",
+            "description":
+                "You can invoke each stage of the benchcomp pipeline "
+                "separately if required",
+            "parsers": {
+                "run": {
+                    "help": "run all variants of all benchmark suites",
+                    "args": [{
+                        "flags": ["--out-prefix"],
+                        "metavar": "D",
+                        "type": pathlib.Path,
+                        "default": benchcomp.entry.run.get_default_out_prefix(),
+                        "help":
+                            "write suite.yaml files to a new directory under D "
+                            "(default: %(default)s)",
+                    }, {
+                        "flags": ["--out-dir"],
+                        "metavar": "D",
+                        "type": str,
+                        "default": benchcomp.entry.run.get_default_out_dir(),
+                        "help":
+                            "write suite.yaml files to D relative to "
+                            "--out-prefix (must not exist) "
+                            "(default: %(default)s)",
+                    }, {
+                        "flags": ["--out-symlink"],
+                        "metavar": "D",
+                        "type": pathlib.Path,
+                        "default":
+                            benchcomp.entry.run.get_default_out_prefix() /
+                        benchcomp.entry.run.get_default_out_symlink(),
+                        "help":
+                            "symbolically link D to the output directory "
+                            "(default: %(default)s)",
+                    }],
+                },
+                "collate": {
+                    "args": [{
+                        "flags": ["--suites-dir"],
+                        "metavar": "D",
+                        "type": _existing_directory,
+                        "default":
+                            benchcomp.entry.run.get_default_out_prefix() /
+                        benchcomp.entry.run.get_default_out_symlink(),
+                        "help":
+                            "directory containing suite.yaml files "
+                            "(default: %(default)s)"
+                    }, {
+                        "flags": ["--out-file"],
+                        "metavar": "F",
+                        "default": benchcomp.Outfile("result.yaml"),
+                        "type": benchcomp.Outfile,
+                        "help":
+                            "write result to F instead of %(default)s. "
+                            "'-' means print to stdout",
+                    }],
+                },
+                "filter": {
+                    "help": "transform a result by piping it through a program",
+                    "args": [],
+                },
+                "visualize": {
+                    "help": "render a result in various formats",
+                    "args": [{
+                        "flags": ["--result-file"],
+                        "metavar": "F",
+                        "default": pathlib.Path("result.yaml"),
+                        "type": pathlib.Path,
+                        "help":
+                            "read result from F instead of %(default)s. "
+                    }],
+                },
+            }
+        }
+    }
+    for subcommand, info in ret["subparsers"]["parsers"].items():
+        info["epilog"] = epilogs[subcommand]
+        info["formatter_class"] = argparse.RawDescriptionHelpFormatter
+    return ret
+
+
+def _get_global_args():
+    return [{
+        "flags": ["-c", "--config"],
+        "default": "benchcomp.yaml",
+        "type": benchcomp.ConfigFile,
+        "metavar": "F",
+        "help": "read configuration from file F (default: %(default)s)",
+    }, {
+        "flags": ["-v", "--verbose"],
+        "action": "store_true",
+        "help": "enable verbose output",
+    }]
+
+
+def get():
+    ad = _get_args_dict()
+    parser = argparse.ArgumentParser(**ad["top_level"])
+
+    parser.set_defaults(func=benchcomp.entry.benchcomp.main)
+
+    global_args = _get_global_args()
+
+    ad["args"].extend(global_args)
+    for arg in ad["args"]:
+        flags = arg.pop("flags")
+        parser.add_argument(*flags, **arg)
+
+    subparsers = ad["subparsers"].pop("parsers")
+    subs = parser.add_subparsers(**ad["subparsers"])
+    for subcommand, info in subparsers.items():
+        args = info.pop("args")
+        subparser = subs.add_parser(name=subcommand, **info)
+
+        # Set entrypoint to benchcomp.entry.visualize.main()
+        # when user invokes `benchcomp visualize`, etc
+        mod = importlib.import_module(f"benchcomp.entry.{subcommand}")
+        subparser.set_defaults(func=mod.main)
+
+        for arg in args:
+            flags = arg.pop("flags")
+            subparser.add_argument(*flags, **arg)
+            if arg not in global_args:
+                parser.add_argument(*flags, **arg)
+
+    return parser.parse_args()
diff --git a/tools/benchcomp/benchcomp/entry/README.md b/tools/benchcomp/benchcomp/entry/README.md
new file mode 100644
index 000000000000..ef610f1d80b4
--- /dev/null
+++ b/tools/benchcomp/benchcomp/entry/README.md
@@ -0,0 +1,3 @@
+Each file X.py in this directory contains a `main` method, which
+bin/benchcomp will call when you run `benchcomp X`. Running `benchcomp`
+with no arguments will invoke the `main` method in `benchcomp.py`.
diff --git a/tools/benchcomp/benchcomp/entry/__init__.py b/tools/benchcomp/benchcomp/entry/__init__.py
new file mode 100644
index 000000000000..01d6d4c73b1c
--- /dev/null
+++ b/tools/benchcomp/benchcomp/entry/__init__.py
@@ -0,0 +1,2 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
diff --git a/tools/benchcomp/benchcomp/entry/benchcomp.py b/tools/benchcomp/benchcomp/entry/benchcomp.py
new file mode 100644
index 000000000000..f92b9f1eac05
--- /dev/null
+++ b/tools/benchcomp/benchcomp/entry/benchcomp.py
@@ -0,0 +1,17 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Entrypoint when running `benchcomp` with no arguments. This runs the other
+# subcommands in sequence, for a single-command way of running, comparing, and
+# post-processing the suites from a single reproducible config file.
+
+
+import benchcomp.entry.collate
+import benchcomp.entry.run
+
+
+def main(args):
+    run_result = benchcomp.entry.run.main(args)
+
+    args.suites_dir = run_result.out_prefix / run_result.out_symlink
+    results = benchcomp.entry.collate.main(args)
diff --git a/tools/benchcomp/benchcomp/entry/collate.py b/tools/benchcomp/benchcomp/entry/collate.py
new file mode 100644
index 000000000000..1647755932d3
--- /dev/null
+++ b/tools/benchcomp/benchcomp/entry/collate.py
@@ -0,0 +1,65 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Entrypoint for `benchcomp collate`. This command turns a directory of
+# `suite.yaml` files into a single `result.yaml` file. `suite.yaml` files are
+# emitted by `benchcomp run` when it runs a single combination of suite x variant;
+# the `collate` command is used to combine those files for all combinations.
+
+import logging
+import sys
+
+import yaml
+
+
+class _ResultsCollator:
+    """Incrementally add suite x variant results, return combined results"""
+
+    def __init__(self):
+        self.result = {
+            "metrics": {},
+            "benchmarks": {},
+        }
+
+    def __call__(self):
+        return self.result
+
+    def _union_benchmarks(self, suite):
+        for bench_name, suite_result in suite["benchmarks"].items():
+            if bench_name not in self.result["benchmarks"]:
+                self.result["benchmarks"][bench_name] = {"variants": {}}
+            self.result["benchmarks"][bench_name]["variants"][suite["variant_id"]] = {
+                **suite_result
+            }
+
+    def _union_metrics(self, suite):
+        for metric, details in suite["metrics"].items():
+            if metric not in self.result["metrics"]:
+                self.result["metrics"][metric] = dict(details)
+                continue
+            if self.result["metrics"][metric] == details:
+                continue
+            logging.error(
+                "two suite.yaml files inconsistently defined metric '%s'",
+                metric)
+            logging.error(
+                "old definition: %s", str(self.result["metrics"][metric]))
+            logging.error("new definition: %s", str(details))
+            sys.exit(1)
+
+    def add_suite(self, suite):
+        self._union_metrics(suite)
+        self._union_benchmarks(suite)
+
+
+def main(args):
+    results = _ResultsCollator()
+    for suite_file in args.suites_dir.iterdir():
+        with open(suite_file, encoding="utf-8") as handle:
+            suite = yaml.safe_load(handle)
+        results.add_suite(suite)
+
+    with args.out_file() as handle:
+        yaml.dump(results(), handle, default_flow_style=False)
+
+    return results()
diff --git a/tools/benchcomp/benchcomp/entry/filter.py b/tools/benchcomp/benchcomp/entry/filter.py
new file mode 100644
index 000000000000..8523a198ce6a
--- /dev/null
+++ b/tools/benchcomp/benchcomp/entry/filter.py
@@ -0,0 +1,8 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Entrypoint for `benchcomp filter`
+
+
+def main(_):
+    raise NotImplementedError  # TODO
diff --git a/tools/benchcomp/benchcomp/entry/run.py b/tools/benchcomp/benchcomp/entry/run.py
new file mode 100644
index 000000000000..05581c18e7e9
--- /dev/null
+++ b/tools/benchcomp/benchcomp/entry/run.py
@@ -0,0 +1,132 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Entrypoint for `benchcomp run`. This command runs all combinations of
+# benchmark suites x variants that are defined in a config file. After each
+# combination, this command uses a 'parser' to write the list of benchmarks and
+# their associated metrics to a file using a unified schema called
+# `suite.yaml`. Parsers are python submodules of benchcomp.parsers; the
+# configuration file describes which parser to use for each benchmark suite.
+
+
+import dataclasses
+import importlib
+import logging
+import os
+import pathlib
+import shutil
+import subprocess
+import uuid
+
+import yaml
+
+import benchcomp
+
+
+@dataclasses.dataclass
+class _SingleInvocation:
+    """Run and parse the result of a single suite x variant"""
+
+    suite_id: str
+    variant_id: str
+
+    parser: str
+
+    suite_yaml_out_dir: pathlib.Path
+
+    command_line: str
+    directory: pathlib.Path
+
+    env: dict = dataclasses.field(default_factory=dict)
+    timeout: int = None
+    memout: int = None
+    patches: list = dataclasses.field(default_factory=list)
+
+    def __post_init__(self):
+        self.working_copy: pathlib.Path = pathlib.Path(
+            f"/tmp/benchcomp/suites/{uuid.uuid4()}")
+
+    def __call__(self):
+        env = dict(os.environ)
+        env.update(self.env)
+
+        shutil.copytree(self.directory, self.working_copy)
+
+        try:
+            subprocess.run(
+                self.command_line, shell=True, env=env, cwd=self.working_copy,
+                check=True)
+        except subprocess.CalledProcessError as exc:
+            logging.warning(
+                "Invocation of suite %s with variant %s exited with code %d",
+                self.suite_id, self.variant_id, exc.returncode)
+            return
+        except (OSError, subprocess.SubprocessError):
+            logging.error(
+                "Invocation of suite %s with variant %s failed", self.suite_id,
+                self.variant_id)
+            return
+
+        parser_mod_name = f"benchcomp.parsers.{self.parser}"
+        parser = importlib.import_module(parser_mod_name)
+        suite = parser.main(self.working_copy)
+
+        suite["suite_id"] = self.suite_id
+        suite["variant_id"] = self.variant_id
+
+        out_file = f"{self.suite_id}@{self.variant_id}_suite.yaml"
+        with open(
+                self.suite_yaml_out_dir / out_file, "w",
+                encoding="utf-8") as handle:
+            yaml.dump(suite, handle, default_flow_style=False)
+
+
+@dataclasses.dataclass
+class _Run:
+    """Run all suite x variant combinations, write results to a directory"""
+
+    config: benchcomp.ConfigFile
+    out_prefix: pathlib.Path
+    out_dir: str
+    out_symlink: str
+    result: dict = None
+
+    def __call__(self):
+        out_path = (self.out_prefix / self.out_dir)
+        out_path.mkdir(parents=True)
+
+        for suite_id, suite in self.config["run"]["suites"].items():
+            for variant_id in suite["variants"]:
+                variant = self.config["variants"][variant_id]
+                config = dict(variant).pop("config")
+                invoke = _SingleInvocation(
+                    suite_id, variant_id,
+                    suite["parser"]["module"],
+                    suite_yaml_out_dir=out_path,
+                    **config)
+                invoke()
+
+        # Atomically symlink the symlink dir to the output dir, even if
+        # there is already an existing symlink with that name
+        tmp_symlink = self.out_symlink.with_suffix(f".{uuid.uuid4()}")
+        tmp_symlink.parent.mkdir(exist_ok=True)
+        tmp_symlink.symlink_to(out_path)
+        tmp_symlink.rename(self.out_symlink)
+
+
+def get_default_out_symlink():
+    return "latest"
+
+
+def get_default_out_dir():
+    return str(uuid.uuid4())
+
+
+def get_default_out_prefix():
+    return pathlib.Path("/tmp") / "benchcomp" / "suites"
+
+
+def main(args):
+    run = _Run(args.config, args.out_prefix, args.out_dir, args.out_symlink)
+    run()
+    return run
diff --git a/tools/benchcomp/benchcomp/entry/visualize.py b/tools/benchcomp/benchcomp/entry/visualize.py
new file mode 100644
index 000000000000..b2cf352048e0
--- /dev/null
+++ b/tools/benchcomp/benchcomp/entry/visualize.py
@@ -0,0 +1,8 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Entrypoint for `benchcomp visualize`
+
+
+def main(_):
+    pass
diff --git a/tools/benchcomp/benchcomp/parsers/README.md b/tools/benchcomp/benchcomp/parsers/README.md
new file mode 100644
index 000000000000..7b41926f5864
--- /dev/null
+++ b/tools/benchcomp/benchcomp/parsers/README.md
@@ -0,0 +1,11 @@
+Each file in this directory implements a 'parser' that is intended to
+parse the results of a single suite x variant run. Each suite has a
+different output format and exposes different metrics; the parsers' job
+is to read the suites' output files and returns a dict in a unified
+format.
+
+Each file in this directory implements a `main` method that takes the
+root directory where the suite was run as an argument. The parser will
+attempt to read the suite's results from that directory, and return the
+results in suite.json format (which `benchcomp collate` will
+subsequently merge with other suites into a single result.json file).
diff --git a/tools/benchcomp/benchcomp/parsers/__init__.py b/tools/benchcomp/benchcomp/parsers/__init__.py
new file mode 100644
index 000000000000..01d6d4c73b1c
--- /dev/null
+++ b/tools/benchcomp/benchcomp/parsers/__init__.py
@@ -0,0 +1,2 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
diff --git a/tools/benchcomp/benchcomp/parsers/test.py b/tools/benchcomp/benchcomp/parsers/test.py
new file mode 100644
index 000000000000..b5e36d1880f2
--- /dev/null
+++ b/tools/benchcomp/benchcomp/parsers/test.py
@@ -0,0 +1,25 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Count and return the number of 'foo's in a file.
+
+
+def main(root_dir):
+    try:
+        with open(root_dir / "out") as handle:
+            data = handle.read().splitlines()
+    except FileNotFoundError:
+        data = []
+
+    return {
+        "metrics": {
+            "foos": {},
+        },
+        "benchmarks": {
+            "suite_1": {
+                "metrics": {
+                    "foos": len([l for l in data if l.strip() == "foo"]),
+                },
+            }
+        },
+    }
diff --git a/tools/benchcomp/bin/benchcomp b/tools/benchcomp/bin/benchcomp
new file mode 100755
index 000000000000..8e9f856878c9
--- /dev/null
+++ b/tools/benchcomp/bin/benchcomp
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Run and compare variants of a set of benchmark suites
+
+
+import logging
+import pathlib
+import sys
+
+# autopep8: off
+sys.path.append(str(pathlib.Path(__file__).parent.parent))
+import benchcomp.cmd_args
+# autopep8: on
+
+
+def main():
+    fmt = "benchcomp: %(levelname)s: %(message)s"
+    logging.basicConfig(format=fmt)
+
+    args = benchcomp.cmd_args.get()
+
+    level = logging.INFO if args.verbose else logging.WARNING
+    logging.basicConfig(level=level)
+
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/benchcomp/test/README.md b/tools/benchcomp/test/README.md
new file mode 100644
index 000000000000..43f920622c8d
--- /dev/null
+++ b/tools/benchcomp/test/README.md
@@ -0,0 +1,3 @@
+# Benchcomp unit & regression tests
+
+To run, invoke `./test/run` from the tools/benchcomp directory.
diff --git a/tools/benchcomp/test/__init__.py b/tools/benchcomp/test/__init__.py
new file mode 100644
index 000000000000..8eab96f09b00
--- /dev/null
+++ b/tools/benchcomp/test/__init__.py
@@ -0,0 +1,10 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Set up import path for unit tests
+
+
+import sys
+import pathlib
+
+sys.path.append(str(pathlib.Path(__file__).parent.parent.parent))
diff --git a/tools/benchcomp/test/run b/tools/benchcomp/test/run
new file mode 100755
index 000000000000..4b8d0c9ff746
--- /dev/null
+++ b/tools/benchcomp/test/run
@@ -0,0 +1,8 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Run benchcomp unit and regression tests. To use this script, run
+# ./test/run from the tools/benchcomp directory.
+
+python3 -m unittest discover
+exit $?
diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py
new file mode 100644
index 000000000000..5399061eb290
--- /dev/null
+++ b/tools/benchcomp/test/test_regression.py
@@ -0,0 +1,164 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Benchcomp regression testing suite. This suite uses Python's stdlib unittest
+# module, but nevertheless actually runs the binary rather than running unit
+# tests.
+
+import pathlib
+import subprocess
+import tempfile
+import unittest
+
+import yaml
+
+
+class Benchcomp:
+    """Invocation of benchcomp binary with optional subcommand and flags"""
+
+    def __init__(self, config):
+        self.proc, self.stdout, self.stderr = None, None, None
+
+        with tempfile.NamedTemporaryFile(
+                mode="w", delete=False, suffix=".yaml") as tmp:
+            yaml.dump(config, tmp, default_flow_style=False)
+        self.config_file = tmp.name
+
+        self.bc = str(pathlib.Path(__file__).parent.parent /
+                      "bin" / "benchcomp")
+
+        wd = tempfile.mkdtemp()
+        self.working_directory = pathlib.Path(wd)
+
+    def __call__(self, subcommand=None, default_flags=None, *flags):
+        subcommand = subcommand or []
+        default_flags = default_flags or [
+            "--out-prefix", "/tmp/benchcomp/test"]
+        config_flags = ["--config", str(self.config_file)]
+
+        cmd = [self.bc, *config_flags, *subcommand, *default_flags, *flags]
+        self.proc = subprocess.Popen(
+            cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+            cwd=self.working_directory)
+        self.stdout, self.stderr = self.proc.communicate()
+
+
+class RegressionTests(unittest.TestCase):
+    def test_return_0(self):
+        """Ensure that benchcomp terminates with return code 0"""
+
+        with tempfile.TemporaryDirectory() as tmp:
+            run_bc = Benchcomp({
+                "variants": {
+                    "variant_1": {
+                        "config": {
+                            "directory": tmp,
+                            "command_line": "true",
+                        }
+                    },
+                    "variant_2": {
+                        "config": {
+                            "directory": tmp,
+                            "command_line": "true",
+                        }
+                    }
+                },
+                "run": {
+                    "suites": {
+                        "suite_1": {
+                            "parser": {"module": "test"},
+                            "variants": ["variant_1", "variant_2"]
+                        }
+                    }
+                },
+                "visualize": [],
+            })
+            run_bc()
+            self.assertEqual(
+                run_bc.proc.returncode, 0, msg=run_bc.stderr)
+
+            with open(run_bc.working_directory / "result.yaml") as handle:
+                result = yaml.safe_load(handle)
+
+    def test_return_0_on_fail(self):
+        """Ensure that benchcomp terminates with 0 even if a suite fails"""
+
+        with tempfile.TemporaryDirectory() as tmp:
+            run_bc = Benchcomp({
+                "variants": {
+                    "variant_1": {
+                        "config": {
+                            "directory": tmp,
+                            "command_line": "false",
+                        }
+                    },
+                    "variant_2": {
+                        "config": {
+                            "directory": tmp,
+                            "command_line": "true",
+                        }
+                    }
+                },
+                "run": {
+                    "suites": {
+                        "suite_1": {
+                            "parser": {"module": "test"},
+                            "variants": ["variant_1", "variant_2"]
+                        }
+                    }
+                },
+                "visualize": [],
+            })
+            run_bc()
+            self.assertEqual(
+                run_bc.proc.returncode, 0, msg=run_bc.stderr)
+
+            with open(run_bc.working_directory / "result.yaml") as handle:
+                result = yaml.safe_load(handle)
+
+    def test_env(self):
+        """Ensure that benchcomp reads the 'env' key of variant config"""
+
+        with tempfile.TemporaryDirectory() as tmp:
+            run_bc = Benchcomp({
+                "variants": {
+                    "env_set": {
+                        "config": {
+                            "command_line": "echo $QJTX > out",
+                            "directory": tmp,
+                            "env": {"QJTX": "foo"}
+                        }
+                    },
+                    "env_unset": {
+                        "config": {
+                            "command_line": "echo $QJTX > out",
+                            "directory": tmp,
+                        }
+                    }
+                },
+                "run": {
+                    "suites": {
+                        "suite_1": {
+                            "parser": {"module": "test"},
+                            "variants": ["env_unset", "env_set"]
+                        }
+                    }
+                },
+                "visualize": [],
+            })
+            run_bc()
+            self.assertEqual(
+                run_bc.proc.returncode, 0, msg=run_bc.stderr)
+
+            with open(run_bc.working_directory / "result.yaml") as handle:
+                result = yaml.safe_load(handle)
+
+            self.assertEqual(
+                result["benchmarks"]["suite_1"]["variants"][
+                    "env_set"]["metrics"]["foos"], 1,
+                msg=yaml.dump(result, default_flow_style=False))
+
+            self.assertEqual(
+                result["benchmarks"]["suite_1"]["variants"][
+                    "env_unset"]["metrics"]["foos"], 0,
+                msg=yaml.dump(result, default_flow_style=False))
diff --git a/tools/benchcomp/test/unit/__init__.py b/tools/benchcomp/test/unit/__init__.py
new file mode 100644
index 000000000000..01d6d4c73b1c
--- /dev/null
+++ b/tools/benchcomp/test/unit/__init__.py
@@ -0,0 +1,2 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
diff --git a/tools/benchcomp/test/unit/test_utils.py b/tools/benchcomp/test/unit/test_utils.py
new file mode 100644
index 000000000000..95c42b258d3e
--- /dev/null
+++ b/tools/benchcomp/test/unit/test_utils.py
@@ -0,0 +1,46 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+
+
+import pathlib
+import tempfile
+import textwrap
+import unittest
+
+import yaml
+
+import benchcomp
+import benchcomp.cmd_args
+
+
+class TestConfigFile(unittest.TestCase):
+    def validate_against_schema(self, data):
+        with tempfile.NamedTemporaryFile(mode="w") as tmp:
+            yaml.dump(data, tmp, default_flow_style=False)
+            benchcomp.ConfigFile(pathlib.Path(tmp.name))
+
+    def test_1(self):
+        self.validate_against_schema(yaml.safe_load(textwrap.dedent("""\
+          variants:
+            variant_1:
+              config:
+                command_line: cmd_1
+                directory: dir_1
+
+            variant_2:
+              config:
+                command_line: cmd_1
+                directory: dir_1
+                env:
+                  ENV_VAR_1: value
+                  ENV_VAR_2: value
+
+          run:
+            suites:
+              suite_1:
+                variants:
+                  - variant_1
+                  - variant_2
+                parser:
+                  module: test
+        """)))