From 76b1accb91745cbda0cf59e841dbd3843ba0a240 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 12:58:19 +0100
Subject: [PATCH 01/17] [util] Move create_logs_dir() to runfiles_path.

This moves the create_logs_dir() function into the runfile_path module
so that all of the functions for generating filesystem paths are in
the same module. This also improves the implementation by enabling
thread safe logging directory generation, and renames the function to
create_user_logs_dir() to better communicate the type of logs.
---
 compiler_gym/util/logs.py          | 38 +++++------------
 compiler_gym/util/runfiles_path.py | 48 +++++++++++++++++++++
 tests/util/BUILD                   | 10 +++++
 tests/util/runfiles_path_test.py   | 67 ++++++++++++++++++++++++++++++
 4 files changed, 136 insertions(+), 27 deletions(-)
 create mode 100644 tests/util/runfiles_path_test.py

diff --git a/compiler_gym/util/logs.py b/compiler_gym/util/logs.py
index e4f6599c5..0340fc744 100644
--- a/compiler_gym/util/logs.py
+++ b/compiler_gym/util/logs.py
@@ -2,11 +2,13 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-import os
-from datetime import datetime
 from pathlib import Path
 from typing import NamedTuple
 
+from deprecated import deprecated
+
+from compiler_gym.util.runfiles_path import create_user_logs_dir
+
 # File names of generated logs.
 METADATA_NAME = "random_search.json"
 PROGRESS_LOG_NAME = "random_search_progress.csv"
@@ -16,34 +18,16 @@
 BEST_ACTIONS_PROGRESS_NAME = "random_search_best_actions_progress.csv"
 
 
+@deprecated(
+    version="0.2.1",
+    reason="Use compiler_gym.util.create_user_logs_dir() instead",
+)
 def create_logging_dir(name: str) -> Path:
-    """Create a directory for writing logs to.
-
-    Defaults to ~/logs/compiler_gym base directory, set
-    $COMPILER_GYM_LOGS=/path/to/dir to override this.
-
-    Usage:
-        >>> create_logging_dir("my_script")
-        Path("~/logs/compiler_gym/my_script/2020-11-03T11:00:00")
+    """Deprecated function to create a directory for writing logs to.
 
-    :param name: The grouping name for the logs.
-    :return: The path of a logging directory.
+    Use :code:`compiler_gym.util.runfiles_path.create_user_logs_dir()` instead.
     """
-    logging_base_dir = os.environ.get("COMPILER_GYM_LOGS", "~/logs/compiler_gym")
-    logging_base_dir = Path(logging_base_dir).expanduser()
-    logging_base_dir = logging_base_dir / name
-
-    timestamp = datetime.now().isoformat()
-
-    logging_dir = logging_base_dir / timestamp
-    logging_dir.mkdir(parents=True)
-
-    # Create a symlink to the "latest" logs results.
-    if (logging_base_dir / "latest").exists():
-        os.unlink(logging_base_dir / "latest")
-    os.symlink(timestamp, logging_base_dir / "latest")
-
-    return logging_dir
+    return create_user_logs_dir(name)
 
 
 class ProgressLogEntry(NamedTuple):
diff --git a/compiler_gym/util/runfiles_path.py b/compiler_gym/util/runfiles_path.py
index 50145c0d3..ce74bf3b2 100644
--- a/compiler_gym/util/runfiles_path.py
+++ b/compiler_gym/util/runfiles_path.py
@@ -4,14 +4,20 @@
 # LICENSE file in the root directory of this source tree.
 """Module for resolving a runfiles path."""
 import os
+from datetime import datetime
 from getpass import getuser
 from pathlib import Path
+from threading import Lock
+from time import sleep
+from typing import Optional
 
 # NOTE(cummins): Moving this file may require updating this relative path.
 _PACKAGE_ROOT = Path(os.path.join(os.path.dirname(__file__), "../../")).resolve(
     strict=True
 )
 
+_CREATE_LOGGING_DIR_LOCK = Lock()
+
 
 def runfiles_path(relpath: str) -> Path:
     """Resolve the path to a runfiles data path.
@@ -124,3 +130,45 @@ def transient_cache_path(relpath: str) -> Path:
     else:
         # Fallback to using the regular cache.
         return cache_path(relpath)
+
+
+def create_user_logs_dir(name: str, dir: Optional[Path] = None) -> Path:
+    """Create a directory for writing logs to.
+
+    Defaults to ~/logs/compiler_gym base directory, set the
+    :code:`COMPILER_GYM_LOGS` environment variable to override this.
+
+    Example use:
+
+        >>> create_user_logs_dir("my_experiment")
+        Path("~/logs/compiler_gym/my_experiment/2020-11-03T11:00:00")
+
+    :param name: The grouping name for the logs.
+
+    :return: A unique timestamped directory for logging. This directory exists.
+    """
+    base_dir = Path(
+        os.environ.get("COMPILER_GYM_LOGS", dir or "~/logs/compiler_gym")
+    ).expanduser()
+    group_dir = base_dir / name
+
+    with _CREATE_LOGGING_DIR_LOCK:
+        # Require that logging directory timestamps are unique by waiting until
+        # a unique timestamp is generated.
+        while True:
+            now = datetime.now()
+            subdirs = now.strftime("%Y-%m-%d/%H-%M-%S")
+
+            logs_dir = group_dir / subdirs
+            if logs_dir.is_dir():
+                sleep(0.3)
+                continue
+
+            logs_dir.mkdir(parents=True, exist_ok=False)
+
+            # Create a symlink to the "latest" logs results.
+            if (group_dir / "latest").exists():
+                os.unlink(group_dir / "latest")
+            os.symlink(subdirs, group_dir / "latest")
+
+            return logs_dir
diff --git a/tests/util/BUILD b/tests/util/BUILD
index dbdb5b5af..d92b1ad08 100644
--- a/tests/util/BUILD
+++ b/tests/util/BUILD
@@ -75,6 +75,16 @@ py_test(
     ],
 )
 
+py_test(
+    name = "runfiles_path_test",
+    srcs = ["runfiles_path_test.py"],
+    deps = [
+        "//compiler_gym/util",
+        "//tests:test_main",
+        "//tests/pytest_plugins:common",
+    ],
+)
+
 py_test(
     name = "statistics_test",
     timeout = "short",
diff --git a/tests/util/runfiles_path_test.py b/tests/util/runfiles_path_test.py
new file mode 100644
index 000000000..8105fa609
--- /dev/null
+++ b/tests/util/runfiles_path_test.py
@@ -0,0 +1,67 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Unit tests for compiler_gym/util/locks.py"""
+from datetime import datetime
+from pathlib import Path
+from threading import Thread
+
+from flaky import flaky
+
+from compiler_gym.util.runfiles_path import create_user_logs_dir
+from tests.test_main import main
+
+pytest_plugins = ["tests.pytest_plugins.common"]
+
+
+@flaky  # Unlikely event that timestamps change
+def test_create_user_logs_dir(temporary_environ, tmpdir):
+    tmpdir = Path(tmpdir)
+    temporary_environ["COMPILER_GYM_LOGS"] = str(tmpdir)
+
+    dir = create_user_logs_dir("foo")
+    now = datetime.now()
+
+    assert dir.parent.parent == tmpdir / "foo"
+
+    year, month, day = dir.parent.name.split("-")
+    assert int(year) == now.year
+    assert int(month) == now.month
+    assert int(day) == now.day
+
+    hour, minute, second = dir.name.split("-")
+    assert int(hour) == now.hour
+    assert int(minute) == now.minute
+    assert int(second) == now.second
+
+
+def test_create_user_logs_dir_multithreaded(temporary_environ, tmpdir):
+    tmpdir = Path(tmpdir)
+    temporary_environ["COMPILER_GYM_LOGS"] = str(tmpdir)
+
+    class MakeDir(Thread):
+        def __init__(self):
+            super().__init__()
+            self.dir = None
+
+        def run(self):
+            self.dir = create_user_logs_dir("foo")
+
+        def join(self):
+            super().join()
+            return self.dir
+
+    threads = [MakeDir() for _ in range(5)]
+    for t in threads:
+        t.start()
+
+    dirs = [t.join() for t in threads]
+
+    # Every directory should be unique.
+    print(dirs)
+    assert len(set(dirs)) == len(dirs)
+
+
+if __name__ == "__main__":
+    main()

From 4591d8196c1b173f4c7ce1a2ee015ec75355dc1b Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 13:17:06 +0100
Subject: [PATCH 02/17] Refactor and merge random search utils into
 random_search module.

This aggregates the random search utility classes and functions into
the random_search module. This effectively deprecates the
compiler_gym.random_replay and compiler_gym.util.logs modules.
---
 compiler_gym/BUILD                |   3 +-
 compiler_gym/bin/BUILD            |   3 +-
 compiler_gym/bin/random_eval.py   |   3 +-
 compiler_gym/bin/random_replay.py |   2 +-
 compiler_gym/random_replay.py     |  81 ++++-----------------
 compiler_gym/random_search.py     | 113 +++++++++++++++++++++++++++++-
 compiler_gym/util/logs.py         |  41 +----------
 tests/random_search_test.py       |   3 +-
 8 files changed, 133 insertions(+), 116 deletions(-)

diff --git a/compiler_gym/BUILD b/compiler_gym/BUILD
index 1a4626ba0..b12e5bb2f 100644
--- a/compiler_gym/BUILD
+++ b/compiler_gym/BUILD
@@ -12,6 +12,7 @@ py_library(
     srcs = ["__init__.py"],
     visibility = ["//visibility:public"],
     deps = [
+        ":random_replay",
         ":random_search",
         ":validate",
         "//compiler_gym/bin",
@@ -41,6 +42,7 @@ py_library(
     srcs = ["random_replay.py"],
     visibility = ["//visibility:public"],
     deps = [
+        ":random_search",
         "//compiler_gym/envs",
         "//compiler_gym/util",
     ],
@@ -52,7 +54,6 @@ py_library(
     data = ["//compiler_gym/envs/llvm/service"],
     visibility = ["//visibility:public"],
     deps = [
-        ":random_replay",
         "//compiler_gym/envs",
         "//compiler_gym/service:connection",
         "//compiler_gym/util",
diff --git a/compiler_gym/bin/BUILD b/compiler_gym/bin/BUILD
index 090908051..27e05e66f 100644
--- a/compiler_gym/bin/BUILD
+++ b/compiler_gym/bin/BUILD
@@ -45,6 +45,7 @@ py_binary(
     name = "random_eval",
     srcs = ["random_eval.py"],
     deps = [
+        "//compiler_gym:random_search",
         "//compiler_gym/util",
         "//compiler_gym/util/flags",
     ],
@@ -65,7 +66,7 @@ py_binary(
     srcs = ["random_replay.py"],
     visibility = ["//visibility:public"],
     deps = [
-        "//compiler_gym:random_replay",
+        "//compiler_gym:random_search",
         "//compiler_gym/util",
         "//compiler_gym/util/flags",
     ],
diff --git a/compiler_gym/bin/random_eval.py b/compiler_gym/bin/random_eval.py
index 93031f947..65b41dd8d 100644
--- a/compiler_gym/bin/random_eval.py
+++ b/compiler_gym/bin/random_eval.py
@@ -11,6 +11,7 @@
 from absl import app, flags
 
 import compiler_gym.util.flags.output_dir  # noqa Flag definition.
+from compiler_gym.random_search import RandomSearchProgressLogEntry
 from compiler_gym.util import logs
 from compiler_gym.util.statistics import geometric_mean
 from compiler_gym.util.tabulate import tabulate
@@ -46,7 +47,7 @@ def eval_logs(outdir: Path) -> None:
 
         with open(str(progress_path)) as f:
             final_line = f.readlines()[-1]
-        best = logs.ProgressLogEntry.from_csv(final_line)
+        best = RandomSearchProgressLogEntry.from_csv(final_line)
 
         totals["instructions"] += meta["num_instructions"]
         totals["init_reward"].append(meta["init_reward"])
diff --git a/compiler_gym/bin/random_replay.py b/compiler_gym/bin/random_replay.py
index c5079246f..f6e0f171d 100644
--- a/compiler_gym/bin/random_replay.py
+++ b/compiler_gym/bin/random_replay.py
@@ -17,7 +17,7 @@
 from absl import app, flags
 
 import compiler_gym.util.flags.output_dir  # noqa Flag definition.
-from compiler_gym.random_replay import replay_actions_from_logs
+from compiler_gym.random_search import replay_actions_from_logs
 from compiler_gym.util import logs
 from compiler_gym.util.flags.benchmark_from_flags import benchmark_from_flags
 from compiler_gym.util.flags.env_from_flags import env_from_flags
diff --git a/compiler_gym/random_replay.py b/compiler_gym/random_replay.py
index f00cc63f6..063ec9ee9 100644
--- a/compiler_gym/random_replay.py
+++ b/compiler_gym/random_replay.py
@@ -3,79 +3,26 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Replay the sequence of actions that produced the best reward."""
-import json
 from pathlib import Path
-from time import time
 from typing import List
 
-from compiler_gym.envs import CompilerEnv, LlvmEnv
-from compiler_gym.util import logs
-from compiler_gym.util.tabulate import tabulate
+from deprecated import deprecated
 
+from compiler_gym.envs.compiler_env import CompilerEnv
+from compiler_gym.random_search import replay_actions as replay_actions_
+from compiler_gym.random_search import (
+    replay_actions_from_logs as replay_actions_from_logs_,
+)
 
-def replay_actions(env: CompilerEnv, action_names: List[str], outdir: Path):
-    logs_path = outdir / logs.BEST_ACTIONS_PROGRESS_NAME
-    start_time = time()
-
-    if isinstance(env, LlvmEnv):
-        env.write_bitcode(outdir / "unoptimized.bc")
 
-    with open(str(logs_path), "w") as f:
-        ep_reward = 0
-        for i, action in enumerate(action_names, start=1):
-            _, reward, done, _ = env.step(env.action_space.names.index(action))
-            assert not done
-            ep_reward += reward
-            print(
-                f"Step [{i:03d} / {len(action_names):03d}]: reward={reward:.4f}   \t"
-                f"episode={ep_reward:.4f}   \taction={action}"
-            )
-            progress = logs.ProgressLogEntry(
-                runtime_seconds=time() - start_time,
-                total_episode_count=1,
-                total_step_count=i,
-                num_passes=i,
-                reward=reward,
-            )
-            print(progress.to_csv(), action, file=f, sep=",")
-
-    if isinstance(env, LlvmEnv):
-        env.write_bitcode(outdir / "optimized.bc")
-        print(
-            tabulate(
-                [
-                    (
-                        "IR instruction count",
-                        env.observation["IrInstructionCountO0"],
-                        env.observation["IrInstructionCountOz"],
-                        env.observation["IrInstructionCount"],
-                    ),
-                    (
-                        "Object .text size (bytes)",
-                        env.observation["ObjectTextSizeO0"],
-                        env.observation["ObjectTextSizeOz"],
-                        env.observation["ObjectTextSizeBytes"],
-                    ),
-                ],
-                headers=("", "-O0", "-Oz", "final"),
-            )
-        )
+@deprecated(version="0.2.1", reason="Use env.step(actions) instead")
+def replay_actions(env: CompilerEnv, action_names: List[str], outdir: Path):
+    return replay_actions_(env, action_names, outdir)
 
 
+@deprecated(
+    version="0.2.1",
+    reason="Use compiler_gym.random_search.replay_actions_from_logs() instead",
+)
 def replay_actions_from_logs(env: CompilerEnv, logdir: Path, benchmark=None) -> None:
-    best_actions_path = logdir / logs.BEST_ACTIONS_NAME
-    meta_path = logdir / logs.METADATA_NAME
-
-    assert best_actions_path.is_file(), f"File not found: {best_actions_path}"
-    assert meta_path.is_file(), f"File not found: {meta_path}"
-
-    with open(meta_path, "rb") as f:
-        meta = json.load(f)
-
-    with open(best_actions_path) as f:
-        actions = [ln.strip() for ln in f.readlines() if ln.strip()]
-
-    benchmark = benchmark or meta["benchmark"]
-    env.reward_space = meta["reward"]
-    env.reset(benchmark=benchmark)
-    replay_actions(env, actions, logdir)
+    return replay_actions_from_logs_(env, logdir, benchmark)
diff --git a/compiler_gym/random_search.py b/compiler_gym/random_search.py
index d1aae0296..f80983cb6 100644
--- a/compiler_gym/random_search.py
+++ b/compiler_gym/random_search.py
@@ -8,15 +8,54 @@
 from pathlib import Path
 from threading import Thread
 from time import sleep, time
-from typing import Callable, List, Optional, Union
+from typing import Callable, List, NamedTuple, Optional, Union
 
 import humanize
 
 from compiler_gym.envs import CompilerEnv
-from compiler_gym.random_replay import replay_actions
+from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.service.connection import ServiceError
 from compiler_gym.util import logs
 from compiler_gym.util.logs import create_logging_dir
+from compiler_gym.util.tabulate import tabulate
+
+
+class RandomSearchProgressLogEntry(NamedTuple):
+    """A snapshot of incremental search progress."""
+
+    runtime_seconds: float
+    total_episode_count: int
+    total_step_count: int
+    num_passes: int
+    reward: float
+
+    def to_csv(self) -> str:
+        return ",".join(
+            [
+                f"{self.runtime_seconds:.3f}",
+                str(self.total_episode_count),
+                str(self.total_step_count),
+                str(self.num_passes),
+                str(self.reward),
+            ]
+        )
+
+    @classmethod
+    def from_csv(cls, line: str) -> "RandomSearchProgressLogEntry":
+        (
+            runtime_seconds,
+            total_episode_count,
+            total_step_count,
+            num_passes,
+            reward,
+        ) = line.split(",")
+        return RandomSearchProgressLogEntry(
+            float(runtime_seconds),
+            int(total_episode_count),
+            int(total_step_count),
+            int(num_passes),
+            float(reward),
+        )
 
 
 class RandomAgentWorker(Thread):
@@ -213,7 +252,7 @@ def random_search(
 
                 # Log the incremental progress improvements.
                 if best_returns > last_best_returns:
-                    entry = logs.ProgressLogEntry(
+                    entry = RandomSearchProgressLogEntry(
                         runtime_seconds=runtime,
                         total_episode_count=total_episode_count,
                         total_step_count=total_step_count,
@@ -252,3 +291,71 @@ def random_search(
         replay_actions(env, best_action_names, outdir)
 
     return env
+
+
+def replay_actions(env: CompilerEnv, action_names: List[str], outdir: Path):
+    logs_path = outdir / logs.BEST_ACTIONS_PROGRESS_NAME
+    start_time = time()
+
+    if isinstance(env, LlvmEnv):
+        env.write_bitcode(outdir / "unoptimized.bc")
+
+    with open(str(logs_path), "w") as f:
+        ep_reward = 0
+        for i, action in enumerate(action_names, start=1):
+            _, reward, done, _ = env.step(env.action_space.names.index(action))
+            assert not done
+            ep_reward += reward
+            print(
+                f"Step [{i:03d} / {len(action_names):03d}]: reward={reward:.4f}   \t"
+                f"episode={ep_reward:.4f}   \taction={action}"
+            )
+            progress = RandomSearchProgressLogEntry(
+                runtime_seconds=time() - start_time,
+                total_episode_count=1,
+                total_step_count=i,
+                num_passes=i,
+                reward=reward,
+            )
+            print(progress.to_csv(), action, file=f, sep=",")
+
+    if isinstance(env, LlvmEnv):
+        env.write_bitcode(outdir / "optimized.bc")
+        print(
+            tabulate(
+                [
+                    (
+                        "IR instruction count",
+                        env.observation["IrInstructionCountO0"],
+                        env.observation["IrInstructionCountOz"],
+                        env.observation["IrInstructionCount"],
+                    ),
+                    (
+                        "Object .text size (bytes)",
+                        env.observation["ObjectTextSizeO0"],
+                        env.observation["ObjectTextSizeOz"],
+                        env.observation["ObjectTextSizeBytes"],
+                    ),
+                ],
+                headers=("", "-O0", "-Oz", "final"),
+            )
+        )
+
+
+def replay_actions_from_logs(env: CompilerEnv, logdir: Path, benchmark=None) -> None:
+    best_actions_path = logdir / logs.BEST_ACTIONS_NAME
+    meta_path = logdir / logs.METADATA_NAME
+
+    assert best_actions_path.is_file(), f"File not found: {best_actions_path}"
+    assert meta_path.is_file(), f"File not found: {meta_path}"
+
+    with open(meta_path, "rb") as f:
+        meta = json.load(f)
+
+    with open(best_actions_path) as f:
+        actions = [ln.strip() for ln in f.readlines() if ln.strip()]
+
+    benchmark = benchmark or meta["benchmark"]
+    env.reward_space = meta["reward"]
+    env.reset(benchmark=benchmark)
+    replay_actions(env, actions, logdir)
diff --git a/compiler_gym/util/logs.py b/compiler_gym/util/logs.py
index 0340fc744..6dfbda7b8 100644
--- a/compiler_gym/util/logs.py
+++ b/compiler_gym/util/logs.py
@@ -3,9 +3,8 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 from pathlib import Path
-from typing import NamedTuple
 
-from deprecated import deprecated
+from deprecated.sphinx import deprecated
 
 from compiler_gym.util.runfiles_path import create_user_logs_dir
 
@@ -28,41 +27,3 @@ def create_logging_dir(name: str) -> Path:
     Use :code:`compiler_gym.util.runfiles_path.create_user_logs_dir()` instead.
     """
     return create_user_logs_dir(name)
-
-
-class ProgressLogEntry(NamedTuple):
-    """A snapshot of incremental search progress."""
-
-    runtime_seconds: float
-    total_episode_count: int
-    total_step_count: int
-    num_passes: int
-    reward: float
-
-    def to_csv(self) -> str:
-        return ",".join(
-            [
-                f"{self.runtime_seconds:.3f}",
-                str(self.total_episode_count),
-                str(self.total_step_count),
-                str(self.num_passes),
-                str(self.reward),
-            ]
-        )
-
-    @classmethod
-    def from_csv(cls, line: str) -> "ProgressLogEntry":
-        (
-            runtime_seconds,
-            total_episode_count,
-            total_step_count,
-            num_passes,
-            reward,
-        ) = line.split(",")
-        return ProgressLogEntry(
-            float(runtime_seconds),
-            int(total_episode_count),
-            int(total_step_count),
-            int(num_passes),
-            float(reward),
-        )
diff --git a/tests/random_search_test.py b/tests/random_search_test.py
index 4680d6416..24c532872 100644
--- a/tests/random_search_test.py
+++ b/tests/random_search_test.py
@@ -8,8 +8,7 @@
 
 import gym
 
-from compiler_gym.random_replay import replay_actions_from_logs
-from compiler_gym.random_search import random_search
+from compiler_gym.random_search import random_search, replay_actions_from_logs
 from tests.pytest_plugins.common import set_command_line_flags
 from tests.test_main import main
 

From 6275b06e281936acd211253101d0ff5ee66185e0 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 13:23:11 +0100
Subject: [PATCH 03/17] [tests] Refactor `e_ctx` pattern to use regex error
 matchers.

---
 tests/datasets/benchmark_test.py     |  5 +----
 tests/datasets/dataset_test.py       | 13 +++++++------
 tests/datasets/datasets_test.py      |  6 ++----
 tests/datasets/files_dataset_test.py |  8 +++-----
 tests/llvm/invalid_ir_test.py        |  4 +---
 tests/validation_result_test.py      |  3 +--
 6 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/tests/datasets/benchmark_test.py b/tests/datasets/benchmark_test.py
index a18c4d5f9..6392d5528 100644
--- a/tests/datasets/benchmark_test.py
+++ b/tests/datasets/benchmark_test.py
@@ -294,12 +294,9 @@ def test_benchmark_from_file(tmpwd: Path):
 
 def test_benchmark_from_file_not_found(tmpwd: Path):
     path = tmpwd / "foo.txt"
-    with pytest.raises(FileNotFoundError) as e_ctx:
+    with pytest.raises(FileNotFoundError, match=str(path)):
         Benchmark.from_file("benchmark://example-v0/foo", path)
 
-    # Use  endswith() because macOS can add a /private prefix to paths.
-    assert str(e_ctx.value).endswith(str(path))
-
 
 def test_dataset_equality_and_sorting():
     """Test comparison operators between datasets."""
diff --git a/tests/datasets/dataset_test.py b/tests/datasets/dataset_test.py
index 979752e72..fccb3385d 100644
--- a/tests/datasets/dataset_test.py
+++ b/tests/datasets/dataset_test.py
@@ -21,7 +21,13 @@
 def test_dataset__invalid_name(invalid_name: str):
     """Test that invalid dataset names raise an error on init."""
 
-    with pytest.raises(ValueError) as e_ctx:
+    with pytest.raises(
+        ValueError,
+        match=(
+            f"Invalid dataset name: '{invalid_name}'. "
+            "Dataset name must be in the form: '{{protocol}}://{{name}}-v{{version}}'"
+        ),
+    ):
         Dataset(
             name=invalid_name,
             description="A test dataset",
@@ -29,11 +35,6 @@ def test_dataset__invalid_name(invalid_name: str):
             site_data_base="test",
         )
 
-    assert str(e_ctx.value) == (
-        f"Invalid dataset name: '{invalid_name}'. "
-        "Dataset name must be in the form: '{{protocol}}://{{name}}-v{{version}}'"
-    )
-
 
 def test_dataset_properties():
     """Test the dataset property values."""
diff --git a/tests/datasets/datasets_test.py b/tests/datasets/datasets_test.py
index 2402eea71..4a38ac7b8 100644
--- a/tests/datasets/datasets_test.py
+++ b/tests/datasets/datasets_test.py
@@ -147,13 +147,11 @@ def test_datasets_get_item_lookup_miss():
     da = MockDataset("benchmark://foo-v0")
     datasets = Datasets([da])
 
-    with pytest.raises(LookupError) as e_ctx:
+    with pytest.raises(LookupError, match=r"^Dataset not found: benchmark://bar-v0$"):
         datasets.dataset("benchmark://bar-v0")
-    assert str(e_ctx.value) == "Dataset not found: benchmark://bar-v0"
 
-    with pytest.raises(LookupError) as e_ctx:
+    with pytest.raises(LookupError, match=r"^Dataset not found: benchmark://bar-v0$"):
         _ = datasets["benchmark://bar-v0"]
-    assert str(e_ctx.value) == "Dataset not found: benchmark://bar-v0"
 
 
 def test_benchmark_lookup_by_uri():
diff --git a/tests/datasets/files_dataset_test.py b/tests/datasets/files_dataset_test.py
index f81716d72..93a545dff 100644
--- a/tests/datasets/files_dataset_test.py
+++ b/tests/datasets/files_dataset_test.py
@@ -96,13 +96,11 @@ def test_populated_dataset_first_file(populated_dataset: FilesDataset):
 
 
 def test_populated_dataset_benchmark_lookup_not_found(populated_dataset: FilesDataset):
-    with pytest.raises(LookupError) as e_ctx:
+    with pytest.raises(
+        LookupError, match=r"^Benchmark not found: benchmark://test-v0/not/a/file"
+    ):
         populated_dataset.benchmark("benchmark://test-v0/not/a/file")
 
-    assert str(e_ctx.value).startswith(
-        "Benchmark not found: benchmark://test-v0/not/a/file"
-    )
-
 
 def test_populated_dataset_with_file_extension_filter(populated_dataset: FilesDataset):
     populated_dataset.benchmark_file_suffix = ".jpg"
diff --git a/tests/llvm/invalid_ir_test.py b/tests/llvm/invalid_ir_test.py
index 1fc7c8123..62fe9bb07 100644
--- a/tests/llvm/invalid_ir_test.py
+++ b/tests/llvm/invalid_ir_test.py
@@ -22,11 +22,9 @@ def test_reset_invalid_ir(env: LlvmEnv):
     """Test that setting the $CXX to an invalid binary raises an error."""
     benchmark = llvm.make_benchmark(INVALID_IR_PATH)
 
-    with pytest.raises(BenchmarkInitError) as e_ctx:
+    with pytest.raises(BenchmarkInitError, match="Failed to compute .text size cost"):
         env.reset(benchmark=benchmark)
 
-    assert "Failed to compute .text size cost" in str(e_ctx.value)
-
 
 if __name__ == "__main__":
     main()
diff --git a/tests/validation_result_test.py b/tests/validation_result_test.py
index 44473dbf7..da0105fd8 100644
--- a/tests/validation_result_test.py
+++ b/tests/validation_result_test.py
@@ -144,9 +144,8 @@ def test_validation_result_equality_different_errors_order():
 
 
 def test_validation_result_join_no_inputs():
-    with pytest.raises(ValueError) as e_ctx:
+    with pytest.raises(ValueError, match=r"^No states to join$"):
         ValidationResult.join([])
-    assert str(e_ctx.value) == "No states to join"
 
 
 def test_validation_result_join_one_input():

From 5e833bf8f43456cd5af1dde21ab6a6f2b5b91c7a Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 14:31:31 +0100
Subject: [PATCH 04/17] Replace owned logging instances with module-level
 loggers.

This replaces the per-object instances of loggers with a shared
per-module logger, accessed using logging.getLogger(__name__). The
idea is to improve the consistency of logging.
---
 compiler_gym/datasets/dataset.py              | 27 ++++----
 compiler_gym/datasets/tar_dataset.py          | 15 ++---
 compiler_gym/envs/compiler_env.py             | 61 +++++++++++--------
 compiler_gym/envs/gcc/datasets/csmith.py      |  8 ++-
 compiler_gym/envs/gcc/gcc.py                  | 24 ++++----
 compiler_gym/envs/gcc/service/gcc_service.py  | 10 +--
 compiler_gym/envs/llvm/datasets/cbench.py     |  9 +--
 compiler_gym/envs/llvm/datasets/clgen.py      |  7 ++-
 compiler_gym/envs/llvm/datasets/csmith.py     |  8 ++-
 compiler_gym/envs/llvm/datasets/poj104.py     |  5 +-
 compiler_gym/envs/llvm/llvm_benchmark.py      |  4 +-
 .../extract_passes_from_llvm_source_tree.py   |  8 ++-
 .../service/passes/filter_action_space.py     |  4 +-
 .../passes/make_action_space_genfiles.py      | 14 +++--
 .../service/loop_tool_compilation_session.py  |  6 +-
 compiler_gym/leaderboard/llvm_instcount.py    |  1 -
 compiler_gym/service/connection.py            | 32 +++++-----
 .../service/runtime/benchmark_cache.py        | 12 ++--
 .../service/runtime/compiler_gym_service.py   | 12 ++--
 compiler_gym/util/debug_util.py               | 15 +++++
 compiler_gym/util/download.py                 | 10 +--
 compiler_gym/util/minimize_trajectory.py      | 34 ++++++-----
 .../example_compiler_gym_service/env_tests.py | 10 ---
 tests/compiler_env_test.py                    | 13 ++--
 tests/datasets/dataset_test.py                |  6 ++
 tests/llvm/llvm_env_test.py                   |  5 --
 tests/service/runtime/benchmark_cache_test.py | 12 ++--
 27 files changed, 206 insertions(+), 166 deletions(-)

diff --git a/compiler_gym/datasets/dataset.py b/compiler_gym/datasets/dataset.py
index 3ee7bb71e..1ecc1a184 100644
--- a/compiler_gym/datasets/dataset.py
+++ b/compiler_gym/datasets/dataset.py
@@ -10,11 +10,18 @@
 from typing import Dict, Iterable, Optional, Union
 
 import numpy as np
+from deprecated.sphinx import deprecated
 from deprecated.sphinx import deprecated as mark_deprecated
 
 from compiler_gym.datasets.benchmark import Benchmark
 from compiler_gym.datasets.uri import DATASET_NAME_RE
-from compiler_gym.util.debug_util import get_logging_level
+
+logger = logging.getLogger(__name__)
+
+# NOTE(cummins): This is only required to prevent a name conflict with the now
+# deprecated Dataset.logger attribute. This can be removed once the logger
+# attribute is removed, scheduled for release 0.2.3.
+_logger = logger
 
 
 class Dataset:
@@ -43,7 +50,6 @@ def __init__(
         references: Optional[Dict[str, str]] = None,
         deprecated: Optional[str] = None,
         sort_order: int = 0,
-        logger: Optional[logging.Logger] = None,
         validatable: str = "No",
     ):
         """Constructor.
@@ -100,7 +106,6 @@ def __init__(
         self._deprecation_message = deprecated
         self._validatable = validatable
 
-        self._logger = logger
         self.sort_order = sort_order
         self.benchmark_class = benchmark_class
 
@@ -112,19 +117,19 @@ def __repr__(self):
         return self.name
 
     @property
+    @deprecated(
+        version="0.2.1",
+        reason=(
+            "The `Dataset.logger` attribute is deprecated. All Dataset "
+            "instances share a logger named compiler_gym.datasets"
+        ),
+    )
     def logger(self) -> logging.Logger:
         """The logger for this dataset.
 
         :type: logging.Logger
         """
-        # NOTE(cummins): Default logger instantiation is deferred since in
-        # Python 3.6 the Logger instance contains an un-pickle-able Rlock()
-        # which can prevent gym.make() from working. This is a workaround that
-        # can be removed once Python 3.6 support is dropped.
-        if self._logger is None:
-            self._logger = logging.getLogger("compiler_gym.datasets")
-            self._logger.setLevel(get_logging_level())
-        return self._logger
+        return _logger
 
     @property
     def name(self) -> str:
diff --git a/compiler_gym/datasets/tar_dataset.py b/compiler_gym/datasets/tar_dataset.py
index 4059a0d90..b6b8ab80e 100644
--- a/compiler_gym/datasets/tar_dataset.py
+++ b/compiler_gym/datasets/tar_dataset.py
@@ -5,6 +5,7 @@
 import bz2
 import gzip
 import io
+import logging
 import shutil
 import tarfile
 from threading import Lock
@@ -17,6 +18,8 @@
 from compiler_gym.util.download import download
 from compiler_gym.util.filesystem import atomic_file_write
 
+logger = logging.getLogger(__name__)
+
 # Module-level locks that ensures exclusive access to install routines across
 # threads. Note that these lock are shared across all TarDataset instances. We
 # don't use per-dataset locks as locks cannot be pickled.
@@ -89,9 +92,9 @@ def install(self) -> None:
             # Remove any partially-completed prior extraction.
             shutil.rmtree(self.site_data_path / "contents", ignore_errors=True)
 
-            self.logger.info("Downloading %s dataset", self.name)
+            logger.info("Downloading %s dataset", self.name)
             tar_data = io.BytesIO(download(self.tar_urls, self.tar_sha256))
-            self.logger.info("Unpacking %s dataset", self.name)
+            logger.info("Unpacking %s dataset", self.name)
             with tarfile.open(
                 fileobj=tar_data, mode=f"r:{self.tar_compression}"
             ) as arc:
@@ -165,7 +168,7 @@ def _read_manifest_file(self) -> List[str]:
         """
         with open(self._manifest_path, encoding="utf-8") as f:
             uris = self._read_manifest(f.read())
-        self.logger.debug("Read %s manifest, %d entries", self.name, len(uris))
+        logger.debug("Read %s manifest, %d entries", self.name, len(uris))
         return uris
 
     @memoized_property
@@ -192,7 +195,7 @@ def _benchmark_uris(self) -> List[str]:
                 )
 
             # Decompress the manifest data.
-            self.logger.debug("Downloading %s manifest", self.name)
+            logger.debug("Downloading %s manifest", self.name)
             manifest_data = io.BytesIO(
                 download(self.manifest_urls, self.manifest_sha256)
             )
@@ -206,9 +209,7 @@ def _benchmark_uris(self) -> List[str]:
                 f.write(manifest_data)
 
             uris = self._read_manifest(manifest_data.decode("utf-8"))
-            self.logger.debug(
-                "Downloaded %s manifest, %d entries", self.name, len(uris)
-            )
+            logger.debug("Downloaded %s manifest, %d entries", self.name, len(uris))
             return uris
 
     @memoized_property
diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 99bbca777..a6325dd32 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -47,7 +47,6 @@
     proto_to_action_space,
 )
 from compiler_gym.spaces import DefaultRewardFromObservation, NamedDiscrete, Reward
-from compiler_gym.util.debug_util import get_logging_level
 from compiler_gym.util.gym_type_hints import (
     ActionType,
     ObservationType,
@@ -60,6 +59,13 @@
 from compiler_gym.validation_result import ValidationResult
 from compiler_gym.views import ObservationSpaceSpec, ObservationView, RewardView
 
+logger = logging.getLogger(__name__)
+
+# NOTE(cummins): This is only required to prevent a name conflict with the now
+# deprecated CompilerEnv.logger attribute. This can be removed once the logger
+# attribute is removed, scheduled for release 0.2.3.
+_logger = logger
+
 
 def _wrapped_step(
     service: CompilerGymServiceConnection, request: StepRequest
@@ -113,11 +119,6 @@ class CompilerEnv(gym.Env):
 
     :vartype service: compiler_gym.service.CompilerGymServiceConnection
 
-    :ivar logger: A Logger instance used by the environment for communicating
-        info and warnings.
-
-    :vartype logger: logging.Logger
-
     :ivar action_spaces: A list of supported action space names.
 
     :vartype action_spaces: List[str]
@@ -210,23 +211,23 @@ def __init__(
         :param service_connection: An existing compiler gym service connection
             to use.
 
-        :param logger: The logger to use for this environment. If not provided,
-            a :code:`compiler_gym.envs` logger is used and assigned the
-            verbosity returned by :func:`get_logging_level()
-            <compiler_gym.get_logging_level>`.
-
         :raises FileNotFoundError: If service is a path to a file that is not
             found.
 
         :raises TimeoutError: If the compiler service fails to initialize within
             the parameters provided in :code:`connection_settings`.
         """
-        self.metadata = {"render.modes": ["human", "ansi"]}
+        # NOTE(cummins): Logger argument deprecated and scheduled to be removed
+        # in release 0.2.3.
+        if logger:
+            warnings.warn(
+                "The `logger` argument is deprecated on CompilerEnv.__init__() "
+                "and will be removed in a future release. All CompilerEnv "
+                "instances share a logger named compiler_gym.envs.compiler_env",
+                DeprecationWarning,
+            )
 
-        if logger is None:
-            logger = logging.getLogger("compiler_gym.envs")
-            logger.setLevel(get_logging_level())
-        self.logger = logger
+        self.metadata = {"render.modes": ["human", "ansi"]}
 
         # A compiler service supports multiple simultaneous environments. This
         # session ID is used to identify this environment.
@@ -238,7 +239,6 @@ def __init__(
         self.service = service_connection or CompilerGymServiceConnection(
             endpoint=self._service_endpoint,
             opts=self._connection_settings,
-            logger=self.logger,
         )
         self.datasets = Datasets(datasets or [])
 
@@ -330,6 +330,17 @@ def available_datasets(self) -> Dict[str, Dataset]:
         """A dictionary of datasets."""
         return {d.name: d for d in self.datasets}
 
+    @property
+    @deprecated(
+        version="0.2.1",
+        reason=(
+            "The `CompilerEnv.logger` attribute is deprecated. All CompilerEnv "
+            "instances share a logger named compiler_gym.envs.compiler_env"
+        ),
+    )
+    def logger(self):
+        return _logger
+
     @property
     def versions(self) -> GetVersionReply:
         """Get the version numbers from the compiler service."""
@@ -447,10 +458,10 @@ def benchmark(self, benchmark: Union[str, Benchmark]):
             )
         if isinstance(benchmark, str):
             benchmark_object = self.datasets.benchmark(benchmark)
-            self.logger.debug("Setting benchmark by name: %s", benchmark_object)
+            logger.debug("Setting benchmark by name: %s", benchmark_object)
             self._next_benchmark = benchmark_object
         elif isinstance(benchmark, Benchmark):
-            self.logger.debug("Setting benchmark: %s", benchmark.uri)
+            logger.debug("Setting benchmark: %s", benchmark.uri)
             self._next_benchmark = benchmark
         else:
             raise TypeError(
@@ -573,9 +584,7 @@ def fork(self) -> "CompilerEnv":
             actions = self.actions.copy()
             self.reset()
             if actions:
-                self.logger.warning(
-                    "Parent service of fork() has died, replaying state"
-                )
+                logger.warning("Parent service of fork() has died, replaying state")
                 _, _, done, _ = self.step(actions)
                 assert not done, "Failed to replay action sequence"
 
@@ -668,7 +677,7 @@ def close(self):
                 if reply.remaining_sessions:
                     close_service = False
             except Exception as e:
-                self.logger.warning(
+                logger.warning(
                     "Failed to end active compiler session on close(): %s (%s)",
                     e,
                     type(e).__name__,
@@ -720,7 +729,7 @@ def reset(  # pylint: disable=arguments-differ
 
         def _retry(error) -> Optional[ObservationType]:
             """Abort and retry on error."""
-            self.logger.warning("%s during reset(): %s", type(error).__name__, error)
+            logger.warning("%s during reset(): %s", type(error).__name__, error)
             if self.service:
                 self.service.close()
             self.service = None
@@ -763,13 +772,13 @@ def _call_with_error(
 
         # Stop an existing episode.
         if self.in_episode:
-            self.logger.debug("Ending session %d", self._session_id)
+            logger.debug("Ending session %d", self._session_id)
             error, _ = _call_with_error(
                 self.service.stub.EndSession,
                 EndSessionRequest(session_id=self._session_id),
             )
             if error:
-                self.logger.warning(
+                logger.warning(
                     "Failed to stop session %d with %s: %s",
                     self._session_id,
                     type(error).__name__,
diff --git a/compiler_gym/envs/gcc/datasets/csmith.py b/compiler_gym/envs/gcc/datasets/csmith.py
index 460899f19..2041e4d60 100644
--- a/compiler_gym/envs/gcc/datasets/csmith.py
+++ b/compiler_gym/envs/gcc/datasets/csmith.py
@@ -21,6 +21,8 @@
 from compiler_gym.util.shell_format import plural
 from compiler_gym.util.truncate import truncate
 
+logger = logging.getLogger(__name__)
+
 # The maximum value for the --seed argument to csmith.
 UINT_MAX = (2 ** 32) - 1
 
@@ -194,7 +196,7 @@ def benchmark_from_seed(
 
         # Run csmith with the given seed and pipe the output to clang to
         # assemble a bitcode.
-        self.logger.debug("Exec csmith --seed %d", seed)
+        logger.debug("Exec csmith --seed %d", seed)
         csmith = subprocess.Popen(
             [str(self.csmith_bin_path), "--seed", str(seed)],
             stdout=subprocess.PIPE,
@@ -208,11 +210,11 @@ def benchmark_from_seed(
                 stderr = "\n".join(
                     truncate(stderr.decode("utf-8"), max_line_len=200, max_lines=20)
                 )
-                logging.warning("Csmith failed with seed %d: %s", seed, stderr)
+                logger.warning("Csmith failed with seed %d: %s", seed, stderr)
             except UnicodeDecodeError:
                 # Failed to interpret the stderr output, generate a generic
                 # error message.
-                logging.warning("Csmith failed with seed %d", seed)
+                logger.warning("Csmith failed with seed %d", seed)
             return self.benchmark_from_seed(
                 seed, max_retries=max_retries, retry_count=retry_count + 1
             )
diff --git a/compiler_gym/envs/gcc/gcc.py b/compiler_gym/envs/gcc/gcc.py
index 41f875426..e4561611f 100755
--- a/compiler_gym/envs/gcc/gcc.py
+++ b/compiler_gym/envs/gcc/gcc.py
@@ -32,6 +32,8 @@
 from compiler_gym.util.filesystem import atomic_file_write
 from compiler_gym.util.runfiles_path import site_data_path
 
+logger = logging.getLogger(__name__)
+
 
 class Option:
     """An Option is either a command line optimization setting or a parameter.
@@ -156,7 +158,7 @@ class GccFlagAlignOption(Option):
     """Alignment flags. These take several forms. See the GCC documentation."""
 
     def __init__(self, name: str):
-        logging.warning("Alignment options not properly handled %s", name)
+        logger.warning("Alignment options not properly handled %s", name)
         self.name = name
 
     def __len__(self):
@@ -390,7 +392,7 @@ def size(self) -> int:
 def _gcc_parse_optimize(gcc: Gcc) -> List[Option]:
     """Parse the optimization help string from the GCC binary to find options."""
 
-    logging.debug("Parsing GCC optimization space")
+    logger.debug("Parsing GCC optimization space")
 
     # Call 'gcc --help=optimize -Q'
     result = gcc("--help=optimize", "-Q", timeout=60)
@@ -519,7 +521,7 @@ def parse_line(line: str):
             add_gcc_flag_int(name, min, max)
             return
 
-        logging.warning("Unknown GCC optimization flag spec, '%s'", line)
+        logger.warning("Unknown GCC optimization flag spec, '%s'", line)
 
     # Parse all the lines
     for line in out:
@@ -533,7 +535,7 @@ def _gcc_parse_params(gcc: Gcc) -> List[Option]:
     """Parse the param help string from the GCC binary to find options."""
 
     # Pretty much identical to _gcc_parse_optimize
-    logging.debug("Parsing GCC param space")
+    logger.debug("Parsing GCC param space")
 
     result = gcc("--help=param", "-Q", timeout=60)
     out = result.split("\n")[1:]
@@ -622,7 +624,7 @@ def parse_line(line: str):
                 add_gcc_param_int(name, min, max)
                 return
 
-        logging.warning("Unknown GCC param flag spec, '%s'", line)
+        logger.warning("Unknown GCC param flag spec, '%s'", line)
 
     # breakpoint()
     for line in out:
@@ -682,13 +684,13 @@ def keep(option: Option) -> bool:
 
 def _gcc_get_version(gcc: Gcc) -> str:
     """Get the version string"""
-    logging.debug("Getting GCC version for %s", gcc.bin)
+    logger.debug("Getting GCC version for %s", gcc.bin)
     try:
         result = gcc("--version", timeout=60)
     except ServiceError as e:
         raise EnvironmentNotSupported(f"Failed to run GCC binary: {gcc.bin}") from e
     version = result.split("\n")[0]
-    logging.debug("GCC version is %s", version)
+    logger.debug("GCC version is %s", version)
     if "gcc" not in version:
         raise ServiceInitError(f"Invalid GCC version string: {version}")
     return version
@@ -724,9 +726,9 @@ def _get_spec(gcc: Gcc, cache_dir: Path) -> Optional[GccSpec]:
             with open(spec_path, "rb") as f:
                 spec = pickle.load(f)
             spec = GccSpec(gcc=gcc, version=spec.version, options=spec.options)
-            logging.debug("GccSpec for version '%s' read from %s", version, spec_path)
+            logger.debug("GccSpec for version '%s' read from %s", version, spec_path)
         except (pickle.UnpicklingError, EOFError) as e:
-            logging.warning("Unable to read spec from '%s': %s", spec_path, e)
+            logger.warning("Unable to read spec from '%s': %s", spec_path, e)
 
     if spec is None:
         # Pickle doesn't exist, parse
@@ -741,8 +743,8 @@ def _get_spec(gcc: Gcc, cache_dir: Path) -> Optional[GccSpec]:
         spec_path.parent.mkdir(exist_ok=True, parents=True)
         with atomic_file_write(spec_path, fileobj=True) as f:
             pickle.dump(spec, f)
-        logging.debug("GccSpec for %s written to %s", version, spec_path)
+        logger.debug("GccSpec for %s written to %s", version, spec_path)
 
-    logging.debug("GccSpec size is approximately 10^%.0f", round(math.log(spec.size)))
+    logger.debug("GccSpec size is approximately 10^%.0f", round(math.log(spec.size)))
 
     return spec
diff --git a/compiler_gym/envs/gcc/service/gcc_service.py b/compiler_gym/envs/gcc/service/gcc_service.py
index b4db8ef4d..df78f866d 100755
--- a/compiler_gym/envs/gcc/service/gcc_service.py
+++ b/compiler_gym/envs/gcc/service/gcc_service.py
@@ -32,6 +32,8 @@
     ScalarRangeList,
 )
 
+logger = logging.getLogger(__name__)
+
 
 def make_gcc_compilation_session(gcc_bin: str):
     """Create a class to represent a GCC compilation service.
@@ -402,7 +404,7 @@ def compile(self) -> Optional[str]:
             """Compile the benchmark"""
             if not self._obj:
                 self.prepare_files()
-                logging.debug(
+                logger.debug(
                     "Compiling: %s", " ".join(map(str, self.obj_command_line()))
                 )
                 gcc(
@@ -420,7 +422,7 @@ def assemble(self) -> Optional[str]:
             """Assemble the benchmark"""
             if not self._asm:
                 self.prepare_files()
-                logging.debug(
+                logger.debug(
                     "Assembling: %s", " ".join(map(str, self.asm_command_line()))
                 )
                 gcc(
@@ -439,7 +441,7 @@ def dump_rtl(self) -> Optional[str]:
             """Dump the RTL (and assemble the benchmark)"""
             if not self._rtl:
                 self.prepare_files()
-                logging.debug(
+                logger.debug(
                     "Dumping RTL: %s", " ".join(map(str, self.rtl_command_line()))
                 )
                 gcc(
@@ -484,7 +486,7 @@ def apply_action(
             # Apply the action to this session and check if we changed anything
             old_choices = self.choices.copy()
             action(self)
-            logging.debug("Applied action %s", action)
+            logger.debug("Applied action %s", action)
 
             # Reset the internal variables if this action has caused a change in the
             # choices
diff --git a/compiler_gym/envs/llvm/datasets/cbench.py b/compiler_gym/envs/llvm/datasets/cbench.py
index 3aad537f3..6a7fcf595 100644
--- a/compiler_gym/envs/llvm/datasets/cbench.py
+++ b/compiler_gym/envs/llvm/datasets/cbench.py
@@ -27,6 +27,8 @@
 from compiler_gym.util.timer import Timer
 from compiler_gym.validation_result import ValidationError
 
+logger = logging.getLogger(__name__)
+
 _CBENCH_TARS = {
     "macos": (
         "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v1-macos.tar.bz2",
@@ -94,7 +96,6 @@ def _compile_and_run_bitcode_file(
     linkopts: List[str],
     env: Dict[str, str],
     num_runs: int,
-    logger: logging.Logger,
     sanitizer: Optional[LlvmSanitizer] = None,
     timeout_seconds: float = 300,
     compilation_timeout_seconds: float = 60,
@@ -317,7 +318,6 @@ def validator_cb(env: "LlvmEnv") -> Optional[ValidationError]:  # noqa: F821
                         linkopts=linkopts + ["-O2"],
                         # Always assume safe.
                         sanitizer=None,
-                        logger=env.logger,
                         env=os_env,
                     )
                     if gold_standard.error:
@@ -354,7 +354,6 @@ def validator_cb(env: "LlvmEnv") -> Optional[ValidationError]:  # noqa: F821
                 num_runs=num_runs,
                 linkopts=linkopts,
                 sanitizer=sanitizer,
-                logger=env.logger,
                 env=os_env,
             )
 
@@ -409,9 +408,7 @@ def flaky_wrapped_cb(env: "LlvmEnv") -> Optional[ValidationError]:  # noqa: F821
                 # Timeout errors can be raised by the environment in case of a
                 # slow step / observation, and should be retried.
                 pass
-            env.logger.warning(
-                "Validation callback failed, attempt=%d/%d", j, flakiness
-            )
+            logger.warning("Validation callback failed, attempt=%d/%d", j, flakiness)
         return error
 
     return flaky_wrapped_cb
diff --git a/compiler_gym/envs/llvm/datasets/clgen.py b/compiler_gym/envs/llvm/datasets/clgen.py
index 006d56c24..6283c90fe 100644
--- a/compiler_gym/envs/llvm/datasets/clgen.py
+++ b/compiler_gym/envs/llvm/datasets/clgen.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 import io
+import logging
 import shutil
 import subprocess
 import tarfile
@@ -19,6 +20,8 @@
 from compiler_gym.util.filesystem import atomic_file_write
 from compiler_gym.util.truncate import truncate
 
+logger = logging.getLogger(__name__)
+
 _CLGEN_INSTALL_LOCK = Lock()
 
 
@@ -93,7 +96,7 @@ def install(self):
 
             # Download the libclc headers.
             shutil.rmtree(self.libclc_dir, ignore_errors=True)
-            self.logger.info("Downloading OpenCL headers")
+            logger.info("Downloading OpenCL headers")
             tar_data = io.BytesIO(
                 download(
                     "https://dl.fbaipublicfiles.com/compiler_gym/libclc-v0.tar.bz2",
@@ -148,7 +151,7 @@ def benchmark(self, uri: str) -> Benchmark:
                         "-w",  # No warnings.
                     ],
                 ).command(outpath=tmp_bc_path)
-                self.logger.debug("Exec %s", compile_command)
+                logger.debug("Exec %s", compile_command)
                 clang = subprocess.Popen(
                     compile_command,
                     stdin=subprocess.PIPE,
diff --git a/compiler_gym/envs/llvm/datasets/csmith.py b/compiler_gym/envs/llvm/datasets/csmith.py
index 6050d85bc..c7adb5220 100644
--- a/compiler_gym/envs/llvm/datasets/csmith.py
+++ b/compiler_gym/envs/llvm/datasets/csmith.py
@@ -18,6 +18,8 @@
 from compiler_gym.util.shell_format import plural
 from compiler_gym.util.truncate import truncate
 
+logger = logging.getLogger(__name__)
+
 # The maximum value for the --seed argument to csmith.
 UINT_MAX = (2 ** 32) - 1
 
@@ -178,7 +180,7 @@ def benchmark_from_seed(
 
         # Run csmith with the given seed and pipe the output to clang to
         # assemble a bitcode.
-        self.logger.debug("Exec csmith --seed %d", seed)
+        logger.debug("Exec csmith --seed %d", seed)
         csmith = subprocess.Popen(
             [str(self.csmith_bin_path), "--seed", str(seed)],
             stdout=subprocess.PIPE,
@@ -192,11 +194,11 @@ def benchmark_from_seed(
                 stderr = "\n".join(
                     truncate(stderr.decode("utf-8"), max_line_len=200, max_lines=20)
                 )
-                logging.warning("Csmith failed with seed %d: %s", seed, stderr)
+                logger.warning("Csmith failed with seed %d: %s", seed, stderr)
             except UnicodeDecodeError:
                 # Failed to interpret the stderr output, generate a generic
                 # error message.
-                logging.warning("Csmith failed with seed %d", seed)
+                logger.warning("Csmith failed with seed %d", seed)
             return self.benchmark_from_seed(
                 seed, max_retries=max_retries, retry_count=retry_count + 1
             )
diff --git a/compiler_gym/envs/llvm/datasets/poj104.py b/compiler_gym/envs/llvm/datasets/poj104.py
index 39fc8e9c1..140d73174 100644
--- a/compiler_gym/envs/llvm/datasets/poj104.py
+++ b/compiler_gym/envs/llvm/datasets/poj104.py
@@ -2,6 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+import logging
 import subprocess
 import sys
 from concurrent.futures import as_completed
@@ -16,6 +17,8 @@
 from compiler_gym.util.filesystem import atomic_file_write
 from compiler_gym.util.truncate import truncate
 
+logger = logging.getLogger(__name__)
+
 
 class POJ104Dataset(TarDatasetWithManifest):
     """The POJ-104 dataset contains 52000 C++ programs implementing 104
@@ -101,7 +104,7 @@ def benchmark(self, uri: Optional[str] = None) -> Benchmark:
                         "-std=c++11",
                     ],
                 ).command(outpath=tmp_bitcode_path)
-                self.logger.debug("Exec %s", compile_cmd)
+                logger.debug("Exec %s", compile_cmd)
                 clang = subprocess.Popen(
                     compile_cmd,
                     stdin=subprocess.PIPE,
diff --git a/compiler_gym/envs/llvm/llvm_benchmark.py b/compiler_gym/envs/llvm/llvm_benchmark.py
index 4a58361a8..5a36dbf4d 100644
--- a/compiler_gym/envs/llvm/llvm_benchmark.py
+++ b/compiler_gym/envs/llvm/llvm_benchmark.py
@@ -20,6 +20,8 @@
 from compiler_gym.util.runfiles_path import transient_cache_path
 from compiler_gym.util.thread_pool import get_thread_pool_executor
 
+logger = logging.getLogger(__name__)
+
 
 def _communicate(process, input=None, timeout=None):
     """subprocess.communicate() which kills subprocess on timeout."""
@@ -115,7 +117,7 @@ def get_system_includes() -> List[Path]:
         try:
             _SYSTEM_INCLUDES = list(get_compiler_includes(system_compiler))
         except OSError as e:
-            logging.warning("%s", e)
+            logger.warning("%s", e)
             _SYSTEM_INCLUDES = []
     return _SYSTEM_INCLUDES
 
diff --git a/compiler_gym/envs/llvm/service/passes/extract_passes_from_llvm_source_tree.py b/compiler_gym/envs/llvm/service/passes/extract_passes_from_llvm_source_tree.py
index e03aab0cd..f75136528 100644
--- a/compiler_gym/envs/llvm/service/passes/extract_passes_from_llvm_source_tree.py
+++ b/compiler_gym/envs/llvm/service/passes/extract_passes_from_llvm_source_tree.py
@@ -36,6 +36,8 @@
 from compiler_gym.envs.llvm.service.passes.common import Pass
 from compiler_gym.envs.llvm.service.passes.config import CREATE_PASS_NAME_MAP
 
+logger = logging.getLogger(__name__)
+
 # A regular expression to match the start of an invocation of one of the
 # InitializePass helper macros.
 INITIALIZE_PASS_RE = r"(INITIALIZE_PASS|INITIALIZE_PASS_BEGIN|INITIALIZE_PASS_WITH_OPTIONS|INITIALIZE_PASS_WITH_OPTIONS_BEGIN)\("
@@ -221,11 +223,11 @@ def handle_file(source_path: Path) -> Tuple[Path, List[Pass]]:
             sys.exit(1)
 
     if passes:
-        logging.debug(
+        logger.debug(
             f"Extracted {len(passes)} {'passes' if len(passes) - 1 else 'pass'} from {source_path}",
         )
     else:
-        logging.debug(f"Found no passes in {source_path}")
+        logger.debug(f"Found no passes in {source_path}")
 
     return passes
 
@@ -245,7 +247,7 @@ def main(argv):
             universal_newlines=True,
         )
         matching_paths += grep.strip().split("\n")
-        logging.debug("Processing %s files ...", len(matching_paths))
+        logger.debug("Processing %s files ...", len(matching_paths))
         paths = [Path(path) for path in matching_paths]
 
     # Build a list of pass entries.
diff --git a/compiler_gym/envs/llvm/service/passes/filter_action_space.py b/compiler_gym/envs/llvm/service/passes/filter_action_space.py
index d5126ac84..9204e6ad4 100644
--- a/compiler_gym/envs/llvm/service/passes/filter_action_space.py
+++ b/compiler_gym/envs/llvm/service/passes/filter_action_space.py
@@ -15,6 +15,8 @@
 from compiler_gym.envs.llvm.service.passes import config
 from compiler_gym.envs.llvm.service.passes.common import Pass
 
+logger = logging.getLogger(__name__)
+
 
 def filter_passes(pass_iterator: Iterable[Pass]) -> Iterable[Pass]:
     """Apply config.include_pass() to an input sequence of passes.
@@ -29,7 +31,7 @@ def filter_passes(pass_iterator: Iterable[Pass]) -> Iterable[Pass]:
         total_count += 1
         if config.include_pass(pass_):
             selected_count += 1
-            logging.debug(
+            logger.debug(
                 f"Selected {pass_.name} pass ({pass_.flag}) from {pass_.source}",
             )
             yield pass_
diff --git a/compiler_gym/envs/llvm/service/passes/make_action_space_genfiles.py b/compiler_gym/envs/llvm/service/passes/make_action_space_genfiles.py
index 1e3a57bbd..58d5e800e 100644
--- a/compiler_gym/envs/llvm/service/passes/make_action_space_genfiles.py
+++ b/compiler_gym/envs/llvm/service/passes/make_action_space_genfiles.py
@@ -84,6 +84,8 @@
 from compiler_gym.envs.llvm.service.passes.common import Pass
 from compiler_gym.envs.llvm.service.passes.config import EXTRA_LLVM_HEADERS
 
+logger = logging.getLogger(__name__)
+
 
 def process_pass(pass_, headers, enum_f, switch_f):
     """Extract and process transform passes in header."""
@@ -121,8 +123,8 @@ def make_action_sources(pass_iterator, outpath: Path):
         print("};", file=enum_f)
         print("  }", file=switch_f)
 
-    logging.debug("Generated %s", switch_path.name)
-    logging.debug("Generated %s", enum_path.name)
+    logger.debug("Generated %s", switch_path.name)
+    logger.debug("Generated %s", enum_path.name)
 
     with open(include_path, "w") as f:
         print("#pragma once", file=f)
@@ -141,17 +143,17 @@ def make_action_sources(pass_iterator, outpath: Path):
 """,
             file=f,
         )
-    logging.debug("Generated %s", include_path.name)
+    logger.debug("Generated %s", include_path.name)
 
     with open(flags_path, "w") as f:
         print("\n".join(p.flag for p in passes), file=f)
-    logging.debug("Generated %s", flags_path.name)
+    logger.debug("Generated %s", flags_path.name)
 
     with open(descriptions_path, "w") as f:
         print("\n".join(p.description for p in passes), file=f)
-    logging.debug("Generated %s", descriptions_path.name)
+    logger.debug("Generated %s", descriptions_path.name)
 
-    logging.debug("Created genfiles for %s pass actions", total_passes)
+    logger.debug("Created genfiles for %s pass actions", total_passes)
 
 
 def main(argv):
diff --git a/compiler_gym/envs/loop_tool/service/loop_tool_compilation_session.py b/compiler_gym/envs/loop_tool/service/loop_tool_compilation_session.py
index a794077b6..dfb64b265 100644
--- a/compiler_gym/envs/loop_tool/service/loop_tool_compilation_session.py
+++ b/compiler_gym/envs/loop_tool/service/loop_tool_compilation_session.py
@@ -28,6 +28,8 @@
     ScalarRangeList,
 )
 
+logger = logging.getLogger(__name__)
+
 
 class LoopToolCompilationSession(CompilationSession):
     """Represents an instance of an interactive loop_tool session."""
@@ -128,7 +130,7 @@ def __init__(
         self.thread = [1, 0, 0]
         self.cursor = 0
         self.mode = "size"
-        logging.info("Started a compilation session for %s", benchmark.uri)
+        logger.info("Started a compilation session for %s", benchmark.uri)
 
     def resize(self, increment):
         """
@@ -219,7 +221,7 @@ def apply_action(self, action: Action) -> Tuple[bool, Optional[ActionSpace], boo
         ):
             raise ValueError("Out-of-range")
 
-        logging.info("Applied action %d", choice_index)
+        logger.info("Applied action %d", choice_index)
 
         act = self.action_space.choice[0].named_discrete_space.value[choice_index]
         if self.mode not in ["size", "select"]:
diff --git a/compiler_gym/leaderboard/llvm_instcount.py b/compiler_gym/leaderboard/llvm_instcount.py
index f071b3c1e..26e3c0ad0 100644
--- a/compiler_gym/leaderboard/llvm_instcount.py
+++ b/compiler_gym/leaderboard/llvm_instcount.py
@@ -230,7 +230,6 @@ def main(argv):
             # Stream verbose CompilerGym logs to file.
             logger = logging.getLogger("compiler_gym")
             logger.setLevel(logging.DEBUG)
-            env.logger.setLevel(logging.DEBUG)
             log_handler = logging.FileHandler(FLAGS.leaderboard_logfile)
             logger.addHandler(log_handler)
             logger.propagate = False
diff --git a/compiler_gym/service/connection.py b/compiler_gym/service/connection.py
index 8ac52f52a..033cb4571 100644
--- a/compiler_gym/service/connection.py
+++ b/compiler_gym/service/connection.py
@@ -25,7 +25,7 @@
     GetSpacesRequest,
     ObservationSpace,
 )
-from compiler_gym.util.debug_util import get_debug_level
+from compiler_gym.util.debug_util import get_debug_level, logging_level_to_debug_level
 from compiler_gym.util.runfiles_path import (
     runfiles_path,
     site_data_path,
@@ -42,6 +42,8 @@
     ("grpc.enable_http_proxy", 0),
 ]
 
+logger = logging.getLogger(__name__)
+
 
 class ConnectionOpts(BaseModel):
     """The options used to configure a connection to a service."""
@@ -158,16 +160,14 @@ def __call__(
 class Connection:
     """Base class for service connections."""
 
-    def __init__(self, channel, url: str, logger: logging.Logger):
+    def __init__(self, channel, url: str):
         """Constructor. Don't instantiate this directly, use the subclasses.
 
         :param channel: The RPC channel to use.
         :param url: The URL of the RPC service.
-        :param logger: A logger instance that will be used for logging messages.
         """
         self.channel = channel
         self.url = url
-        self.logger = logger
         self.stub = CompilerGymServiceStub(self.channel)
         self.spaces: GetSpacesReply = self(self.stub.GetSpaces, GetSpacesRequest())
 
@@ -223,7 +223,7 @@ def __call__(
                             f"{self.url} {e.details()} ({max_retries} retries)"
                         ) from None
                     remaining = max_retries - attempt
-                    self.logger.warning(
+                    logger.warning(
                         "%s %s (%d %s remaining)",
                         self.url,
                         e.details(),
@@ -297,7 +297,6 @@ def __init__(
         port_init_max_seconds: float,
         rpc_init_max_seconds: float,
         process_exit_max_seconds: float,
-        logger: logging.Logger,
         script_args: List[str],
         script_env: Dict[str, str],
     ):
@@ -333,7 +332,9 @@ def __init__(
         # Set the verbosity of the service. The logging level of the service is
         # the debug level - 1, so that COMPILER_GYM_DEBUG=3 will cause VLOG(2)
         # and lower to be logged to stdout.
-        debug_level = get_debug_level()
+        debug_level = max(
+            get_debug_level(), logging_level_to_debug_level(logger.getEffectiveLevel())
+        )
         if debug_level > 0:
             cmd.append("--alsologtostderr")
             cmd.append(f"-v={debug_level - 1}")
@@ -445,7 +446,7 @@ def __init__(
                 f"{rpc_init_max_seconds:.1f} seconds.{logs_message}"
             )
 
-        super().__init__(channel, url, logger)
+        super().__init__(channel, url)
 
     def loglines(self) -> Iterable[str]:
         """Fetch any available log lines from the service backend.
@@ -478,14 +479,14 @@ def close(self):
                     f"Service exited with returncode {self.process.returncode}"
                 )
         except ProcessLookupError:
-            self.logger.warning("Service process not found at %s", self.working_dir)
+            logger.warning("Service process not found at %s", self.working_dir)
         except subprocess.TimeoutExpired:
             # Try and kill it and then walk away.
             try:
                 self.process.kill()
             except:  # noqa
                 pass
-            self.logger.warning("Abandoning orphan service at %s", self.working_dir)
+            logger.warning("Abandoning orphan service at %s", self.working_dir)
         finally:
             shutil.rmtree(self.working_dir, ignore_errors=True)
             super().close()
@@ -498,7 +499,7 @@ def __repr__(self):
 class UnmanagedConnection(Connection):
     """A connection to a service that is not managed by this process."""
 
-    def __init__(self, url: str, rpc_init_max_seconds: float, logger: logging.Logger):
+    def __init__(self, url: str, rpc_init_max_seconds: float):
         """Constructor.
 
         :param url: The URL of the service to connect to.
@@ -529,7 +530,7 @@ def __init__(self, url: str, rpc_init_max_seconds: float, logger: logging.Logger
                 f"{rpc_init_max_seconds:.1f} seconds"
             )
 
-        super().__init__(channel, url, logger)
+        super().__init__(channel, url)
 
     def __repr__(self):
         return self.url
@@ -588,7 +589,6 @@ def __init__(
         self,
         endpoint: Union[str, Path],
         opts: ConnectionOpts = None,
-        logger: Optional[logging.Logger] = None,
     ):
         """Constructor.
 
@@ -604,7 +604,6 @@ def __init__(
         self.opts = opts or ConnectionOpts()
         self.connection = None
         self.stub = None
-        self.logger = logger or logging.getLogger("")
         self._establish_connection()
 
         self.action_spaces: List[ActionSpace] = list(
@@ -616,7 +615,7 @@ def __init__(
 
     def _establish_connection(self) -> None:
         """Create and establish a connection."""
-        self.connection = self._create_connection(self.endpoint, self.opts, self.logger)
+        self.connection = self._create_connection(self.endpoint, self.opts)
         self.stub = self.connection.stub
 
     @classmethod
@@ -624,7 +623,6 @@ def _create_connection(
         cls,
         endpoint: Union[str, Path],
         opts: ConnectionOpts,
-        logger: logging.Logger,
     ) -> Connection:
         """Initialize the service connection, either by connecting to an RPC
         service or by starting a locally-managed subprocess.
@@ -656,7 +654,6 @@ def _create_connection(
                         process_exit_max_seconds=opts.local_service_exit_max_seconds,
                         rpc_init_max_seconds=opts.rpc_init_max_seconds,
                         port_init_max_seconds=opts.local_service_port_init_max_seconds,
-                        logger=logger,
                         script_args=opts.script_args,
                         script_env=opts.script_env,
                     )
@@ -665,7 +662,6 @@ def _create_connection(
                     return UnmanagedConnection(
                         url=endpoint,
                         rpc_init_max_seconds=opts.rpc_init_max_seconds,
-                        logger=logger,
                     )
             except (TimeoutError, ServiceError, NotImplementedError) as e:
                 # Catch preventable errors so that we can retry:
diff --git a/compiler_gym/service/runtime/benchmark_cache.py b/compiler_gym/service/runtime/benchmark_cache.py
index 72a862b75..add4eea2b 100644
--- a/compiler_gym/service/runtime/benchmark_cache.py
+++ b/compiler_gym/service/runtime/benchmark_cache.py
@@ -11,6 +11,8 @@
 
 MAX_SIZE_IN_BYTES = 512 * 104 * 1024
 
+logger = logging.getLogger(__name__)
+
 
 class BenchmarkCache:
     """An in-memory cache of Benchmark messages.
@@ -24,11 +26,9 @@ def __init__(
         self,
         max_size_in_bytes: int = MAX_SIZE_IN_BYTES,
         rng: Optional[np.random.Generator] = None,
-        logger: Optional[logging.Logger] = None,
     ):
         self._max_size_in_bytes = max_size_in_bytes
         self.rng = rng or np.random.default_rng()
-        self.logger = logger or logging.getLogger("compiler_gym")
 
         self._benchmarks: Dict[str, Benchmark] = {}
         self._size_in_bytes = 0
@@ -46,7 +46,7 @@ def __contains__(self, uri: str):
 
     def __setitem__(self, uri: str, benchmark: Benchmark):
         """Add benchmark to cache."""
-        self.logger.debug(
+        logger.debug(
             "Caching benchmark %s. Cache size = %d bytes, %d items",
             uri,
             self.size_in_bytes,
@@ -61,14 +61,14 @@ def __setitem__(self, uri: str, benchmark: Benchmark):
         size = benchmark.ByteSize()
         if self.size_in_bytes + size > self.max_size_in_bytes:
             if size > self.max_size_in_bytes:
-                self.logger.warning(
+                logger.warning(
                     "Adding new benchmark with size %d bytes exceeds total "
                     "target cache size of %d bytes",
                     size,
                     self.max_size_in_bytes,
                 )
             else:
-                self.logger.debug(
+                logger.debug(
                     "Adding new benchmark with size %d bytes "
                     "exceeds maximum size %d bytes, %d items",
                     size,
@@ -96,7 +96,7 @@ def evict_to_capacity(self, target_size_in_bytes: Optional[int] = None) -> None:
             del self._benchmarks[key]
 
         if evicted:
-            self.logger.info(
+            logger.info(
                 "Evicted %d benchmarks from cache. "
                 "Benchmark cache size now %d bytes, %d items",
                 evicted,
diff --git a/compiler_gym/service/runtime/compiler_gym_service.py b/compiler_gym/service/runtime/compiler_gym_service.py
index 9fb507b97..50c4b5af4 100644
--- a/compiler_gym/service/runtime/compiler_gym_service.py
+++ b/compiler_gym/service/runtime/compiler_gym_service.py
@@ -32,6 +32,8 @@
 from compiler_gym.service.runtime.benchmark_cache import BenchmarkCache
 from compiler_gym.util.version import __version__
 
+logger = logging.getLogger(__name__)
+
 # NOTE(cummins): The CompilerGymService class is used in a subprocess by a
 # compiler service, so code coverage tracking does not work. As such we use "#
 # pragma: no cover" annotation for all definitions in this file.
@@ -85,7 +87,7 @@ def __init__(self, working_directory: Path, compilation_session_type):
     def GetVersion(self, request: GetVersionRequest, context) -> GetVersionReply:
         del context  # Unused
         del request  # Unused
-        logging.debug("GetVersion()")
+        logger.debug("GetVersion()")
         return GetVersionReply(
             service_version=__version__,
             compiler_version=self.compilation_session_type.compiler_version,
@@ -93,7 +95,7 @@ def GetVersion(self, request: GetVersionRequest, context) -> GetVersionReply:
 
     def GetSpaces(self, request: GetSpacesRequest, context) -> GetSpacesReply:
         del request  # Unused
-        logging.debug("GetSpaces()")
+        logger.debug("GetSpaces()")
         with exception_to_grpc_status(context):
             return GetSpacesReply(
                 action_space_list=self.action_spaces,
@@ -102,7 +104,7 @@ def GetSpaces(self, request: GetSpacesRequest, context) -> GetSpacesReply:
 
     def StartSession(self, request: StartSessionRequest, context) -> StartSessionReply:
         """Create a new compilation session."""
-        logging.debug(
+        logger.debug(
             "StartSession(id=%d, benchmark=%s), %d active sessions",
             self.next_session_id,
             request.benchmark.uri,
@@ -148,7 +150,7 @@ def StartSession(self, request: StartSessionRequest, context) -> StartSessionRep
 
     def EndSession(self, request: EndSessionRequest, context) -> EndSessionReply:
         del context  # Unused
-        logging.debug(
+        logger.debug(
             "EndSession(id=%d), %d sessions remaining",
             request.session_id,
             len(self.sessions) - 1,
@@ -160,7 +162,7 @@ def EndSession(self, request: EndSessionRequest, context) -> EndSessionReply:
             return EndSessionReply(remaining_sessions=len(self.sessions))
 
     def Step(self, request: StepRequest, context) -> StepReply:
-        logging.debug("Step()")
+        logger.debug("Step()")
         reply = StepReply()
 
         if request.session_id not in self.sessions:
diff --git a/compiler_gym/util/debug_util.py b/compiler_gym/util/debug_util.py
index 27ff77586..edc87db40 100644
--- a/compiler_gym/util/debug_util.py
+++ b/compiler_gym/util/debug_util.py
@@ -68,3 +68,18 @@ def set_debug_level(level: int) -> None:
     :param level: The debugging level to use.
     """
     os.environ["COMPILER_GYM_DEBUG"] = str(level)
+    logging.getLogger("compiler_gym").setLevel(
+        _DEBUG_LEVEL_LOGGING_LEVEL_MAP.get(level, logging.DEBUG)
+    )
+
+
+def logging_level_to_debug_level(logging_level: int) -> int:
+    """Convert a python logging level to a debug level.
+
+    See :func:`get_debug_level` for a description of the debug levels.
+
+    :param logging_level: A python logging level.
+
+    :returns: An integer logging level in the range :code:`[0,3]`.
+    """
+    return max(_LOGGING_LEVEL_DEBUG_LEVEL_MAP.get(logging_level, 1) - 1, 0)
diff --git a/compiler_gym/util/download.py b/compiler_gym/util/download.py
index 2171e525b..c68a54ace 100644
--- a/compiler_gym/util/download.py
+++ b/compiler_gym/util/download.py
@@ -14,6 +14,8 @@
 from compiler_gym.util.runfiles_path import cache_path
 from compiler_gym.util.truncate import truncate
 
+logger = logging.getLogger(__name__)
+
 
 class DownloadFailed(IOError):
     """Error thrown if a download fails."""
@@ -42,7 +44,7 @@ def _get_url_data(url: str) -> bytes:
 
 
 def _do_download_attempt(url: str, sha256: Optional[str]) -> bytes:
-    logging.info("Downloading %s ...", url)
+    logger.info("Downloading %s ...", url)
     content = _get_url_data(url)
     if sha256:
         # Validate the checksum.
@@ -63,7 +65,7 @@ def _do_download_attempt(url: str, sha256: Optional[str]) -> bytes:
         with atomic_file_write(path, fileobj=True) as f:
             f.write(content)
 
-    logging.debug(f"Downloaded {url}")
+    logger.debug(f"Downloaded {url}")
     return content
 
 
@@ -85,7 +87,7 @@ def _download(urls: List[str], sha256: Optional[str], max_retries: int) -> bytes
                 return _do_download_attempt(url, sha256)
             except TooManyRequests as e:
                 last_exception = e
-                logging.info(
+                logger.info(
                     "Download attempt failed with Too Many Requests error. "
                     "Watiting %.1f seconds",
                     wait_time,
@@ -93,7 +95,7 @@ def _download(urls: List[str], sha256: Optional[str], max_retries: int) -> bytes
                 sleep(wait_time)
                 wait_time *= 1.5
             except DownloadFailed as e:
-                logging.info("Download attempt failed: %s", truncate(e))
+                logger.info("Download attempt failed: %s", truncate(e))
                 last_exception = e
     raise last_exception
 
diff --git a/compiler_gym/util/minimize_trajectory.py b/compiler_gym/util/minimize_trajectory.py
index face9be21..0de687699 100644
--- a/compiler_gym/util/minimize_trajectory.py
+++ b/compiler_gym/util/minimize_trajectory.py
@@ -17,6 +17,8 @@
 
 from compiler_gym.util.truncate import truncate
 
+logger = logging.getLogger(__name__)
+
 
 class MinimizationError(OSError):
     """Error raised if trajectory minimization fails."""
@@ -30,7 +32,7 @@ class MinimizationError(OSError):
 def environment_validation_fails(env: "CompilerEnv") -> bool:  # noqa: F821
     """A hypothesis that holds true if environment validation fails."""
     validation_result = env.validate()
-    logging.debug(truncate(str(validation_result), max_lines=1, max_line_len=120))
+    logger.debug(truncate(str(validation_result), max_lines=1, max_line_len=120))
     return not validation_result.okay()
 
 
@@ -38,13 +40,13 @@ def _apply_and_test(env, actions, hypothesis, flakiness) -> bool:
     """Run specific actions on environment and return whether hypothesis holds."""
     env.reset(benchmark=env.benchmark)
     for _ in range(flakiness):
-        logging.debug("Applying %d actions ...", len(actions))
+        logger.debug("Applying %d actions ...", len(actions))
         _, _, done, info = env.step(actions)
         if done:
             raise MinimizationError(
                 f"Failed to replay actions: {info.get('error_details', '')}"
             )
-        logging.debug("Applied %d actions", len(actions))
+        logger.debug("Applied %d actions", len(actions))
         if hypothesis(env):
             return True
     return False
@@ -87,7 +89,7 @@ def apply_and_test(actions):
     if not all_actions:
         return env
 
-    logging.info(
+    logger.info(
         "%sisecting sequence of %d actions",
         "Reverse b" if reverse else "B",
         len(all_actions),
@@ -104,13 +106,13 @@ def apply_and_test(actions):
         step += 1
         remaining_steps = int(log(max(right - left, 1), 2))
         mid = left + ((right - left) // 2)
-        logging.debug(
+        logger.debug(
             "Bisect step=%d, left=%d, right=%d, mid=%d", step, left, right, mid
         )
 
         actions = all_actions[mid:] if reverse else all_actions[:mid]
         if apply_and_test(actions):
-            logging.info(
+            logger.info(
                 "🟢 Hypothesis holds at num_actions=%d, remaining bisect steps=%d",
                 mid,
                 remaining_steps,
@@ -121,7 +123,7 @@ def apply_and_test(actions):
             else:
                 right = mid - 1
         else:
-            logging.info(
+            logger.info(
                 "🔴 Hypothesis does not hold at num_actions=%d, remaining bisect steps=%d",
                 mid,
                 remaining_steps,
@@ -134,10 +136,10 @@ def apply_and_test(actions):
     mid = max(left, right) - 1 if reverse else min(left, right) + 1
     if (reverse and mid < 0) or (not reverse and mid >= len(all_actions)):
         actions = all_actions
-        logging.info("Failed to reduce trajectory length using bisection")
+        logger.info("Failed to reduce trajectory length using bisection")
     else:
         actions = all_actions[mid:] if reverse else all_actions[:mid]
-        logging.info(
+        logger.info(
             "Determined that action %d of %d is the first at which the hypothesis holds: %s",
             mid,
             len(all_actions),
@@ -216,7 +218,7 @@ def apply_and_test(actions):
         for _ in range(num_to_remove):
             del candidate_actions[random.randint(0, len(candidate_actions) - 1)]
         if apply_and_test(candidate_actions):
-            logging.info(
+            logger.info(
                 "🟢 Hypothesis holds with %s of %s actions randomly removed, continuing",
                 num_to_remove,
                 len(actions),
@@ -225,14 +227,14 @@ def apply_and_test(actions):
             discard_ratio = init_discard_ratio
             yield env
         else:
-            logging.info(
+            logger.info(
                 "🔴 Hypothesis does not hold with %s of %s actions randomly removed, rolling back",
                 num_to_remove,
                 len(actions),
             )
             discard_ratio *= discard_ratio_decay
             if num_to_remove == 1:
-                logging.info(
+                logger.info(
                     "Terminating random minimization after failing with only a single action removed"
                 )
                 break
@@ -290,7 +292,7 @@ def apply_and_test(actions):
         pass_num += 1
         action_has_been_pruned = False
         action_mask = [True] * len(all_actions)
-        logging.info("Minimization pass on sequence of %d actions", len(all_actions))
+        logger.info("Minimization pass on sequence of %d actions", len(all_actions))
 
         # Inner loop. Go through every action and see if it can be removed.
         for i in range(len(action_mask)):
@@ -298,7 +300,7 @@ def apply_and_test(actions):
             action_name = env.action_space.flags[all_actions[i]]
             actions = [action for action, mask in zip(all_actions, action_mask) if mask]
             if apply_and_test(actions):
-                logging.info(
+                logger.info(
                     "🟢 Hypothesis holds with action %s removed, %d actions remaining",
                     action_name,
                     sum(action_mask),
@@ -308,7 +310,7 @@ def apply_and_test(actions):
                 yield env
             else:
                 action_mask[i] = True
-                logging.info(
+                logger.info(
                     "🔴 Hypothesis does not hold with action %s removed, %d actions remaining",
                     action_name,
                     sum(action_mask),
@@ -316,7 +318,7 @@ def apply_and_test(actions):
 
         all_actions = [action for action, mask in zip(all_actions, action_mask) if mask]
 
-    logging.info(
+    logger.info(
         "Minimization halted after %d passes, %d of %d actions removed",
         pass_num,
         actions_removed,
diff --git a/examples/example_compiler_gym_service/env_tests.py b/examples/example_compiler_gym_service/env_tests.py
index 91e9d8d61..8f471e0d5 100644
--- a/examples/example_compiler_gym_service/env_tests.py
+++ b/examples/example_compiler_gym_service/env_tests.py
@@ -3,7 +3,6 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Tests for the example CompilerGym service."""
-import logging
 import subprocess
 from pathlib import Path
 
@@ -15,7 +14,6 @@
 from compiler_gym.envs import CompilerEnv
 from compiler_gym.service import SessionNotFound
 from compiler_gym.spaces import Box, NamedDiscrete, Scalar, Sequence
-from compiler_gym.util.debug_util import set_debug_level
 from tests.test_main import main
 
 # Given that the C++ and Python service implementations have identical
@@ -39,14 +37,6 @@ def bin(request) -> Path:
     yield request.param
 
 
-@pytest.mark.parametrize("env_id", EXAMPLE_ENVIRONMENTS)
-def test_debug_level(env_id: str):
-    """Test that debug level is set."""
-    set_debug_level(3)
-    with gym.make(env_id) as env:
-        assert env.logger.level == logging.DEBUG
-
-
 def test_invalid_arguments(bin: Path):
     """Test that running the binary with unrecognized arguments is an error."""
 
diff --git a/tests/compiler_env_test.py b/tests/compiler_env_test.py
index 97b68114e..9ff33814d 100644
--- a/tests/compiler_env_test.py
+++ b/tests/compiler_env_test.py
@@ -3,8 +3,6 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Unit tests for //compiler_gym/envs."""
-import logging
-
 import gym
 import pytest
 from flaky import flaky
@@ -36,12 +34,11 @@ def test_benchmark_set_in_reset(env: LlvmEnv):
     assert env.benchmark == "benchmark://cbench-v1/dijkstra"
 
 
-def test_logger_forced():
-    logger = logging.getLogger("test_logger")
-    with gym.make("llvm-v0") as env_a:
-        with gym.make("llvm-v0", logger=logger) as env_b:
-            assert env_a.logger != logger
-            assert env_b.logger == logger
+def test_logger_is_deprecated(env: LlvmEnv):
+    with pytest.deprecated_call(
+        match="The `CompilerEnv.logger` attribute is deprecated"
+    ):
+        env.logger
 
 
 def test_uri_substring_no_match(env: LlvmEnv):
diff --git a/tests/datasets/dataset_test.py b/tests/datasets/dataset_test.py
index fccb3385d..99645c256 100644
--- a/tests/datasets/dataset_test.py
+++ b/tests/datasets/dataset_test.py
@@ -237,5 +237,11 @@ def test_benchmarks_iter():
     assert list(dataset) == [1, 2, 3]
 
 
+def test_logger_is_deprecated():
+    dataset = DatasetForTesting()
+    with pytest.deprecated_call(match="The `Dataset.logger` attribute is deprecated"):
+        dataset.logger
+
+
 if __name__ == "__main__":
     main()
diff --git a/tests/llvm/llvm_env_test.py b/tests/llvm/llvm_env_test.py
index 19feab19a..f112f55b1 100644
--- a/tests/llvm/llvm_env_test.py
+++ b/tests/llvm/llvm_env_test.py
@@ -20,7 +20,6 @@
 from compiler_gym.envs.llvm.llvm_env import LlvmEnv
 from compiler_gym.service import ServiceError
 from compiler_gym.service.connection import CompilerGymServiceConnection
-from compiler_gym.util import debug_util as dbg
 from tests.pytest_plugins import llvm as llvm_plugin
 from tests.test_main import main
 
@@ -212,10 +211,6 @@ def test_generate_enum_declarations(env: LlvmEnv):
     assert issubclass(llvm.reward_spaces, Enum)
 
 
-def test_logging_default_level(env: LlvmEnv):
-    assert env.logger.level == dbg.get_logging_level()
-
-
 def test_step_multiple_actions_list(env: LlvmEnv):
     """Pass a list of actions to step()."""
     env.reset(benchmark="cbench-v1/crc32")
diff --git a/tests/service/runtime/benchmark_cache_test.py b/tests/service/runtime/benchmark_cache_test.py
index e3036e3dd..7bed096a9 100644
--- a/tests/service/runtime/benchmark_cache_test.py
+++ b/tests/service/runtime/benchmark_cache_test.py
@@ -7,7 +7,7 @@
 import pytest
 
 from compiler_gym.service.proto import Benchmark, File
-from compiler_gym.service.runtime.benchmark_cache import BenchmarkCache
+from compiler_gym.service.runtime.benchmark_cache import BenchmarkCache, logger
 from tests.test_main import main
 
 
@@ -57,7 +57,7 @@ def test_evict_to_capacity_on_max_size_reached(mocker):
     cache = BenchmarkCache(max_size_in_bytes=100)
 
     mocker.spy(cache, "evict_to_capacity")
-    mocker.spy(cache.logger, "info")
+    mocker.spy(logger, "info")
 
     cache["a"] = make_benchmark_of_size(30)
     cache["b"] = make_benchmark_of_size(30)
@@ -70,7 +70,7 @@ def test_evict_to_capacity_on_max_size_reached(mocker):
     assert cache.size == 2
     assert cache.size_in_bytes == 60
 
-    cache.logger.info.assert_called_once_with(
+    logger.info.assert_called_once_with(
         "Evicted %d benchmarks from cache. Benchmark cache size now %d bytes, "
         "%d items",
         2,
@@ -85,11 +85,11 @@ def test_oversized_benchmark_emits_warning(mocker):
     """
     cache = BenchmarkCache(max_size_in_bytes=10)
 
-    mocker.spy(cache.logger, "warning")
+    mocker.spy(logger, "warning")
 
     cache["test"] = make_benchmark_of_size(50)
 
-    cache.logger.warning.assert_called_once_with(
+    logger.warning.assert_called_once_with(
         "Adding new benchmark with size %d bytes exceeds total target cache "
         "size of %d bytes",
         50,
@@ -128,7 +128,7 @@ def test_evict_to_capacity_on_maximum_size_update(mocker):
     cache = BenchmarkCache(max_size_in_bytes=100)
 
     mocker.spy(cache, "evict_to_capacity")
-    mocker.spy(cache.logger, "info")
+    mocker.spy(logger, "info")
 
     cache["a"] = make_benchmark_of_size(30)
     cache["b"] = make_benchmark_of_size(30)

From c74bb2e55fe40e2418e54a57cab7be4b1c742beb Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 15:44:57 +0100
Subject: [PATCH 05/17] More verbose logging when downloading large datasets.

Issue #445.
---
 compiler_gym/datasets/tar_dataset.py      | 7 +++++--
 compiler_gym/third_party/llvm/__init__.py | 7 +++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/datasets/tar_dataset.py b/compiler_gym/datasets/tar_dataset.py
index b6b8ab80e..6b2040343 100644
--- a/compiler_gym/datasets/tar_dataset.py
+++ b/compiler_gym/datasets/tar_dataset.py
@@ -92,9 +92,12 @@ def install(self) -> None:
             # Remove any partially-completed prior extraction.
             shutil.rmtree(self.site_data_path / "contents", ignore_errors=True)
 
-            logger.info("Downloading %s dataset", self.name)
+            logger.warning(
+                "Installing the %s dataset. This may take a few moments ...", self.name
+            )
+
             tar_data = io.BytesIO(download(self.tar_urls, self.tar_sha256))
-            logger.info("Unpacking %s dataset", self.name)
+            logger.info("Unpacking %s dataset to %s", self.name, self.site_data_path)
             with tarfile.open(
                 fileobj=tar_data, mode=f"r:{self.tar_compression}"
             ) as arc:
diff --git a/compiler_gym/third_party/llvm/__init__.py b/compiler_gym/third_party/llvm/__init__.py
index 58be6ba2c..80781616f 100644
--- a/compiler_gym/third_party/llvm/__init__.py
+++ b/compiler_gym/third_party/llvm/__init__.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 """Module for resolving paths to LLVM binaries and libraries."""
 import io
+import logging
 import shutil
 import sys
 import tarfile
@@ -16,6 +17,8 @@
 from compiler_gym.util.download import download
 from compiler_gym.util.runfiles_path import cache_path, site_data_path
 
+logger = logging.getLogger(__name__)
+
 # The data archive containing LLVM binaries and libraries.
 _LLVM_URL, _LLVM_SHA256 = {
     "darwin": (
@@ -37,6 +40,10 @@
 
 def _download_llvm_files(destination: Path) -> Path:
     """Download and unpack the LLVM data pack."""
+    logger.warning(
+        "Installing the CompilerGym LLVM environment runtime. This may take a few moments ..."
+    )
+
     # Tidy up an incomplete unpack.
     shutil.rmtree(destination, ignore_errors=True)
 

From dd272c811e6d48170b387c1b2d193fa2d51e5764 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 29 Sep 2021 21:02:41 +0100
Subject: [PATCH 06/17] [tests] Bump pytest dependency versions.

---
 tests/requirements.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/requirements.txt b/tests/requirements.txt
index 623fd95ef..a93d91ec0 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,11 +1,11 @@
 flaky==3.7.0
 psutil==5.8.0  # Implicit dependency of pytest-xdist
-pytest==6.1.0
-pytest-benchmark==3.2.3
-pytest-cov==2.11.1
-pytest-mock==3.6.0
-pytest-shard==0.1.1
+pytest==6.2.5
+pytest-benchmark==3.4.1
+pytest-cov==2.12.1
+pytest-mock==3.6.1
+pytest-shard==0.1.2
 pytest-stress==1.0.1
 pytest-sugar==0.9.4
 pytest-timeout==1.4.2
-pytest-xdist==2.2.1
+pytest-xdist==2.4.0

From 4963a3efa02ea0ce3b63879acd6a2f1f3a31b2cf Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 17:23:03 +0100
Subject: [PATCH 07/17] Be consistent in use of process.kill().

Always check the Python version and use terminate() on py < 3.7, and
always block until the process has actually terminated.
---
 compiler_gym/envs/llvm/compute_observation.py |  7 +++++++
 compiler_gym/envs/llvm/datasets/cbench.py     | 14 ++++++++++++--
 compiler_gym/envs/llvm/llvm_benchmark.py      |  1 +
 compiler_gym/service/connection.py            | 19 ++++++++++++++++---
 tests/llvm/multiprocessing_test.py            |  2 +-
 5 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/compiler_gym/envs/llvm/compute_observation.py b/compiler_gym/envs/llvm/compute_observation.py
index 1585fc78d..426b9fce6 100644
--- a/compiler_gym/envs/llvm/compute_observation.py
+++ b/compiler_gym/envs/llvm/compute_observation.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 """This module defines a utility function for computing LLVM observations."""
 import subprocess
+import sys
 from pathlib import Path
 from typing import List
 
@@ -81,6 +82,12 @@ def compute_observation(
     try:
         stdout, stderr = process.communicate(timeout=timeout)
     except subprocess.TimeoutExpired as e:
+        # kill() was added in Python 3.7.
+        if sys.version_info >= (3, 7, 0):
+            process.kill()
+        else:
+            process.terminate()
+        process.communicate(timeout=timeout)  # Wait for shutdown to complete.
         raise TimeoutError(
             f"Failed to compute {observation_space.id} observation in "
             f"{timeout:.1f} {plural(int(round(timeout)), 'second', 'seconds')}"
diff --git a/compiler_gym/envs/llvm/datasets/cbench.py b/compiler_gym/envs/llvm/datasets/cbench.py
index 6a7fcf595..f344f085c 100644
--- a/compiler_gym/envs/llvm/datasets/cbench.py
+++ b/compiler_gym/envs/llvm/datasets/cbench.py
@@ -145,7 +145,12 @@ def _compile_and_run_bitcode_file(
         try:
             output, _ = clang.communicate(timeout=compilation_timeout_seconds)
         except subprocess.TimeoutExpired:
-            clang.kill()
+            # kill() was added in Python 3.7.
+            if sys.version_info >= (3, 7, 0):
+                clang.kill()
+            else:
+                clang.terminate()
+            clang.communicate(timeout=30)  # Wait for shutdown to complete.
             error_data["timeout"] = compilation_timeout_seconds
             return BenchmarkExecutionResult(
                 walltime_seconds=timeout_seconds,
@@ -183,7 +188,12 @@ def _compile_and_run_bitcode_file(
         with Timer() as timer:
             stdout, _ = process.communicate(timeout=timeout_seconds)
     except subprocess.TimeoutExpired:
-        process.kill()
+        # kill() was added in Python 3.7.
+        if sys.version_info >= (3, 7, 0):
+            process.kill()
+        else:
+            process.terminate()
+        process.communicate(timeout=30)  # Wait for shutdown to complete.
         error_data["timeout_seconds"] = timeout_seconds
         return BenchmarkExecutionResult(
             walltime_seconds=timeout_seconds,
diff --git a/compiler_gym/envs/llvm/llvm_benchmark.py b/compiler_gym/envs/llvm/llvm_benchmark.py
index 5a36dbf4d..0e354749b 100644
--- a/compiler_gym/envs/llvm/llvm_benchmark.py
+++ b/compiler_gym/envs/llvm/llvm_benchmark.py
@@ -33,6 +33,7 @@ def _communicate(process, input=None, timeout=None):
             process.kill()
         else:
             process.terminate()
+        process.communicate(timeout=timeout)  # Wait for shutdown to complete.
         raise
 
 
diff --git a/compiler_gym/service/connection.py b/compiler_gym/service/connection.py
index 033cb4571..af4f7072e 100644
--- a/compiler_gym/service/connection.py
+++ b/compiler_gym/service/connection.py
@@ -401,7 +401,11 @@ def __init__(
             sleep(wait_secs)
             wait_secs *= 1.2
         else:
-            self.process.kill()
+            # kill() was added in Python 3.7.
+            if sys.version_info >= (3, 7, 0):
+                self.process.kill()
+            else:
+                self.process.terminate()
             self.process.communicate(timeout=rpc_init_max_seconds)
             shutil.rmtree(self.working_dir)
             raise TimeoutError(
@@ -430,7 +434,11 @@ def __init__(
                 )
                 wait_secs *= 1.2
         else:
-            self.process.kill()
+            # kill() was added in Python 3.7.
+            if sys.version_info >= (3, 7, 0):
+                self.process.kill()
+            else:
+                self.process.terminate()
             self.process.communicate(timeout=process_exit_max_seconds)
 
             # Include the last few lines of logs generated by the compiler
@@ -483,7 +491,12 @@ def close(self):
         except subprocess.TimeoutExpired:
             # Try and kill it and then walk away.
             try:
-                self.process.kill()
+                # kill() was added in Python 3.7.
+                if sys.version_info >= (3, 7, 0):
+                    self.process.kill()
+                else:
+                    self.process.terminate()
+                self.process.communicate(timeout=60)
             except:  # noqa
                 pass
             logger.warning("Abandoning orphan service at %s", self.working_dir)
diff --git a/tests/llvm/multiprocessing_test.py b/tests/llvm/multiprocessing_test.py
index bca182aba..d2ae8f4d1 100644
--- a/tests/llvm/multiprocessing_test.py
+++ b/tests/llvm/multiprocessing_test.py
@@ -62,7 +62,7 @@ def test_running_environment_in_background_process():
             process.kill()
         else:
             process.terminate()
-        process.join()
+        process.join(timeout=60)
 
 
 @macos_only

From 3580bb1be792034eabb99eaf5d8298b22ff90cb5 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 17:23:58 +0100
Subject: [PATCH 08/17] [Makefile] Don't double quote paths.

---
 Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index c12f88b63..975f963e6 100644
--- a/Makefile
+++ b/Makefile
@@ -275,12 +275,12 @@ livedocs: gendocs doxygen
 # Testing #
 ###########
 
-COMPILER_GYM_SITE_DATA ?= "/tmp/compiler_gym_$(USER)/tests/site_data"
-COMPILER_GYM_CACHE ?= "/tmp/compiler_gym_$(USER)/tests/cache"
+COMPILER_GYM_SITE_DATA ?= /tmp/compiler_gym_$(USER)/tests/site_data
+COMPILER_GYM_CACHE ?= /tmp/compiler_gym_$(USER)/tests/cache
 
 # A directory that is used as the working directory for running pytest tests
 # by symlinking the tests directory into it.
-INSTALL_TEST_ROOT ?= "/tmp/compiler_gym_$(USER)/install_tests"
+INSTALL_TEST_ROOT ?= /tmp/compiler_gym_$(USER)/install_tests
 
 # The target to use. If not provided, all tests will be run. For `make test` and
 # related, this is a bazel target pattern, with default value '//...'. For `make

From c4f2c27a7b12a45ac4641d3ba7e989a0645d7331 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 17:24:06 +0100
Subject: [PATCH 09/17] [Makefile] Fix redundant instruction.

---
 Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Makefile b/Makefile
index 975f963e6..bab14cb64 100644
--- a/Makefile
+++ b/Makefile
@@ -307,7 +307,6 @@ itest: bazel-fetch
 install-test-setup:
 	mkdir -p "$(INSTALL_TEST_ROOT)"
 	rm -f "$(INSTALL_TEST_ROOT)/tests" "$(INSTALL_TEST_ROOT)/tox.ini"
-	ln -s "$(INSTALL_TEST_ROOT)"
 	ln -s "$(ROOT)/tests" "$(INSTALL_TEST_ROOT)"
 	ln -s "$(ROOT)/tox.ini" "$(INSTALL_TEST_ROOT)"
 

From 380c5422c22807aad2b9ff1bc1144a6c8795786e Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 17:25:33 +0100
Subject: [PATCH 10/17] [Makefile] Update list of data file locations.

---
 Makefile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index bab14cb64..05c598f09 100644
--- a/Makefile
+++ b/Makefile
@@ -359,10 +359,12 @@ install: |  init-runtime-requirements bazel-build pip-install
 # A list of all filesystem locations that CompilerGym may use for storing
 # files and data.
 COMPILER_GYM_DATA_FILE_LOCATIONS = \
-    $(HOME)/.cache/compiler_gym \
-    $(HOME)/.local/share/compiler_gym \
-    $(HOME)/logs/compiler_gym \
+    "$(HOME)/.cache/compiler_gym" \
+    "$(HOME)/.local/share/compiler_gym" \
+    "$(HOME)/logs/compiler_gym" \
     /dev/shm/compiler_gym \
+    /dev/shm/compiler_gym_$(USER) \
+    /tmp/compiler_gym \
     /tmp/compiler_gym_$(USER) \
     $(NULL)
 

From 4c0d5c96bbe51ed5592f5000438a2bddb37e693a Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 17:29:45 +0100
Subject: [PATCH 11/17] [llvm] Fix orphaned subprocess.

---
 compiler_gym/envs/llvm/datasets/llvm_stress.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler_gym/envs/llvm/datasets/llvm_stress.py b/compiler_gym/envs/llvm/datasets/llvm_stress.py
index 130add7a7..949b17c49 100644
--- a/compiler_gym/envs/llvm/datasets/llvm_stress.py
+++ b/compiler_gym/envs/llvm/datasets/llvm_stress.py
@@ -86,6 +86,7 @@ def benchmark_from_seed(self, seed: int) -> Benchmark:
         )
 
         stdout, _ = llvm_as.communicate(timeout=60)
+        llvm_stress.communicate(timeout=60)
         if llvm_stress.returncode or llvm_as.returncode:
             raise BenchmarkInitError("Failed to generate benchmark")
 

From 9d2d2061942a40c73d9c70680b8504edd6496d72 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 6 Oct 2021 18:33:20 +0100
Subject: [PATCH 12/17] [tests] Mark a flaky test.

---
 tests/llvm/llvm_env_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/llvm/llvm_env_test.py b/tests/llvm/llvm_env_test.py
index f112f55b1..a1efc22a6 100644
--- a/tests/llvm/llvm_env_test.py
+++ b/tests/llvm/llvm_env_test.py
@@ -10,6 +10,7 @@
 
 import gym
 import pytest
+from flaky import flaky
 
 import compiler_gym
 from compiler_gym.compiler_env_state import (
@@ -75,6 +76,7 @@ def test_double_reset(env: LlvmEnv, always_send_benchmark_on_reset: bool):
     assert env.in_episode
 
 
+@flaky
 def test_connection_dies_default_reward(env: LlvmEnv):
     env.reward_space = "IrInstructionCount"
     env.reset(benchmark="cbench-v1/crc32")
@@ -98,6 +100,7 @@ def test_connection_dies_default_reward(env: LlvmEnv):
     assert reward == 2.5
 
 
+@flaky
 def test_connection_dies_default_reward_negated(env: LlvmEnv):
     env.reward_space = "IrInstructionCount"
     env.reset(benchmark="cbench-v1/crc32")

From a247b40e3f403b562e76e608db57310c249c029f Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 7 Oct 2021 18:00:41 +0100
Subject: [PATCH 13/17] [requirements] Pin gym at < 0.21.

Fixes #456.
---
 compiler_gym/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler_gym/requirements.txt b/compiler_gym/requirements.txt
index c76c2edbb..721c9590d 100644
--- a/compiler_gym/requirements.txt
+++ b/compiler_gym/requirements.txt
@@ -3,7 +3,7 @@ deprecated>=1.2.12
 docker>=4.0.0
 fasteners>=0.15
 grpcio>=1.32.0
-gym>=0.18.0
+gym>=0.18.0,<0.21
 humanize>=2.6.0
 loop_tool_py==0.0.7
 networkx>=2.5

From 2f00774ceae7ce3009dc528e4342d1fdf0eb6f35 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Mon, 11 Oct 2021 16:48:31 +0100
Subject: [PATCH 14/17] [tests] Add missing __init__.py file

---
 tests/loop_tool/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 tests/loop_tool/__init__.py

diff --git a/tests/loop_tool/__init__.py b/tests/loop_tool/__init__.py
new file mode 100644
index 000000000..768f77b7e
--- /dev/null
+++ b/tests/loop_tool/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the LICENSE file
+# in the root directory of this source tree.
+#
+# tests/**/__init__.py files are needed for pytest Python path resolution. See:
+# https://docs.pytest.org/en/latest/explanation/pythonpath.html

From 7c10cffb4332c135f1b0d87aa142dd107676584d Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Mon, 11 Oct 2021 16:51:17 +0100
Subject: [PATCH 15/17] [examples] Fix scoped thread pool.

This ensures that the thread pool is destroyed after use.
---
 examples/explore.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/explore.py b/examples/explore.py
index c943f9cb8..6ae6668d0 100644
--- a/examples/explore.py
+++ b/examples/explore.py
@@ -285,13 +285,11 @@ def number_list(stats):
 # Compute an action graph and use it to find the optimal sequence
 # within episode_length actions. Uses as many threads as there are
 # elements in envs.
-def compute_action_graph(envs, episode_length):
+def compute_action_graph(pool, envs, episode_length):
     assert len(envs) >= 1
     env_queue = Queue()
     for env in envs:
         env_queue.put(env)
-    pool = ThreadPool(len(envs))
-
     stats = NodeTypeStats(action_count=env.action_space.n)
     graph = StateGraph(edges_per_node=env.action_space.n)
 
@@ -448,7 +446,8 @@ def main(argv):
     try:
         for _ in range(FLAGS.nproc):
             envs.append(make_env())
-        compute_action_graph(envs, episode_length=FLAGS.episode_length)
+        with ThreadPool(len(envs)) as pool:
+            compute_action_graph(pool, envs, episode_length=FLAGS.episode_length)
     finally:
         for env in envs:
             env.close()

From edf915bc307e3b0fdb9bb6f1f47ae73c7498754c Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 11 Oct 2021 18:57:47 +0100
Subject: [PATCH 16/17] Mark test regressions introduced in #453.

Issue #459.
---
 .../action_sensitivity_analysis_test.py                  | 7 +++++++
 tests/bin/service_bin_test.py                            | 6 ++++++
 tests/gcc/gcc_env_test.py                                | 9 +++++++--
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/examples/sensitivity_analysis/action_sensitivity_analysis_test.py b/examples/sensitivity_analysis/action_sensitivity_analysis_test.py
index cd586321b..554f432a3 100644
--- a/examples/sensitivity_analysis/action_sensitivity_analysis_test.py
+++ b/examples/sensitivity_analysis/action_sensitivity_analysis_test.py
@@ -4,9 +4,11 @@
 # LICENSE file in the root directory of this source tree.
 """End-to-end test of //compiler_gym/bin:action_sensitivity_analysis."""
 
+import sys
 import tempfile
 from pathlib import Path
 
+import pytest
 from absl.flags import FLAGS
 from sensitivity_analysis.action_sensitivity_analysis import (
     run_action_sensitivity_analysis,
@@ -14,6 +16,11 @@
 from sensitivity_analysis.sensitivity_analysis_eval import run_sensitivity_analysis_eval
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 def test_run_action_sensitivity_analysis():
     actions = [0, 1]
     env = "llvm-v0"
diff --git a/tests/bin/service_bin_test.py b/tests/bin/service_bin_test.py
index 53c7e9011..083eae4d1 100644
--- a/tests/bin/service_bin_test.py
+++ b/tests/bin/service_bin_test.py
@@ -3,6 +3,8 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Unit tests for //compiler_gym/bin:service."""
+import sys
+
 import gym
 import pytest
 from absl import flags
@@ -14,6 +16,10 @@
 
 
 @pytest.mark.parametrize("env_name", compiler_gym.COMPILER_GYM_ENVS)
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 def test_print_service_capabilities_smoke_test(env_name: str):
     flags.FLAGS(["argv0"])
     try:
diff --git a/tests/gcc/gcc_env_test.py b/tests/gcc/gcc_env_test.py
index 3f457565b..3777c5832 100644
--- a/tests/gcc/gcc_env_test.py
+++ b/tests/gcc/gcc_env_test.py
@@ -14,7 +14,7 @@
 from compiler_gym.service.connection import ServiceError
 from compiler_gym.spaces import Scalar, Sequence
 from tests.pytest_plugins.common import with_docker, without_docker
-from tests.pytest_plugins.gcc import with_gcc_support
+from tests.pytest_plugins.gcc import docker_is_available, with_gcc_support
 from tests.test_main import main
 
 pytest_plugins = ["tests.pytest_plugins.gcc"]
@@ -35,7 +35,12 @@ def test_docker_default_action_space():
         assert env.action_spaces[0].names[0] == "-O0"
 
 
-def test_observation_spaces(gcc_bin: str):
+@pytest.mark.xfail(
+    not docker_is_available(),
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
+def test_observation_spaces_failing_because_of_bug(gcc_bin: str):
     """Test that the environment reports the service's observation spaces."""
     with gym.make("gcc-v0", gcc_bin=gcc_bin) as env:
         env.reset()

From 7594a7179907aeff40537a2baf895bd0d302dcce Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Mon, 11 Oct 2021 22:13:08 +0100
Subject: [PATCH 17/17] [tests] Mark macOS test regressions.

Issue #459.
---
 tests/llvm/datasets/csmith_test.py            | 11 ++++++++++
 ...esh_environment_observation_reward_test.py |  7 +++++++
 tests/llvm/llvm_session_parameters_test.py    |  7 +++++++
 tests/llvm/observation_spaces_test.py         | 20 +++++++++++++++++++
 tests/llvm/runtime_test.py                    | 11 ++++++++++
 5 files changed, 56 insertions(+)

diff --git a/tests/llvm/datasets/csmith_test.py b/tests/llvm/datasets/csmith_test.py
index 50a8d09f2..955f85276 100644
--- a/tests/llvm/datasets/csmith_test.py
+++ b/tests/llvm/datasets/csmith_test.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Tests for the Csmith dataset."""
+import sys
 from itertools import islice
 from pathlib import Path
 
@@ -62,6 +63,11 @@ def test_csmith_from_seed_retry_count_exceeded(csmith_dataset: CsmithDataset):
         csmith_dataset.benchmark_from_seed(seed=1, max_retries=3, retry_count=5)
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @flaky(rerun_filter=lambda err, *args: issubclass(err[0], ServiceError))
 def test_csmith_positive_runtimes(env: LlvmEnv, csmith_dataset: CsmithDataset):
     benchmark = next(csmith_dataset.benchmarks())
@@ -71,6 +77,11 @@ def test_csmith_positive_runtimes(env: LlvmEnv, csmith_dataset: CsmithDataset):
     assert np.all(np.greater(val, 0))
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @flaky(rerun_filter=lambda err, *args: issubclass(err[0], ServiceError))
 def test_csmith_positive_buildtimes(env: LlvmEnv, csmith_dataset: CsmithDataset):
     benchmark = next(csmith_dataset.benchmarks())
diff --git a/tests/llvm/fresh_environment_observation_reward_test.py b/tests/llvm/fresh_environment_observation_reward_test.py
index 8e5957e9d..0ec955090 100644
--- a/tests/llvm/fresh_environment_observation_reward_test.py
+++ b/tests/llvm/fresh_environment_observation_reward_test.py
@@ -3,7 +3,9 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Integrations tests for the LLVM CompilerGym environments."""
+import sys
 
+import pytest
 from flaky import flaky
 
 from compiler_gym.envs import CompilerEnv
@@ -12,6 +14,11 @@
 pytest_plugins = ["tests.pytest_plugins.llvm"]
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @flaky  # Runtime can timeout
 def test_step(env: CompilerEnv, observation_space: str, reward_space: str):
     """Request every combination of observation and reward in a fresh environment."""
diff --git a/tests/llvm/llvm_session_parameters_test.py b/tests/llvm/llvm_session_parameters_test.py
index 1d72cde52..56a8db6de 100644
--- a/tests/llvm/llvm_session_parameters_test.py
+++ b/tests/llvm/llvm_session_parameters_test.py
@@ -3,6 +3,8 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Tests for LLVM session parameter handlers."""
+import sys
+
 import pytest
 from flaky import flaky
 
@@ -67,6 +69,11 @@ def test_benchmarks_cache_parameter_invalid_int_type(env: LlvmEnv):
         env.send_params(("service.benchmark_cache.set_max_size_in_bytes", "not an int"))
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @flaky  # Runtime can timeout.
 @pytest.mark.parametrize("n", [1, 3, 10])
 def test_runtime_observation_parameters(env: LlvmEnv, n: int):
diff --git a/tests/llvm/observation_spaces_test.py b/tests/llvm/observation_spaces_test.py
index a98fd4b74..2210e1f2b 100644
--- a/tests/llvm/observation_spaces_test.py
+++ b/tests/llvm/observation_spaces_test.py
@@ -1202,6 +1202,11 @@ def test_object_text_size_observation_spaces(env: LlvmEnv):
     assert value == crc32_code_sizes[sys.platform][2]
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @flaky  # Runtimes can timeout
 def test_runtime_observation_space(env: LlvmEnv):
     env.reset("cbench-v1/crc32")
@@ -1226,6 +1231,11 @@ def test_runtime_observation_space(env: LlvmEnv):
     assert len(set(value)) > 1
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @flaky  # Runtimes can timeout
 def test_runtime_observation_space_different_observation_count(env: LlvmEnv):
     """Test setting a custom observation count for LLVM runtimes."""
@@ -1247,6 +1257,11 @@ def test_runtime_observation_space_different_observation_count(env: LlvmEnv):
     assert value.shape == (5,)
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @flaky  # Runtimes can timeout
 def test_runtime_observation_space_invalid_observation_count(env: LlvmEnv):
     """Test setting an invalid custom observation count for LLVM runtimes."""
@@ -1273,6 +1288,11 @@ def test_runtime_observation_space_not_runnable(env: LlvmEnv):
     assert space.space.contains(value)
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @flaky  # Build can timeout
 def test_buildtime_observation_space(env: LlvmEnv):
     env.reset("cbench-v1/crc32")
diff --git a/tests/llvm/runtime_test.py b/tests/llvm/runtime_test.py
index 72ad3a120..21c774a67 100644
--- a/tests/llvm/runtime_test.py
+++ b/tests/llvm/runtime_test.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Integrations tests for LLVM runtime support."""
+import sys
 from pathlib import Path
 
 import numpy as np
@@ -16,6 +17,11 @@
 pytest_plugins = ["tests.pytest_plugins.llvm"]
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @pytest.mark.parametrize("runtime_observation_count", [1, 3, 5])
 def test_custom_benchmark_runtime(env: LlvmEnv, tmpdir, runtime_observation_count: int):
     env.reset()
@@ -50,6 +56,11 @@ def test_custom_benchmark_runtime(env: LlvmEnv, tmpdir, runtime_observation_coun
     assert np.all(runtimes > 0)
 
 
+@pytest.mark.xfail(
+    sys.platform == "darwin",
+    strict=True,
+    reason="github.com/facebookresearch/CompilerGym/issues/459",
+)
 @flaky
 def test_custom_benchmark_runtimes_differ(env: LlvmEnv, tmpdir):
     """Same as above, but test that runtimes differ from run to run."""