From a203b97f12a954434ae54f92397fde8c9a91da3c Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Sun, 6 Mar 2022 22:35:55 +0000
Subject: [PATCH 1/8] Add annotation to step() about differing arguments.

---
 compiler_gym/envs/compiler_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 62b7f29fe..1c5c41883 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -1029,7 +1029,7 @@ def raw_step(
 
         return observations, rewards, reply.end_of_session, info
 
-    def step(
+    def step(  # pylint: disable=arguments-differ
         self,
         action: Union[ActionType, Iterable[ActionType]],
         observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,

From c6cc16b4b213751254a99aefed6f8539b2c53473 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Sun, 6 Mar 2022 23:03:03 +0000
Subject: [PATCH 2/8] [core] Add a CompilerEnv.multistep() method.

CompilerEnv.step() currently accepts two types for the "action"
argument: a scalar action, or an iterable of actions. This kind of
type overloading does not work for list types.

This adds a new method, CompilerEnv.multistep(), that explicitly takes
takes an iterable sequence of actions. If you want to run multiple
actions in a single step, call this new method. Calling
CompilerEnv.step() with a list of actions still works, though with a
deprecation warning. In the v0.2.4 release support for lists of
actions in CompilerEnv.step() will be removed.

Fixes #610.
---
 compiler_gym/envs/compiler_env.py | 43 ++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 1c5c41883..d804c7c56 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -1031,7 +1031,7 @@ def raw_step(
 
     def step(  # pylint: disable=arguments-differ
         self,
-        action: Union[ActionType, Iterable[ActionType]],
+        action: ActionType,
         observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
         rewards: Optional[Iterable[Union[str, Reward]]] = None,
     ) -> StepType:
@@ -1058,9 +1058,46 @@ def step(  # pylint: disable=arguments-differ
         :raises SessionNotFound: If :meth:`reset()
             <compiler_gym.envs.CompilerEnv.reset>` has not been called.
         """
-        # Coerce actions into a list.
-        actions = action if isinstance(action, IterableType) else [action]
+        # NOTE(github.com/facebookresearch/CompilerGym/issues/610): This
+        # workaround for accepting a list of actions will be removed in v0.2.4.
+        if isinstance(action, IterableType):
+            warnings.warn(
+                "env.step() only takes a single action. Use env.multistep() "
+                "for an iterable of actions",
+                category=DeprecationWarning,
+            )
+        else:
+            action = [action]
+
+        return self.multistep(action, observations, rewards)
+
+    def multistep(
+        self,
+        actions: Iterable[ActionType],
+        observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        rewards: Optional[Iterable[Union[str, Reward]]] = None,
+    ):
+        """Take a sequence of steps and return the final observation and reward.
 
+        :param action: A sequence of actions to apply in order.
+
+        :param observations: A list of observation spaces to compute
+            observations from. If provided, this changes the :code:`observation`
+            element of the return tuple to be a list of observations from the
+            requested spaces. The default :code:`env.observation_space` is not
+            returned.
+
+        :param rewards: A list of reward spaces to compute rewards from. If
+            provided, this changes the :code:`reward` element of the return
+            tuple to be a list of rewards from the requested spaces. The default
+            :code:`env.reward_space` is not returned.
+
+        :return: A tuple of observation, reward, done, and info. Observation and
+            reward are None if default observation/reward is not set.
+
+        :raises SessionNotFound: If :meth:`reset()
+            <compiler_gym.envs.CompilerEnv.reset>` has not been called.
+        """
         # Coerce observation spaces into a list of ObservationSpaceSpec instances.
         if observations:
             observation_spaces: List[ObservationSpaceSpec] = [

From d685562cab227db106704fec3547908d41076cae Mon Sep 17 00:00:00 2001
From: Boian Petkantchin <boian@nod-labs.com>
Date: Fri, 4 Mar 2022 20:00:37 -0800
Subject: [PATCH 3/8] [core] Deprecations and updates to env.step() arguments.

This makes the following changes:

- Changes env.step() `action` to accept only a single action, with a
deprecation warning if a list of actions are provided.

- Renames env.step() `observations` to `observation_spaces`. The old
parameter name is still accepted with a deprecation warning.

- Renames env.step() `rewards` to `reward_spaces`. The old parameter
name is still accepted with a deprecation warning.
---
 compiler_gym/bin/service.py                   |   7 +-
 compiler_gym/envs/compiler_env.py             | 153 +++++++++++------
 compiler_gym/envs/llvm/llvm_rewards.py        |   6 +-
 compiler_gym/random_replay.py                 |   2 +-
 compiler_gym/random_search.py                 |  13 +-
 compiler_gym/spaces/named_discrete.py         |  14 +-
 compiler_gym/spaces/reward.py                 |   4 +-
 compiler_gym/util/gym_type_hints.py           |   2 +-
 compiler_gym/util/minimize_trajectory.py      |   2 +-
 compiler_gym/views/observation.py             |   2 +-
 compiler_gym/wrappers/commandline.py          |  51 +++---
 compiler_gym/wrappers/core.py                 | 157 +++++++++++++++---
 compiler_gym/wrappers/llvm.py                 |   4 +-
 compiler_gym/wrappers/time_limit.py           |   5 +-
 compiler_gym/wrappers/validation.py           |  16 +-
 examples/brute_force.py                       |   3 +-
 .../llvm_autotuning/autotuners/nevergrad_.py  |  11 +-
 .../llvm_autotuning/optimization_target.py    |   2 +-
 examples/llvm_rl/wrappers.py                  |  18 +-
 .../action_sensitivity_analysis.py            |   5 +-
 tests/llvm/episode_reward_test.py             |   2 +-
 tests/llvm/fork_regression_test.py            |   6 +-
 tests/llvm/llvm_env_test.py                   |   4 +-
 tests/llvm/multiprocessing_test.py            |   7 +-
 tests/llvm/threading_test.py                  |   5 +-
 tests/util/minimize_trajectory_test.py        |   5 +-
 tests/views/observation_test.py               |   8 +-
 tests/wrappers/commandline_wrappers_test.py   |  21 +--
 tests/wrappers/core_wrappers_test.py          |  36 +++-
 tests/wrappers/time_limit_wrappers_test.py    |   4 +-
 www/www.py                                    |  12 +-
 31 files changed, 399 insertions(+), 188 deletions(-)

diff --git a/compiler_gym/bin/service.py b/compiler_gym/bin/service.py
index 25f868467..3542ae22b 100644
--- a/compiler_gym/bin/service.py
+++ b/compiler_gym/bin/service.py
@@ -105,7 +105,7 @@
 from compiler_gym.datasets import Dataset
 from compiler_gym.envs import CompilerEnv
 from compiler_gym.service.connection import ConnectionOpts
-from compiler_gym.spaces import Commandline
+from compiler_gym.spaces import Commandline, NamedDiscrete
 from compiler_gym.util.flags.env_from_flags import env_from_flags
 from compiler_gym.util.tabulate import tabulate
 from compiler_gym.util.truncate import truncate
@@ -249,12 +249,13 @@ def print_service_capabilities(env: CompilerEnv):
                 ],
                 headers=("Action", "Description"),
             )
-        else:
+            print(table)
+        elif isinstance(action_space, NamedDiscrete):
             table = tabulate(
                 [(a,) for a in sorted(action_space.names)],
                 headers=("Action",),
             )
-        print(table)
+            print(table)
 
 
 def main(argv):
diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index d804c7c56..a00f86947 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -11,7 +11,7 @@
 from math import isclose
 from pathlib import Path
 from time import time
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 
 import gym
 import numpy as np
@@ -128,7 +128,7 @@ class CompilerEnv(gym.Env):
     :ivar actions: The list of actions that have been performed since the
         previous call to :func:`reset`.
 
-    :vartype actions: List[int]
+    :vartype actions: List[ActionType]
 
     :ivar reward_range: A tuple indicating the range of reward values. Default
         range is (-inf, +inf).
@@ -321,7 +321,7 @@ def __init__(
         self.reward_range: Tuple[float, float] = (-np.inf, np.inf)
         self.episode_reward: Optional[float] = None
         self.episode_start_time: float = time()
-        self.actions: List[int] = []
+        self.actions: List[ActionType] = []
 
         # Initialize the default observation/reward spaces.
         self.observation_space_spec: Optional[ObservationSpaceSpec] = None
@@ -375,7 +375,7 @@ def commandline(self) -> str:
         """
         raise NotImplementedError("abstract method")
 
-    def commandline_to_actions(self, commandline: str) -> List[int]:
+    def commandline_to_actions(self, commandline: str) -> List[ActionType]:
         """Interface for :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>`
         subclasses to convert from a commandline invocation to a sequence of
         actions.
@@ -409,7 +409,7 @@ def state(self) -> CompilerEnvState:
         )
 
     @property
-    def action_space(self) -> NamedDiscrete:
+    def action_space(self) -> Space:
         """The current action space.
 
         :getter: Get the current action space.
@@ -587,7 +587,7 @@ def fork(self) -> "CompilerEnv":
             self.reset()
             if actions:
                 logger.warning("Parent service of fork() has died, replaying state")
-                _, _, done, _ = self.step(actions)
+                _, _, done, _ = self.multistep(actions)
                 assert not done, "Failed to replay action sequence"
 
         request = ForkSessionRequest(session_id=self._session_id)
@@ -620,7 +620,7 @@ def fork(self) -> "CompilerEnv":
             # replay the state.
             new_env = type(self)(**self._init_kwargs())
             new_env.reset()
-            _, _, done, _ = new_env.step(self.actions)
+            _, _, done, _ = new_env.multistep(self.actions)
             assert not done, "Failed to replay action sequence in forked environment"
 
         # Create copies of the mutable reward and observation spaces. This
@@ -885,9 +885,9 @@ def _call_with_error(
 
     def raw_step(
         self,
-        actions: Iterable[int],
-        observations: Iterable[ObservationSpaceSpec],
-        rewards: Iterable[Reward],
+        actions: Iterable[ActionType],
+        observation_spaces: List[ObservationSpaceSpec],
+        reward_spaces: List[Reward],
     ) -> StepType:
         """Take a step.
 
@@ -908,18 +908,15 @@ def raw_step(
 
         .. warning::
 
-            Prefer :meth:`step() <compiler_gym.envs.CompilerEnv.step>` to
-            :meth:`raw_step() <compiler_gym.envs.CompilerEnv.step>`.
-            :meth:`step() <compiler_gym.envs.CompilerEnv.step>` has equivalent
-            functionality, and is less likely to change in the future.
+            Don't call this method directly, use :meth:`step()
+            <compiler_gym.envs.CompilerEnv.step>` or :meth:`multistep()
+            <compiler_gym.envs.CompilerEnv.multistep>` instead. The
+            :meth:`raw_step() <compiler_gym.envs.CompilerEnv.step>` method is an
+            implementation detail.
         """
         if not self.in_episode:
             raise SessionNotFound("Must call reset() before step()")
 
-        # Build the list of observations that must be computed by the backend
-        user_observation_spaces: List[ObservationSpaceSpec] = list(observations)
-        reward_spaces: List[Reward] = list(rewards)
-
         reward_observation_spaces: List[ObservationSpaceSpec] = []
         for reward_space in reward_spaces:
             reward_observation_spaces += [
@@ -927,7 +924,7 @@ def raw_step(
             ]
 
         observations_to_compute: List[ObservationSpaceSpec] = list(
-            set(user_observation_spaces).union(set(reward_observation_spaces))
+            set(observation_spaces).union(set(reward_observation_spaces))
         )
         observation_space_index_map: Dict[ObservationSpaceSpec, int] = {
             observation_space: i
@@ -974,7 +971,7 @@ def raw_step(
 
             default_observations = [
                 observation_space.default_value
-                for observation_space in user_observation_spaces
+                for observation_space in observation_spaces
             ]
             default_rewards = [
                 float(reward_space.reward_on_error(self.episode_reward))
@@ -1002,7 +999,7 @@ def raw_step(
         # Get the user-requested observation.
         observations: List[ObservationType] = [
             computed_observations[observation_space_index_map[observation_space]]
-            for observation_space in user_observation_spaces
+            for observation_space in observation_spaces
         ]
 
         # Update and compute the rewards.
@@ -1032,22 +1029,22 @@ def raw_step(
     def step(  # pylint: disable=arguments-differ
         self,
         action: ActionType,
+        observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        reward_spaces: Optional[Iterable[Union[str, Reward]]] = None,
         observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
         rewards: Optional[Iterable[Union[str, Reward]]] = None,
     ) -> StepType:
         """Take a step.
 
-        :param action: An action, or a sequence of actions. When multiple
-            actions are provided the observation and reward are returned after
-            running all of the actions.
+        :param action: An action.
 
-        :param observations: A list of observation spaces to compute
+        :param observation_spaces: A list of observation spaces to compute
             observations from. If provided, this changes the :code:`observation`
             element of the return tuple to be a list of observations from the
             requested spaces. The default :code:`env.observation_space` is not
             returned.
 
-        :param rewards: A list of reward spaces to compute rewards from. If
+        :param reward_spaces: A list of reward spaces to compute rewards from. If
             provided, this changes the :code:`reward` element of the return
             tuple to be a list of rewards from the requested spaces. The default
             :code:`env.reward_space` is not returned.
@@ -1058,22 +1055,42 @@ def step(  # pylint: disable=arguments-differ
         :raises SessionNotFound: If :meth:`reset()
             <compiler_gym.envs.CompilerEnv.reset>` has not been called.
         """
-        # NOTE(github.com/facebookresearch/CompilerGym/issues/610): This
-        # workaround for accepting a list of actions will be removed in v0.2.4.
         if isinstance(action, IterableType):
             warnings.warn(
-                "env.step() only takes a single action. Use env.multistep() "
-                "for an iterable of actions",
+                "Argument `action` of CompilerEnv.step no longer accepts a list "
+                " of actions. Please use CompilerEnv.multistep instead",
                 category=DeprecationWarning,
             )
-        else:
-            action = [action]
-
-        return self.multistep(action, observations, rewards)
+            return self.multistep(
+                action,
+                observation_spaces=observation_spaces,
+                reward_spaces=reward_spaces,
+                observations=observations,
+                rewards=rewards,
+            )
+        if observations is not None:
+            warnings.warn(
+                "Argument `observations` of CompilerEnv.step has been "
+                "renamed `observation_spaces`. Please update your code",
+                category=DeprecationWarning,
+            )
+            observation_spaces = observations
+        if rewards is not None:
+            warnings.warn(
+                "Argument `rewards` of CompilerEnv.step has been renamed "
+                "`reward_spaces`. Please update your code",
+                category=DeprecationWarning,
+            )
+            reward_spaces = rewards
+        return self._multistep(
+            self.raw_step, [action], observation_spaces, reward_spaces
+        )
 
     def multistep(
         self,
         actions: Iterable[ActionType],
+        observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        reward_spaces: Optional[Iterable[Union[str, Reward]]] = None,
         observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
         rewards: Optional[Iterable[Union[str, Reward]]] = None,
     ):
@@ -1081,13 +1098,13 @@ def multistep(
 
         :param action: A sequence of actions to apply in order.
 
-        :param observations: A list of observation spaces to compute
+        :param observation_spaces: A list of observation spaces to compute
             observations from. If provided, this changes the :code:`observation`
             element of the return tuple to be a list of observations from the
             requested spaces. The default :code:`env.observation_space` is not
             returned.
 
-        :param rewards: A list of reward spaces to compute rewards from. If
+        :param reward_spaces: A list of reward spaces to compute rewards from. If
             provided, this changes the :code:`reward` element of the return
             tuple to be a list of rewards from the requested spaces. The default
             :code:`env.reward_space` is not returned.
@@ -1098,49 +1115,77 @@ def multistep(
         :raises SessionNotFound: If :meth:`reset()
             <compiler_gym.envs.CompilerEnv.reset>` has not been called.
         """
+        if observations is not None:
+            warnings.warn(
+                "Argument `observations` of CompilerEnv.multistep has been "
+                "renamed `observation_spaces`. Please update your code",
+                category=DeprecationWarning,
+            )
+            observation_spaces = observations
+        if rewards is not None:
+            warnings.warn(
+                "Argument `rewards` of CompilerEnv.multistep has been renamed "
+                "`reward_spaces`. Please update your code",
+                category=DeprecationWarning,
+            )
+            reward_spaces = rewards
+        return self._multistep(
+            self.raw_step, list(actions), observation_spaces, reward_spaces
+        )
+
+    def _multistep(
+        self,
+        raw_step: Callable[
+            [Iterable[ActionType], Iterable[ObservationSpaceSpec], Iterable[Reward]],
+            StepType,
+        ],
+        actions: Iterable[ActionType],
+        observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]],
+        reward_spaces: Optional[Iterable[Union[str, Reward]]],
+    ) -> StepType:
         # Coerce observation spaces into a list of ObservationSpaceSpec instances.
-        if observations:
-            observation_spaces: List[ObservationSpaceSpec] = [
+        if observation_spaces:
+            observation_spaces_to_compute: List[ObservationSpaceSpec] = [
                 obs
                 if isinstance(obs, ObservationSpaceSpec)
                 else self.observation.spaces[obs]
-                for obs in observations
+                for obs in observation_spaces
             ]
         elif self.observation_space_spec:
-            observation_spaces: List[ObservationSpaceSpec] = [
+            observation_spaces_to_compute: List[ObservationSpaceSpec] = [
                 self.observation_space_spec
             ]
         else:
-            observation_spaces: List[ObservationSpaceSpec] = []
+            observation_spaces_to_compute: List[ObservationSpaceSpec] = []
 
         # Coerce reward spaces into a list of Reward instances.
-        if rewards:
-            reward_spaces: List[Reward] = [
+        if reward_spaces:
+            reward_spaces_to_compute: List[Reward] = [
                 rew if isinstance(rew, Reward) else self.reward.spaces[rew]
-                for rew in rewards
+                for rew in reward_spaces
             ]
         elif self.reward_space:
-            reward_spaces: List[Reward] = [self.reward_space]
+            reward_spaces_to_compute: List[Reward] = [self.reward_space]
         else:
-            reward_spaces: List[Reward] = []
+            reward_spaces_to_compute: List[Reward] = []
 
         # Perform the underlying environment step.
-        observation_values, reward_values, done, info = self.raw_step(
-            actions, observation_spaces, reward_spaces
+        observation_values, reward_values, done, info = raw_step(
+            actions, observation_spaces_to_compute, reward_spaces_to_compute
         )
 
         # Translate observations lists back to the appropriate types.
-        if observations is None and self.observation_space_spec:
+        if observation_spaces is None and self.observation_space_spec:
             observation_values = observation_values[0]
-        elif not observation_spaces:
+        elif not observation_spaces_to_compute:
             observation_values = None
 
         # Translate reward lists back to the appropriate types.
-        if rewards is None and self.reward_space:
+        if reward_spaces is None and self.reward_space:
             reward_values = reward_values[0]
             # Update the cumulative episode reward
             self.episode_reward += reward_values
-        elif not reward_spaces:
+        elif not reward_spaces_to_compute:
             reward_values = None
 
         return observation_values, reward_values, done, info
@@ -1213,7 +1258,9 @@ def apply(self, state: CompilerEnvState) -> None:  # noqa
             )
 
         actions = self.commandline_to_actions(state.commandline)
-        _, _, done, info = self.step(actions)
+        done = False
+        for action in actions:
+            _, _, done, info = self.step(action)
         if done:
             raise ValueError(
                 f"Environment terminated with error: `{info.get('error_details')}`"
diff --git a/compiler_gym/envs/llvm/llvm_rewards.py b/compiler_gym/envs/llvm/llvm_rewards.py
index e674591ee..1cc045ab1 100644
--- a/compiler_gym/envs/llvm/llvm_rewards.py
+++ b/compiler_gym/envs/llvm/llvm_rewards.py
@@ -7,7 +7,7 @@
 
 from compiler_gym.datasets import Benchmark
 from compiler_gym.spaces.reward import Reward
-from compiler_gym.util.gym_type_hints import ObservationType, RewardType
+from compiler_gym.util.gym_type_hints import ActionType, ObservationType, RewardType
 from compiler_gym.views.observation import ObservationView
 
 
@@ -44,7 +44,7 @@ def reset(self, benchmark: Benchmark, observation_view: ObservationView) -> None
 
     def update(
         self,
-        actions: List[int],
+        actions: List[ActionType],
         observations: List[ObservationType],
         observation_view: ObservationView,
     ) -> RewardType:
@@ -81,7 +81,7 @@ def reset(self, benchmark: str, observation_view: ObservationView) -> None:
 
     def update(
         self,
-        actions: List[int],
+        actions: List[ActionType],
         observations: List[ObservationType],
         observation_view: ObservationView,
     ) -> RewardType:
diff --git a/compiler_gym/random_replay.py b/compiler_gym/random_replay.py
index 063ec9ee9..81be67ba2 100644
--- a/compiler_gym/random_replay.py
+++ b/compiler_gym/random_replay.py
@@ -15,7 +15,7 @@
 )
 
 
-@deprecated(version="0.2.1", reason="Use env.step(actions) instead")
+@deprecated(version="0.2.1", reason="Use env.step(action) instead")
 def replay_actions(env: CompilerEnv, action_names: List[str], outdir: Path):
     return replay_actions_(env, action_names, outdir)
 
diff --git a/compiler_gym/random_search.py b/compiler_gym/random_search.py
index 7b86dc7d3..a81b0bdb2 100644
--- a/compiler_gym/random_search.py
+++ b/compiler_gym/random_search.py
@@ -17,6 +17,7 @@
 from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.service.connection import ServiceError
 from compiler_gym.util import logs
+from compiler_gym.util.gym_type_hints import ActionType
 from compiler_gym.util.logs import create_logging_dir
 from compiler_gym.util.tabulate import tabulate
 
@@ -79,8 +80,8 @@ def __init__(
         self.total_episode_count = 0
         self.total_step_count = 0
         self.best_returns = -float("inf")
-        self.best_actions: List[int] = []
-        self.best_commandline: List[int] = []
+        self.best_actions: List[ActionType] = []
+        self.best_commandline: str = []
         self.best_found_at_time = time()
 
         self.alive = True  # Set this to False to signal the thread to stop.
@@ -112,17 +113,17 @@ def run_one_episode(self, env: CompilerEnv) -> bool:
         :return: True if the episode ended gracefully, else False.
         """
         observation = env.reset()
-        actions: List[int] = []
+        actions: List[ActionType] = []
         patience = self._patience
         total_returns = 0
         while patience >= 0:
             patience -= 1
             self.total_step_count += 1
             # === Your agent here! ===
-            action_index = env.action_space.sample()
+            action = env.action_space.sample()
             # === End of agent. ===
-            actions.append(action_index)
-            observation, reward, done, _ = env.step(action_index)
+            actions.append(action)
+            observation, reward, done, _ = env.step(action)
             if done:
                 return False
             total_returns += reward
diff --git a/compiler_gym/spaces/named_discrete.py b/compiler_gym/spaces/named_discrete.py
index 64419efc4..afd9a51c7 100644
--- a/compiler_gym/spaces/named_discrete.py
+++ b/compiler_gym/spaces/named_discrete.py
@@ -2,9 +2,11 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from collections.abc import Iterable as IterableType
 from typing import Iterable, List, Union
 
 from compiler_gym.spaces.discrete import Discrete
+from compiler_gym.util.gym_type_hints import ActionType
 
 
 class NamedDiscrete(Discrete):
@@ -51,18 +53,20 @@ def __getitem__(self, name: str) -> int:
     def __repr__(self) -> str:
         return f"NamedDiscrete([{', '.join(self.names)}])"
 
-    def to_string(self, values: Union[int, Iterable[int]]) -> str:
+    def to_string(self, values: Union[int, Iterable[ActionType]]) -> str:
         """Convert an action, or sequence of actions, to string.
 
         :param values: A numeric value, or list of numeric values.
         :return: A string representing the values.
         """
-        if isinstance(values, int):
-            return self.names[values]
-        else:
+        if isinstance(values, IterableType):
             return " ".join([self.names[v] for v in values])
+        else:
+            return self.names[values]
 
-    def from_string(self, values: Union[str, Iterable[str]]) -> Union[int, List[int]]:
+    def from_string(
+        self, values: Union[str, Iterable[str]]
+    ) -> Union[ActionType, List[ActionType]]:
         """Convert a name, or list of names, to numeric values.
 
         :param values: A name, or list of names.
diff --git a/compiler_gym/spaces/reward.py b/compiler_gym/spaces/reward.py
index cccc2169d..75c9d5dad 100644
--- a/compiler_gym/spaces/reward.py
+++ b/compiler_gym/spaces/reward.py
@@ -9,7 +9,7 @@
 
 import compiler_gym
 from compiler_gym.spaces.scalar import Scalar
-from compiler_gym.util.gym_type_hints import ObservationType, RewardType
+from compiler_gym.util.gym_type_hints import ActionType, ObservationType, RewardType
 
 
 class Reward(Scalar):
@@ -132,7 +132,7 @@ def reset(
 
     def update(
         self,
-        actions: List[int],
+        actions: List[ActionType],
         observations: List[ObservationType],
         observation_view: "compiler_gym.views.ObservationView",  # noqa: F821
     ) -> RewardType:
diff --git a/compiler_gym/util/gym_type_hints.py b/compiler_gym/util/gym_type_hints.py
index cc592de45..ba7b6ef8c 100644
--- a/compiler_gym/util/gym_type_hints.py
+++ b/compiler_gym/util/gym_type_hints.py
@@ -9,7 +9,7 @@
 
 # Type hints for the values returned by gym.Env.step().
 ObservationType = TypeVar("ObservationType")
-ActionType = int
+ActionType = TypeVar("ActionType")
 RewardType = float
 DoneType = bool
 InfoType = JsonDictType
diff --git a/compiler_gym/util/minimize_trajectory.py b/compiler_gym/util/minimize_trajectory.py
index 0de687699..ffd2b947f 100644
--- a/compiler_gym/util/minimize_trajectory.py
+++ b/compiler_gym/util/minimize_trajectory.py
@@ -41,7 +41,7 @@ def _apply_and_test(env, actions, hypothesis, flakiness) -> bool:
     env.reset(benchmark=env.benchmark)
     for _ in range(flakiness):
         logger.debug("Applying %d actions ...", len(actions))
-        _, _, done, info = env.step(actions)
+        _, _, done, info = env.multistep(actions)
         if done:
             raise MinimizationError(
                 f"Failed to replay actions: {info.get('error_details', '')}"
diff --git a/compiler_gym/views/observation.py b/compiler_gym/views/observation.py
index e6e72b0e6..e743bf6ad 100644
--- a/compiler_gym/views/observation.py
+++ b/compiler_gym/views/observation.py
@@ -67,7 +67,7 @@ def __getitem__(self, observation_space: str) -> ObservationType:
         """
         observation_space: ObservationSpaceSpec = self.spaces[observation_space]
         observations, _, done, info = self._raw_step(
-            actions=[], observations=[observation_space], rewards=[]
+            actions=[], observation_spaces=[observation_space], reward_spaces=[]
         )
 
         if done:
diff --git a/compiler_gym/wrappers/commandline.py b/compiler_gym/wrappers/commandline.py
index 30606a00f..a961c878c 100644
--- a/compiler_gym/wrappers/commandline.py
+++ b/compiler_gym/wrappers/commandline.py
@@ -6,8 +6,9 @@
 from typing import Dict, Iterable, List, Optional, Union
 
 from compiler_gym.envs import CompilerEnv
-from compiler_gym.spaces import Commandline, CommandlineFlag
-from compiler_gym.util.gym_type_hints import StepType
+from compiler_gym.spaces import Commandline, CommandlineFlag, Reward
+from compiler_gym.util.gym_type_hints import ActionType, StepType
+from compiler_gym.views import ObservationSpaceSpec
 from compiler_gym.wrappers.core import ActionWrapper, CompilerEnvWrapper
 
 
@@ -40,8 +41,7 @@ def __init__(
 
         # Redefine the action space, inserting the terminal action at the start.
         self.action_space = Commandline(
-            items=[terminal]
-            + [
+            items=[
                 CommandlineFlag(
                     name=name,
                     flag=flag,
@@ -52,25 +52,36 @@ def __init__(
                     env.action_space.flags,
                     env.action_space.descriptions,
                 )
-            ],
+            ]
+            + [terminal],
             name=f"{type(self).__name__}<{env.action_space.name}>",
         )
 
-    def step(self, action: int) -> StepType:
-        if isinstance(action, int):
-            end_of_episode = action == 0
-            action = [] if end_of_episode else action - 1
-        else:
-            try:
-                index = action.index(0)
-                end_of_episode = True
-            except ValueError:
-                index = len(action)
-                end_of_episode = False
-            action = [a - 1 for a in action[:index]]
-
-        observation, reward, done, info = self.env.step(action)
-        if end_of_episode and not done:
+    def raw_step(
+        self,
+        actions: List[ActionType],
+        observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        reward_spaces: Optional[Iterable[Union[str, Reward]]] = None,
+    ) -> StepType:
+        terminal_action: int = len(self.action_space.flags) - 1
+
+        try:
+            index_of_terminal = actions.index(terminal_action)
+        except ValueError:
+            index_of_terminal = -1
+
+        # Run only the actions up to the terminal action.
+        if index_of_terminal >= 0:
+            actions = actions[:index_of_terminal]
+
+        observation, reward, done, info = self.env.raw_step(
+            actions,
+            observation_spaces=observation_spaces,
+            reward_spaces=reward_spaces,
+        )
+
+        # Communicate back to the frontend.
+        if index_of_terminal >= 0 and not done:
             done = True
             info["terminal_action"] = True
 
diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py
index 56bb5ecff..f6d0809ef 100644
--- a/compiler_gym/wrappers/core.py
+++ b/compiler_gym/wrappers/core.py
@@ -2,13 +2,15 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+import warnings
+from collections.abc import Iterable as IterableType
 from typing import Iterable, Optional, Union
 
 import gym
 
 from compiler_gym.envs import CompilerEnv
 from compiler_gym.spaces.reward import Reward
-from compiler_gym.util.gym_type_hints import ObservationType, StepType
+from compiler_gym.util.gym_type_hints import ActionType, ObservationType
 from compiler_gym.views import ObservationSpaceSpec
 
 
@@ -21,7 +23,7 @@ class CompilerEnvWrapper(gym.Wrapper):
     such as the :code:`fork()` method.
     """
 
-    def __init__(self, env: CompilerEnv):
+    def __init__(self, env: CompilerEnv):  # pylint: disable=super-init-not-called
         """Constructor.
 
         :param env: The environment to wrap.
@@ -38,8 +40,15 @@ def __init__(self, env: CompilerEnv):
         self.reward_range = self.env.reward_range
         self.metadata = self.env.metadata
 
-    def step(self, action, observations=None, rewards=None):
-        return self.env.step(action, observations=observations, rewards=rewards)
+    def raw_step(
+        self,
+        actions: Iterable[ActionType],
+        observation_spaces: Iterable[ObservationSpaceSpec],
+        reward_spaces: Iterable[Reward],
+    ):
+        return self.env.raw_step(
+            actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces
+        )
 
     def reset(self, *args, **kwargs) -> ObservationType:
         return self.env.reset(*args, **kwargs)
@@ -47,6 +56,81 @@ def reset(self, *args, **kwargs) -> ObservationType:
     def fork(self) -> CompilerEnv:
         return type(self)(env=self.env.fork())
 
+    # NOTE(cummins): This step() method is provided only because
+    # CompilerEnv.step accepts additional arguments over gym.Env.step. Users who
+    # wish to modify the behavior of CompilerEnv.step should overload
+    # raw_step().
+    def step(  # pylint: disable=arguments-differ
+        self,
+        action: ActionType,
+        observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        reward_spaces: Optional[Iterable[Union[str, Reward]]] = None,
+        observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        rewards: Optional[Iterable[Union[str, Reward]]] = None,
+    ):
+        if isinstance(action, IterableType):
+            warnings.warn(
+                "Argument `action` of CompilerEnv.step no longer accepts a list "
+                " of actions. Please use CompilerEnv.multistep instead",
+                category=DeprecationWarning,
+            )
+            return self.multistep(
+                action,
+                observation_spaces=observation_spaces,
+                reward_spaces=reward_spaces,
+                observations=observations,
+                rewards=rewards,
+            )
+        if observations is not None:
+            warnings.warn(
+                "Argument `observations` of CompilerEnv.multistep has been "
+                "renamed `observation_spaces`. Please update your code",
+                category=DeprecationWarning,
+            )
+            observation_spaces = observations
+        if rewards is not None:
+            warnings.warn(
+                "Argument `rewards` of CompilerEnv.multistep has been renamed "
+                "`reward_spaces`. Please update your code",
+                category=DeprecationWarning,
+            )
+            reward_spaces = rewards
+        return self.env._multistep(
+            raw_step=self.raw_step,
+            actions=[action],
+            observation_spaces=observation_spaces,
+            reward_spaces=reward_spaces,
+        )
+
+    def multistep(
+        self,
+        actions: Iterable[ActionType],
+        observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        reward_spaces: Optional[Iterable[Union[str, Reward]]] = None,
+        observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        rewards: Optional[Iterable[Union[str, Reward]]] = None,
+    ):
+        if observations is not None:
+            warnings.warn(
+                "Argument `observations` of CompilerEnv.multistep has been "
+                "renamed `observation_spaces`. Please update your code",
+                category=DeprecationWarning,
+            )
+            observation_spaces = observations
+        if rewards is not None:
+            warnings.warn(
+                "Argument `rewards` of CompilerEnv.multistep has been renamed "
+                "`reward_spaces`. Please update your code",
+                category=DeprecationWarning,
+            )
+            reward_spaces = rewards
+        return self.env._multistep(  # pylint: disable=protected-access
+            raw_step=self.raw_step,
+            actions=actions,
+            observation_spaces=observation_spaces,
+            reward_spaces=reward_spaces,
+        )
+
     @property
     def observation_space(self):
         if self.env.observation_space_spec:
@@ -82,18 +166,23 @@ class ActionWrapper(CompilerEnvWrapper):
     to allow an action space transformation.
     """
 
-    def step(
-        self, action: Union[int, Iterable[int]], observations=None, rewards=None
-    ) -> StepType:
-        return self.env.step(
-            self.action(action), observations=observations, rewards=rewards
+    def raw_step(
+        self,
+        actions: Iterable[ActionType],
+        observation_spaces: Iterable[ObservationSpaceSpec],
+        reward_spaces: Iterable[Reward],
+    ):
+        return self.env.raw_step(
+            [self.action(a) for a in actions],
+            observation_spaces=observation_spaces,
+            reward_spaces=reward_spaces,
         )
 
-    def action(self, action):
+    def action(self, action: ActionType) -> ActionType:
         """Translate the action to the new space."""
         raise NotImplementedError
 
-    def reverse_action(self, action):
+    def reverse_action(self, action: ActionType) -> ActionType:
         """Translate an action from the new space to the wrapped space."""
         raise NotImplementedError
 
@@ -107,9 +196,22 @@ def reset(self, *args, **kwargs):
         observation = self.env.reset(*args, **kwargs)
         return self.observation(observation)
 
-    def step(self, *args, **kwargs):
-        observation, reward, done, info = self.env.step(*args, **kwargs)
-        return self.observation(observation), reward, done, info
+    def raw_step(
+        self,
+        actions: Iterable[ActionType],
+        observation_spaces: Iterable[ObservationSpaceSpec],
+        reward_spaces: Iterable[Reward],
+    ):
+        observation, reward, done, info = self.env.raw_step(
+            actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces
+        )
+
+        # Only apply observation transformation if we are using the default
+        # observation space.
+        if observation_spaces == [self.observation_space_spec]:
+            observation = [self.observation(observation)]
+
+        return observation, reward, done, info
 
     def observation(self, observation):
         """Translate an observation to the new space."""
@@ -124,18 +226,21 @@ class RewardWrapper(CompilerEnvWrapper):
     def reset(self, *args, **kwargs):
         return self.env.reset(*args, **kwargs)
 
-    def step(self, *args, **kwargs):
-        observation, reward, done, info = self.env.step(*args, **kwargs)
-        # Undo the episode_reward update and reapply it once we have transformed
-        # the reward.
-        #
-        # TODO(cummins): Refactor step() so that we don't have to do this
-        # recalculation of episode_reward, as this is prone to errors if, say,
-        # the base reward returns NaN or an invalid type.
-        if reward is not None and self.episode_reward is not None:
-            self.unwrapped.episode_reward -= reward
-            reward = self.reward(reward)
-            self.unwrapped.episode_reward += reward
+    def raw_step(
+        self,
+        actions: Iterable[ActionType],
+        observation_spaces: Iterable[ObservationSpaceSpec],
+        reward_spaces: Iterable[Reward],
+    ):
+        observation, reward, done, info = self.env.step(
+            actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces
+        )
+
+        # Only apply rewards transformation if we are using the default
+        # reward space.
+        if reward_spaces == [self.reward_space]:
+            reward = [self.reward(reward)]
+
         return observation, reward, done, info
 
     def reward(self, reward):
diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py
index 08174ed9a..1f91ba6be 100644
--- a/compiler_gym/wrappers/llvm.py
+++ b/compiler_gym/wrappers/llvm.py
@@ -11,7 +11,7 @@
 from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.service.connection import ServiceError
 from compiler_gym.spaces import Reward
-from compiler_gym.util.gym_type_hints import ObservationType
+from compiler_gym.util.gym_type_hints import ActionType, ObservationType
 from compiler_gym.wrappers import CompilerEnvWrapper
 
 
@@ -65,7 +65,7 @@ def reset(self, benchmark, observation_view) -> None:
 
         def update(
             self,
-            actions: List[int],
+            actions: List[ActionType],
             observations: List[ObservationType],
             observation_view,
         ) -> float:
diff --git a/compiler_gym/wrappers/time_limit.py b/compiler_gym/wrappers/time_limit.py
index 743853915..2e5fda2a3 100644
--- a/compiler_gym/wrappers/time_limit.py
+++ b/compiler_gym/wrappers/time_limit.py
@@ -2,9 +2,10 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-from typing import Iterable, Optional, Union
+from typing import Optional
 
 from compiler_gym.envs import CompilerEnv
+from compiler_gym.util.gym_type_hints import ActionType
 from compiler_gym.wrappers.core import CompilerEnvWrapper
 
 
@@ -31,7 +32,7 @@ def __init__(self, env: CompilerEnv, max_episode_steps: Optional[int] = None):
         self._max_episode_steps = max_episode_steps
         self._elapsed_steps = None
 
-    def step(self, action: Union[int, Iterable[int]], **kwargs):
+    def step(self, action: ActionType, **kwargs):
         assert (
             self._elapsed_steps is not None
         ), "Cannot call env.step() before calling reset()"
diff --git a/compiler_gym/wrappers/validation.py b/compiler_gym/wrappers/validation.py
index a493187cf..2c64f6579 100644
--- a/compiler_gym/wrappers/validation.py
+++ b/compiler_gym/wrappers/validation.py
@@ -2,7 +2,10 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from typing import List
+
 from compiler_gym.envs import CompilerEnv
+from compiler_gym.util.gym_type_hints import ActionType
 from compiler_gym.wrappers.core import CompilerEnvWrapper
 
 
@@ -26,9 +29,16 @@ def __init__(
         super().__init__(env)
         self.reward_penalty = reward_penalty
 
-    def step(self, action, observations=None, rewards=None):
-        observation, reward, done, info = self.env.step(
-            action, observations=observations, rewards=rewards
+    def raw_step(
+        self,
+        actions: List[ActionType],
+        observation_spaces=None,
+        reward_spaces=None,
+    ):
+        observation, reward, done, info = self.env.raw_step(
+            actions,
+            observation_spaces=observation_spaces,
+            reward_spaces=reward_spaces,
         )
 
         # Early exit if environment reaches terminal state.
diff --git a/examples/brute_force.py b/examples/brute_force.py
index 50f809a64..1f75545dc 100644
--- a/examples/brute_force.py
+++ b/examples/brute_force.py
@@ -42,6 +42,7 @@
 from compiler_gym.envs import CompilerEnv
 from compiler_gym.util.flags.benchmark_from_flags import benchmark_from_flags
 from compiler_gym.util.flags.env_from_flags import env_from_flags
+from compiler_gym.util.gym_type_hints import ActionType
 from compiler_gym.util.logs import create_logging_dir
 
 flags.DEFINE_list(
@@ -68,7 +69,7 @@ class BruteForceProducer(Thread):
     def __init__(
         self,
         in_q: Queue,
-        actions: List[int],
+        actions: List[ActionType],
         episode_length: int,
         nproc: int,
         chunksize: int = 128,
diff --git a/examples/llvm_autotuning/autotuners/nevergrad_.py b/examples/llvm_autotuning/autotuners/nevergrad_.py
index f7b8fd043..bacea33d8 100644
--- a/examples/llvm_autotuning/autotuners/nevergrad_.py
+++ b/examples/llvm_autotuning/autotuners/nevergrad_.py
@@ -10,6 +10,7 @@
 from llvm_autotuning.optimization_target import OptimizationTarget
 
 from compiler_gym.envs import CompilerEnv
+from compiler_gym.util.gym_type_hints import ActionType
 
 
 def nevergrad(
@@ -30,17 +31,17 @@ def nevergrad(
     """
     if optimization_target == OptimizationTarget.RUNTIME:
 
-        def calculate_negative_reward(actions: Tuple[int]) -> float:
+        def calculate_negative_reward(actions: Tuple[ActionType]) -> float:
             env.reset()
-            env.step(actions)
+            env.multistep(actions)
             return -env.episode_reward
 
     else:
         # Only cache the deterministic non-runtime rewards.
         @lru_cache(maxsize=int(1e4))
-        def calculate_negative_reward(actions: Tuple[int]) -> float:
+        def calculate_negative_reward(actions: Tuple[ActionType]) -> float:
             env.reset()
-            env.step(actions)
+            env.multistep(actions)
             return -env.episode_reward
 
     params = ng.p.Choice(
@@ -61,4 +62,4 @@ def calculate_negative_reward(actions: Tuple[int]) -> float:
     # Get best solution and replay it.
     recommendation = optimizer.provide_recommendation()
     env.reset()
-    env.step(recommendation.value)
+    env.multistep(recommendation.value)
diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py
index 58feddfc4..7baeba1cb 100644
--- a/examples/llvm_autotuning/optimization_target.py
+++ b/examples/llvm_autotuning/optimization_target.py
@@ -68,7 +68,7 @@ def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float:
         actions = list(env.actions)
         env.reset()
         for i in range(1, 5 + 1):
-            _, _, done, info = env.step(actions)
+            _, _, done, info = env.multistep(actions)
             if not done:
                 break
             logger.warning(
diff --git a/examples/llvm_rl/wrappers.py b/examples/llvm_rl/wrappers.py
index 4ee1b0619..d14c82e53 100644
--- a/examples/llvm_rl/wrappers.py
+++ b/examples/llvm_rl/wrappers.py
@@ -3,13 +3,13 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Environment wrappers to closer replicate the MLSys'20 Autophase paper."""
-from collections.abc import Iterable as IterableType
-from typing import List, Union
+from typing import List
 
 import gym
 import numpy as np
 
 from compiler_gym.envs import CompilerEnv, LlvmEnv
+from compiler_gym.util.gym_type_hints import ActionType
 from compiler_gym.wrappers import (
     ConstrainedCommandline,
     ObservationWrapper,
@@ -126,12 +126,16 @@ def reset(self, *args, **kwargs):
         )
         return super().reset(*args, **kwargs)
 
-    def step(self, action: Union[int, List[int]], observations=None, **kwargs):
-        if not isinstance(action, IterableType):
-            action = [action]
-        for a in action:
+    def raw_step(
+        self,
+        actions: List[ActionType],
+        observation_spaces=None,
+        observations=None,
+        **kwargs,
+    ):
+        for a in actions:
             self.histogram[a] += self.increment
-        return super().step(action, **kwargs)
+        return self.env.raw_step(actions, **kwargs)
 
     def observation(self, observation):
         return np.concatenate((observation, self.histogram)).astype(
diff --git a/examples/sensitivity_analysis/action_sensitivity_analysis.py b/examples/sensitivity_analysis/action_sensitivity_analysis.py
index 52c4e409a..05431de74 100644
--- a/examples/sensitivity_analysis/action_sensitivity_analysis.py
+++ b/examples/sensitivity_analysis/action_sensitivity_analysis.py
@@ -40,6 +40,7 @@
 from compiler_gym.envs import CompilerEnv
 from compiler_gym.util.flags.benchmark_from_flags import benchmark_from_flags
 from compiler_gym.util.flags.env_from_flags import env_from_flags
+from compiler_gym.util.gym_type_hints import ActionType
 from compiler_gym.util.logs import create_logging_dir
 from compiler_gym.util.timer import Timer
 
@@ -118,12 +119,12 @@ def run_one_trial(
     _, _, done, _ = env.step(warmup_actions)
     if done:
         return None
-    _, (reward,), done, _ = env.step(action, rewards=[reward_space])
+    _, (reward,), done, _ = env.step(action, reward_spaces=[reward_space])
     return None if done else reward
 
 
 def run_action_sensitivity_analysis(
-    actions: List[int],
+    actions: List[ActionType],
     rewards_path: Path,
     runtimes_path: Path,
     reward_space: str,
diff --git a/tests/llvm/episode_reward_test.py b/tests/llvm/episode_reward_test.py
index ba5d6e3d7..7c1a0e7cb 100644
--- a/tests/llvm/episode_reward_test.py
+++ b/tests/llvm/episode_reward_test.py
@@ -28,7 +28,7 @@ def test_episode_reward_with_non_default_reward_space(env: LlvmEnv):
     assert env.episode_reward == 0
     _, rewards, _, _ = env.step(
         env.action_space["-mem2reg"],
-        rewards=["IrInstructionCount"],
+        reward_spaces=["IrInstructionCount"],
     )
     assert rewards[0] > 0
     assert env.episode_reward == 0
diff --git a/tests/llvm/fork_regression_test.py b/tests/llvm/fork_regression_test.py
index febc8b851..f5333dde5 100644
--- a/tests/llvm/fork_regression_test.py
+++ b/tests/llvm/fork_regression_test.py
@@ -57,14 +57,14 @@ def test_fork_regression_test(env: LlvmEnv, test: ForkRegressionTest):
     pre_fork = [env.action_space[f] for f in test.pre_fork.split()]
     post_fork = [env.action_space[f] for f in test.post_fork.split()]
 
-    _, _, done, info = env.step(pre_fork)
+    _, _, done, info = env.multistep(pre_fork)
     assert not done, info
 
     with env.fork() as fkd:
         assert env.state == fkd.state  # Sanity check
 
-        env.step(post_fork)
-        fkd.step(post_fork)
+        env.multistep(post_fork)
+        fkd.multistep(post_fork)
         # Verify that the environment states no longer line up.
         assert env.state != fkd.state
 
diff --git a/tests/llvm/llvm_env_test.py b/tests/llvm/llvm_env_test.py
index a1efc22a6..36fe74bba 100644
--- a/tests/llvm/llvm_env_test.py
+++ b/tests/llvm/llvm_env_test.py
@@ -221,7 +221,7 @@ def test_step_multiple_actions_list(env: LlvmEnv):
         env.action_space.flags.index("-mem2reg"),
         env.action_space.flags.index("-reg2mem"),
     ]
-    _, _, done, _ = env.step(actions)
+    _, _, done, _ = env.multistep(actions)
     assert not done
     assert env.actions == actions
 
@@ -233,7 +233,7 @@ def test_step_multiple_actions_generator(env: LlvmEnv):
         env.action_space.flags.index("-mem2reg"),
         env.action_space.flags.index("-reg2mem"),
     )
-    _, _, done, _ = env.step(actions)
+    _, _, done, _ = env.multistep(actions)
     assert not done
     assert env.actions == [
         env.action_space.flags.index("-mem2reg"),
diff --git a/tests/llvm/multiprocessing_test.py b/tests/llvm/multiprocessing_test.py
index d2ae8f4d1..e93fb21e5 100644
--- a/tests/llvm/multiprocessing_test.py
+++ b/tests/llvm/multiprocessing_test.py
@@ -12,11 +12,14 @@
 from flaky import flaky
 
 from compiler_gym.envs import LlvmEnv
+from compiler_gym.util.gym_type_hints import ActionType
 from tests.pytest_plugins.common import macos_only
 from tests.test_main import main
 
 
-def process_worker(env_name: str, benchmark: str, actions: List[int], queue: mp.Queue):
+def process_worker(
+    env_name: str, benchmark: str, actions: List[ActionType], queue: mp.Queue
+):
     assert actions
     with gym.make(env_name) as env:
         env.reset(benchmark=benchmark)
@@ -28,7 +31,7 @@ def process_worker(env_name: str, benchmark: str, actions: List[int], queue: mp.
         queue.put((observation, reward, done, info))
 
 
-def process_worker_with_env(env: LlvmEnv, actions: List[int], queue: mp.Queue):
+def process_worker_with_env(env: LlvmEnv, actions: List[ActionType], queue: mp.Queue):
     assert actions
 
     for action in actions:
diff --git a/tests/llvm/threading_test.py b/tests/llvm/threading_test.py
index e125230db..a347ffc6d 100644
--- a/tests/llvm/threading_test.py
+++ b/tests/llvm/threading_test.py
@@ -10,13 +10,14 @@
 from flaky import flaky
 
 from compiler_gym import CompilerEnv
+from compiler_gym.util.gym_type_hints import ActionType
 from tests.test_main import main
 
 
 class ThreadedWorker(Thread):
     """Create an environment and run through a set of actions in a background thread."""
 
-    def __init__(self, env_name: str, benchmark: str, actions: List[int]):
+    def __init__(self, env_name: str, benchmark: str, actions: List[ActionType]):
         super().__init__()
         self.done = False
         self.env_name = env_name
@@ -38,7 +39,7 @@ def run(self) -> None:
 class ThreadedWorkerWithEnv(Thread):
     """Create an environment and run through a set of actions in a background thread."""
 
-    def __init__(self, env: CompilerEnv, actions: List[int]):
+    def __init__(self, env: CompilerEnv, actions: List[ActionType]):
         super().__init__()
         self.done = False
         self.env = env
diff --git a/tests/util/minimize_trajectory_test.py b/tests/util/minimize_trajectory_test.py
index b9e7597e4..292cb9e09 100644
--- a/tests/util/minimize_trajectory_test.py
+++ b/tests/util/minimize_trajectory_test.py
@@ -10,6 +10,7 @@
 import pytest
 
 from compiler_gym.util import minimize_trajectory as mt
+from compiler_gym.util.gym_type_hints import ActionType
 from tests.test_main import main
 
 pytest_plugins = ["tests.pytest_plugins.llvm"]
@@ -38,7 +39,7 @@ def okay(self):
 class MockEnv:
     """A mock environment for testing trajectory minimization."""
 
-    def __init__(self, actions: List[int], validate=lambda env: True):
+    def __init__(self, actions: List[ActionType], validate=lambda env: True):
         self.original_trajectory = actions
         self.actions = actions.copy()
         self.validate = lambda: MockValidationResult(validate(self))
@@ -49,7 +50,7 @@ def reset(self, benchmark):
         self.actions = []
         assert benchmark == self.benchmark
 
-    def step(self, actions):
+    def multistep(self, actions):
         for action in actions:
             assert action in self.original_trajectory
         self.actions += actions
diff --git a/tests/views/observation_test.py b/tests/views/observation_test.py
index 2d73e27d9..5f30fee2c 100644
--- a/tests/views/observation_test.py
+++ b/tests/views/observation_test.py
@@ -28,11 +28,11 @@ def __init__(self, ret=None):
         self.called_observation_spaces = []
         self.ret = list(reversed(ret or [None]))
 
-    def __call__(self, actions, observations, rewards):
+    def __call__(self, actions, observation_spaces, reward_spaces):
         assert not actions
-        assert len(observations) == 1
-        assert not rewards
-        self.called_observation_spaces.append(observations[0].id)
+        assert len(observation_spaces) == 1
+        assert not reward_spaces
+        self.called_observation_spaces.append(observation_spaces[0].id)
         ret = self.ret[-1]
         del self.ret[-1]
         return [ret], [], False, {}
diff --git a/tests/wrappers/commandline_wrappers_test.py b/tests/wrappers/commandline_wrappers_test.py
index dba8b509a..64095c31d 100644
--- a/tests/wrappers/commandline_wrappers_test.py
+++ b/tests/wrappers/commandline_wrappers_test.py
@@ -18,17 +18,17 @@ def test_commandline_with_terminal_action(env: LlvmEnv):
     mem2reg_index = env.action_space["-mem2reg"]
     reg2mem_index = env.action_space["-reg2mem"]
 
-    assert mem2reg_index == mem2reg_unwrapped_index + 1
+    assert mem2reg_index == mem2reg_unwrapped_index
 
     env.reset()
-    _, _, done, info = env.step(mem2reg_index + 1)
+    _, _, done, info = env.step(mem2reg_index)
     assert not done, info
-    _, _, done, info = env.step([reg2mem_index + 1, reg2mem_index + 1])
+    _, _, done, info = env.multistep([reg2mem_index, reg2mem_index])
     assert not done, info
 
     assert env.actions == [mem2reg_index, reg2mem_index, reg2mem_index]
 
-    _, _, done, info = env.step(0)
+    _, _, done, info = env.step(len(env.action_space.flags) - 1)
     assert done
     assert "terminal_action" in info
 
@@ -36,17 +36,14 @@ def test_commandline_with_terminal_action(env: LlvmEnv):
 def test_commandline_with_terminal_action_fork(env: LlvmEnv):
     env = CommandlineWithTerminalAction(env)
     assert env.unwrapped.action_space != env.action_space  # Sanity check.
-    fkd = env.fork()
-    try:
+    with env.fork() as fkd:
         assert fkd.action_space == env.action_space
 
-        _, _, done, info = env.step(0)
+        _, _, done, _ = env.step(len(env.action_space.flags) - 1)
         assert done
 
-        _, _, done, info = fkd.step(0)
+        _, _, done, _ = fkd.step(len(env.action_space.flags) - 1)
         assert done
-    finally:
-        fkd.close()
 
 
 def test_constrained_action_space(env: LlvmEnv):
@@ -63,7 +60,7 @@ def test_constrained_action_space(env: LlvmEnv):
 
     env.reset()
     env.step(0)
-    env.step([1, 1])
+    env.multistep([1, 1])
 
     assert env.actions == [0, 1, 1]
 
@@ -84,7 +81,7 @@ def test_constrained_action_space_fork(env: LlvmEnv):
 
         fkd.reset()
         fkd.step(0)
-        fkd.step([1, 1])
+        fkd.multistep([1, 1])
 
         assert fkd.actions == [0, 1, 1]
     finally:
diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py
index a682fe7d2..cc577bac4 100644
--- a/tests/wrappers/core_wrappers_test.py
+++ b/tests/wrappers/core_wrappers_test.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Unit tests for //compiler_gym/wrappers."""
+import numpy as np
 import pytest
 
 from compiler_gym.datasets import Datasets
@@ -92,7 +93,7 @@ def test_wrapped_step_multi_step(env: LlvmEnv):
     """Test passing a list of actions to step()."""
     env = CompilerEnvWrapper(env)
     env.reset()
-    env.step([0, 0, 0])
+    env.multistep([0, 0, 0])
 
     assert env.actions == [0, 0, 0]
 
@@ -109,10 +110,10 @@ def action(self, action):
 
     env = MyWrapper(env)
     env.reset()
-    (ir, ic), (icr, icroz), _, _ = env.step(
-        action=[0, 0, 0],
-        observations=["Ir", "IrInstructionCount"],
-        rewards=["IrInstructionCount", "IrInstructionCountOz"],
+    (ir, ic), (icr, icroz), _, _ = env.multistep(
+        actions=[0, 0, 0],
+        observation_spaces=["Ir", "IrInstructionCount"],
+        reward_spaces=["IrInstructionCount", "IrInstructionCountOz"],
     )
     assert isinstance(ir, str)
     assert isinstance(ic, int)
@@ -213,7 +214,7 @@ def observation(self, observation):
     assert env.reward_space.name == "IrInstructionCount"
 
 
-def test_wrapped_action(env: LlvmEnv):
+def test_wrapped_action(mocker, env: LlvmEnv):
     class MyWrapper(ActionWrapper):
         def action(self, action):
             return action - 1
@@ -222,14 +223,17 @@ def reverse_action(self, action):
             return action + 1
 
     env = MyWrapper(env)
+    mocker.spy(env, "action")
+
     env.reset()
     env.step(1)
     env.step(2)
 
+    assert env.action.call_count == 2  # pylint: disable=no-member
     assert env.actions == [0, 1]
 
 
-def test_wrapped_observation(env: LlvmEnv):
+def test_wrapped_observation(mocker, env: LlvmEnv):
     """Test using an ObservationWrapper that returns the length of the Ir string."""
 
     class MyWrapper(ObservationWrapper):
@@ -241,8 +245,10 @@ def observation(self, observation):
             return len(observation)
 
     env = MyWrapper(env)
+
     assert env.reset() > 0
     observation, _, _, _ = env.step(0)
+
     assert observation > 0
 
 
@@ -253,6 +259,22 @@ def test_wrapped_observation_missing_definition(env: LlvmEnv):
         env.reset()
 
 
+def test_wrapped_observation_not_applied_to_non_default_observations(env: LlvmEnv):
+    class MyWrapper(ObservationWrapper):
+        def __init__(self, env):
+            super().__init__(env)
+            self.observation_space = "Ir"
+
+        def observation(self, observation):
+            return len(observation)
+
+    env = MyWrapper(env)
+    env.reset()
+    (observation,), _, _, _ = env.step(0, observation_spaces=["Autophase"])
+    print(observation)
+    assert isinstance(observation, np.ndarray)
+
+
 def test_wrapped_reward(env: LlvmEnv):
     class MyWrapper(RewardWrapper):
         def reward(self, reward):
diff --git a/tests/wrappers/time_limit_wrappers_test.py b/tests/wrappers/time_limit_wrappers_test.py
index f74c76ea5..83b356535 100644
--- a/tests/wrappers/time_limit_wrappers_test.py
+++ b/tests/wrappers/time_limit_wrappers_test.py
@@ -28,7 +28,7 @@ def test_wrapped_fork_type(env: LlvmEnv):
 def test_wrapped_step_multi_step(env: LlvmEnv):
     env = TimeLimit(env, max_episode_steps=5)
     env.reset(benchmark="benchmark://cbench-v1/dijkstra")
-    env.step([0, 0, 0])
+    env.multistep([0, 0, 0])
 
     assert env.benchmark == "benchmark://cbench-v1/dijkstra"
     assert env.actions == [0, 0, 0]
@@ -37,7 +37,7 @@ def test_wrapped_step_multi_step(env: LlvmEnv):
 def test_wrapped_custom_step_args(env: LlvmEnv):
     env = TimeLimit(env, max_episode_steps=5)
     env.reset(benchmark="benchmark://cbench-v1/dijkstra")
-    (ic,), _, _, _ = env.step(0, observations=["IrInstructionCount"])
+    (ic,), _, _, _ = env.step(0, observation_spaces=["IrInstructionCount"])
     assert isinstance(ic, int)
 
 
diff --git a/www/www.py b/www/www.py
index 0586b3274..951aa34f3 100644
--- a/www/www.py
+++ b/www/www.py
@@ -217,9 +217,9 @@ def _step(request: StepRequest) -> StepReply:
         if request.all_states:
             # Replay actions one at a time to receive incremental rewards. The
             # first item represents the state prior to any actions.
-            (instcount, autophase), _, done, info = env.step(
-                action=[],
-                observations=[
+            (instcount, autophase), _, done, info = env.raw_step(
+                actions=[],
+                observation_spaces=[
                     env.observation.spaces["InstCountDict"],
                     env.observation.spaces["AutophaseDict"],
                 ],
@@ -238,7 +238,7 @@ def _step(request: StepRequest) -> StepReply:
             for action in request.actions[:-1]:
                 (instcount, autophase), reward, done, info = env.step(
                     action,
-                    observations=[
+                    observation_spaces=[
                         env.observation.spaces["InstCountDict"],
                         env.observation.spaces["AutophaseDict"],
                     ],
@@ -265,12 +265,12 @@ def _step(request: StepRequest) -> StepReply:
         # Perform the final action.
         (ir, instcount, autophase), (reward,), done, _ = env.raw_step(
             actions=request.actions[-1:],
-            observations=[
+            observation_spaces=[
                 env.observation.spaces["Ir"],
                 env.observation.spaces["InstCountDict"],
                 env.observation.spaces["AutophaseDict"],
             ],
-            rewards=[env.reward_space],
+            reward_spaces=[env.reward_space],
         )
 
     states.append(

From 48380bba390e4e27316dc624cd9361a1b9540bb2 Mon Sep 17 00:00:00 2001
From: Boian Petkantchin <boian@nod-labs.com>
Date: Thu, 10 Mar 2022 18:36:20 -0800
Subject: [PATCH 4/8] Use multistep instead of raw_step in wrappers

Fix tests
---
 compiler_gym/wrappers/commandline.py          |  8 +-
 compiler_gym/wrappers/core.py                 | 84 ++++++++-----------
 compiler_gym/wrappers/validation.py           |  4 +-
 .../llvm_autotuning/autotuners/opentuner_.py  |  2 +-
 examples/llvm_rl/wrappers.py                  |  4 +-
 .../service_py/CMakeLists.txt                 |  2 +
 examples/op_benchmarks.py                     |  2 +-
 .../benchmark_sensitivity_analysis.py         |  2 +-
 tests/util/minimize_trajectory_test.py        |  2 +-
 tests/wrappers/core_wrappers_test.py          | 22 +----
 www/www.py                                    |  4 +-
 11 files changed, 58 insertions(+), 78 deletions(-)

diff --git a/compiler_gym/wrappers/commandline.py b/compiler_gym/wrappers/commandline.py
index a961c878c..21a2e2fc9 100644
--- a/compiler_gym/wrappers/commandline.py
+++ b/compiler_gym/wrappers/commandline.py
@@ -57,11 +57,13 @@ def __init__(
             name=f"{type(self).__name__}<{env.action_space.name}>",
         )
 
-    def raw_step(
+    def multistep(
         self,
         actions: List[ActionType],
         observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
         reward_spaces: Optional[Iterable[Union[str, Reward]]] = None,
+        observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        rewards: Optional[Iterable[Union[str, Reward]]] = None,
     ) -> StepType:
         terminal_action: int = len(self.action_space.flags) - 1
 
@@ -74,10 +76,12 @@ def raw_step(
         if index_of_terminal >= 0:
             actions = actions[:index_of_terminal]
 
-        observation, reward, done, info = self.env.raw_step(
+        observation, reward, done, info = self.env.multistep(
             actions,
             observation_spaces=observation_spaces,
             reward_spaces=reward_spaces,
+            observations=observations,
+            rewards=rewards,
         )
 
         # Communicate back to the frontend.
diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py
index f6d0809ef..34674fb8d 100644
--- a/compiler_gym/wrappers/core.py
+++ b/compiler_gym/wrappers/core.py
@@ -4,7 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 import warnings
 from collections.abc import Iterable as IterableType
-from typing import Iterable, Optional, Union
+from typing import Iterable, List, Optional, Union
 
 import gym
 
@@ -40,26 +40,12 @@ def __init__(self, env: CompilerEnv):  # pylint: disable=super-init-not-called
         self.reward_range = self.env.reward_range
         self.metadata = self.env.metadata
 
-    def raw_step(
-        self,
-        actions: Iterable[ActionType],
-        observation_spaces: Iterable[ObservationSpaceSpec],
-        reward_spaces: Iterable[Reward],
-    ):
-        return self.env.raw_step(
-            actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces
-        )
-
     def reset(self, *args, **kwargs) -> ObservationType:
         return self.env.reset(*args, **kwargs)
 
     def fork(self) -> CompilerEnv:
         return type(self)(env=self.env.fork())
 
-    # NOTE(cummins): This step() method is provided only because
-    # CompilerEnv.step accepts additional arguments over gym.Env.step. Users who
-    # wish to modify the behavior of CompilerEnv.step should overload
-    # raw_step().
     def step(  # pylint: disable=arguments-differ
         self,
         action: ActionType,
@@ -95,8 +81,7 @@ def step(  # pylint: disable=arguments-differ
                 category=DeprecationWarning,
             )
             reward_spaces = rewards
-        return self.env._multistep(
-            raw_step=self.raw_step,
+        return self.multistep(
             actions=[action],
             observation_spaces=observation_spaces,
             reward_spaces=reward_spaces,
@@ -124,8 +109,7 @@ def multistep(
                 category=DeprecationWarning,
             )
             reward_spaces = rewards
-        return self.env._multistep(  # pylint: disable=protected-access
-            raw_step=self.raw_step,
+        return self.env.multistep(
             actions=actions,
             observation_spaces=observation_spaces,
             reward_spaces=reward_spaces,
@@ -166,16 +150,20 @@ class ActionWrapper(CompilerEnvWrapper):
     to allow an action space transformation.
     """
 
-    def raw_step(
+    def multistep(
         self,
         actions: Iterable[ActionType],
-        observation_spaces: Iterable[ObservationSpaceSpec],
-        reward_spaces: Iterable[Reward],
+        observation_spaces: Optional[Iterable[ObservationSpaceSpec]] = None,
+        reward_spaces: Optional[Iterable[Reward]] = None,
+        observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        rewards: Optional[Iterable[Union[str, Reward]]] = None,
     ):
-        return self.env.raw_step(
+        return self.env.multistep(
             [self.action(a) for a in actions],
             observation_spaces=observation_spaces,
             reward_spaces=reward_spaces,
+            observations=observations,
+            rewards=rewards,
         )
 
     def action(self, action: ActionType) -> ActionType:
@@ -196,22 +184,23 @@ def reset(self, *args, **kwargs):
         observation = self.env.reset(*args, **kwargs)
         return self.observation(observation)
 
-    def raw_step(
+    def multistep(
         self,
-        actions: Iterable[ActionType],
-        observation_spaces: Iterable[ObservationSpaceSpec],
-        reward_spaces: Iterable[Reward],
+        actions: List[ActionType],
+        observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        reward_spaces: Optional[Iterable[Union[str, Reward]]] = None,
+        observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        rewards: Optional[Iterable[Union[str, Reward]]] = None,
     ):
-        observation, reward, done, info = self.env.raw_step(
-            actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces
+        observation, reward, done, info = self.env.multistep(
+            actions,
+            observation_spaces=observation_spaces,
+            reward_spaces=reward_spaces,
+            observations=observations,
+            rewards=rewards,
         )
 
-        # Only apply observation transformation if we are using the default
-        # observation space.
-        if observation_spaces == [self.observation_space_spec]:
-            observation = [self.observation(observation)]
-
-        return observation, reward, done, info
+        return self.observation(observation), reward, done, info
 
     def observation(self, observation):
         """Translate an observation to the new space."""
@@ -226,22 +215,23 @@ class RewardWrapper(CompilerEnvWrapper):
     def reset(self, *args, **kwargs):
         return self.env.reset(*args, **kwargs)
 
-    def raw_step(
+    def multistep(
         self,
-        actions: Iterable[ActionType],
-        observation_spaces: Iterable[ObservationSpaceSpec],
-        reward_spaces: Iterable[Reward],
+        actions: List[ActionType],
+        observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        reward_spaces: Optional[Iterable[Union[str, Reward]]] = None,
+        observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        rewards: Optional[Iterable[Union[str, Reward]]] = None,
     ):
-        observation, reward, done, info = self.env.step(
-            actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces
+        observation, reward, done, info = self.env.multistep(
+            actions,
+            observation_spaces=observation_spaces,
+            reward_spaces=reward_spaces,
+            observations=observations,
+            rewards=rewards,
         )
 
-        # Only apply rewards transformation if we are using the default
-        # reward space.
-        if reward_spaces == [self.reward_space]:
-            reward = [self.reward(reward)]
-
-        return observation, reward, done, info
+        return observation, self.reward(reward), done, info
 
     def reward(self, reward):
         """Translate a reward to the new space."""
diff --git a/compiler_gym/wrappers/validation.py b/compiler_gym/wrappers/validation.py
index 2c64f6579..3c3fd9c62 100644
--- a/compiler_gym/wrappers/validation.py
+++ b/compiler_gym/wrappers/validation.py
@@ -29,13 +29,13 @@ def __init__(
         super().__init__(env)
         self.reward_penalty = reward_penalty
 
-    def raw_step(
+    def multistep(
         self,
         actions: List[ActionType],
         observation_spaces=None,
         reward_spaces=None,
     ):
-        observation, reward, done, info = self.env.raw_step(
+        observation, reward, done, info = self.env.multistep(
             actions,
             observation_spaces=observation_spaces,
             reward_spaces=reward_spaces,
diff --git a/examples/llvm_autotuning/autotuners/opentuner_.py b/examples/llvm_autotuning/autotuners/opentuner_.py
index 3850de8aa..9d506093b 100644
--- a/examples/llvm_autotuning/autotuners/opentuner_.py
+++ b/examples/llvm_autotuning/autotuners/opentuner_.py
@@ -93,7 +93,7 @@ def __init__(self, data) -> None:
         wrapped = DesiredResult(Configuration(manipulator.best_config))
         manipulator.run(wrapped, None, None)
         env.reset()
-        env.step(manipulator.serialize_actions(manipulator.best_config))
+        env.multistep(manipulator.serialize_actions(manipulator.best_config))
 
 
 class LlvmOptFlagsTuner(MeasurementInterface):
diff --git a/examples/llvm_rl/wrappers.py b/examples/llvm_rl/wrappers.py
index d14c82e53..04aa5b8b4 100644
--- a/examples/llvm_rl/wrappers.py
+++ b/examples/llvm_rl/wrappers.py
@@ -126,7 +126,7 @@ def reset(self, *args, **kwargs):
         )
         return super().reset(*args, **kwargs)
 
-    def raw_step(
+    def multistep(
         self,
         actions: List[ActionType],
         observation_spaces=None,
@@ -135,7 +135,7 @@ def raw_step(
     ):
         for a in actions:
             self.histogram[a] += self.increment
-        return self.env.raw_step(actions, **kwargs)
+        return self.env.multistep(actions, **kwargs)
 
     def observation(self, observation):
         return np.concatenate((observation, self.histogram)).astype(
diff --git a/examples/loop_optimizations_service/service_py/CMakeLists.txt b/examples/loop_optimizations_service/service_py/CMakeLists.txt
index c10e61828..3ddacda70 100644
--- a/examples/loop_optimizations_service/service_py/CMakeLists.txt
+++ b/examples/loop_optimizations_service/service_py/CMakeLists.txt
@@ -5,6 +5,8 @@
 
 cg_add_all_subdirs()
 
+return()
+
 cg_py_library(
   NAME
     loops_opt_service
diff --git a/examples/op_benchmarks.py b/examples/op_benchmarks.py
index faa53ca05..1d63d04d8 100644
--- a/examples/op_benchmarks.py
+++ b/examples/op_benchmarks.py
@@ -267,7 +267,7 @@ def get_step_times(env: CompilerEnv, num_steps: int, batched=False):
             # Run all actions in a single step().
             steps = [env.action_space.sample() for _ in range(num_steps)]
             with Timer() as timer:
-                _, _, done, _ = env.step(steps)
+                _, _, done, _ = env.multistep(steps)
             if not done:
                 return [timer.time / num_steps] * num_steps
             env.reset()
diff --git a/examples/sensitivity_analysis/benchmark_sensitivity_analysis.py b/examples/sensitivity_analysis/benchmark_sensitivity_analysis.py
index 065b5bc52..00c6d688f 100644
--- a/examples/sensitivity_analysis/benchmark_sensitivity_analysis.py
+++ b/examples/sensitivity_analysis/benchmark_sensitivity_analysis.py
@@ -116,7 +116,7 @@ def run_one_trial(
     num_steps = random.randint(min_steps, max_steps)
     warmup_actions = [env.action_space.sample() for _ in range(num_steps)]
     env.reward_space = reward_space
-    _, _, done, _ = env.step(warmup_actions)
+    _, _, done, _ = env.multistep(warmup_actions)
     if done:
         return None
     return env.episode_reward
diff --git a/tests/util/minimize_trajectory_test.py b/tests/util/minimize_trajectory_test.py
index 292cb9e09..782518bc2 100644
--- a/tests/util/minimize_trajectory_test.py
+++ b/tests/util/minimize_trajectory_test.py
@@ -152,7 +152,7 @@ def hypothesis(env):
 def test_minimize_trajectory_iteratively_llvm_crc32(env):
     """Test trajectory minimization on a real environment."""
     env.reset(benchmark="cbench-v1/crc32")
-    env.step(
+    env.multistep(
         [
             env.action_space["-mem2reg"],
             env.action_space["-gvn"],
diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py
index cc577bac4..1aac66170 100644
--- a/tests/wrappers/core_wrappers_test.py
+++ b/tests/wrappers/core_wrappers_test.py
@@ -3,7 +3,6 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Unit tests for //compiler_gym/wrappers."""
-import numpy as np
 import pytest
 
 from compiler_gym.datasets import Datasets
@@ -108,6 +107,9 @@ def observation(self, observation):
         def action(self, action):
             return action  # pass thru
 
+        def reward(self, reward):
+            return reward
+
     env = MyWrapper(env)
     env.reset()
     (ir, ic), (icr, icroz), _, _ = env.multistep(
@@ -259,22 +261,6 @@ def test_wrapped_observation_missing_definition(env: LlvmEnv):
         env.reset()
 
 
-def test_wrapped_observation_not_applied_to_non_default_observations(env: LlvmEnv):
-    class MyWrapper(ObservationWrapper):
-        def __init__(self, env):
-            super().__init__(env)
-            self.observation_space = "Ir"
-
-        def observation(self, observation):
-            return len(observation)
-
-    env = MyWrapper(env)
-    env.reset()
-    (observation,), _, _, _ = env.step(0, observation_spaces=["Autophase"])
-    print(observation)
-    assert isinstance(observation, np.ndarray)
-
-
 def test_wrapped_reward(env: LlvmEnv):
     class MyWrapper(RewardWrapper):
         def reward(self, reward):
@@ -286,11 +272,9 @@ def reward(self, reward):
     env.reset()
     _, reward, _, _ = env.step(0)
     assert reward == -5
-    assert env.episode_reward == -5
 
     _, reward, _, _ = env.step(0)
     assert reward == -5
-    assert env.episode_reward == -10
 
 
 if __name__ == "__main__":
diff --git a/www/www.py b/www/www.py
index 951aa34f3..d080c07e4 100644
--- a/www/www.py
+++ b/www/www.py
@@ -217,7 +217,7 @@ def _step(request: StepRequest) -> StepReply:
         if request.all_states:
             # Replay actions one at a time to receive incremental rewards. The
             # first item represents the state prior to any actions.
-            (instcount, autophase), _, done, info = env.raw_step(
+            (instcount, autophase), _, done, info = env.multistep(
                 actions=[],
                 observation_spaces=[
                     env.observation.spaces["InstCountDict"],
@@ -263,7 +263,7 @@ def _step(request: StepRequest) -> StepReply:
                 )
 
         # Perform the final action.
-        (ir, instcount, autophase), (reward,), done, _ = env.raw_step(
+        (ir, instcount, autophase), (reward,), done, _ = env.multistep(
             actions=request.actions[-1:],
             observation_spaces=[
                 env.observation.spaces["Ir"],

From bc1f9e3fa57519bbf0a71d2573f786417a298e36 Mon Sep 17 00:00:00 2001
From: Boian Petkantchin <boian@nod-labs.com>
Date: Wed, 16 Mar 2022 19:41:09 -0700
Subject: [PATCH 5/8] Fix failing tests

---
 compiler_gym/bin/service.py                   |  4 ++++
 compiler_gym/envs/compiler_env.py             | 21 +++----------------
 examples/llvm_rl/wrappers.py                  |  2 +-
 .../action_sensitivity_analysis.py            |  2 +-
 4 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/compiler_gym/bin/service.py b/compiler_gym/bin/service.py
index 3542ae22b..cf4c10c38 100644
--- a/compiler_gym/bin/service.py
+++ b/compiler_gym/bin/service.py
@@ -256,6 +256,10 @@ def print_service_capabilities(env: CompilerEnv):
                 headers=("Action",),
             )
             print(table)
+        else:
+            raise NotImplementedError(
+                "Only Commandline and NamedDiscrete are supported."
+            )
 
 
 def main(argv):
diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index a00f86947..9d3c5a2ed 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -11,7 +11,7 @@
 from math import isclose
 from pathlib import Path
 from time import time
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 
 import gym
 import numpy as np
@@ -1082,9 +1082,7 @@ def step(  # pylint: disable=arguments-differ
                 category=DeprecationWarning,
             )
             reward_spaces = rewards
-        return self._multistep(
-            self.raw_step, [action], observation_spaces, reward_spaces
-        )
+        return self.multistep([action], observation_spaces, reward_spaces)
 
     def multistep(
         self,
@@ -1129,20 +1127,7 @@ def multistep(
                 category=DeprecationWarning,
             )
             reward_spaces = rewards
-        return self._multistep(
-            self.raw_step, list(actions), observation_spaces, reward_spaces
-        )
 
-    def _multistep(
-        self,
-        raw_step: Callable[
-            [Iterable[ActionType], Iterable[ObservationSpaceSpec], Iterable[Reward]],
-            StepType,
-        ],
-        actions: Iterable[ActionType],
-        observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]],
-        reward_spaces: Optional[Iterable[Union[str, Reward]]],
-    ) -> StepType:
         # Coerce observation spaces into a list of ObservationSpaceSpec instances.
         if observation_spaces:
             observation_spaces_to_compute: List[ObservationSpaceSpec] = [
@@ -1170,7 +1155,7 @@ def _multistep(
             reward_spaces_to_compute: List[Reward] = []
 
         # Perform the underlying environment step.
-        observation_values, reward_values, done, info = raw_step(
+        observation_values, reward_values, done, info = self.raw_step(
             actions, observation_spaces_to_compute, reward_spaces_to_compute
         )
 
diff --git a/examples/llvm_rl/wrappers.py b/examples/llvm_rl/wrappers.py
index 04aa5b8b4..4a2a7e6d9 100644
--- a/examples/llvm_rl/wrappers.py
+++ b/examples/llvm_rl/wrappers.py
@@ -135,7 +135,7 @@ def multistep(
     ):
         for a in actions:
             self.histogram[a] += self.increment
-        return self.env.multistep(actions, **kwargs)
+        return super().multistep(actions, **kwargs)
 
     def observation(self, observation):
         return np.concatenate((observation, self.histogram)).astype(
diff --git a/examples/sensitivity_analysis/action_sensitivity_analysis.py b/examples/sensitivity_analysis/action_sensitivity_analysis.py
index 05431de74..d979cee65 100644
--- a/examples/sensitivity_analysis/action_sensitivity_analysis.py
+++ b/examples/sensitivity_analysis/action_sensitivity_analysis.py
@@ -116,7 +116,7 @@ def run_one_trial(
     num_warmup_steps = random.randint(0, max_warmup_steps)
     warmup_actions = [env.action_space.sample() for _ in range(num_warmup_steps)]
     env.reward_space = reward_space
-    _, _, done, _ = env.step(warmup_actions)
+    _, _, done, _ = env.multistep(warmup_actions)
     if done:
         return None
     _, (reward,), done, _ = env.step(action, reward_spaces=[reward_space])

From 5eb13814915851c5f975f95acb8310f7948a677f Mon Sep 17 00:00:00 2001
From: Boian Petkantchin <boian@nod-labs.com>
Date: Thu, 17 Mar 2022 08:54:08 -0700
Subject: [PATCH 6/8] Fix RewardWrapper episode_reward

---
 compiler_gym/wrappers/core.py        | 12 +++++++++++-
 tests/wrappers/core_wrappers_test.py |  2 ++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py
index 34674fb8d..62b0c679d 100644
--- a/compiler_gym/wrappers/core.py
+++ b/compiler_gym/wrappers/core.py
@@ -231,7 +231,17 @@ def multistep(
             rewards=rewards,
         )
 
-        return observation, self.reward(reward), done, info
+        # Undo the episode_reward update and reapply it once we have transformed
+        # the reward.
+        #
+        # TODO(cummins): Refactor step() so that we don't have to do this
+        # recalculation of episode_reward, as this is prone to errors if, say,
+        # the base reward returns NaN or an invalid type.
+        if reward is not None and self.episode_reward is not None:
+            self.unwrapped.episode_reward -= reward
+            reward = self.reward(reward)
+            self.unwrapped.episode_reward += reward
+        return observation, reward, done, info
 
     def reward(self, reward):
         """Translate a reward to the new space."""
diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py
index 1aac66170..bcaffa42f 100644
--- a/tests/wrappers/core_wrappers_test.py
+++ b/tests/wrappers/core_wrappers_test.py
@@ -272,9 +272,11 @@ def reward(self, reward):
     env.reset()
     _, reward, _, _ = env.step(0)
     assert reward == -5
+    assert env.episode_reward == -5
 
     _, reward, _, _ = env.step(0)
     assert reward == -5
+    assert env.episode_reward == -10
 
 
 if __name__ == "__main__":

From f7f8fc4d8c84032a191d19d338d63c93d9e293a7 Mon Sep 17 00:00:00 2001
From: Boian Petkantchin <boian@nod-labs.com>
Date: Thu, 17 Mar 2022 08:55:28 -0700
Subject: [PATCH 7/8] Fix build of
 examples/loop_optimizations_service/service_py

---
 examples/loop_optimizations_service/service_py/CMakeLists.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/loop_optimizations_service/service_py/CMakeLists.txt b/examples/loop_optimizations_service/service_py/CMakeLists.txt
index 3ddacda70..c10e61828 100644
--- a/examples/loop_optimizations_service/service_py/CMakeLists.txt
+++ b/examples/loop_optimizations_service/service_py/CMakeLists.txt
@@ -5,8 +5,6 @@
 
 cg_add_all_subdirs()
 
-return()
-
 cg_py_library(
   NAME
     loops_opt_service

From 8cd9679a53831305359fd9dfcd823a85463804bd Mon Sep 17 00:00:00 2001
From: Boian Petkantchin <boian@nod-labs.com>
Date: Thu, 17 Mar 2022 08:56:29 -0700
Subject: [PATCH 8/8] Add missing arguments to
 ValidateBenchmarkAfterEveryStep.multistep

---
 compiler_gym/wrappers/validation.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/compiler_gym/wrappers/validation.py b/compiler_gym/wrappers/validation.py
index 3c3fd9c62..85427a8ce 100644
--- a/compiler_gym/wrappers/validation.py
+++ b/compiler_gym/wrappers/validation.py
@@ -34,11 +34,15 @@ def multistep(
         actions: List[ActionType],
         observation_spaces=None,
         reward_spaces=None,
+        observations=None,
+        rewards=None,
     ):
         observation, reward, done, info = self.env.multistep(
             actions,
             observation_spaces=observation_spaces,
             reward_spaces=reward_spaces,
+            observations=observations,
+            rewards=rewards,
         )
 
         # Early exit if environment reaches terminal state.