From a203b97f12a954434ae54f92397fde8c9a91da3c Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Sun, 6 Mar 2022 22:35:55 +0000 Subject: [PATCH 1/8] Add annotation to step() about differing arguments. --- compiler_gym/envs/compiler_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py index 62b7f29fe..1c5c41883 100644 --- a/compiler_gym/envs/compiler_env.py +++ b/compiler_gym/envs/compiler_env.py @@ -1029,7 +1029,7 @@ def raw_step( return observations, rewards, reply.end_of_session, info - def step( + def step( # pylint: disable=arguments-differ self, action: Union[ActionType, Iterable[ActionType]], observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, From c6cc16b4b213751254a99aefed6f8539b2c53473 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Sun, 6 Mar 2022 23:03:03 +0000 Subject: [PATCH 2/8] [core] Add a CompilerEnv.multistep() method. CompilerEnv.step() currently accepts two types for the "action" argument: a scalar action, or an iterable of actions. This kind of type overloading does not work for list types. This adds a new method, CompilerEnv.multistep(), that explicitly takes takes an iterable sequence of actions. If you want to run multiple actions in a single step, call this new method. Calling CompilerEnv.step() with a list of actions still works, though with a deprecation warning. In the v0.2.4 release support for lists of actions in CompilerEnv.step() will be removed. Fixes #610. --- compiler_gym/envs/compiler_env.py | 43 ++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py index 1c5c41883..d804c7c56 100644 --- a/compiler_gym/envs/compiler_env.py +++ b/compiler_gym/envs/compiler_env.py @@ -1031,7 +1031,7 @@ def raw_step( def step( # pylint: disable=arguments-differ self, - action: Union[ActionType, Iterable[ActionType]], + action: ActionType, observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, rewards: Optional[Iterable[Union[str, Reward]]] = None, ) -> StepType: @@ -1058,9 +1058,46 @@ def step( # pylint: disable=arguments-differ :raises SessionNotFound: If :meth:`reset() ` has not been called. """ - # Coerce actions into a list. - actions = action if isinstance(action, IterableType) else [action] + # NOTE(github.com/facebookresearch/CompilerGym/issues/610): This + # workaround for accepting a list of actions will be removed in v0.2.4. + if isinstance(action, IterableType): + warnings.warn( + "env.step() only takes a single action. Use env.multistep() " + "for an iterable of actions", + category=DeprecationWarning, + ) + else: + action = [action] + + return self.multistep(action, observations, rewards) + + def multistep( + self, + actions: Iterable[ActionType], + observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + rewards: Optional[Iterable[Union[str, Reward]]] = None, + ): + """Take a sequence of steps and return the final observation and reward. + :param action: A sequence of actions to apply in order. + + :param observations: A list of observation spaces to compute + observations from. If provided, this changes the :code:`observation` + element of the return tuple to be a list of observations from the + requested spaces. The default :code:`env.observation_space` is not + returned. + + :param rewards: A list of reward spaces to compute rewards from. If + provided, this changes the :code:`reward` element of the return + tuple to be a list of rewards from the requested spaces. The default + :code:`env.reward_space` is not returned. + + :return: A tuple of observation, reward, done, and info. Observation and + reward are None if default observation/reward is not set. + + :raises SessionNotFound: If :meth:`reset() + ` has not been called. + """ # Coerce observation spaces into a list of ObservationSpaceSpec instances. if observations: observation_spaces: List[ObservationSpaceSpec] = [ From d685562cab227db106704fec3547908d41076cae Mon Sep 17 00:00:00 2001 From: Boian Petkantchin Date: Fri, 4 Mar 2022 20:00:37 -0800 Subject: [PATCH 3/8] [core] Deprecations and updates to env.step() arguments. This makes the following changes: - Changes env.step() `action` to accept only a single action, with a deprecation warning if a list of actions are provided. - Renames env.step() `observations` to `observation_spaces`. The old parameter name is still accepted with a deprecation warning. - Renames env.step() `rewards` to `reward_spaces`. The old parameter name is still accepted with a deprecation warning. --- compiler_gym/bin/service.py | 7 +- compiler_gym/envs/compiler_env.py | 153 +++++++++++------ compiler_gym/envs/llvm/llvm_rewards.py | 6 +- compiler_gym/random_replay.py | 2 +- compiler_gym/random_search.py | 13 +- compiler_gym/spaces/named_discrete.py | 14 +- compiler_gym/spaces/reward.py | 4 +- compiler_gym/util/gym_type_hints.py | 2 +- compiler_gym/util/minimize_trajectory.py | 2 +- compiler_gym/views/observation.py | 2 +- compiler_gym/wrappers/commandline.py | 51 +++--- compiler_gym/wrappers/core.py | 157 +++++++++++++++--- compiler_gym/wrappers/llvm.py | 4 +- compiler_gym/wrappers/time_limit.py | 5 +- compiler_gym/wrappers/validation.py | 16 +- examples/brute_force.py | 3 +- .../llvm_autotuning/autotuners/nevergrad_.py | 11 +- .../llvm_autotuning/optimization_target.py | 2 +- examples/llvm_rl/wrappers.py | 18 +- .../action_sensitivity_analysis.py | 5 +- tests/llvm/episode_reward_test.py | 2 +- tests/llvm/fork_regression_test.py | 6 +- tests/llvm/llvm_env_test.py | 4 +- tests/llvm/multiprocessing_test.py | 7 +- tests/llvm/threading_test.py | 5 +- tests/util/minimize_trajectory_test.py | 5 +- tests/views/observation_test.py | 8 +- tests/wrappers/commandline_wrappers_test.py | 21 +-- tests/wrappers/core_wrappers_test.py | 36 +++- tests/wrappers/time_limit_wrappers_test.py | 4 +- www/www.py | 12 +- 31 files changed, 399 insertions(+), 188 deletions(-) diff --git a/compiler_gym/bin/service.py b/compiler_gym/bin/service.py index 25f868467..3542ae22b 100644 --- a/compiler_gym/bin/service.py +++ b/compiler_gym/bin/service.py @@ -105,7 +105,7 @@ from compiler_gym.datasets import Dataset from compiler_gym.envs import CompilerEnv from compiler_gym.service.connection import ConnectionOpts -from compiler_gym.spaces import Commandline +from compiler_gym.spaces import Commandline, NamedDiscrete from compiler_gym.util.flags.env_from_flags import env_from_flags from compiler_gym.util.tabulate import tabulate from compiler_gym.util.truncate import truncate @@ -249,12 +249,13 @@ def print_service_capabilities(env: CompilerEnv): ], headers=("Action", "Description"), ) - else: + print(table) + elif isinstance(action_space, NamedDiscrete): table = tabulate( [(a,) for a in sorted(action_space.names)], headers=("Action",), ) - print(table) + print(table) def main(argv): diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py index d804c7c56..a00f86947 100644 --- a/compiler_gym/envs/compiler_env.py +++ b/compiler_gym/envs/compiler_env.py @@ -11,7 +11,7 @@ from math import isclose from pathlib import Path from time import time -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import gym import numpy as np @@ -128,7 +128,7 @@ class CompilerEnv(gym.Env): :ivar actions: The list of actions that have been performed since the previous call to :func:`reset`. - :vartype actions: List[int] + :vartype actions: List[ActionType] :ivar reward_range: A tuple indicating the range of reward values. Default range is (-inf, +inf). @@ -321,7 +321,7 @@ def __init__( self.reward_range: Tuple[float, float] = (-np.inf, np.inf) self.episode_reward: Optional[float] = None self.episode_start_time: float = time() - self.actions: List[int] = [] + self.actions: List[ActionType] = [] # Initialize the default observation/reward spaces. self.observation_space_spec: Optional[ObservationSpaceSpec] = None @@ -375,7 +375,7 @@ def commandline(self) -> str: """ raise NotImplementedError("abstract method") - def commandline_to_actions(self, commandline: str) -> List[int]: + def commandline_to_actions(self, commandline: str) -> List[ActionType]: """Interface for :class:`CompilerEnv ` subclasses to convert from a commandline invocation to a sequence of actions. @@ -409,7 +409,7 @@ def state(self) -> CompilerEnvState: ) @property - def action_space(self) -> NamedDiscrete: + def action_space(self) -> Space: """The current action space. :getter: Get the current action space. @@ -587,7 +587,7 @@ def fork(self) -> "CompilerEnv": self.reset() if actions: logger.warning("Parent service of fork() has died, replaying state") - _, _, done, _ = self.step(actions) + _, _, done, _ = self.multistep(actions) assert not done, "Failed to replay action sequence" request = ForkSessionRequest(session_id=self._session_id) @@ -620,7 +620,7 @@ def fork(self) -> "CompilerEnv": # replay the state. new_env = type(self)(**self._init_kwargs()) new_env.reset() - _, _, done, _ = new_env.step(self.actions) + _, _, done, _ = new_env.multistep(self.actions) assert not done, "Failed to replay action sequence in forked environment" # Create copies of the mutable reward and observation spaces. This @@ -885,9 +885,9 @@ def _call_with_error( def raw_step( self, - actions: Iterable[int], - observations: Iterable[ObservationSpaceSpec], - rewards: Iterable[Reward], + actions: Iterable[ActionType], + observation_spaces: List[ObservationSpaceSpec], + reward_spaces: List[Reward], ) -> StepType: """Take a step. @@ -908,18 +908,15 @@ def raw_step( .. warning:: - Prefer :meth:`step() ` to - :meth:`raw_step() `. - :meth:`step() ` has equivalent - functionality, and is less likely to change in the future. + Don't call this method directly, use :meth:`step() + ` or :meth:`multistep() + ` instead. The + :meth:`raw_step() ` method is an + implementation detail. """ if not self.in_episode: raise SessionNotFound("Must call reset() before step()") - # Build the list of observations that must be computed by the backend - user_observation_spaces: List[ObservationSpaceSpec] = list(observations) - reward_spaces: List[Reward] = list(rewards) - reward_observation_spaces: List[ObservationSpaceSpec] = [] for reward_space in reward_spaces: reward_observation_spaces += [ @@ -927,7 +924,7 @@ def raw_step( ] observations_to_compute: List[ObservationSpaceSpec] = list( - set(user_observation_spaces).union(set(reward_observation_spaces)) + set(observation_spaces).union(set(reward_observation_spaces)) ) observation_space_index_map: Dict[ObservationSpaceSpec, int] = { observation_space: i @@ -974,7 +971,7 @@ def raw_step( default_observations = [ observation_space.default_value - for observation_space in user_observation_spaces + for observation_space in observation_spaces ] default_rewards = [ float(reward_space.reward_on_error(self.episode_reward)) @@ -1002,7 +999,7 @@ def raw_step( # Get the user-requested observation. observations: List[ObservationType] = [ computed_observations[observation_space_index_map[observation_space]] - for observation_space in user_observation_spaces + for observation_space in observation_spaces ] # Update and compute the rewards. @@ -1032,22 +1029,22 @@ def raw_step( def step( # pylint: disable=arguments-differ self, action: ActionType, + observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, rewards: Optional[Iterable[Union[str, Reward]]] = None, ) -> StepType: """Take a step. - :param action: An action, or a sequence of actions. When multiple - actions are provided the observation and reward are returned after - running all of the actions. + :param action: An action. - :param observations: A list of observation spaces to compute + :param observation_spaces: A list of observation spaces to compute observations from. If provided, this changes the :code:`observation` element of the return tuple to be a list of observations from the requested spaces. The default :code:`env.observation_space` is not returned. - :param rewards: A list of reward spaces to compute rewards from. If + :param reward_spaces: A list of reward spaces to compute rewards from. If provided, this changes the :code:`reward` element of the return tuple to be a list of rewards from the requested spaces. The default :code:`env.reward_space` is not returned. @@ -1058,22 +1055,42 @@ def step( # pylint: disable=arguments-differ :raises SessionNotFound: If :meth:`reset() ` has not been called. """ - # NOTE(github.com/facebookresearch/CompilerGym/issues/610): This - # workaround for accepting a list of actions will be removed in v0.2.4. if isinstance(action, IterableType): warnings.warn( - "env.step() only takes a single action. Use env.multistep() " - "for an iterable of actions", + "Argument `action` of CompilerEnv.step no longer accepts a list " + " of actions. Please use CompilerEnv.multistep instead", category=DeprecationWarning, ) - else: - action = [action] - - return self.multistep(action, observations, rewards) + return self.multistep( + action, + observation_spaces=observation_spaces, + reward_spaces=reward_spaces, + observations=observations, + rewards=rewards, + ) + if observations is not None: + warnings.warn( + "Argument `observations` of CompilerEnv.step has been " + "renamed `observation_spaces`. Please update your code", + category=DeprecationWarning, + ) + observation_spaces = observations + if rewards is not None: + warnings.warn( + "Argument `rewards` of CompilerEnv.step has been renamed " + "`reward_spaces`. Please update your code", + category=DeprecationWarning, + ) + reward_spaces = rewards + return self._multistep( + self.raw_step, [action], observation_spaces, reward_spaces + ) def multistep( self, actions: Iterable[ActionType], + observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, rewards: Optional[Iterable[Union[str, Reward]]] = None, ): @@ -1081,13 +1098,13 @@ def multistep( :param action: A sequence of actions to apply in order. - :param observations: A list of observation spaces to compute + :param observation_spaces: A list of observation spaces to compute observations from. If provided, this changes the :code:`observation` element of the return tuple to be a list of observations from the requested spaces. The default :code:`env.observation_space` is not returned. - :param rewards: A list of reward spaces to compute rewards from. If + :param reward_spaces: A list of reward spaces to compute rewards from. If provided, this changes the :code:`reward` element of the return tuple to be a list of rewards from the requested spaces. The default :code:`env.reward_space` is not returned. @@ -1098,49 +1115,77 @@ def multistep( :raises SessionNotFound: If :meth:`reset() ` has not been called. """ + if observations is not None: + warnings.warn( + "Argument `observations` of CompilerEnv.multistep has been " + "renamed `observation_spaces`. Please update your code", + category=DeprecationWarning, + ) + observation_spaces = observations + if rewards is not None: + warnings.warn( + "Argument `rewards` of CompilerEnv.multistep has been renamed " + "`reward_spaces`. Please update your code", + category=DeprecationWarning, + ) + reward_spaces = rewards + return self._multistep( + self.raw_step, list(actions), observation_spaces, reward_spaces + ) + + def _multistep( + self, + raw_step: Callable[ + [Iterable[ActionType], Iterable[ObservationSpaceSpec], Iterable[Reward]], + StepType, + ], + actions: Iterable[ActionType], + observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]], + reward_spaces: Optional[Iterable[Union[str, Reward]]], + ) -> StepType: # Coerce observation spaces into a list of ObservationSpaceSpec instances. - if observations: - observation_spaces: List[ObservationSpaceSpec] = [ + if observation_spaces: + observation_spaces_to_compute: List[ObservationSpaceSpec] = [ obs if isinstance(obs, ObservationSpaceSpec) else self.observation.spaces[obs] - for obs in observations + for obs in observation_spaces ] elif self.observation_space_spec: - observation_spaces: List[ObservationSpaceSpec] = [ + observation_spaces_to_compute: List[ObservationSpaceSpec] = [ self.observation_space_spec ] else: - observation_spaces: List[ObservationSpaceSpec] = [] + observation_spaces_to_compute: List[ObservationSpaceSpec] = [] # Coerce reward spaces into a list of Reward instances. - if rewards: - reward_spaces: List[Reward] = [ + if reward_spaces: + reward_spaces_to_compute: List[Reward] = [ rew if isinstance(rew, Reward) else self.reward.spaces[rew] - for rew in rewards + for rew in reward_spaces ] elif self.reward_space: - reward_spaces: List[Reward] = [self.reward_space] + reward_spaces_to_compute: List[Reward] = [self.reward_space] else: - reward_spaces: List[Reward] = [] + reward_spaces_to_compute: List[Reward] = [] # Perform the underlying environment step. - observation_values, reward_values, done, info = self.raw_step( - actions, observation_spaces, reward_spaces + observation_values, reward_values, done, info = raw_step( + actions, observation_spaces_to_compute, reward_spaces_to_compute ) # Translate observations lists back to the appropriate types. - if observations is None and self.observation_space_spec: + if observation_spaces is None and self.observation_space_spec: observation_values = observation_values[0] - elif not observation_spaces: + elif not observation_spaces_to_compute: observation_values = None # Translate reward lists back to the appropriate types. - if rewards is None and self.reward_space: + if reward_spaces is None and self.reward_space: reward_values = reward_values[0] # Update the cumulative episode reward self.episode_reward += reward_values - elif not reward_spaces: + elif not reward_spaces_to_compute: reward_values = None return observation_values, reward_values, done, info @@ -1213,7 +1258,9 @@ def apply(self, state: CompilerEnvState) -> None: # noqa ) actions = self.commandline_to_actions(state.commandline) - _, _, done, info = self.step(actions) + done = False + for action in actions: + _, _, done, info = self.step(action) if done: raise ValueError( f"Environment terminated with error: `{info.get('error_details')}`" diff --git a/compiler_gym/envs/llvm/llvm_rewards.py b/compiler_gym/envs/llvm/llvm_rewards.py index e674591ee..1cc045ab1 100644 --- a/compiler_gym/envs/llvm/llvm_rewards.py +++ b/compiler_gym/envs/llvm/llvm_rewards.py @@ -7,7 +7,7 @@ from compiler_gym.datasets import Benchmark from compiler_gym.spaces.reward import Reward -from compiler_gym.util.gym_type_hints import ObservationType, RewardType +from compiler_gym.util.gym_type_hints import ActionType, ObservationType, RewardType from compiler_gym.views.observation import ObservationView @@ -44,7 +44,7 @@ def reset(self, benchmark: Benchmark, observation_view: ObservationView) -> None def update( self, - actions: List[int], + actions: List[ActionType], observations: List[ObservationType], observation_view: ObservationView, ) -> RewardType: @@ -81,7 +81,7 @@ def reset(self, benchmark: str, observation_view: ObservationView) -> None: def update( self, - actions: List[int], + actions: List[ActionType], observations: List[ObservationType], observation_view: ObservationView, ) -> RewardType: diff --git a/compiler_gym/random_replay.py b/compiler_gym/random_replay.py index 063ec9ee9..81be67ba2 100644 --- a/compiler_gym/random_replay.py +++ b/compiler_gym/random_replay.py @@ -15,7 +15,7 @@ ) -@deprecated(version="0.2.1", reason="Use env.step(actions) instead") +@deprecated(version="0.2.1", reason="Use env.step(action) instead") def replay_actions(env: CompilerEnv, action_names: List[str], outdir: Path): return replay_actions_(env, action_names, outdir) diff --git a/compiler_gym/random_search.py b/compiler_gym/random_search.py index 7b86dc7d3..a81b0bdb2 100644 --- a/compiler_gym/random_search.py +++ b/compiler_gym/random_search.py @@ -17,6 +17,7 @@ from compiler_gym.envs.llvm import LlvmEnv from compiler_gym.service.connection import ServiceError from compiler_gym.util import logs +from compiler_gym.util.gym_type_hints import ActionType from compiler_gym.util.logs import create_logging_dir from compiler_gym.util.tabulate import tabulate @@ -79,8 +80,8 @@ def __init__( self.total_episode_count = 0 self.total_step_count = 0 self.best_returns = -float("inf") - self.best_actions: List[int] = [] - self.best_commandline: List[int] = [] + self.best_actions: List[ActionType] = [] + self.best_commandline: str = [] self.best_found_at_time = time() self.alive = True # Set this to False to signal the thread to stop. @@ -112,17 +113,17 @@ def run_one_episode(self, env: CompilerEnv) -> bool: :return: True if the episode ended gracefully, else False. """ observation = env.reset() - actions: List[int] = [] + actions: List[ActionType] = [] patience = self._patience total_returns = 0 while patience >= 0: patience -= 1 self.total_step_count += 1 # === Your agent here! === - action_index = env.action_space.sample() + action = env.action_space.sample() # === End of agent. === - actions.append(action_index) - observation, reward, done, _ = env.step(action_index) + actions.append(action) + observation, reward, done, _ = env.step(action) if done: return False total_returns += reward diff --git a/compiler_gym/spaces/named_discrete.py b/compiler_gym/spaces/named_discrete.py index 64419efc4..afd9a51c7 100644 --- a/compiler_gym/spaces/named_discrete.py +++ b/compiler_gym/spaces/named_discrete.py @@ -2,9 +2,11 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from collections.abc import Iterable as IterableType from typing import Iterable, List, Union from compiler_gym.spaces.discrete import Discrete +from compiler_gym.util.gym_type_hints import ActionType class NamedDiscrete(Discrete): @@ -51,18 +53,20 @@ def __getitem__(self, name: str) -> int: def __repr__(self) -> str: return f"NamedDiscrete([{', '.join(self.names)}])" - def to_string(self, values: Union[int, Iterable[int]]) -> str: + def to_string(self, values: Union[int, Iterable[ActionType]]) -> str: """Convert an action, or sequence of actions, to string. :param values: A numeric value, or list of numeric values. :return: A string representing the values. """ - if isinstance(values, int): - return self.names[values] - else: + if isinstance(values, IterableType): return " ".join([self.names[v] for v in values]) + else: + return self.names[values] - def from_string(self, values: Union[str, Iterable[str]]) -> Union[int, List[int]]: + def from_string( + self, values: Union[str, Iterable[str]] + ) -> Union[ActionType, List[ActionType]]: """Convert a name, or list of names, to numeric values. :param values: A name, or list of names. diff --git a/compiler_gym/spaces/reward.py b/compiler_gym/spaces/reward.py index cccc2169d..75c9d5dad 100644 --- a/compiler_gym/spaces/reward.py +++ b/compiler_gym/spaces/reward.py @@ -9,7 +9,7 @@ import compiler_gym from compiler_gym.spaces.scalar import Scalar -from compiler_gym.util.gym_type_hints import ObservationType, RewardType +from compiler_gym.util.gym_type_hints import ActionType, ObservationType, RewardType class Reward(Scalar): @@ -132,7 +132,7 @@ def reset( def update( self, - actions: List[int], + actions: List[ActionType], observations: List[ObservationType], observation_view: "compiler_gym.views.ObservationView", # noqa: F821 ) -> RewardType: diff --git a/compiler_gym/util/gym_type_hints.py b/compiler_gym/util/gym_type_hints.py index cc592de45..ba7b6ef8c 100644 --- a/compiler_gym/util/gym_type_hints.py +++ b/compiler_gym/util/gym_type_hints.py @@ -9,7 +9,7 @@ # Type hints for the values returned by gym.Env.step(). ObservationType = TypeVar("ObservationType") -ActionType = int +ActionType = TypeVar("ActionType") RewardType = float DoneType = bool InfoType = JsonDictType diff --git a/compiler_gym/util/minimize_trajectory.py b/compiler_gym/util/minimize_trajectory.py index 0de687699..ffd2b947f 100644 --- a/compiler_gym/util/minimize_trajectory.py +++ b/compiler_gym/util/minimize_trajectory.py @@ -41,7 +41,7 @@ def _apply_and_test(env, actions, hypothesis, flakiness) -> bool: env.reset(benchmark=env.benchmark) for _ in range(flakiness): logger.debug("Applying %d actions ...", len(actions)) - _, _, done, info = env.step(actions) + _, _, done, info = env.multistep(actions) if done: raise MinimizationError( f"Failed to replay actions: {info.get('error_details', '')}" diff --git a/compiler_gym/views/observation.py b/compiler_gym/views/observation.py index e6e72b0e6..e743bf6ad 100644 --- a/compiler_gym/views/observation.py +++ b/compiler_gym/views/observation.py @@ -67,7 +67,7 @@ def __getitem__(self, observation_space: str) -> ObservationType: """ observation_space: ObservationSpaceSpec = self.spaces[observation_space] observations, _, done, info = self._raw_step( - actions=[], observations=[observation_space], rewards=[] + actions=[], observation_spaces=[observation_space], reward_spaces=[] ) if done: diff --git a/compiler_gym/wrappers/commandline.py b/compiler_gym/wrappers/commandline.py index 30606a00f..a961c878c 100644 --- a/compiler_gym/wrappers/commandline.py +++ b/compiler_gym/wrappers/commandline.py @@ -6,8 +6,9 @@ from typing import Dict, Iterable, List, Optional, Union from compiler_gym.envs import CompilerEnv -from compiler_gym.spaces import Commandline, CommandlineFlag -from compiler_gym.util.gym_type_hints import StepType +from compiler_gym.spaces import Commandline, CommandlineFlag, Reward +from compiler_gym.util.gym_type_hints import ActionType, StepType +from compiler_gym.views import ObservationSpaceSpec from compiler_gym.wrappers.core import ActionWrapper, CompilerEnvWrapper @@ -40,8 +41,7 @@ def __init__( # Redefine the action space, inserting the terminal action at the start. self.action_space = Commandline( - items=[terminal] - + [ + items=[ CommandlineFlag( name=name, flag=flag, @@ -52,25 +52,36 @@ def __init__( env.action_space.flags, env.action_space.descriptions, ) - ], + ] + + [terminal], name=f"{type(self).__name__}<{env.action_space.name}>", ) - def step(self, action: int) -> StepType: - if isinstance(action, int): - end_of_episode = action == 0 - action = [] if end_of_episode else action - 1 - else: - try: - index = action.index(0) - end_of_episode = True - except ValueError: - index = len(action) - end_of_episode = False - action = [a - 1 for a in action[:index]] - - observation, reward, done, info = self.env.step(action) - if end_of_episode and not done: + def raw_step( + self, + actions: List[ActionType], + observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, + ) -> StepType: + terminal_action: int = len(self.action_space.flags) - 1 + + try: + index_of_terminal = actions.index(terminal_action) + except ValueError: + index_of_terminal = -1 + + # Run only the actions up to the terminal action. + if index_of_terminal >= 0: + actions = actions[:index_of_terminal] + + observation, reward, done, info = self.env.raw_step( + actions, + observation_spaces=observation_spaces, + reward_spaces=reward_spaces, + ) + + # Communicate back to the frontend. + if index_of_terminal >= 0 and not done: done = True info["terminal_action"] = True diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py index 56bb5ecff..f6d0809ef 100644 --- a/compiler_gym/wrappers/core.py +++ b/compiler_gym/wrappers/core.py @@ -2,13 +2,15 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +import warnings +from collections.abc import Iterable as IterableType from typing import Iterable, Optional, Union import gym from compiler_gym.envs import CompilerEnv from compiler_gym.spaces.reward import Reward -from compiler_gym.util.gym_type_hints import ObservationType, StepType +from compiler_gym.util.gym_type_hints import ActionType, ObservationType from compiler_gym.views import ObservationSpaceSpec @@ -21,7 +23,7 @@ class CompilerEnvWrapper(gym.Wrapper): such as the :code:`fork()` method. """ - def __init__(self, env: CompilerEnv): + def __init__(self, env: CompilerEnv): # pylint: disable=super-init-not-called """Constructor. :param env: The environment to wrap. @@ -38,8 +40,15 @@ def __init__(self, env: CompilerEnv): self.reward_range = self.env.reward_range self.metadata = self.env.metadata - def step(self, action, observations=None, rewards=None): - return self.env.step(action, observations=observations, rewards=rewards) + def raw_step( + self, + actions: Iterable[ActionType], + observation_spaces: Iterable[ObservationSpaceSpec], + reward_spaces: Iterable[Reward], + ): + return self.env.raw_step( + actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces + ) def reset(self, *args, **kwargs) -> ObservationType: return self.env.reset(*args, **kwargs) @@ -47,6 +56,81 @@ def reset(self, *args, **kwargs) -> ObservationType: def fork(self) -> CompilerEnv: return type(self)(env=self.env.fork()) + # NOTE(cummins): This step() method is provided only because + # CompilerEnv.step accepts additional arguments over gym.Env.step. Users who + # wish to modify the behavior of CompilerEnv.step should overload + # raw_step(). + def step( # pylint: disable=arguments-differ + self, + action: ActionType, + observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, + observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + rewards: Optional[Iterable[Union[str, Reward]]] = None, + ): + if isinstance(action, IterableType): + warnings.warn( + "Argument `action` of CompilerEnv.step no longer accepts a list " + " of actions. Please use CompilerEnv.multistep instead", + category=DeprecationWarning, + ) + return self.multistep( + action, + observation_spaces=observation_spaces, + reward_spaces=reward_spaces, + observations=observations, + rewards=rewards, + ) + if observations is not None: + warnings.warn( + "Argument `observations` of CompilerEnv.multistep has been " + "renamed `observation_spaces`. Please update your code", + category=DeprecationWarning, + ) + observation_spaces = observations + if rewards is not None: + warnings.warn( + "Argument `rewards` of CompilerEnv.multistep has been renamed " + "`reward_spaces`. Please update your code", + category=DeprecationWarning, + ) + reward_spaces = rewards + return self.env._multistep( + raw_step=self.raw_step, + actions=[action], + observation_spaces=observation_spaces, + reward_spaces=reward_spaces, + ) + + def multistep( + self, + actions: Iterable[ActionType], + observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, + observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + rewards: Optional[Iterable[Union[str, Reward]]] = None, + ): + if observations is not None: + warnings.warn( + "Argument `observations` of CompilerEnv.multistep has been " + "renamed `observation_spaces`. Please update your code", + category=DeprecationWarning, + ) + observation_spaces = observations + if rewards is not None: + warnings.warn( + "Argument `rewards` of CompilerEnv.multistep has been renamed " + "`reward_spaces`. Please update your code", + category=DeprecationWarning, + ) + reward_spaces = rewards + return self.env._multistep( # pylint: disable=protected-access + raw_step=self.raw_step, + actions=actions, + observation_spaces=observation_spaces, + reward_spaces=reward_spaces, + ) + @property def observation_space(self): if self.env.observation_space_spec: @@ -82,18 +166,23 @@ class ActionWrapper(CompilerEnvWrapper): to allow an action space transformation. """ - def step( - self, action: Union[int, Iterable[int]], observations=None, rewards=None - ) -> StepType: - return self.env.step( - self.action(action), observations=observations, rewards=rewards + def raw_step( + self, + actions: Iterable[ActionType], + observation_spaces: Iterable[ObservationSpaceSpec], + reward_spaces: Iterable[Reward], + ): + return self.env.raw_step( + [self.action(a) for a in actions], + observation_spaces=observation_spaces, + reward_spaces=reward_spaces, ) - def action(self, action): + def action(self, action: ActionType) -> ActionType: """Translate the action to the new space.""" raise NotImplementedError - def reverse_action(self, action): + def reverse_action(self, action: ActionType) -> ActionType: """Translate an action from the new space to the wrapped space.""" raise NotImplementedError @@ -107,9 +196,22 @@ def reset(self, *args, **kwargs): observation = self.env.reset(*args, **kwargs) return self.observation(observation) - def step(self, *args, **kwargs): - observation, reward, done, info = self.env.step(*args, **kwargs) - return self.observation(observation), reward, done, info + def raw_step( + self, + actions: Iterable[ActionType], + observation_spaces: Iterable[ObservationSpaceSpec], + reward_spaces: Iterable[Reward], + ): + observation, reward, done, info = self.env.raw_step( + actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces + ) + + # Only apply observation transformation if we are using the default + # observation space. + if observation_spaces == [self.observation_space_spec]: + observation = [self.observation(observation)] + + return observation, reward, done, info def observation(self, observation): """Translate an observation to the new space.""" @@ -124,18 +226,21 @@ class RewardWrapper(CompilerEnvWrapper): def reset(self, *args, **kwargs): return self.env.reset(*args, **kwargs) - def step(self, *args, **kwargs): - observation, reward, done, info = self.env.step(*args, **kwargs) - # Undo the episode_reward update and reapply it once we have transformed - # the reward. - # - # TODO(cummins): Refactor step() so that we don't have to do this - # recalculation of episode_reward, as this is prone to errors if, say, - # the base reward returns NaN or an invalid type. - if reward is not None and self.episode_reward is not None: - self.unwrapped.episode_reward -= reward - reward = self.reward(reward) - self.unwrapped.episode_reward += reward + def raw_step( + self, + actions: Iterable[ActionType], + observation_spaces: Iterable[ObservationSpaceSpec], + reward_spaces: Iterable[Reward], + ): + observation, reward, done, info = self.env.step( + actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces + ) + + # Only apply rewards transformation if we are using the default + # reward space. + if reward_spaces == [self.reward_space]: + reward = [self.reward(reward)] + return observation, reward, done, info def reward(self, reward): diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py index 08174ed9a..1f91ba6be 100644 --- a/compiler_gym/wrappers/llvm.py +++ b/compiler_gym/wrappers/llvm.py @@ -11,7 +11,7 @@ from compiler_gym.envs.llvm import LlvmEnv from compiler_gym.service.connection import ServiceError from compiler_gym.spaces import Reward -from compiler_gym.util.gym_type_hints import ObservationType +from compiler_gym.util.gym_type_hints import ActionType, ObservationType from compiler_gym.wrappers import CompilerEnvWrapper @@ -65,7 +65,7 @@ def reset(self, benchmark, observation_view) -> None: def update( self, - actions: List[int], + actions: List[ActionType], observations: List[ObservationType], observation_view, ) -> float: diff --git a/compiler_gym/wrappers/time_limit.py b/compiler_gym/wrappers/time_limit.py index 743853915..2e5fda2a3 100644 --- a/compiler_gym/wrappers/time_limit.py +++ b/compiler_gym/wrappers/time_limit.py @@ -2,9 +2,10 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from typing import Iterable, Optional, Union +from typing import Optional from compiler_gym.envs import CompilerEnv +from compiler_gym.util.gym_type_hints import ActionType from compiler_gym.wrappers.core import CompilerEnvWrapper @@ -31,7 +32,7 @@ def __init__(self, env: CompilerEnv, max_episode_steps: Optional[int] = None): self._max_episode_steps = max_episode_steps self._elapsed_steps = None - def step(self, action: Union[int, Iterable[int]], **kwargs): + def step(self, action: ActionType, **kwargs): assert ( self._elapsed_steps is not None ), "Cannot call env.step() before calling reset()" diff --git a/compiler_gym/wrappers/validation.py b/compiler_gym/wrappers/validation.py index a493187cf..2c64f6579 100644 --- a/compiler_gym/wrappers/validation.py +++ b/compiler_gym/wrappers/validation.py @@ -2,7 +2,10 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from typing import List + from compiler_gym.envs import CompilerEnv +from compiler_gym.util.gym_type_hints import ActionType from compiler_gym.wrappers.core import CompilerEnvWrapper @@ -26,9 +29,16 @@ def __init__( super().__init__(env) self.reward_penalty = reward_penalty - def step(self, action, observations=None, rewards=None): - observation, reward, done, info = self.env.step( - action, observations=observations, rewards=rewards + def raw_step( + self, + actions: List[ActionType], + observation_spaces=None, + reward_spaces=None, + ): + observation, reward, done, info = self.env.raw_step( + actions, + observation_spaces=observation_spaces, + reward_spaces=reward_spaces, ) # Early exit if environment reaches terminal state. diff --git a/examples/brute_force.py b/examples/brute_force.py index 50f809a64..1f75545dc 100644 --- a/examples/brute_force.py +++ b/examples/brute_force.py @@ -42,6 +42,7 @@ from compiler_gym.envs import CompilerEnv from compiler_gym.util.flags.benchmark_from_flags import benchmark_from_flags from compiler_gym.util.flags.env_from_flags import env_from_flags +from compiler_gym.util.gym_type_hints import ActionType from compiler_gym.util.logs import create_logging_dir flags.DEFINE_list( @@ -68,7 +69,7 @@ class BruteForceProducer(Thread): def __init__( self, in_q: Queue, - actions: List[int], + actions: List[ActionType], episode_length: int, nproc: int, chunksize: int = 128, diff --git a/examples/llvm_autotuning/autotuners/nevergrad_.py b/examples/llvm_autotuning/autotuners/nevergrad_.py index f7b8fd043..bacea33d8 100644 --- a/examples/llvm_autotuning/autotuners/nevergrad_.py +++ b/examples/llvm_autotuning/autotuners/nevergrad_.py @@ -10,6 +10,7 @@ from llvm_autotuning.optimization_target import OptimizationTarget from compiler_gym.envs import CompilerEnv +from compiler_gym.util.gym_type_hints import ActionType def nevergrad( @@ -30,17 +31,17 @@ def nevergrad( """ if optimization_target == OptimizationTarget.RUNTIME: - def calculate_negative_reward(actions: Tuple[int]) -> float: + def calculate_negative_reward(actions: Tuple[ActionType]) -> float: env.reset() - env.step(actions) + env.multistep(actions) return -env.episode_reward else: # Only cache the deterministic non-runtime rewards. @lru_cache(maxsize=int(1e4)) - def calculate_negative_reward(actions: Tuple[int]) -> float: + def calculate_negative_reward(actions: Tuple[ActionType]) -> float: env.reset() - env.step(actions) + env.multistep(actions) return -env.episode_reward params = ng.p.Choice( @@ -61,4 +62,4 @@ def calculate_negative_reward(actions: Tuple[int]) -> float: # Get best solution and replay it. recommendation = optimizer.provide_recommendation() env.reset() - env.step(recommendation.value) + env.multistep(recommendation.value) diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py index 58feddfc4..7baeba1cb 100644 --- a/examples/llvm_autotuning/optimization_target.py +++ b/examples/llvm_autotuning/optimization_target.py @@ -68,7 +68,7 @@ def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float: actions = list(env.actions) env.reset() for i in range(1, 5 + 1): - _, _, done, info = env.step(actions) + _, _, done, info = env.multistep(actions) if not done: break logger.warning( diff --git a/examples/llvm_rl/wrappers.py b/examples/llvm_rl/wrappers.py index 4ee1b0619..d14c82e53 100644 --- a/examples/llvm_rl/wrappers.py +++ b/examples/llvm_rl/wrappers.py @@ -3,13 +3,13 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Environment wrappers to closer replicate the MLSys'20 Autophase paper.""" -from collections.abc import Iterable as IterableType -from typing import List, Union +from typing import List import gym import numpy as np from compiler_gym.envs import CompilerEnv, LlvmEnv +from compiler_gym.util.gym_type_hints import ActionType from compiler_gym.wrappers import ( ConstrainedCommandline, ObservationWrapper, @@ -126,12 +126,16 @@ def reset(self, *args, **kwargs): ) return super().reset(*args, **kwargs) - def step(self, action: Union[int, List[int]], observations=None, **kwargs): - if not isinstance(action, IterableType): - action = [action] - for a in action: + def raw_step( + self, + actions: List[ActionType], + observation_spaces=None, + observations=None, + **kwargs, + ): + for a in actions: self.histogram[a] += self.increment - return super().step(action, **kwargs) + return self.env.raw_step(actions, **kwargs) def observation(self, observation): return np.concatenate((observation, self.histogram)).astype( diff --git a/examples/sensitivity_analysis/action_sensitivity_analysis.py b/examples/sensitivity_analysis/action_sensitivity_analysis.py index 52c4e409a..05431de74 100644 --- a/examples/sensitivity_analysis/action_sensitivity_analysis.py +++ b/examples/sensitivity_analysis/action_sensitivity_analysis.py @@ -40,6 +40,7 @@ from compiler_gym.envs import CompilerEnv from compiler_gym.util.flags.benchmark_from_flags import benchmark_from_flags from compiler_gym.util.flags.env_from_flags import env_from_flags +from compiler_gym.util.gym_type_hints import ActionType from compiler_gym.util.logs import create_logging_dir from compiler_gym.util.timer import Timer @@ -118,12 +119,12 @@ def run_one_trial( _, _, done, _ = env.step(warmup_actions) if done: return None - _, (reward,), done, _ = env.step(action, rewards=[reward_space]) + _, (reward,), done, _ = env.step(action, reward_spaces=[reward_space]) return None if done else reward def run_action_sensitivity_analysis( - actions: List[int], + actions: List[ActionType], rewards_path: Path, runtimes_path: Path, reward_space: str, diff --git a/tests/llvm/episode_reward_test.py b/tests/llvm/episode_reward_test.py index ba5d6e3d7..7c1a0e7cb 100644 --- a/tests/llvm/episode_reward_test.py +++ b/tests/llvm/episode_reward_test.py @@ -28,7 +28,7 @@ def test_episode_reward_with_non_default_reward_space(env: LlvmEnv): assert env.episode_reward == 0 _, rewards, _, _ = env.step( env.action_space["-mem2reg"], - rewards=["IrInstructionCount"], + reward_spaces=["IrInstructionCount"], ) assert rewards[0] > 0 assert env.episode_reward == 0 diff --git a/tests/llvm/fork_regression_test.py b/tests/llvm/fork_regression_test.py index febc8b851..f5333dde5 100644 --- a/tests/llvm/fork_regression_test.py +++ b/tests/llvm/fork_regression_test.py @@ -57,14 +57,14 @@ def test_fork_regression_test(env: LlvmEnv, test: ForkRegressionTest): pre_fork = [env.action_space[f] for f in test.pre_fork.split()] post_fork = [env.action_space[f] for f in test.post_fork.split()] - _, _, done, info = env.step(pre_fork) + _, _, done, info = env.multistep(pre_fork) assert not done, info with env.fork() as fkd: assert env.state == fkd.state # Sanity check - env.step(post_fork) - fkd.step(post_fork) + env.multistep(post_fork) + fkd.multistep(post_fork) # Verify that the environment states no longer line up. assert env.state != fkd.state diff --git a/tests/llvm/llvm_env_test.py b/tests/llvm/llvm_env_test.py index a1efc22a6..36fe74bba 100644 --- a/tests/llvm/llvm_env_test.py +++ b/tests/llvm/llvm_env_test.py @@ -221,7 +221,7 @@ def test_step_multiple_actions_list(env: LlvmEnv): env.action_space.flags.index("-mem2reg"), env.action_space.flags.index("-reg2mem"), ] - _, _, done, _ = env.step(actions) + _, _, done, _ = env.multistep(actions) assert not done assert env.actions == actions @@ -233,7 +233,7 @@ def test_step_multiple_actions_generator(env: LlvmEnv): env.action_space.flags.index("-mem2reg"), env.action_space.flags.index("-reg2mem"), ) - _, _, done, _ = env.step(actions) + _, _, done, _ = env.multistep(actions) assert not done assert env.actions == [ env.action_space.flags.index("-mem2reg"), diff --git a/tests/llvm/multiprocessing_test.py b/tests/llvm/multiprocessing_test.py index d2ae8f4d1..e93fb21e5 100644 --- a/tests/llvm/multiprocessing_test.py +++ b/tests/llvm/multiprocessing_test.py @@ -12,11 +12,14 @@ from flaky import flaky from compiler_gym.envs import LlvmEnv +from compiler_gym.util.gym_type_hints import ActionType from tests.pytest_plugins.common import macos_only from tests.test_main import main -def process_worker(env_name: str, benchmark: str, actions: List[int], queue: mp.Queue): +def process_worker( + env_name: str, benchmark: str, actions: List[ActionType], queue: mp.Queue +): assert actions with gym.make(env_name) as env: env.reset(benchmark=benchmark) @@ -28,7 +31,7 @@ def process_worker(env_name: str, benchmark: str, actions: List[int], queue: mp. queue.put((observation, reward, done, info)) -def process_worker_with_env(env: LlvmEnv, actions: List[int], queue: mp.Queue): +def process_worker_with_env(env: LlvmEnv, actions: List[ActionType], queue: mp.Queue): assert actions for action in actions: diff --git a/tests/llvm/threading_test.py b/tests/llvm/threading_test.py index e125230db..a347ffc6d 100644 --- a/tests/llvm/threading_test.py +++ b/tests/llvm/threading_test.py @@ -10,13 +10,14 @@ from flaky import flaky from compiler_gym import CompilerEnv +from compiler_gym.util.gym_type_hints import ActionType from tests.test_main import main class ThreadedWorker(Thread): """Create an environment and run through a set of actions in a background thread.""" - def __init__(self, env_name: str, benchmark: str, actions: List[int]): + def __init__(self, env_name: str, benchmark: str, actions: List[ActionType]): super().__init__() self.done = False self.env_name = env_name @@ -38,7 +39,7 @@ def run(self) -> None: class ThreadedWorkerWithEnv(Thread): """Create an environment and run through a set of actions in a background thread.""" - def __init__(self, env: CompilerEnv, actions: List[int]): + def __init__(self, env: CompilerEnv, actions: List[ActionType]): super().__init__() self.done = False self.env = env diff --git a/tests/util/minimize_trajectory_test.py b/tests/util/minimize_trajectory_test.py index b9e7597e4..292cb9e09 100644 --- a/tests/util/minimize_trajectory_test.py +++ b/tests/util/minimize_trajectory_test.py @@ -10,6 +10,7 @@ import pytest from compiler_gym.util import minimize_trajectory as mt +from compiler_gym.util.gym_type_hints import ActionType from tests.test_main import main pytest_plugins = ["tests.pytest_plugins.llvm"] @@ -38,7 +39,7 @@ def okay(self): class MockEnv: """A mock environment for testing trajectory minimization.""" - def __init__(self, actions: List[int], validate=lambda env: True): + def __init__(self, actions: List[ActionType], validate=lambda env: True): self.original_trajectory = actions self.actions = actions.copy() self.validate = lambda: MockValidationResult(validate(self)) @@ -49,7 +50,7 @@ def reset(self, benchmark): self.actions = [] assert benchmark == self.benchmark - def step(self, actions): + def multistep(self, actions): for action in actions: assert action in self.original_trajectory self.actions += actions diff --git a/tests/views/observation_test.py b/tests/views/observation_test.py index 2d73e27d9..5f30fee2c 100644 --- a/tests/views/observation_test.py +++ b/tests/views/observation_test.py @@ -28,11 +28,11 @@ def __init__(self, ret=None): self.called_observation_spaces = [] self.ret = list(reversed(ret or [None])) - def __call__(self, actions, observations, rewards): + def __call__(self, actions, observation_spaces, reward_spaces): assert not actions - assert len(observations) == 1 - assert not rewards - self.called_observation_spaces.append(observations[0].id) + assert len(observation_spaces) == 1 + assert not reward_spaces + self.called_observation_spaces.append(observation_spaces[0].id) ret = self.ret[-1] del self.ret[-1] return [ret], [], False, {} diff --git a/tests/wrappers/commandline_wrappers_test.py b/tests/wrappers/commandline_wrappers_test.py index dba8b509a..64095c31d 100644 --- a/tests/wrappers/commandline_wrappers_test.py +++ b/tests/wrappers/commandline_wrappers_test.py @@ -18,17 +18,17 @@ def test_commandline_with_terminal_action(env: LlvmEnv): mem2reg_index = env.action_space["-mem2reg"] reg2mem_index = env.action_space["-reg2mem"] - assert mem2reg_index == mem2reg_unwrapped_index + 1 + assert mem2reg_index == mem2reg_unwrapped_index env.reset() - _, _, done, info = env.step(mem2reg_index + 1) + _, _, done, info = env.step(mem2reg_index) assert not done, info - _, _, done, info = env.step([reg2mem_index + 1, reg2mem_index + 1]) + _, _, done, info = env.multistep([reg2mem_index, reg2mem_index]) assert not done, info assert env.actions == [mem2reg_index, reg2mem_index, reg2mem_index] - _, _, done, info = env.step(0) + _, _, done, info = env.step(len(env.action_space.flags) - 1) assert done assert "terminal_action" in info @@ -36,17 +36,14 @@ def test_commandline_with_terminal_action(env: LlvmEnv): def test_commandline_with_terminal_action_fork(env: LlvmEnv): env = CommandlineWithTerminalAction(env) assert env.unwrapped.action_space != env.action_space # Sanity check. - fkd = env.fork() - try: + with env.fork() as fkd: assert fkd.action_space == env.action_space - _, _, done, info = env.step(0) + _, _, done, _ = env.step(len(env.action_space.flags) - 1) assert done - _, _, done, info = fkd.step(0) + _, _, done, _ = fkd.step(len(env.action_space.flags) - 1) assert done - finally: - fkd.close() def test_constrained_action_space(env: LlvmEnv): @@ -63,7 +60,7 @@ def test_constrained_action_space(env: LlvmEnv): env.reset() env.step(0) - env.step([1, 1]) + env.multistep([1, 1]) assert env.actions == [0, 1, 1] @@ -84,7 +81,7 @@ def test_constrained_action_space_fork(env: LlvmEnv): fkd.reset() fkd.step(0) - fkd.step([1, 1]) + fkd.multistep([1, 1]) assert fkd.actions == [0, 1, 1] finally: diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py index a682fe7d2..cc577bac4 100644 --- a/tests/wrappers/core_wrappers_test.py +++ b/tests/wrappers/core_wrappers_test.py @@ -3,6 +3,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Unit tests for //compiler_gym/wrappers.""" +import numpy as np import pytest from compiler_gym.datasets import Datasets @@ -92,7 +93,7 @@ def test_wrapped_step_multi_step(env: LlvmEnv): """Test passing a list of actions to step().""" env = CompilerEnvWrapper(env) env.reset() - env.step([0, 0, 0]) + env.multistep([0, 0, 0]) assert env.actions == [0, 0, 0] @@ -109,10 +110,10 @@ def action(self, action): env = MyWrapper(env) env.reset() - (ir, ic), (icr, icroz), _, _ = env.step( - action=[0, 0, 0], - observations=["Ir", "IrInstructionCount"], - rewards=["IrInstructionCount", "IrInstructionCountOz"], + (ir, ic), (icr, icroz), _, _ = env.multistep( + actions=[0, 0, 0], + observation_spaces=["Ir", "IrInstructionCount"], + reward_spaces=["IrInstructionCount", "IrInstructionCountOz"], ) assert isinstance(ir, str) assert isinstance(ic, int) @@ -213,7 +214,7 @@ def observation(self, observation): assert env.reward_space.name == "IrInstructionCount" -def test_wrapped_action(env: LlvmEnv): +def test_wrapped_action(mocker, env: LlvmEnv): class MyWrapper(ActionWrapper): def action(self, action): return action - 1 @@ -222,14 +223,17 @@ def reverse_action(self, action): return action + 1 env = MyWrapper(env) + mocker.spy(env, "action") + env.reset() env.step(1) env.step(2) + assert env.action.call_count == 2 # pylint: disable=no-member assert env.actions == [0, 1] -def test_wrapped_observation(env: LlvmEnv): +def test_wrapped_observation(mocker, env: LlvmEnv): """Test using an ObservationWrapper that returns the length of the Ir string.""" class MyWrapper(ObservationWrapper): @@ -241,8 +245,10 @@ def observation(self, observation): return len(observation) env = MyWrapper(env) + assert env.reset() > 0 observation, _, _, _ = env.step(0) + assert observation > 0 @@ -253,6 +259,22 @@ def test_wrapped_observation_missing_definition(env: LlvmEnv): env.reset() +def test_wrapped_observation_not_applied_to_non_default_observations(env: LlvmEnv): + class MyWrapper(ObservationWrapper): + def __init__(self, env): + super().__init__(env) + self.observation_space = "Ir" + + def observation(self, observation): + return len(observation) + + env = MyWrapper(env) + env.reset() + (observation,), _, _, _ = env.step(0, observation_spaces=["Autophase"]) + print(observation) + assert isinstance(observation, np.ndarray) + + def test_wrapped_reward(env: LlvmEnv): class MyWrapper(RewardWrapper): def reward(self, reward): diff --git a/tests/wrappers/time_limit_wrappers_test.py b/tests/wrappers/time_limit_wrappers_test.py index f74c76ea5..83b356535 100644 --- a/tests/wrappers/time_limit_wrappers_test.py +++ b/tests/wrappers/time_limit_wrappers_test.py @@ -28,7 +28,7 @@ def test_wrapped_fork_type(env: LlvmEnv): def test_wrapped_step_multi_step(env: LlvmEnv): env = TimeLimit(env, max_episode_steps=5) env.reset(benchmark="benchmark://cbench-v1/dijkstra") - env.step([0, 0, 0]) + env.multistep([0, 0, 0]) assert env.benchmark == "benchmark://cbench-v1/dijkstra" assert env.actions == [0, 0, 0] @@ -37,7 +37,7 @@ def test_wrapped_step_multi_step(env: LlvmEnv): def test_wrapped_custom_step_args(env: LlvmEnv): env = TimeLimit(env, max_episode_steps=5) env.reset(benchmark="benchmark://cbench-v1/dijkstra") - (ic,), _, _, _ = env.step(0, observations=["IrInstructionCount"]) + (ic,), _, _, _ = env.step(0, observation_spaces=["IrInstructionCount"]) assert isinstance(ic, int) diff --git a/www/www.py b/www/www.py index 0586b3274..951aa34f3 100644 --- a/www/www.py +++ b/www/www.py @@ -217,9 +217,9 @@ def _step(request: StepRequest) -> StepReply: if request.all_states: # Replay actions one at a time to receive incremental rewards. The # first item represents the state prior to any actions. - (instcount, autophase), _, done, info = env.step( - action=[], - observations=[ + (instcount, autophase), _, done, info = env.raw_step( + actions=[], + observation_spaces=[ env.observation.spaces["InstCountDict"], env.observation.spaces["AutophaseDict"], ], @@ -238,7 +238,7 @@ def _step(request: StepRequest) -> StepReply: for action in request.actions[:-1]: (instcount, autophase), reward, done, info = env.step( action, - observations=[ + observation_spaces=[ env.observation.spaces["InstCountDict"], env.observation.spaces["AutophaseDict"], ], @@ -265,12 +265,12 @@ def _step(request: StepRequest) -> StepReply: # Perform the final action. (ir, instcount, autophase), (reward,), done, _ = env.raw_step( actions=request.actions[-1:], - observations=[ + observation_spaces=[ env.observation.spaces["Ir"], env.observation.spaces["InstCountDict"], env.observation.spaces["AutophaseDict"], ], - rewards=[env.reward_space], + reward_spaces=[env.reward_space], ) states.append( From 48380bba390e4e27316dc624cd9361a1b9540bb2 Mon Sep 17 00:00:00 2001 From: Boian Petkantchin Date: Thu, 10 Mar 2022 18:36:20 -0800 Subject: [PATCH 4/8] Use multistep instead of raw_step in wrappers Fix tests --- compiler_gym/wrappers/commandline.py | 8 +- compiler_gym/wrappers/core.py | 84 ++++++++----------- compiler_gym/wrappers/validation.py | 4 +- .../llvm_autotuning/autotuners/opentuner_.py | 2 +- examples/llvm_rl/wrappers.py | 4 +- .../service_py/CMakeLists.txt | 2 + examples/op_benchmarks.py | 2 +- .../benchmark_sensitivity_analysis.py | 2 +- tests/util/minimize_trajectory_test.py | 2 +- tests/wrappers/core_wrappers_test.py | 22 +---- www/www.py | 4 +- 11 files changed, 58 insertions(+), 78 deletions(-) diff --git a/compiler_gym/wrappers/commandline.py b/compiler_gym/wrappers/commandline.py index a961c878c..21a2e2fc9 100644 --- a/compiler_gym/wrappers/commandline.py +++ b/compiler_gym/wrappers/commandline.py @@ -57,11 +57,13 @@ def __init__( name=f"{type(self).__name__}<{env.action_space.name}>", ) - def raw_step( + def multistep( self, actions: List[ActionType], observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, + observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + rewards: Optional[Iterable[Union[str, Reward]]] = None, ) -> StepType: terminal_action: int = len(self.action_space.flags) - 1 @@ -74,10 +76,12 @@ def raw_step( if index_of_terminal >= 0: actions = actions[:index_of_terminal] - observation, reward, done, info = self.env.raw_step( + observation, reward, done, info = self.env.multistep( actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces, + observations=observations, + rewards=rewards, ) # Communicate back to the frontend. diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py index f6d0809ef..34674fb8d 100644 --- a/compiler_gym/wrappers/core.py +++ b/compiler_gym/wrappers/core.py @@ -4,7 +4,7 @@ # LICENSE file in the root directory of this source tree. import warnings from collections.abc import Iterable as IterableType -from typing import Iterable, Optional, Union +from typing import Iterable, List, Optional, Union import gym @@ -40,26 +40,12 @@ def __init__(self, env: CompilerEnv): # pylint: disable=super-init-not-called self.reward_range = self.env.reward_range self.metadata = self.env.metadata - def raw_step( - self, - actions: Iterable[ActionType], - observation_spaces: Iterable[ObservationSpaceSpec], - reward_spaces: Iterable[Reward], - ): - return self.env.raw_step( - actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces - ) - def reset(self, *args, **kwargs) -> ObservationType: return self.env.reset(*args, **kwargs) def fork(self) -> CompilerEnv: return type(self)(env=self.env.fork()) - # NOTE(cummins): This step() method is provided only because - # CompilerEnv.step accepts additional arguments over gym.Env.step. Users who - # wish to modify the behavior of CompilerEnv.step should overload - # raw_step(). def step( # pylint: disable=arguments-differ self, action: ActionType, @@ -95,8 +81,7 @@ def step( # pylint: disable=arguments-differ category=DeprecationWarning, ) reward_spaces = rewards - return self.env._multistep( - raw_step=self.raw_step, + return self.multistep( actions=[action], observation_spaces=observation_spaces, reward_spaces=reward_spaces, @@ -124,8 +109,7 @@ def multistep( category=DeprecationWarning, ) reward_spaces = rewards - return self.env._multistep( # pylint: disable=protected-access - raw_step=self.raw_step, + return self.env.multistep( actions=actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces, @@ -166,16 +150,20 @@ class ActionWrapper(CompilerEnvWrapper): to allow an action space transformation. """ - def raw_step( + def multistep( self, actions: Iterable[ActionType], - observation_spaces: Iterable[ObservationSpaceSpec], - reward_spaces: Iterable[Reward], + observation_spaces: Optional[Iterable[ObservationSpaceSpec]] = None, + reward_spaces: Optional[Iterable[Reward]] = None, + observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + rewards: Optional[Iterable[Union[str, Reward]]] = None, ): - return self.env.raw_step( + return self.env.multistep( [self.action(a) for a in actions], observation_spaces=observation_spaces, reward_spaces=reward_spaces, + observations=observations, + rewards=rewards, ) def action(self, action: ActionType) -> ActionType: @@ -196,22 +184,23 @@ def reset(self, *args, **kwargs): observation = self.env.reset(*args, **kwargs) return self.observation(observation) - def raw_step( + def multistep( self, - actions: Iterable[ActionType], - observation_spaces: Iterable[ObservationSpaceSpec], - reward_spaces: Iterable[Reward], + actions: List[ActionType], + observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, + observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + rewards: Optional[Iterable[Union[str, Reward]]] = None, ): - observation, reward, done, info = self.env.raw_step( - actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces + observation, reward, done, info = self.env.multistep( + actions, + observation_spaces=observation_spaces, + reward_spaces=reward_spaces, + observations=observations, + rewards=rewards, ) - # Only apply observation transformation if we are using the default - # observation space. - if observation_spaces == [self.observation_space_spec]: - observation = [self.observation(observation)] - - return observation, reward, done, info + return self.observation(observation), reward, done, info def observation(self, observation): """Translate an observation to the new space.""" @@ -226,22 +215,23 @@ class RewardWrapper(CompilerEnvWrapper): def reset(self, *args, **kwargs): return self.env.reset(*args, **kwargs) - def raw_step( + def multistep( self, - actions: Iterable[ActionType], - observation_spaces: Iterable[ObservationSpaceSpec], - reward_spaces: Iterable[Reward], + actions: List[ActionType], + observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + reward_spaces: Optional[Iterable[Union[str, Reward]]] = None, + observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None, + rewards: Optional[Iterable[Union[str, Reward]]] = None, ): - observation, reward, done, info = self.env.step( - actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces + observation, reward, done, info = self.env.multistep( + actions, + observation_spaces=observation_spaces, + reward_spaces=reward_spaces, + observations=observations, + rewards=rewards, ) - # Only apply rewards transformation if we are using the default - # reward space. - if reward_spaces == [self.reward_space]: - reward = [self.reward(reward)] - - return observation, reward, done, info + return observation, self.reward(reward), done, info def reward(self, reward): """Translate a reward to the new space.""" diff --git a/compiler_gym/wrappers/validation.py b/compiler_gym/wrappers/validation.py index 2c64f6579..3c3fd9c62 100644 --- a/compiler_gym/wrappers/validation.py +++ b/compiler_gym/wrappers/validation.py @@ -29,13 +29,13 @@ def __init__( super().__init__(env) self.reward_penalty = reward_penalty - def raw_step( + def multistep( self, actions: List[ActionType], observation_spaces=None, reward_spaces=None, ): - observation, reward, done, info = self.env.raw_step( + observation, reward, done, info = self.env.multistep( actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces, diff --git a/examples/llvm_autotuning/autotuners/opentuner_.py b/examples/llvm_autotuning/autotuners/opentuner_.py index 3850de8aa..9d506093b 100644 --- a/examples/llvm_autotuning/autotuners/opentuner_.py +++ b/examples/llvm_autotuning/autotuners/opentuner_.py @@ -93,7 +93,7 @@ def __init__(self, data) -> None: wrapped = DesiredResult(Configuration(manipulator.best_config)) manipulator.run(wrapped, None, None) env.reset() - env.step(manipulator.serialize_actions(manipulator.best_config)) + env.multistep(manipulator.serialize_actions(manipulator.best_config)) class LlvmOptFlagsTuner(MeasurementInterface): diff --git a/examples/llvm_rl/wrappers.py b/examples/llvm_rl/wrappers.py index d14c82e53..04aa5b8b4 100644 --- a/examples/llvm_rl/wrappers.py +++ b/examples/llvm_rl/wrappers.py @@ -126,7 +126,7 @@ def reset(self, *args, **kwargs): ) return super().reset(*args, **kwargs) - def raw_step( + def multistep( self, actions: List[ActionType], observation_spaces=None, @@ -135,7 +135,7 @@ def raw_step( ): for a in actions: self.histogram[a] += self.increment - return self.env.raw_step(actions, **kwargs) + return self.env.multistep(actions, **kwargs) def observation(self, observation): return np.concatenate((observation, self.histogram)).astype( diff --git a/examples/loop_optimizations_service/service_py/CMakeLists.txt b/examples/loop_optimizations_service/service_py/CMakeLists.txt index c10e61828..3ddacda70 100644 --- a/examples/loop_optimizations_service/service_py/CMakeLists.txt +++ b/examples/loop_optimizations_service/service_py/CMakeLists.txt @@ -5,6 +5,8 @@ cg_add_all_subdirs() +return() + cg_py_library( NAME loops_opt_service diff --git a/examples/op_benchmarks.py b/examples/op_benchmarks.py index faa53ca05..1d63d04d8 100644 --- a/examples/op_benchmarks.py +++ b/examples/op_benchmarks.py @@ -267,7 +267,7 @@ def get_step_times(env: CompilerEnv, num_steps: int, batched=False): # Run all actions in a single step(). steps = [env.action_space.sample() for _ in range(num_steps)] with Timer() as timer: - _, _, done, _ = env.step(steps) + _, _, done, _ = env.multistep(steps) if not done: return [timer.time / num_steps] * num_steps env.reset() diff --git a/examples/sensitivity_analysis/benchmark_sensitivity_analysis.py b/examples/sensitivity_analysis/benchmark_sensitivity_analysis.py index 065b5bc52..00c6d688f 100644 --- a/examples/sensitivity_analysis/benchmark_sensitivity_analysis.py +++ b/examples/sensitivity_analysis/benchmark_sensitivity_analysis.py @@ -116,7 +116,7 @@ def run_one_trial( num_steps = random.randint(min_steps, max_steps) warmup_actions = [env.action_space.sample() for _ in range(num_steps)] env.reward_space = reward_space - _, _, done, _ = env.step(warmup_actions) + _, _, done, _ = env.multistep(warmup_actions) if done: return None return env.episode_reward diff --git a/tests/util/minimize_trajectory_test.py b/tests/util/minimize_trajectory_test.py index 292cb9e09..782518bc2 100644 --- a/tests/util/minimize_trajectory_test.py +++ b/tests/util/minimize_trajectory_test.py @@ -152,7 +152,7 @@ def hypothesis(env): def test_minimize_trajectory_iteratively_llvm_crc32(env): """Test trajectory minimization on a real environment.""" env.reset(benchmark="cbench-v1/crc32") - env.step( + env.multistep( [ env.action_space["-mem2reg"], env.action_space["-gvn"], diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py index cc577bac4..1aac66170 100644 --- a/tests/wrappers/core_wrappers_test.py +++ b/tests/wrappers/core_wrappers_test.py @@ -3,7 +3,6 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Unit tests for //compiler_gym/wrappers.""" -import numpy as np import pytest from compiler_gym.datasets import Datasets @@ -108,6 +107,9 @@ def observation(self, observation): def action(self, action): return action # pass thru + def reward(self, reward): + return reward + env = MyWrapper(env) env.reset() (ir, ic), (icr, icroz), _, _ = env.multistep( @@ -259,22 +261,6 @@ def test_wrapped_observation_missing_definition(env: LlvmEnv): env.reset() -def test_wrapped_observation_not_applied_to_non_default_observations(env: LlvmEnv): - class MyWrapper(ObservationWrapper): - def __init__(self, env): - super().__init__(env) - self.observation_space = "Ir" - - def observation(self, observation): - return len(observation) - - env = MyWrapper(env) - env.reset() - (observation,), _, _, _ = env.step(0, observation_spaces=["Autophase"]) - print(observation) - assert isinstance(observation, np.ndarray) - - def test_wrapped_reward(env: LlvmEnv): class MyWrapper(RewardWrapper): def reward(self, reward): @@ -286,11 +272,9 @@ def reward(self, reward): env.reset() _, reward, _, _ = env.step(0) assert reward == -5 - assert env.episode_reward == -5 _, reward, _, _ = env.step(0) assert reward == -5 - assert env.episode_reward == -10 if __name__ == "__main__": diff --git a/www/www.py b/www/www.py index 951aa34f3..d080c07e4 100644 --- a/www/www.py +++ b/www/www.py @@ -217,7 +217,7 @@ def _step(request: StepRequest) -> StepReply: if request.all_states: # Replay actions one at a time to receive incremental rewards. The # first item represents the state prior to any actions. - (instcount, autophase), _, done, info = env.raw_step( + (instcount, autophase), _, done, info = env.multistep( actions=[], observation_spaces=[ env.observation.spaces["InstCountDict"], @@ -263,7 +263,7 @@ def _step(request: StepRequest) -> StepReply: ) # Perform the final action. - (ir, instcount, autophase), (reward,), done, _ = env.raw_step( + (ir, instcount, autophase), (reward,), done, _ = env.multistep( actions=request.actions[-1:], observation_spaces=[ env.observation.spaces["Ir"], From bc1f9e3fa57519bbf0a71d2573f786417a298e36 Mon Sep 17 00:00:00 2001 From: Boian Petkantchin Date: Wed, 16 Mar 2022 19:41:09 -0700 Subject: [PATCH 5/8] Fix failing tests --- compiler_gym/bin/service.py | 4 ++++ compiler_gym/envs/compiler_env.py | 21 +++---------------- examples/llvm_rl/wrappers.py | 2 +- .../action_sensitivity_analysis.py | 2 +- 4 files changed, 9 insertions(+), 20 deletions(-) diff --git a/compiler_gym/bin/service.py b/compiler_gym/bin/service.py index 3542ae22b..cf4c10c38 100644 --- a/compiler_gym/bin/service.py +++ b/compiler_gym/bin/service.py @@ -256,6 +256,10 @@ def print_service_capabilities(env: CompilerEnv): headers=("Action",), ) print(table) + else: + raise NotImplementedError( + "Only Commandline and NamedDiscrete are supported." + ) def main(argv): diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py index a00f86947..9d3c5a2ed 100644 --- a/compiler_gym/envs/compiler_env.py +++ b/compiler_gym/envs/compiler_env.py @@ -11,7 +11,7 @@ from math import isclose from pathlib import Path from time import time -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union import gym import numpy as np @@ -1082,9 +1082,7 @@ def step( # pylint: disable=arguments-differ category=DeprecationWarning, ) reward_spaces = rewards - return self._multistep( - self.raw_step, [action], observation_spaces, reward_spaces - ) + return self.multistep([action], observation_spaces, reward_spaces) def multistep( self, @@ -1129,20 +1127,7 @@ def multistep( category=DeprecationWarning, ) reward_spaces = rewards - return self._multistep( - self.raw_step, list(actions), observation_spaces, reward_spaces - ) - def _multistep( - self, - raw_step: Callable[ - [Iterable[ActionType], Iterable[ObservationSpaceSpec], Iterable[Reward]], - StepType, - ], - actions: Iterable[ActionType], - observation_spaces: Optional[Iterable[Union[str, ObservationSpaceSpec]]], - reward_spaces: Optional[Iterable[Union[str, Reward]]], - ) -> StepType: # Coerce observation spaces into a list of ObservationSpaceSpec instances. if observation_spaces: observation_spaces_to_compute: List[ObservationSpaceSpec] = [ @@ -1170,7 +1155,7 @@ def _multistep( reward_spaces_to_compute: List[Reward] = [] # Perform the underlying environment step. - observation_values, reward_values, done, info = raw_step( + observation_values, reward_values, done, info = self.raw_step( actions, observation_spaces_to_compute, reward_spaces_to_compute ) diff --git a/examples/llvm_rl/wrappers.py b/examples/llvm_rl/wrappers.py index 04aa5b8b4..4a2a7e6d9 100644 --- a/examples/llvm_rl/wrappers.py +++ b/examples/llvm_rl/wrappers.py @@ -135,7 +135,7 @@ def multistep( ): for a in actions: self.histogram[a] += self.increment - return self.env.multistep(actions, **kwargs) + return super().multistep(actions, **kwargs) def observation(self, observation): return np.concatenate((observation, self.histogram)).astype( diff --git a/examples/sensitivity_analysis/action_sensitivity_analysis.py b/examples/sensitivity_analysis/action_sensitivity_analysis.py index 05431de74..d979cee65 100644 --- a/examples/sensitivity_analysis/action_sensitivity_analysis.py +++ b/examples/sensitivity_analysis/action_sensitivity_analysis.py @@ -116,7 +116,7 @@ def run_one_trial( num_warmup_steps = random.randint(0, max_warmup_steps) warmup_actions = [env.action_space.sample() for _ in range(num_warmup_steps)] env.reward_space = reward_space - _, _, done, _ = env.step(warmup_actions) + _, _, done, _ = env.multistep(warmup_actions) if done: return None _, (reward,), done, _ = env.step(action, reward_spaces=[reward_space]) From 5eb13814915851c5f975f95acb8310f7948a677f Mon Sep 17 00:00:00 2001 From: Boian Petkantchin Date: Thu, 17 Mar 2022 08:54:08 -0700 Subject: [PATCH 6/8] Fix RewardWrapper episode_reward --- compiler_gym/wrappers/core.py | 12 +++++++++++- tests/wrappers/core_wrappers_test.py | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py index 34674fb8d..62b0c679d 100644 --- a/compiler_gym/wrappers/core.py +++ b/compiler_gym/wrappers/core.py @@ -231,7 +231,17 @@ def multistep( rewards=rewards, ) - return observation, self.reward(reward), done, info + # Undo the episode_reward update and reapply it once we have transformed + # the reward. + # + # TODO(cummins): Refactor step() so that we don't have to do this + # recalculation of episode_reward, as this is prone to errors if, say, + # the base reward returns NaN or an invalid type. + if reward is not None and self.episode_reward is not None: + self.unwrapped.episode_reward -= reward + reward = self.reward(reward) + self.unwrapped.episode_reward += reward + return observation, reward, done, info def reward(self, reward): """Translate a reward to the new space.""" diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py index 1aac66170..bcaffa42f 100644 --- a/tests/wrappers/core_wrappers_test.py +++ b/tests/wrappers/core_wrappers_test.py @@ -272,9 +272,11 @@ def reward(self, reward): env.reset() _, reward, _, _ = env.step(0) assert reward == -5 + assert env.episode_reward == -5 _, reward, _, _ = env.step(0) assert reward == -5 + assert env.episode_reward == -10 if __name__ == "__main__": From f7f8fc4d8c84032a191d19d338d63c93d9e293a7 Mon Sep 17 00:00:00 2001 From: Boian Petkantchin Date: Thu, 17 Mar 2022 08:55:28 -0700 Subject: [PATCH 7/8] Fix build of examples/loop_optimizations_service/service_py --- examples/loop_optimizations_service/service_py/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/loop_optimizations_service/service_py/CMakeLists.txt b/examples/loop_optimizations_service/service_py/CMakeLists.txt index 3ddacda70..c10e61828 100644 --- a/examples/loop_optimizations_service/service_py/CMakeLists.txt +++ b/examples/loop_optimizations_service/service_py/CMakeLists.txt @@ -5,8 +5,6 @@ cg_add_all_subdirs() -return() - cg_py_library( NAME loops_opt_service From 8cd9679a53831305359fd9dfcd823a85463804bd Mon Sep 17 00:00:00 2001 From: Boian Petkantchin Date: Thu, 17 Mar 2022 08:56:29 -0700 Subject: [PATCH 8/8] Add missing arguments to ValidateBenchmarkAfterEveryStep.multistep --- compiler_gym/wrappers/validation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compiler_gym/wrappers/validation.py b/compiler_gym/wrappers/validation.py index 3c3fd9c62..85427a8ce 100644 --- a/compiler_gym/wrappers/validation.py +++ b/compiler_gym/wrappers/validation.py @@ -34,11 +34,15 @@ def multistep( actions: List[ActionType], observation_spaces=None, reward_spaces=None, + observations=None, + rewards=None, ): observation, reward, done, info = self.env.multistep( actions, observation_spaces=observation_spaces, reward_spaces=reward_spaces, + observations=observations, + rewards=rewards, ) # Early exit if environment reaches terminal state.