diff --git a/.github/workflows/fuzz.yaml b/.github/workflows/fuzz.yaml index 84a031c01..1ae439a14 100644 --- a/.github/workflows/fuzz.yaml +++ b/.github/workflows/fuzz.yaml @@ -42,4 +42,4 @@ jobs: BAZEL_TEST_OPTS: --config=ci - name: Test - run: FUZZ_TIME=600 make fuzz + run: FUZZ_TIME=600 make install-fuzz diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py index 16db59fb3..d819512f4 100644 --- a/compiler_gym/envs/compiler_env.py +++ b/compiler_gym/envs/compiler_env.py @@ -973,23 +973,25 @@ def step( reward_spaces: List[Reward] = [] # Perform the underlying environment step. - observations, rewards, done, info = self.raw_step( + observation_values, reward_values, done, info = self.raw_step( actions, observation_spaces, reward_spaces ) # Translate observations lists back to the appropriate types. - if self.observation_space_spec and len(observations) == 1: - observations = observations[0] + if observations is None and self.observation_space_spec: + observation_values = observation_values[0] elif not observation_spaces: - observations = None + observation_values = None # Translate reward lists back to the appropriate types. - if self.reward_space_spec and len(rewards) == 1: - rewards = rewards[0] + if rewards is None and self.reward_space: + reward_values = reward_values[0] + # Update the cumulative episode reward + self.episode_reward += reward_values elif not reward_spaces: - rewards = None + reward_values = None - return observations, rewards, done, info + return observation_values, reward_values, done, info def render( self, diff --git a/compiler_gym/spaces/reward.py b/compiler_gym/spaces/reward.py index 1576d5154..7c2ea57ea 100644 --- a/compiler_gym/spaces/reward.py +++ b/compiler_gym/spaces/reward.py @@ -2,7 +2,7 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Union import numpy as np @@ -137,6 +137,14 @@ def range(self) -> Tuple[RewardType, RewardType]: def __repr__(self): return self.id + def __eq__(self, other: Union["Reward", str]) -> bool: + if isinstance(other, str): + return self.id == other + elif isinstance(other, Reward): + return self.id == other.id + else: + return False + class DefaultRewardFromObservation(Reward): def __init__(self, observation_name: str, **kwargs): diff --git a/compiler_gym/third_party/llvm/__init__.py b/compiler_gym/third_party/llvm/__init__.py index d832b7a89..58be6ba2c 100644 --- a/compiler_gym/third_party/llvm/__init__.py +++ b/compiler_gym/third_party/llvm/__init__.py @@ -107,6 +107,11 @@ def llvm_stress_path() -> Path: return download_llvm_files() / "bin/llvm-stress" +def llvm_diff_path() -> Path: + """Return the path of llvm-diff.""" + return download_llvm_files() / "bin/llvm-diff" + + def opt_path() -> Path: """Return the path of opt.""" return download_llvm_files() / "bin/opt" diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py index fb9fddf99..f74b46fa5 100644 --- a/compiler_gym/wrappers/core.py +++ b/compiler_gym/wrappers/core.py @@ -81,7 +81,16 @@ def reset(self, *args, **kwargs): def step(self, *args, **kwargs): observation, reward, done, info = self.env.step(*args, **kwargs) - return observation, self.reward(reward), done, info + # Undo the episode_reward update and reapply it once we have transformed + # the reward. + # + # TODO(cummins): Refactor step() so that we don't have to do this + # recalculation of episode_reward, as this is prone to errors if, say, + # the base reward returns NaN or an invalid type. + self.unwrapped.episode_reward -= reward + reward = self.reward(reward) + self.unwrapped.episode_reward += reward + return observation, reward, done, info def reward(self, reward): """Translate a reward to the new space.""" diff --git a/tests/fuzzing/BUILD b/tests/fuzzing/BUILD index ac4d27556..682779ff4 100644 --- a/tests/fuzzing/BUILD +++ b/tests/fuzzing/BUILD @@ -6,6 +6,17 @@ # LICENSE file in the root directory of this source tree. load("@rules_python//python:defs.bzl", "py_test") +py_test( + name = "llvm_cbench_validate_fuzz_test", + srcs = ["llvm_cbench_validate_fuzz_test.py"], + tags = ["manual"], + deps = [ + "//compiler_gym", + "//tests:test_main", + "//tests/pytest_plugins:llvm", + ], +) + py_test( name = "llvm_commandline_opt_equivalence_fuzz_test", srcs = ["llvm_commandline_opt_equivalence_fuzz_test.py"], @@ -68,17 +79,6 @@ py_test( ], ) -py_test( - name = "llvm_validate_fuzz_test", - srcs = ["llvm_validate_fuzz_test.py"], - tags = ["manual"], - deps = [ - "//compiler_gym", - "//tests:test_main", - "//tests/pytest_plugins:llvm", - ], -) - py_test( name = "llvm_stress_fuzz_test", timeout = "long", diff --git a/tests/fuzzing/llvm_validate_fuzz_test.py b/tests/fuzzing/llvm_cbench_validate_fuzz_test.py similarity index 90% rename from tests/fuzzing/llvm_validate_fuzz_test.py rename to tests/fuzzing/llvm_cbench_validate_fuzz_test.py index eae460a0b..8a5faa67e 100644 --- a/tests/fuzzing/llvm_validate_fuzz_test.py +++ b/tests/fuzzing/llvm_cbench_validate_fuzz_test.py @@ -5,6 +5,8 @@ """Fuzz test for LlvmEnv.validate().""" import random +import pytest + from compiler_gym.envs import LlvmEnv from tests.pytest_plugins.llvm import VALIDATABLE_CBENCH_URIS from tests.test_main import main @@ -16,6 +18,7 @@ RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 50) +@pytest.mark.timeout(600) def test_fuzz(env: LlvmEnv): """This test generates a random trajectory and validates the semantics.""" benchmark = random.choice(VALIDATABLE_CBENCH_URIS) @@ -29,7 +32,8 @@ def test_fuzz(env: LlvmEnv): break # Broken trajectory, retry. else: print(f"Validating state {env.state}") - assert env.validate() == [] + result = env.validate() + assert result.okay(), result # Stop the test. break diff --git a/tests/fuzzing/llvm_commandline_opt_equivalence_fuzz_test.py b/tests/fuzzing/llvm_commandline_opt_equivalence_fuzz_test.py index 12a9a9122..57b852b23 100644 --- a/tests/fuzzing/llvm_commandline_opt_equivalence_fuzz_test.py +++ b/tests/fuzzing/llvm_commandline_opt_equivalence_fuzz_test.py @@ -3,8 +3,8 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Fuzz test for LlvmEnv.commandline().""" +import os import subprocess -from difflib import unified_diff from pathlib import Path import pytest @@ -22,10 +22,13 @@ RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 50) +@pytest.mark.timeout(600) def test_fuzz(env: LlvmEnv, tmpwd: Path, llvm_opt: Path, llvm_diff: Path): """This test produces a random trajectory and then uses the commandline() generated with opt to check that the states are equivalent. """ + del tmpwd + env.reset() env.write_ir("input.ll") assert Path("input.ll").is_file() @@ -47,20 +50,21 @@ def test_fuzz(env: LlvmEnv, tmpwd: Path, llvm_opt: Path, llvm_diff: Path): commandline, env={"PATH": str(llvm_opt.parent)}, shell=True, timeout=60 ) assert Path("output.ll").is_file() + os.rename("output.ll", "opt.ll") - with open("output.ll") as f1, open("env.ll") as f2: - # Diff the IR files but exclude the first line which is the module name. - diff = list(unified_diff(f1.readlines()[1:], f2.readlines()[1:])) + diff = subprocess.Popen( + [llvm_diff, "opt.ll", "env.ll"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + stdout, stderr = diff.communicate(timeout=300) - if diff and len(diff) < 25: - diff = "\n".join(diff) - pytest.fail(f"Opt produced different output to CompilerGym:\n{diff}") - elif diff: - # If it's a big diff then we will require the user to reproduce it - # themselves using the environment state we printed earlier. - pytest.fail( - f"Opt produced different output to CompilerGym ({len(diff)}-line diff)" - ) + if diff.returncode: + pytest.fail( + f"Opt produced different output to CompilerGym " + f"(returncode: {diff.returncode}):\n{stdout}\n{stderr}" + ) if __name__ == "__main__": diff --git a/tests/fuzzing/llvm_deterministic_action_fuzz_test.py b/tests/fuzzing/llvm_deterministic_action_fuzz_test.py index 6e5b314a9..3c4af9617 100644 --- a/tests/fuzzing/llvm_deterministic_action_fuzz_test.py +++ b/tests/fuzzing/llvm_deterministic_action_fuzz_test.py @@ -24,6 +24,7 @@ def sha1(string: str): return sha1.hexdigest() +@pytest.mark.timeout(600) def test_fuzz(env: LlvmEnv): """Run an action multiple times from the same starting state and check that the generated LLVM-IR is the same. diff --git a/tests/fuzzing/llvm_fork_env_fuzz_test.py b/tests/fuzzing/llvm_fork_env_fuzz_test.py index 800838e87..ac73a5f5b 100644 --- a/tests/fuzzing/llvm_fork_env_fuzz_test.py +++ b/tests/fuzzing/llvm_fork_env_fuzz_test.py @@ -17,6 +17,7 @@ POST_FORK_ACTIONS = 10 +@pytest.mark.timeout(600) def test_fuzz(env: LlvmEnv, reward_space: str): """This test generates a random trajectory and checks that fork() produces an equivalent state. It then runs a second trajectory on the two diff --git a/tests/fuzzing/llvm_random_actions_fuzz_test.py b/tests/fuzzing/llvm_random_actions_fuzz_test.py index f4c68ceb7..aabff3455 100644 --- a/tests/fuzzing/llvm_random_actions_fuzz_test.py +++ b/tests/fuzzing/llvm_random_actions_fuzz_test.py @@ -8,6 +8,7 @@ import gym import numpy as np +import pytest from compiler_gym.third_party.autophase import AUTOPHASE_FEATURE_DIM from tests.test_main import main @@ -18,7 +19,8 @@ FUZZ_TIME_SECONDS = 2 -def test_benchmark_random_actions(benchmark_name: str): +@pytest.mark.timeout(600) +def test_fuzz(benchmark_name: str): """Run randomly selected actions on a benchmark until a minimum amount of time has elapsed.""" env = gym.make( "llvm-v0", diff --git a/tests/fuzzing/llvm_stress_fuzz_test.py b/tests/fuzzing/llvm_stress_fuzz_test.py index 751fc4284..9960d67a4 100644 --- a/tests/fuzzing/llvm_stress_fuzz_test.py +++ b/tests/fuzzing/llvm_stress_fuzz_test.py @@ -3,6 +3,9 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Fuzz test LLVM backend using llvm-stress.""" +import pytest + +from compiler_gym.datasets import BenchmarkInitError from compiler_gym.envs import LlvmEnv from tests.pytest_plugins.random_util import apply_random_trajectory from tests.test_main import main @@ -13,22 +16,28 @@ RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 10) +@pytest.mark.timeout(600) def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str): """This test produces a random trajectory using a program generated using llvm-stress. """ - env.benchmark = env.datasets["llvm-stress-v0"].random_benchmark() + benchmark = env.datasets["generator://llvm-stress-v0"].random_benchmark() + print(benchmark.uri) # For debugging in case of failure. env.observation_space = observation_space env.reward_space = reward_space - env.reset() - apply_random_trajectory( - env, - random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE, - timeout=10, - ) - print(env.state) # For debugging in case of failure. + try: + env.reset(benchmark=benchmark) + apply_random_trajectory( + env, + random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE, + timeout=10, + ) + print(env.state) # For debugging in case of failure. + except BenchmarkInitError: + # Benchmark is invalid. + pass if __name__ == "__main__": diff --git a/tests/fuzzing/llvm_trajectory_replay_fuzz_test.py b/tests/fuzzing/llvm_trajectory_replay_fuzz_test.py index a95486f6a..b0c8b07cf 100644 --- a/tests/fuzzing/llvm_trajectory_replay_fuzz_test.py +++ b/tests/fuzzing/llvm_trajectory_replay_fuzz_test.py @@ -3,12 +3,11 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """Fuzz test for LlvmEnv.validate().""" -import random - import numpy as np +import pytest +from compiler_gym.datasets import BenchmarkInitError from compiler_gym.envs import LlvmEnv -from tests.pytest_plugins.llvm import BENCHMARK_NAMES from tests.pytest_plugins.random_util import apply_random_trajectory from tests.test_main import main @@ -19,19 +18,26 @@ RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 50) +@pytest.mark.timeout(600) def test_fuzz(env: LlvmEnv, reward_space: str): """This test produces a random trajectory, resets the environment, then replays the trajectory and checks that it produces the same state. """ env.observation_space = "Autophase" env.reward_space = reward_space + benchmark = env.datasets["generator://csmith-v0"].random_benchmark() + print(benchmark.uri) # For debugging in case of failure. + + try: + env.reset(benchmark=benchmark) + except BenchmarkInitError: + return - env.reset(benchmark=random.choice(BENCHMARK_NAMES)) trajectory = apply_random_trajectory( env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE ) print(env.state) # For debugging in case of failure. - env.reset() + env.reset(benchmark=benchmark) for i, (action, observation, reward, done) in enumerate(trajectory, start=1): print(f"Replaying step {i}: {env.action_space.flags[action]}") diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD index 22f35fbdb..f37795922 100644 --- a/tests/llvm/BUILD +++ b/tests/llvm/BUILD @@ -68,6 +68,17 @@ py_test( ], ) +py_test( + name = "episode_reward_test", + timeout = "long", + srcs = ["episode_reward_test.py"], + deps = [ + "//compiler_gym/envs", + "//tests:test_main", + "//tests/pytest_plugins:llvm", + ], +) + py_test( name = "fork_env_test", timeout = "long", diff --git a/tests/llvm/episode_reward_test.py b/tests/llvm/episode_reward_test.py new file mode 100644 index 000000000..ba5d6e3d7 --- /dev/null +++ b/tests/llvm/episode_reward_test.py @@ -0,0 +1,38 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""Tests for LlvmEnv.episode_reward.""" +from compiler_gym.envs import LlvmEnv +from tests.test_main import main + +pytest_plugins = ["tests.pytest_plugins.llvm"] + + +def test_episode_reward_init_zero(env: LlvmEnv): + env.reward_space = "IrInstructionCount" + env.reset("cbench-v1/crc32") + assert env.episode_reward == 0 + _, reward, _, _ = env.step(env.action_space["-mem2reg"]) + assert reward > 0 + assert env.episode_reward == reward + env.reset() + assert env.episode_reward == 0 + + +def test_episode_reward_with_non_default_reward_space(env: LlvmEnv): + """Test that episode_reward is not updated when custom rewards passed to + step().""" + env.reward_space = "IrInstructionCountOz" + env.reset("cbench-v1/crc32") + assert env.episode_reward == 0 + _, rewards, _, _ = env.step( + env.action_space["-mem2reg"], + rewards=["IrInstructionCount"], + ) + assert rewards[0] > 0 + assert env.episode_reward == 0 + + +if __name__ == "__main__": + main() diff --git a/tests/requirements.txt b/tests/requirements.txt index c99c25b9c..6eadb637d 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -5,4 +5,5 @@ pytest-mock==3.6.0 pytest-shard==0.1.1 pytest-stress==1.0.1 pytest-sugar==0.9.4 +pytest-timeout==1.4.2 pytest-xdist==2.2.1 diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py index a4c8f7a0b..8080288a7 100644 --- a/tests/wrappers/core_wrappers_test.py +++ b/tests/wrappers/core_wrappers_test.py @@ -124,6 +124,11 @@ def reward(self, reward): env.reset() _, reward, _, _ = env.step(0) assert reward == -5 + assert env.episode_reward == -5 + + _, reward, _, _ = env.step(0) + assert reward == -5 + assert env.episode_reward == -10 if __name__ == "__main__":