Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.1.9 pre-release fixups #287

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/fuzz.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,4 @@ jobs:
BAZEL_TEST_OPTS: --config=ci

- name: Test
run: FUZZ_TIME=600 make fuzz
run: FUZZ_TIME=600 make install-fuzz
18 changes: 10 additions & 8 deletions compiler_gym/envs/compiler_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,23 +973,25 @@ def step(
reward_spaces: List[Reward] = []

# Perform the underlying environment step.
observations, rewards, done, info = self.raw_step(
observation_values, reward_values, done, info = self.raw_step(
actions, observation_spaces, reward_spaces
)

# Translate observations lists back to the appropriate types.
if self.observation_space_spec and len(observations) == 1:
observations = observations[0]
if observations is None and self.observation_space_spec:
observation_values = observation_values[0]
elif not observation_spaces:
observations = None
observation_values = None

# Translate reward lists back to the appropriate types.
if self.reward_space_spec and len(rewards) == 1:
rewards = rewards[0]
if rewards is None and self.reward_space:
reward_values = reward_values[0]
# Update the cumulative episode reward
self.episode_reward += reward_values
elif not reward_spaces:
rewards = None
reward_values = None

return observations, rewards, done, info
return observation_values, reward_values, done, info

def render(
self,
Expand Down
10 changes: 9 additions & 1 deletion compiler_gym/spaces/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, Union

import numpy as np

Expand Down Expand Up @@ -137,6 +137,14 @@ def range(self) -> Tuple[RewardType, RewardType]:
def __repr__(self):
return self.id

def __eq__(self, other: Union["Reward", str]) -> bool:
if isinstance(other, str):
return self.id == other
elif isinstance(other, Reward):
return self.id == other.id
else:
return False


class DefaultRewardFromObservation(Reward):
def __init__(self, observation_name: str, **kwargs):
Expand Down
5 changes: 5 additions & 0 deletions compiler_gym/third_party/llvm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ def llvm_stress_path() -> Path:
return download_llvm_files() / "bin/llvm-stress"


def llvm_diff_path() -> Path:
"""Return the path of llvm-diff."""
return download_llvm_files() / "bin/llvm-diff"


def opt_path() -> Path:
"""Return the path of opt."""
return download_llvm_files() / "bin/opt"
11 changes: 10 additions & 1 deletion compiler_gym/wrappers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,16 @@ def reset(self, *args, **kwargs):

def step(self, *args, **kwargs):
observation, reward, done, info = self.env.step(*args, **kwargs)
return observation, self.reward(reward), done, info
# Undo the episode_reward update and reapply it once we have transformed
# the reward.
#
# TODO(cummins): Refactor step() so that we don't have to do this
# recalculation of episode_reward, as this is prone to errors if, say,
# the base reward returns NaN or an invalid type.
self.unwrapped.episode_reward -= reward
reward = self.reward(reward)
self.unwrapped.episode_reward += reward
return observation, reward, done, info

def reward(self, reward):
"""Translate a reward to the new space."""
Expand Down
22 changes: 11 additions & 11 deletions tests/fuzzing/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@
# LICENSE file in the root directory of this source tree.
load("@rules_python//python:defs.bzl", "py_test")

py_test(
name = "llvm_cbench_validate_fuzz_test",
srcs = ["llvm_cbench_validate_fuzz_test.py"],
tags = ["manual"],
deps = [
"//compiler_gym",
"//tests:test_main",
"//tests/pytest_plugins:llvm",
],
)

py_test(
name = "llvm_commandline_opt_equivalence_fuzz_test",
srcs = ["llvm_commandline_opt_equivalence_fuzz_test.py"],
Expand Down Expand Up @@ -68,17 +79,6 @@ py_test(
],
)

py_test(
name = "llvm_validate_fuzz_test",
srcs = ["llvm_validate_fuzz_test.py"],
tags = ["manual"],
deps = [
"//compiler_gym",
"//tests:test_main",
"//tests/pytest_plugins:llvm",
],
)

py_test(
name = "llvm_stress_fuzz_test",
timeout = "long",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"""Fuzz test for LlvmEnv.validate()."""
import random

import pytest

from compiler_gym.envs import LlvmEnv
from tests.pytest_plugins.llvm import VALIDATABLE_CBENCH_URIS
from tests.test_main import main
Expand All @@ -16,6 +18,7 @@
RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 50)


@pytest.mark.timeout(600)
def test_fuzz(env: LlvmEnv):
"""This test generates a random trajectory and validates the semantics."""
benchmark = random.choice(VALIDATABLE_CBENCH_URIS)
Expand All @@ -29,7 +32,8 @@ def test_fuzz(env: LlvmEnv):
break # Broken trajectory, retry.
else:
print(f"Validating state {env.state}")
assert env.validate() == []
result = env.validate()
assert result.okay(), result
# Stop the test.
break

Expand Down
30 changes: 17 additions & 13 deletions tests/fuzzing/llvm_commandline_opt_equivalence_fuzz_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Fuzz test for LlvmEnv.commandline()."""
import os
import subprocess
from difflib import unified_diff
from pathlib import Path

import pytest
Expand All @@ -22,10 +22,13 @@
RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 50)


@pytest.mark.timeout(600)
def test_fuzz(env: LlvmEnv, tmpwd: Path, llvm_opt: Path, llvm_diff: Path):
"""This test produces a random trajectory and then uses the commandline()
generated with opt to check that the states are equivalent.
"""
del tmpwd

env.reset()
env.write_ir("input.ll")
assert Path("input.ll").is_file()
Expand All @@ -47,20 +50,21 @@ def test_fuzz(env: LlvmEnv, tmpwd: Path, llvm_opt: Path, llvm_diff: Path):
commandline, env={"PATH": str(llvm_opt.parent)}, shell=True, timeout=60
)
assert Path("output.ll").is_file()
os.rename("output.ll", "opt.ll")

with open("output.ll") as f1, open("env.ll") as f2:
# Diff the IR files but exclude the first line which is the module name.
diff = list(unified_diff(f1.readlines()[1:], f2.readlines()[1:]))
diff = subprocess.Popen(
[llvm_diff, "opt.ll", "env.ll"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
stdout, stderr = diff.communicate(timeout=300)

if diff and len(diff) < 25:
diff = "\n".join(diff)
pytest.fail(f"Opt produced different output to CompilerGym:\n{diff}")
elif diff:
# If it's a big diff then we will require the user to reproduce it
# themselves using the environment state we printed earlier.
pytest.fail(
f"Opt produced different output to CompilerGym ({len(diff)}-line diff)"
)
if diff.returncode:
pytest.fail(
f"Opt produced different output to CompilerGym "
f"(returncode: {diff.returncode}):\n{stdout}\n{stderr}"
)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions tests/fuzzing/llvm_deterministic_action_fuzz_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def sha1(string: str):
return sha1.hexdigest()


@pytest.mark.timeout(600)
def test_fuzz(env: LlvmEnv):
"""Run an action multiple times from the same starting state and check that
the generated LLVM-IR is the same.
Expand Down
1 change: 1 addition & 0 deletions tests/fuzzing/llvm_fork_env_fuzz_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
POST_FORK_ACTIONS = 10


@pytest.mark.timeout(600)
def test_fuzz(env: LlvmEnv, reward_space: str):
"""This test generates a random trajectory and checks that fork() produces
an equivalent state. It then runs a second trajectory on the two
Expand Down
4 changes: 3 additions & 1 deletion tests/fuzzing/llvm_random_actions_fuzz_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import gym
import numpy as np
import pytest

from compiler_gym.third_party.autophase import AUTOPHASE_FEATURE_DIM
from tests.test_main import main
Expand All @@ -18,7 +19,8 @@
FUZZ_TIME_SECONDS = 2


def test_benchmark_random_actions(benchmark_name: str):
@pytest.mark.timeout(600)
def test_fuzz(benchmark_name: str):
"""Run randomly selected actions on a benchmark until a minimum amount of time has elapsed."""
env = gym.make(
"llvm-v0",
Expand Down
25 changes: 17 additions & 8 deletions tests/fuzzing/llvm_stress_fuzz_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Fuzz test LLVM backend using llvm-stress."""
import pytest

from compiler_gym.datasets import BenchmarkInitError
from compiler_gym.envs import LlvmEnv
from tests.pytest_plugins.random_util import apply_random_trajectory
from tests.test_main import main
Expand All @@ -13,22 +16,28 @@
RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 10)


@pytest.mark.timeout(600)
def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str):
"""This test produces a random trajectory using a program generated using
llvm-stress.
"""
env.benchmark = env.datasets["llvm-stress-v0"].random_benchmark()
benchmark = env.datasets["generator://llvm-stress-v0"].random_benchmark()
print(benchmark.uri) # For debugging in case of failure.

env.observation_space = observation_space
env.reward_space = reward_space

env.reset()
apply_random_trajectory(
env,
random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE,
timeout=10,
)
print(env.state) # For debugging in case of failure.
try:
env.reset(benchmark=benchmark)
apply_random_trajectory(
env,
random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE,
timeout=10,
)
print(env.state) # For debugging in case of failure.
except BenchmarkInitError:
# Benchmark is invalid.
pass


if __name__ == "__main__":
Expand Down
16 changes: 11 additions & 5 deletions tests/fuzzing/llvm_trajectory_replay_fuzz_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Fuzz test for LlvmEnv.validate()."""
import random

import numpy as np
import pytest

from compiler_gym.datasets import BenchmarkInitError
from compiler_gym.envs import LlvmEnv
from tests.pytest_plugins.llvm import BENCHMARK_NAMES
from tests.pytest_plugins.random_util import apply_random_trajectory
from tests.test_main import main

Expand All @@ -19,19 +18,26 @@
RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 50)


@pytest.mark.timeout(600)
def test_fuzz(env: LlvmEnv, reward_space: str):
"""This test produces a random trajectory, resets the environment, then
replays the trajectory and checks that it produces the same state.
"""
env.observation_space = "Autophase"
env.reward_space = reward_space
benchmark = env.datasets["generator://csmith-v0"].random_benchmark()
print(benchmark.uri) # For debugging in case of failure.

try:
env.reset(benchmark=benchmark)
except BenchmarkInitError:
return

env.reset(benchmark=random.choice(BENCHMARK_NAMES))
trajectory = apply_random_trajectory(
env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE
)
print(env.state) # For debugging in case of failure.
env.reset()
env.reset(benchmark=benchmark)

for i, (action, observation, reward, done) in enumerate(trajectory, start=1):
print(f"Replaying step {i}: {env.action_space.flags[action]}")
Expand Down
11 changes: 11 additions & 0 deletions tests/llvm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ py_test(
],
)

py_test(
name = "episode_reward_test",
timeout = "long",
srcs = ["episode_reward_test.py"],
deps = [
"//compiler_gym/envs",
"//tests:test_main",
"//tests/pytest_plugins:llvm",
],
)

py_test(
name = "fork_env_test",
timeout = "long",
Expand Down
38 changes: 38 additions & 0 deletions tests/llvm/episode_reward_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Tests for LlvmEnv.episode_reward."""
from compiler_gym.envs import LlvmEnv
from tests.test_main import main

pytest_plugins = ["tests.pytest_plugins.llvm"]


def test_episode_reward_init_zero(env: LlvmEnv):
env.reward_space = "IrInstructionCount"
env.reset("cbench-v1/crc32")
assert env.episode_reward == 0
_, reward, _, _ = env.step(env.action_space["-mem2reg"])
assert reward > 0
assert env.episode_reward == reward
env.reset()
assert env.episode_reward == 0


def test_episode_reward_with_non_default_reward_space(env: LlvmEnv):
"""Test that episode_reward is not updated when custom rewards passed to
step()."""
env.reward_space = "IrInstructionCountOz"
env.reset("cbench-v1/crc32")
assert env.episode_reward == 0
_, rewards, _, _ = env.step(
env.action_space["-mem2reg"],
rewards=["IrInstructionCount"],
)
assert rewards[0] > 0
assert env.episode_reward == 0


if __name__ == "__main__":
main()
Loading