Skip to content

Commit

Permalink
Load crash overage from crash dir on startup
Browse files Browse the repository at this point in the history
Closes #25
  • Loading branch information
senier committed Feb 10, 2024
1 parent e0cbabb commit 95a1a3c
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 148 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Saving fuzzer state to file (#15)
- Loading crash coverage from crash dir on startup (#25)

## [2.1.0] - 2024-02-10

Expand Down
89 changes: 51 additions & 38 deletions cobrafuzz/fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,34 +185,53 @@ def __init__( # noqa: PLR0913, ref:#2
self._workers: list[tuple[MPProcess, mp.Queue[Update]]] = []

self._crash_dir = crash_dir
self._target = pickle.dumps(target)
self._target_bytes = pickle.dumps(target)

self._close_stderr = close_stderr
self._close_stdout = close_stdout
self._stat_frequency = stat_frequency
self._max_crashes = max_crashes
self._max_input_size = max_input_size
self._max_runs = max_runs
self._max_time = max_time
self._num_workers: int = num_workers or self._mp_ctx.cpu_count() - 1
self._seeds = seeds or []
self._state_file = state_file
self._state = st.State(seeds=seeds, max_input_size=max_input_size, file=state_file)

if regression:
for error_file in crash_dir.glob("*"):
if not error_file.is_file():
continue
with error_file.open("br") as f:
try:
target(f.read())
except Exception:
logging.exception(
"\n========================================================================\n"
"Testing %s:",
error_file,
)
self._load_crashes(regression=regression)

def _load_crashes(self, regression: bool) -> None:
"""
Load crash coverage from crash directory.
Arguments:
---------
regression: Output unique errors and then exit.
"""

target = cast(Callable[[bytes], None], pickle.loads(self._target_bytes)) # noqa: S301
local_state = st.State()

for error_file in self._crash_dir.glob("*"):
if not error_file.is_file():
continue
with error_file.open("br") as f:
try:
target(f.read())
except Exception as e: # noqa: BLE001
if regression:
changed = local_state.store_coverage(covered(e))
if changed:
logging.exception(
"\n========================================================================\n"
"Testing %s:",
error_file,
)
else:
self._state.store_coverage(covered(e))
else:
if regression:
logging.error("No error when testing %s", error_file)

if regression:
sys.exit(0)

def _log_stats(self, log_type: str, total_coverage: int, corpus_size: int) -> None:
Expand Down Expand Up @@ -250,40 +269,34 @@ def _write_sample(self, buf: bytes, prefix: str = "crash-") -> None:
if len(buf) < 200:
logging.info("sample = %s", buf.hex())

def _initialize_process(self, wid: int, state: st.State) -> tuple[MPProcess, mp.Queue[Update]]:
def _initialize_process(self, wid: int) -> tuple[MPProcess, mp.Queue[Update]]:
queue: mp.Queue[Update] = self._mp_ctx.Queue()
result = self._mp_ctx.Process(
target=worker,
args=(
wid,
self._target,
self._target_bytes,
queue,
self._result_queue,
self._close_stdout,
self._close_stderr,
self._stat_frequency,
state,
self._state,
),
)
result.start()
return result, queue

def start(self) -> None: # noqa: PLR0912
start_time = time.time()
state = st.State(self._seeds, self._max_input_size)

if self._state_file:
state.load(self._state_file)

self._workers = [
self._initialize_process(wid=wid, state=state) for wid in range(self._num_workers)
]
self._workers = [self._initialize_process(wid=wid) for wid in range(self._num_workers)]

logging.info(
"#0 READ units: %d workers: %d seeds: %d",
state.size,
self._state.size,
self._num_workers,
len(self._seeds),
self._state.num_seeds,
)

while True:
Expand Down Expand Up @@ -311,19 +324,17 @@ def start(self) -> None: # noqa: PLR0912
self._current_runs += result.runs

if isinstance(result, Error):
improvement = state.store_coverage(result.covered)
improvement = self._state.store_coverage(result.covered)
if improvement:
self._current_crashes += 1
self._write_sample(result.data)

elif isinstance(result, Report):
improvement = state.store_coverage(result.covered)
improvement = self._state.store_coverage(result.covered)
if improvement:
self._log_stats("NEW", state.total_coverage, state.size)
state.put_input(bytearray(result.data))

if self._state_file:
state.save(self._state_file)
self._log_stats("NEW", self._state.total_coverage, self._state.size)
self._state.put_input(bytearray(result.data))
self._state.save()

for wid, (_, queue) in enumerate(self._workers):
if wid != result.wid:
Expand All @@ -336,7 +347,9 @@ def start(self) -> None: # noqa: PLR0912
assert False, f"Unhandled result type: {type(result)}"

if (time.time() - self._last_stats_time) > self._stat_frequency:
self._log_stats("PULSE", state.total_coverage, state.size)
self._log_stats("PULSE", self._state.total_coverage, self._state.size)

self._state.save()

for _, queue in self._workers:
queue.cancel_join_thread()
Expand All @@ -347,4 +360,4 @@ def start(self) -> None: # noqa: PLR0912

for p, _ in self._workers:
p.join()
sys.exit(0)
sys.exit(0 if self._current_crashes == 0 else 1)
52 changes: 34 additions & 18 deletions cobrafuzz/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,39 +18,40 @@ def __init__(
self,
seeds: Optional[list[Path]] = None,
max_input_size: int = 4096,
file: Optional[Path] = None,
):
seeds = seeds or []

self._VERSION = 1
self._max_input_size = max_input_size
self._covered: set[tuple[Optional[str], Optional[int], str, int]] = set()
self._inputs: list[bytearray] = []
self._num_seeds = len(seeds)
self._file = file

for path in [p for p in seeds or [] if p.is_file()] + [
f for p in seeds or [] if not p.is_file() for f in p.glob("*") if f.is_file()
for path in [p for p in seeds if p.is_file()] + [
f for p in seeds if not p.is_file() for f in p.glob("*") if f.is_file()
]:
with path.open("rb") as f:
self._inputs.append(bytearray(f.read()))
if not self._inputs:
self._inputs.append(bytearray(0))
self._load()

def save(self, filename: Path) -> None:
with filename.open(mode="w+") as sf:
json.dump(
obj={
"version": self._VERSION,
"coverage": list(self._covered),
"population": [str(bytes(i))[2:-1] for i in self._inputs],
},
fp=sf,
ensure_ascii=True,
)
@property
def num_seeds(self) -> int:
return self._num_seeds

def _load(self) -> None:
if not self._file:
return

def load(self, filename: Path) -> None:
try:
with filename.open() as sf:
with self._file.open() as sf:
data = json.load(sf)
if "version" not in data or data["version"] != self._VERSION:
raise LoadError(
f"Invalid version in state file {filename} (expected {self._VERSION})",
f"Invalid version in state file {self._file} (expected {self._VERSION})",
)
self._covered |= {tuple(e) for e in data["coverage"]}
self._inputs.extend(
Expand All @@ -59,10 +60,25 @@ def load(self, filename: Path) -> None:
except FileNotFoundError:
pass
except (json.JSONDecodeError, TypeError):
filename.unlink()
logging.info("Malformed state file: %s", filename)
self._file.unlink()
logging.info("Malformed state file: %s", self._file)
except OSError as e:
logging.info("Error opening state file: %s", e)
self._file = None

def save(self) -> None:
if not self._file:
return
with self._file.open(mode="w+") as sf:
json.dump(
obj={
"version": self._VERSION,
"coverage": list(self._covered),
"population": [str(bytes(i))[2:-1] for i in self._inputs],
},
fp=sf,
ensure_ascii=True,
)

def store_coverage(
self,
Expand Down
80 changes: 73 additions & 7 deletions tests/unit/test_fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,19 @@ def non_crashing_target(data: bytes) -> None:
return # pragma: no cover


def crashing_target(data: bytes) -> None:
def crashing_target_simple(data: bytes) -> None:
print("Failing Target\n", flush=True) # noqa: T201
print("Failing Target\n", file=sys.stderr, flush=True) # noqa: T201
if len(data) > 0 and data[0] == 42:
raise DummyError


def crashing_target_hard(data: bytes) -> None:
if len(data) > 0 and data[0] > 128: # noqa: SIM102
if len(data) > 1 and data[1] > 200:
raise DummyError


def test_no_crash(tmp_path: Path) -> None:
f = fuzzer.Fuzzer(target=non_crashing_target, crash_dir=tmp_path, stat_frequency=1, max_time=3)
with pytest.raises(SystemExit, match="^0$"):
Expand Down Expand Up @@ -60,22 +66,82 @@ def test_regression(tmp_path: Path) -> None:
cf.write(b"*foo")
with (tmp_path / "crash2").open("wb") as cf:
cf.write(b"*bar")
with (tmp_path / "crash2").open("wb") as cf:
with (tmp_path / "crash3").open("wb") as cf:
cf.write(b"*bar")
with (tmp_path / "crash4").open("wb") as cf:
cf.write(b"baz")
(tmp_path / "subdir").mkdir()
with pytest.raises(SystemExit, match="^0$"):
fuzzer.Fuzzer(target=crashing_target, crash_dir=tmp_path, regression=True)
fuzzer.Fuzzer(target=crashing_target_simple, crash_dir=tmp_path, regression=True)


def test_load_crashes(tmp_path: Path) -> None:
with (tmp_path / "crash").open("wb") as cf:
cf.write(b"*foo")
with pytest.raises(SystemExit, match="^0$"):
fuzzer.Fuzzer(target=crashing_target_simple, crash_dir=tmp_path, regression=True)


def test_state(tmp_path: Path) -> None:
state_file = tmp_path / "state.json"
for _ in range(2):
assert not state_file.exists()
for i in range(2):
f = fuzzer.Fuzzer(
target=non_crashing_target,
target=crashing_target_simple,
crash_dir=tmp_path,
max_runs=10,
max_runs=1000,
state_file=state_file,
close_stderr=True,
close_stdout=True,
)
with pytest.raises(SystemExit, match="^0$"):
with pytest.raises(SystemExit, match="^1$" if i == 0 else "^0$"):
f.start()
assert state_file.exists()


def test_crash_simple(tmp_path: Path) -> None:
crash_dir = tmp_path / "crashes"
f = fuzzer.Fuzzer(target=crashing_target_simple, crash_dir=crash_dir, max_crashes=1)
with pytest.raises(SystemExit, match="^1$"):
f.start()
assert crash_dir.is_dir()


def test_crash_hard(tmp_path: Path) -> None:
crash_dir = tmp_path / "crashes"
f = fuzzer.Fuzzer(
crash_dir=crash_dir,
target=crashing_target_hard,
max_time=5,
num_workers=2,
stat_frequency=1,
)
with pytest.raises(SystemExit, match="^1$"):
f.start()
assert crash_dir.is_dir()


def test_crash_with_crash_dir(tmp_path: Path) -> None:
crash_dir = tmp_path / "crashes"
f = fuzzer.Fuzzer(
target=crashing_target_simple,
crash_dir=crash_dir,
max_crashes=1,
)
with pytest.raises(SystemExit, match="^1$"):
f.start()
assert crash_dir.is_dir()


def test_crash_stderr_stdout_closed(tmp_path: Path) -> None:
crash_dir = tmp_path / "crashes"
f = fuzzer.Fuzzer(
target=crashing_target_simple,
close_stderr=True,
close_stdout=True,
crash_dir=crash_dir,
max_crashes=1,
)
with pytest.raises(SystemExit, match="^1$"):
f.start()
assert crash_dir.is_dir()
Loading

0 comments on commit 95a1a3c

Please sign in to comment.