Skip to content

Commit 1d2c8d1

Browse files
javanlacerdaalhijazijonathanmetzman
authored
[Centipede] Implement corpus prune for centipede on corpus prune task (#4707)
It implements corpus pruning for centipede fuzzer. --------- Signed-off-by: Javan Lacerda <javanlacerda@google.com> Co-authored-by: Ali Hijazi <ahijazi@google.com> Co-authored-by: Jonathan Metzman <metzman@chromium.org>
1 parent 956647f commit 1d2c8d1

File tree

6 files changed

+171
-282
lines changed

6 files changed

+171
-282
lines changed

src/clusterfuzz/_internal/bot/fuzzers/centipede/engine.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -551,8 +551,8 @@ def minimize_corpus(self,
551551
# Only remove this directory if it was created in this method.
552552
shutil.rmtree(full_corpus_workdir)
553553

554-
return engine.ReproduceResult(result.command, result.return_code,
555-
result.time_executed, result.output)
554+
return engine.FuzzResult(result.output, result.command, [], None,
555+
result.time_executed, result.timed_out)
556556

557557
def _get_smallest_crasher(self, workdir_path):
558558
"""Returns the path to the smallest crash in Centipede's |workdir_path|."""

src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py

+121-85
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import os
2020
import random
2121
import shutil
22-
from typing import Dict
2322
from typing import List
2423
import zipfile
2524

@@ -301,8 +300,8 @@ def _cross_pollinate_other_fuzzer_corpuses(self):
301300
'Failed to unpack corpus backup from url %s.' % corpus_backup_url)
302301

303302

304-
class Runner:
305-
"""Runner for libFuzzer."""
303+
class BaseRunner:
304+
"""Base Runner"""
306305

307306
def __init__(self, build_directory, context):
308307
self.build_directory = build_directory
@@ -312,12 +311,44 @@ def __init__(self, build_directory, context):
312311
self.build_directory, self.context.fuzz_target.binary)
313312
if not self.target_path:
314313
raise CorpusPruningError(
315-
'Failed to get fuzzer path for %s.' % self.context.fuzz_target.binary)
316-
314+
f'Failed to get fuzzer path for {self.context.fuzz_target.binary}')
317315
self.fuzzer_options = options.get_fuzz_target_options(self.target_path)
318316

319-
def get_libfuzzer_flags(self):
320-
"""Get default libFuzzer options."""
317+
def get_fuzzer_flags(self):
318+
return []
319+
320+
def process_sanitizer_options(self):
321+
"""Process sanitizer options overrides."""
322+
if not self.fuzzer_options:
323+
return
324+
325+
# Only need to look as ASan, as that's what we prune with.
326+
overrides = self.fuzzer_options.get_asan_options()
327+
if not overrides:
328+
return
329+
330+
asan_options = environment.get_memory_tool_options('ASAN_OPTIONS')
331+
if not asan_options:
332+
return
333+
asan_options.update(overrides)
334+
environment.set_memory_tool_options('ASAN_OPTIONS', asan_options)
335+
336+
def reproduce(self, input_path, arguments, max_time):
337+
return self.context.engine.reproduce(self.target_path, input_path,
338+
arguments, max_time)
339+
340+
def minimize_corpus(self, arguments, input_dirs, output_dir, reproducers_dir,
341+
max_time):
342+
return self.context.engine.minimize_corpus(self.target_path, arguments,
343+
input_dirs, output_dir,
344+
reproducers_dir, max_time)
345+
346+
347+
class LibFuzzerRunner(BaseRunner):
348+
"""Runner for libFuzzer."""
349+
350+
def get_fuzzer_flags(self):
351+
"""Get default libFuzzer options for pruning."""
321352
rss_limit = RSS_LIMIT
322353
max_len = engine_common.CORPUS_INPUT_SIZE_LIMIT
323354
detect_leaks = 1
@@ -352,22 +383,6 @@ def get_libfuzzer_flags(self):
352383

353384
return arguments.list()
354385

355-
def process_sanitizer_options(self):
356-
"""Process sanitizer options overrides."""
357-
if not self.fuzzer_options:
358-
return
359-
360-
# Only need to look at ASan, as that's what we prune with.
361-
overrides = self.fuzzer_options.get_asan_options()
362-
if not overrides:
363-
return
364-
365-
asan_options = environment.get_memory_tool_options('ASAN_OPTIONS')
366-
if not asan_options:
367-
return
368-
asan_options.update(overrides)
369-
environment.set_memory_tool_options('ASAN_OPTIONS', asan_options)
370-
371386
def reproduce(self, input_path, arguments, max_time):
372387
return self.context.engine.reproduce(self.target_path, input_path,
373388
arguments, max_time)
@@ -379,33 +394,81 @@ def minimize_corpus(self, arguments, input_dirs, output_dir, reproducers_dir,
379394
reproducers_dir, max_time)
380395

381396

382-
class CorpusPruner:
383-
"""Class that handles corpus pruning."""
397+
class CentipedeRunner(BaseRunner):
398+
"""Runner implementation for Centipede fuzzing engine."""
399+
400+
401+
class CorpusPrunerBase:
402+
"""Base class for corpus pruning that is engine‐agnostic."""
384403

385404
def __init__(self, runner):
386405
self.runner = runner
387-
self.context = self.runner.context
406+
self.context = runner.context
407+
408+
def run(self, initial_corpus_path, minimized_corpus_path, bad_units_path):
409+
"""Running generic corpus prunning"""
410+
if not shell.get_directory_file_count(initial_corpus_path):
411+
# Empty corpus, nothing to do.
412+
return None
413+
414+
# Unpack seed corpus if needed.
415+
engine_common.unpack_seed_corpus_if_needed(
416+
self.runner.target_path, initial_corpus_path, force_unpack=True)
417+
418+
environment.reset_current_memory_tool_options(
419+
redzone_size=MIN_REDZONE, leaks=True)
420+
self.runner.process_sanitizer_options()
421+
422+
additional_args = self.runner.get_fuzzer_flags()
423+
logs.info('Running merge...')
424+
try:
425+
result = self.runner.minimize_corpus(
426+
additional_args, [initial_corpus_path], minimized_corpus_path,
427+
bad_units_path, CORPUS_PRUNING_TIMEOUT)
428+
except TimeoutError as e:
429+
raise CorpusPruningError(
430+
'Corpus pruning timed out while minimizing corpus\n' + repr(e))
431+
except engine.Error as e:
432+
raise CorpusPruningError('Corpus pruning failed to minimize corpus\n' +
433+
repr(e))
434+
435+
symbolized_output = stack_symbolizer.symbolize_stacktrace(result.logs)
436+
437+
if not shell.get_directory_file_count(minimized_corpus_path):
438+
raise CorpusPruningError('Corpus pruning failed to minimize corpus\n' +
439+
symbolized_output)
440+
441+
logs.info('Corpus merge finished successfully.', output=symbolized_output)
442+
return result.stats
443+
444+
def process_bad_units(self, bad_units_path, quarantine_corpus_path):
445+
del bad_units_path
446+
del quarantine_corpus_path
447+
return {}
448+
449+
450+
class LibFuzzerPruner(CorpusPrunerBase):
451+
"""
452+
LibFuzzerPruner is a specialized pruner for libFuzzer that handles
453+
quarantining of problematic units and related special cases.
454+
"""
388455

389456
def _run_single_unit(self, unit_path):
390-
"""Run a single unit, and return the result."""
391-
arguments = self.runner.get_libfuzzer_flags()
457+
arguments = self.runner.get_fuzzer_flags() # Expect libFuzzer flags.
392458
return self.runner.reproduce(unit_path, arguments, SINGLE_UNIT_TIMEOUT)
393459

394460
def _quarantine_unit(self, unit_path, quarantine_corpus_path):
395-
"""Moves the given unit to the quarantine, and returns the path to the unit
396-
in the quarantine."""
397461
quarantined_unit_path = os.path.join(quarantine_corpus_path,
398462
os.path.basename(unit_path))
399463
shutil.move(unit_path, quarantined_unit_path)
400-
401464
return quarantined_unit_path
402465

403-
def process_bad_units(self, bad_units_path, quarantine_corpus_path
404-
) -> Dict[str, uworker_msg_pb2.CrashInfo]: # pylint: disable=no-member
405-
"""Process bad units found during merge."""
406-
# TODO(ochang): A lot of this function is similar to parts of fuzz_task.
407-
# Ideally fuzz_task can be refactored in a way that lets us share the common
408-
# code.
466+
def process_bad_units(self, bad_units_path, quarantine_corpus_path):
467+
"""
468+
Process bad units by running each test case individually,
469+
quarantining those that timeout, OOM, or crash due to memory sanitizer
470+
errors.
471+
"""
409472
crashes = {}
410473

411474
environment.reset_current_memory_tool_options(redzone_size=DEFAULT_REDZONE)
@@ -415,41 +478,36 @@ def process_bad_units(self, bad_units_path, quarantine_corpus_path
415478
corpus_file_paths = _get_corpus_file_paths(bad_units_path)
416479
num_bad_units = 0
417480

418-
# Run each corpus item individually.
419481
for i, unit_path in enumerate(corpus_file_paths, 1):
420482
if i % 100 == 0:
421483
logs.info('Up to %d' % i)
422484

423485
unit_name = os.path.basename(unit_path)
424486
if unit_name.startswith('timeout-') or unit_name.startswith('oom-'):
425-
# Don't waste time re-running timeout or oom testcases.
487+
# Immediately quarantine timeouts/oom testcases.
426488
self._quarantine_unit(unit_path, quarantine_corpus_path)
427489
num_bad_units += 1
428490
continue
429491

430492
try:
431493
result = self._run_single_unit(unit_path)
432494
except TimeoutError:
433-
# Slow unit. Quarantine it.
434495
self._quarantine_unit(unit_path, quarantine_corpus_path)
435496
num_bad_units += 1
436497
continue
437498

438499
if not crash_analyzer.is_memory_tool_crash(result.output):
439-
# Didn't crash.
440500
continue
441501

442-
# Get memory tool crash information.
443502
state = stack_analyzer.get_crash_data(result.output, symbolize_flag=True)
444503

445-
# Crashed or caused a leak. Quarantine it.
504+
# Quarantine the crashing unit.
446505
unit_path = self._quarantine_unit(unit_path, quarantine_corpus_path)
447506
num_bad_units += 1
448507

449508
if crash_analyzer.ignore_stacktrace(state.crash_stacktrace):
450509
continue
451510

452-
# Local de-duplication.
453511
if state.crash_state not in crashes:
454512
security_flag = crash_analyzer.is_security_issue(
455513
state.crash_stacktrace, state.crash_type, state.crash_address)
@@ -460,49 +518,13 @@ def process_bad_units(self, bad_units_path, quarantine_corpus_path
460518
crash_stacktrace=state.crash_stacktrace,
461519
unit_path=unit_path,
462520
security_flag=security_flag)
463-
464521
logs.info(
465522
f'Found {num_bad_units} bad units, {len(crashes)} unique crashes.')
466523
return crashes
467524

468-
def run(self, initial_corpus_path, minimized_corpus_path, bad_units_path):
469-
"""Run corpus pruning. Output result to directory."""
470-
if not shell.get_directory_file_count(initial_corpus_path):
471-
logs.info('Empty corpus, nothing to do.')
472-
return None
473525

474-
# Set memory tool options and fuzzer arguments.
475-
engine_common.unpack_seed_corpus_if_needed(
476-
self.runner.target_path, initial_corpus_path, force_unpack=True)
477-
478-
environment.reset_current_memory_tool_options(
479-
redzone_size=MIN_REDZONE, leaks=True)
480-
self.runner.process_sanitizer_options()
481-
additional_args = self.runner.get_libfuzzer_flags()
482-
483-
# Execute fuzzer with arguments for corpus pruning.
484-
logs.info('Running merge...')
485-
try:
486-
result = self.runner.minimize_corpus(
487-
additional_args, [initial_corpus_path], minimized_corpus_path,
488-
bad_units_path, CORPUS_PRUNING_TIMEOUT)
489-
except TimeoutError as e:
490-
raise CorpusPruningError(
491-
'Corpus pruning timed out while minimizing corpus\n' + repr(e))
492-
except engine.Error as e:
493-
raise CorpusPruningError('Corpus pruning failed to minimize corpus\n' +
494-
repr(e))
495-
496-
symbolized_output = stack_symbolizer.symbolize_stacktrace(result.logs)
497-
498-
# Sanity check that there are files in minimized corpus after merging.
499-
if not shell.get_directory_file_count(minimized_corpus_path):
500-
raise CorpusPruningError('Corpus pruning failed to minimize corpus\n' +
501-
symbolized_output)
502-
503-
logs.info('Corpus merge finished successfully.', output=symbolized_output)
504-
505-
return result.stats
526+
class CentipedePruner(CorpusPrunerBase):
527+
"""Centipede pruner."""
506528

507529

508530
class CrossPollinator:
@@ -597,6 +619,22 @@ def _record_cross_pollination_stats(output):
597619
client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])
598620

599621

622+
def _get_pruner_and_runner(context):
623+
"""Get pruner and runner object acording with the FuzzTarget into the context
624+
"""
625+
build_directory = environment.get_value('BUILD_DIR')
626+
if context.fuzz_target.engine.lower() == 'libfuzzer':
627+
runner = LibFuzzerRunner(build_directory, context)
628+
pruner = LibFuzzerPruner(runner)
629+
elif context.fuzz_target.engine.lower() == 'centipede':
630+
runner = CentipedeRunner(build_directory, context)
631+
pruner = CentipedePruner(runner)
632+
else:
633+
raise CorpusPruningError(
634+
'Corpus pruner task does not support the given engine.')
635+
return pruner, runner
636+
637+
600638
def do_corpus_pruning(uworker_input, context, revision) -> CorpusPruningResult:
601639
"""Run corpus pruning."""
602640
# Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz
@@ -611,10 +649,8 @@ def do_corpus_pruning(uworker_input, context, revision) -> CorpusPruningResult:
611649
revision=revision, fuzz_target=context.fuzz_target.binary):
612650
raise CorpusPruningError('Failed to setup build.')
613651

614-
build_directory = environment.get_value('BUILD_DIR')
615652
start_time = datetime.datetime.utcnow()
616-
runner = Runner(build_directory, context)
617-
pruner = CorpusPruner(runner)
653+
pruner, runner = _get_pruner_and_runner(context)
618654
fuzzer_binary_name = os.path.basename(runner.target_path)
619655

620656
logs.info('Getting the initial corpus to process from GCS.')

src/clusterfuzz/_internal/tests/core/bot/fuzzers/centipede/centipede_engine_test.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,6 @@ def test_crash_timeout(self):
383383
'slo',
384384
timeout_per_input=_TIMEOUT_PER_INPUT_TEST)
385385

386-
@unittest.skip('This test is failing, blocking deploy.')
387386
def test_minimize_corpus(self):
388387
"""Tests minimizing a corpus."""
389388
unminimized_corpus = setup_testcase('unmin_corpus', self.test_paths)
@@ -402,7 +401,9 @@ def test_minimize_corpus(self):
402401
self.assertEqual(len(os.listdir(crash_corpus)), 1)
403402
crasher = os.path.join(crash_corpus, os.listdir(crash_corpus)[0])
404403
with open(crasher) as crasher_file:
405-
self.assertEqual(crasher_file.read(), '?f???u???z?')
404+
crash_content = crasher_file.read()
405+
crasher_file.close()
406+
self.assertEqual(crash_content, '?f???u???z?')
406407

407408
def test_minimize_testcase(self):
408409
"""Tests minimizing a testcase."""

0 commit comments

Comments
 (0)