19
19
import os
20
20
import random
21
21
import shutil
22
- from typing import Dict
23
22
from typing import List
24
23
import zipfile
25
24
@@ -301,8 +300,8 @@ def _cross_pollinate_other_fuzzer_corpuses(self):
301
300
'Failed to unpack corpus backup from url %s.' % corpus_backup_url )
302
301
303
302
304
- class Runner :
305
- """Runner for libFuzzer. """
303
+ class BaseRunner :
304
+ """Base Runner """
306
305
307
306
def __init__ (self , build_directory , context ):
308
307
self .build_directory = build_directory
@@ -312,12 +311,44 @@ def __init__(self, build_directory, context):
312
311
self .build_directory , self .context .fuzz_target .binary )
313
312
if not self .target_path :
314
313
raise CorpusPruningError (
315
- 'Failed to get fuzzer path for %s.' % self .context .fuzz_target .binary )
316
-
314
+ f'Failed to get fuzzer path for { self .context .fuzz_target .binary } ' )
317
315
self .fuzzer_options = options .get_fuzz_target_options (self .target_path )
318
316
319
- def get_libfuzzer_flags (self ):
320
- """Get default libFuzzer options."""
317
+ def get_fuzzer_flags (self ):
318
+ return []
319
+
320
+ def process_sanitizer_options (self ):
321
+ """Process sanitizer options overrides."""
322
+ if not self .fuzzer_options :
323
+ return
324
+
325
+ # Only need to look as ASan, as that's what we prune with.
326
+ overrides = self .fuzzer_options .get_asan_options ()
327
+ if not overrides :
328
+ return
329
+
330
+ asan_options = environment .get_memory_tool_options ('ASAN_OPTIONS' )
331
+ if not asan_options :
332
+ return
333
+ asan_options .update (overrides )
334
+ environment .set_memory_tool_options ('ASAN_OPTIONS' , asan_options )
335
+
336
+ def reproduce (self , input_path , arguments , max_time ):
337
+ return self .context .engine .reproduce (self .target_path , input_path ,
338
+ arguments , max_time )
339
+
340
+ def minimize_corpus (self , arguments , input_dirs , output_dir , reproducers_dir ,
341
+ max_time ):
342
+ return self .context .engine .minimize_corpus (self .target_path , arguments ,
343
+ input_dirs , output_dir ,
344
+ reproducers_dir , max_time )
345
+
346
+
347
+ class LibFuzzerRunner (BaseRunner ):
348
+ """Runner for libFuzzer."""
349
+
350
+ def get_fuzzer_flags (self ):
351
+ """Get default libFuzzer options for pruning."""
321
352
rss_limit = RSS_LIMIT
322
353
max_len = engine_common .CORPUS_INPUT_SIZE_LIMIT
323
354
detect_leaks = 1
@@ -352,22 +383,6 @@ def get_libfuzzer_flags(self):
352
383
353
384
return arguments .list ()
354
385
355
- def process_sanitizer_options (self ):
356
- """Process sanitizer options overrides."""
357
- if not self .fuzzer_options :
358
- return
359
-
360
- # Only need to look at ASan, as that's what we prune with.
361
- overrides = self .fuzzer_options .get_asan_options ()
362
- if not overrides :
363
- return
364
-
365
- asan_options = environment .get_memory_tool_options ('ASAN_OPTIONS' )
366
- if not asan_options :
367
- return
368
- asan_options .update (overrides )
369
- environment .set_memory_tool_options ('ASAN_OPTIONS' , asan_options )
370
-
371
386
def reproduce (self , input_path , arguments , max_time ):
372
387
return self .context .engine .reproduce (self .target_path , input_path ,
373
388
arguments , max_time )
@@ -379,33 +394,81 @@ def minimize_corpus(self, arguments, input_dirs, output_dir, reproducers_dir,
379
394
reproducers_dir , max_time )
380
395
381
396
382
- class CorpusPruner :
383
- """Class that handles corpus pruning."""
397
+ class CentipedeRunner (BaseRunner ):
398
+ """Runner implementation for Centipede fuzzing engine."""
399
+
400
+
401
+ class CorpusPrunerBase :
402
+ """Base class for corpus pruning that is engine‐agnostic."""
384
403
385
404
def __init__ (self , runner ):
386
405
self .runner = runner
387
- self .context = self .runner .context
406
+ self .context = runner .context
407
+
408
+ def run (self , initial_corpus_path , minimized_corpus_path , bad_units_path ):
409
+ """Running generic corpus prunning"""
410
+ if not shell .get_directory_file_count (initial_corpus_path ):
411
+ # Empty corpus, nothing to do.
412
+ return None
413
+
414
+ # Unpack seed corpus if needed.
415
+ engine_common .unpack_seed_corpus_if_needed (
416
+ self .runner .target_path , initial_corpus_path , force_unpack = True )
417
+
418
+ environment .reset_current_memory_tool_options (
419
+ redzone_size = MIN_REDZONE , leaks = True )
420
+ self .runner .process_sanitizer_options ()
421
+
422
+ additional_args = self .runner .get_fuzzer_flags ()
423
+ logs .info ('Running merge...' )
424
+ try :
425
+ result = self .runner .minimize_corpus (
426
+ additional_args , [initial_corpus_path ], minimized_corpus_path ,
427
+ bad_units_path , CORPUS_PRUNING_TIMEOUT )
428
+ except TimeoutError as e :
429
+ raise CorpusPruningError (
430
+ 'Corpus pruning timed out while minimizing corpus\n ' + repr (e ))
431
+ except engine .Error as e :
432
+ raise CorpusPruningError ('Corpus pruning failed to minimize corpus\n ' +
433
+ repr (e ))
434
+
435
+ symbolized_output = stack_symbolizer .symbolize_stacktrace (result .logs )
436
+
437
+ if not shell .get_directory_file_count (minimized_corpus_path ):
438
+ raise CorpusPruningError ('Corpus pruning failed to minimize corpus\n ' +
439
+ symbolized_output )
440
+
441
+ logs .info ('Corpus merge finished successfully.' , output = symbolized_output )
442
+ return result .stats
443
+
444
+ def process_bad_units (self , bad_units_path , quarantine_corpus_path ):
445
+ del bad_units_path
446
+ del quarantine_corpus_path
447
+ return {}
448
+
449
+
450
+ class LibFuzzerPruner (CorpusPrunerBase ):
451
+ """
452
+ LibFuzzerPruner is a specialized pruner for libFuzzer that handles
453
+ quarantining of problematic units and related special cases.
454
+ """
388
455
389
456
def _run_single_unit (self , unit_path ):
390
- """Run a single unit, and return the result."""
391
- arguments = self .runner .get_libfuzzer_flags ()
457
+ arguments = self .runner .get_fuzzer_flags () # Expect libFuzzer flags.
392
458
return self .runner .reproduce (unit_path , arguments , SINGLE_UNIT_TIMEOUT )
393
459
394
460
def _quarantine_unit (self , unit_path , quarantine_corpus_path ):
395
- """Moves the given unit to the quarantine, and returns the path to the unit
396
- in the quarantine."""
397
461
quarantined_unit_path = os .path .join (quarantine_corpus_path ,
398
462
os .path .basename (unit_path ))
399
463
shutil .move (unit_path , quarantined_unit_path )
400
-
401
464
return quarantined_unit_path
402
465
403
- def process_bad_units (self , bad_units_path , quarantine_corpus_path
404
- ) -> Dict [ str , uworker_msg_pb2 . CrashInfo ]: # pylint: disable=no-member
405
- """ Process bad units found during merge."""
406
- # TODO(ochang): A lot of this function is similar to parts of fuzz_task.
407
- # Ideally fuzz_task can be refactored in a way that lets us share the common
408
- # code.
466
+ def process_bad_units (self , bad_units_path , quarantine_corpus_path ):
467
+ """
468
+ Process bad units by running each test case individually,
469
+ quarantining those that timeout, OOM, or crash due to memory sanitizer
470
+ errors.
471
+ """
409
472
crashes = {}
410
473
411
474
environment .reset_current_memory_tool_options (redzone_size = DEFAULT_REDZONE )
@@ -415,41 +478,36 @@ def process_bad_units(self, bad_units_path, quarantine_corpus_path
415
478
corpus_file_paths = _get_corpus_file_paths (bad_units_path )
416
479
num_bad_units = 0
417
480
418
- # Run each corpus item individually.
419
481
for i , unit_path in enumerate (corpus_file_paths , 1 ):
420
482
if i % 100 == 0 :
421
483
logs .info ('Up to %d' % i )
422
484
423
485
unit_name = os .path .basename (unit_path )
424
486
if unit_name .startswith ('timeout-' ) or unit_name .startswith ('oom-' ):
425
- # Don't waste time re-running timeout or oom testcases.
487
+ # Immediately quarantine timeouts/ oom testcases.
426
488
self ._quarantine_unit (unit_path , quarantine_corpus_path )
427
489
num_bad_units += 1
428
490
continue
429
491
430
492
try :
431
493
result = self ._run_single_unit (unit_path )
432
494
except TimeoutError :
433
- # Slow unit. Quarantine it.
434
495
self ._quarantine_unit (unit_path , quarantine_corpus_path )
435
496
num_bad_units += 1
436
497
continue
437
498
438
499
if not crash_analyzer .is_memory_tool_crash (result .output ):
439
- # Didn't crash.
440
500
continue
441
501
442
- # Get memory tool crash information.
443
502
state = stack_analyzer .get_crash_data (result .output , symbolize_flag = True )
444
503
445
- # Crashed or caused a leak. Quarantine it .
504
+ # Quarantine the crashing unit .
446
505
unit_path = self ._quarantine_unit (unit_path , quarantine_corpus_path )
447
506
num_bad_units += 1
448
507
449
508
if crash_analyzer .ignore_stacktrace (state .crash_stacktrace ):
450
509
continue
451
510
452
- # Local de-duplication.
453
511
if state .crash_state not in crashes :
454
512
security_flag = crash_analyzer .is_security_issue (
455
513
state .crash_stacktrace , state .crash_type , state .crash_address )
@@ -460,49 +518,13 @@ def process_bad_units(self, bad_units_path, quarantine_corpus_path
460
518
crash_stacktrace = state .crash_stacktrace ,
461
519
unit_path = unit_path ,
462
520
security_flag = security_flag )
463
-
464
521
logs .info (
465
522
f'Found { num_bad_units } bad units, { len (crashes )} unique crashes.' )
466
523
return crashes
467
524
468
- def run (self , initial_corpus_path , minimized_corpus_path , bad_units_path ):
469
- """Run corpus pruning. Output result to directory."""
470
- if not shell .get_directory_file_count (initial_corpus_path ):
471
- logs .info ('Empty corpus, nothing to do.' )
472
- return None
473
525
474
- # Set memory tool options and fuzzer arguments.
475
- engine_common .unpack_seed_corpus_if_needed (
476
- self .runner .target_path , initial_corpus_path , force_unpack = True )
477
-
478
- environment .reset_current_memory_tool_options (
479
- redzone_size = MIN_REDZONE , leaks = True )
480
- self .runner .process_sanitizer_options ()
481
- additional_args = self .runner .get_libfuzzer_flags ()
482
-
483
- # Execute fuzzer with arguments for corpus pruning.
484
- logs .info ('Running merge...' )
485
- try :
486
- result = self .runner .minimize_corpus (
487
- additional_args , [initial_corpus_path ], minimized_corpus_path ,
488
- bad_units_path , CORPUS_PRUNING_TIMEOUT )
489
- except TimeoutError as e :
490
- raise CorpusPruningError (
491
- 'Corpus pruning timed out while minimizing corpus\n ' + repr (e ))
492
- except engine .Error as e :
493
- raise CorpusPruningError ('Corpus pruning failed to minimize corpus\n ' +
494
- repr (e ))
495
-
496
- symbolized_output = stack_symbolizer .symbolize_stacktrace (result .logs )
497
-
498
- # Sanity check that there are files in minimized corpus after merging.
499
- if not shell .get_directory_file_count (minimized_corpus_path ):
500
- raise CorpusPruningError ('Corpus pruning failed to minimize corpus\n ' +
501
- symbolized_output )
502
-
503
- logs .info ('Corpus merge finished successfully.' , output = symbolized_output )
504
-
505
- return result .stats
526
+ class CentipedePruner (CorpusPrunerBase ):
527
+ """Centipede pruner."""
506
528
507
529
508
530
class CrossPollinator :
@@ -597,6 +619,22 @@ def _record_cross_pollination_stats(output):
597
619
client .insert ([big_query .Insert (row = bigquery_row , insert_id = None )])
598
620
599
621
622
+ def _get_pruner_and_runner (context ):
623
+ """Get pruner and runner object acording with the FuzzTarget into the context
624
+ """
625
+ build_directory = environment .get_value ('BUILD_DIR' )
626
+ if context .fuzz_target .engine .lower () == 'libfuzzer' :
627
+ runner = LibFuzzerRunner (build_directory , context )
628
+ pruner = LibFuzzerPruner (runner )
629
+ elif context .fuzz_target .engine .lower () == 'centipede' :
630
+ runner = CentipedeRunner (build_directory , context )
631
+ pruner = CentipedePruner (runner )
632
+ else :
633
+ raise CorpusPruningError (
634
+ 'Corpus pruner task does not support the given engine.' )
635
+ return pruner , runner
636
+
637
+
600
638
def do_corpus_pruning (uworker_input , context , revision ) -> CorpusPruningResult :
601
639
"""Run corpus pruning."""
602
640
# Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz
@@ -611,10 +649,8 @@ def do_corpus_pruning(uworker_input, context, revision) -> CorpusPruningResult:
611
649
revision = revision , fuzz_target = context .fuzz_target .binary ):
612
650
raise CorpusPruningError ('Failed to setup build.' )
613
651
614
- build_directory = environment .get_value ('BUILD_DIR' )
615
652
start_time = datetime .datetime .utcnow ()
616
- runner = Runner (build_directory , context )
617
- pruner = CorpusPruner (runner )
653
+ pruner , runner = _get_pruner_and_runner (context )
618
654
fuzzer_binary_name = os .path .basename (runner .target_path )
619
655
620
656
logs .info ('Getting the initial corpus to process from GCS.' )
0 commit comments