From e7e5123b58b981c972a44db2db14ed5438e5a89c Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 24 Jun 2021 16:54:24 +0200 Subject: [PATCH 01/36] Removed python version 3.5 from build system since it is end-of-life --- .gitignore | 5 ++++- deployment/Dockerfile | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 410d3ed7..489f35f3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ dist atheris.egg-info -.hypothesis \ No newline at end of file +.hypothesis +/.eggs +/build +/tmp diff --git a/deployment/Dockerfile b/deployment/Dockerfile index 91998938..3ebf4dfa 100644 --- a/deployment/Dockerfile +++ b/deployment/Dockerfile @@ -38,7 +38,6 @@ RUN set -e -x -v; \ WORKDIR /atheris CMD export LIBFUZZER_LIB="/root/llvm-project/build/lib/clang/$(ls /root/llvm-project/build/lib/clang/)/lib/linux/libclang_rt.fuzzer_no_main-x86_64.a"; \ - /opt/python/cp35-cp35m/bin/python3 setup.py bdist_wheel -d /tmp/dist && \ /opt/python/cp36-cp36m/bin/python3 setup.py bdist_wheel -d /tmp/dist && \ /opt/python/cp37-cp37m/bin/python3 setup.py bdist_wheel -d /tmp/dist && \ /opt/python/cp38-cp38/bin/python3 setup.py bdist_wheel -d /tmp/dist && \ From 62f8c15c73ad0fdd10c63dcf561f2d807845118e Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 17:48:48 +0200 Subject: [PATCH 02/36] Added bytecode instrumentation functionality Removed sys.settrace stuff --- atheris.cc | 12 +- atheris.h | 6 + atheris/__init__.py | 3 + atheris/import_hook.py | 92 ++++ atheris/instrument_bytecode.py | 762 +++++++++++++++++++++++++++++++++ atheris/version_dependent.py | 137 ++++++ atheris_no_libfuzzer | 1 + libfuzzer.cc | 74 ++-- libfuzzer.h | 0 setup.py | 11 +- tracer.cc | 304 ++----------- tracer.h | 11 +- 12 files changed, 1080 insertions(+), 333 deletions(-) create mode 100644 atheris/__init__.py create mode 100644 atheris/import_hook.py create mode 100644 atheris/instrument_bytecode.py create mode 100644 atheris/version_dependent.py create mode 120000 atheris_no_libfuzzer create mode 100644 libfuzzer.h diff --git a/atheris.cc b/atheris.cc index 89ddc6f2..0b1b29c2 100644 --- a/atheris.cc +++ b/atheris.cc @@ -21,7 +21,6 @@ #include "pybind11/functional.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" -#include "tracer.h" #include "util.h" namespace atheris { @@ -35,14 +34,9 @@ PYBIND11_MODULE(ATHERIS_MODULE_NAME, m) { m.def("Setup", &Setup); m.def("Fuzz", &Fuzz); - m.def("TraceThisThread", [](pybind11::kwargs kwargs){ - bool enable_python_opcode_coverage = true; - if (kwargs.contains("enable_python_opcode_coverage")) { - enable_python_opcode_coverage = - kwargs["enable_python_opcode_coverage"].cast(); - } - TraceThisThread(enable_python_opcode_coverage); - }); + m.def("_loc", &_loc); + m.def("_reg", &_reg); + m.def("_cmp", &_cmp); py::class_(m, "FuzzedDataProvider") .def(py::init()) diff --git a/atheris.h b/atheris.h index 3ae563d2..e86bfc10 100644 --- a/atheris.h +++ b/atheris.h @@ -30,6 +30,8 @@ #include "pybind11/stl.h" namespace atheris { + +namespace py = pybind11; void Init(); @@ -40,6 +42,10 @@ std::vector Setup( void Fuzz(); +py::handle _cmp (py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const); +void _reg(unsigned long long num); +void _loc(unsigned long long idx); + } // namespace atheris #endif // THIRD_PARTY_PY_ATHERIS_LIBFUZZER_H_ diff --git a/atheris/__init__.py b/atheris/__init__.py new file mode 100644 index 00000000..4b0e454f --- /dev/null +++ b/atheris/__init__.py @@ -0,0 +1,3 @@ +from .atheris import * +from .atheris import _loc, _reg, _cmp +from .import_hook import set_target_module as SetTarget, unregister_import_hook as UnregisterImportHook diff --git a/atheris/import_hook.py b/atheris/import_hook.py new file mode 100644 index 00000000..1ef6a88f --- /dev/null +++ b/atheris/import_hook.py @@ -0,0 +1,92 @@ +""" +Atheris instruments all modules that get imported after atheris. +It does so by installing an import hook into sys.meta_path. +A filter can be set by calling SetTarget() with the name of the +target module. +The hook can be unregistered by calling UnregisterImportHook() +and be manually re-registered by calling RegisterImportHook(). +""" + +import sys +from importlib.abc import MetaPathFinder +from importlib.machinery import SourceFileLoader, SourcelessFileLoader, PathFinder +from _frozen_importlib_external import SourceFileLoader, SourcelessFileLoader +from _frozen_importlib import BuiltinImporter, FrozenImporter + +from .instrument_bytecode import patch_code + +TARGET_PACKAGES = None + +class AtherisMetaPathFinder(MetaPathFinder): + def find_spec(self, fullname, path, target=None): + package_name = fullname.split(".")[0] + + if TARGET_PACKAGES is None or package_name in TARGET_PACKAGES: + spec = PathFinder.find_spec(fullname, path, target) + + if spec is None or spec.loader is None: + return None + + if isinstance(spec.loader, SourceFileLoader): + spec.loader = AtherisSourceFileLoader(spec.loader.name, spec.loader.path) + elif isinstance(spec.loader, SourcelessFileLoader): + spec.loader = AtherisSourcelessFileLoader(spec.loader.name, spec.loader.path) + else: + return None + + spec.loader_state = None + + #TODO: better output ? + print(f"Instrumenting {fullname}") + + return spec + + else: + return None + + def invalidate_caches(self): + return PathFinder.invalidate_caches() + +class AtherisSourceFileLoader(SourceFileLoader): + def get_code(self, fullname): + return patch_code(super().get_code(fullname), True) + +class AtherisSourcelessFileLoader(SourcelessFileLoader): + def get_code(self, fullname): + return patch_code(super().get_code(fullname), True) + +def set_target_module(module_name): + global TARGET_PACKAGES + + if TARGET_PACKAGES is None: + TARGET_PACKAGES = set() + + if "." in module_name: + module_name = module_name.split(".")[0] + + TARGET_PACKAGES.add(module_name) + +def unregister_import_hook(): + i = 0 + while i < len(sys.meta_path): + if isinstance(sys.meta_path[i], AtherisMetaPathFinder): + sys.meta_path.pop(i) + else: + i += 1 + +def register_import_hook(): + # Don't register twice + i = 0 + while i < len(sys.meta_path): + if isinstance(sys.meta_path[i], AtherisMetaPathFinder): + return + i += 1 + + i = 0 + while i < len(sys.meta_path) and sys.meta_path[i] in [BuiltinImporter, FrozenImporter]: + i += 1 + + sys.meta_path.insert(i, AtherisMetaPathFinder()) + +# Automatically register import hook +register_import_hook() diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py new file mode 100644 index 00000000..f82918a0 --- /dev/null +++ b/atheris/instrument_bytecode.py @@ -0,0 +1,762 @@ +""" +This module provides the instrumentation functionality for atheris. +Mainly the function patch_code(), which can instrument a code object +and the helper class Instrumentor. +""" + +import os +import sys +import types +import importlib +import imp +import dis +from collections import OrderedDict + +from .version_dependent import get_code_object, CONDITIONAL_JUMPS, UNCONDITIONAL_JUMPS, ENDS_FUNCTION, HAVE_REL_REFERENCE, HAVE_ABS_REFERENCE, REVERSE_CMP_OP + +current_index = 0 +current_pc = 0 + +TARGET_MODULE = "atheris" +REGISTER_FUNCTION = "_reg" +COVERAGE_FUNCTION = "_loc" +COMPARE_FUNCTION = "_cmp" + +class Instruction: + """ + This class represents a single instruction after every + EXTENDED_ARG has been resolved in the bytecode. + It is assumed that all instructions are always 2*n bytes long. + Sometimes the Python-Interpreter pads instructions with + 'EXTENDED_ARG 0' so instructions must have a minimum size. + """ + + @classmethod + def get_fixed_size(cls): + return 2 + + def __init__(self, lineno, offset, opcode, arg=None, min_size=None): + self.lineno = lineno + self.offset = offset + self.opcode = opcode + self.mnemonic = dis.opname[opcode] + + if arg is None: + self.arg = 0 + else: + self.arg = arg + + if min_size is not None: + self._min_size = min_size + else: + self._min_size = 0 + + if self.mnemonic in HAVE_REL_REFERENCE: + self._is_relative = True + self.reference = self.offset + self.get_size() + self.arg + elif self.mnemonic in HAVE_ABS_REFERENCE: + self._is_relative = False + self.reference = self.arg + else: + self._is_relative = None + self.reference = None + + self.check_state() + + def has_argument(self): + return self.opcode >= dis.HAVE_ARGUMENT + + def _get_arg_size(self): + if self.arg >= (1 << 24): + return 8 + elif self.arg >= (1 << 16): + return 6 + elif self.arg >= (1 << 8): + return 4 + else: + return 2 + + def get_size(self): + return max(self._get_arg_size(), self._min_size) + + def get_stack_effect(self): + # dis.stack_effect does not work for EXTENDED_ARG and NOP + if self.mnemonic in ["EXTENDED_ARG", "NOP"]: + return 0 + + return dis.stack_effect(self.opcode, (self.arg if self.has_argument() else None)) + + def to_bytes(self): + size = self._get_arg_size() + arg = self.arg + ret = [ self.opcode, arg & 0xff ] + + for _ in range(size // 2 - 1): + arg >>= 8 + ret = [ dis.opmap["EXTENDED_ARG"], arg & 0xff ] + ret + + while len(ret) < self._min_size: + ret = [ dis.opmap["EXTENDED_ARG"], 0 ] + ret + + assert(len(ret) == self.get_size()) + + return bytes(ret) + + def adjust(self, changed_offset, size, keep_ref): + """ + This function can be used to signal two different events: + (1) Insertion of instructions + (2) Change of size of a single, already existing instruction + + (1) Signal this instruction that some instructions of size + `size` (in bytes) have been inserted at offset `changed_offset` + in the instruction listing. + + (2) Signal this instruction that an instruction at offset `changed_offset` - 0.5 + has increased in size. If `changed_offset` is self.offset + 0.5, this + instruction increased in size. + + Either way, adjust the current offset, reference and argument accordingly. + """ + old_offset = self.offset + old_reference = self.reference + old_size = self.get_size() + + if changed_offset == old_offset + 0.5: + if old_reference is not None: + if self._is_relative: + self.reference += size + elif old_reference > old_offset: + self.reference += size + self.arg += size + + return + + if changed_offset <= old_offset: + self.offset += size + + if old_reference is not None and not keep_ref: + if changed_offset <= old_reference: + self.reference += size + + if self._is_relative: + if old_offset < changed_offset <= old_reference: + self.arg += size + else: + if changed_offset <= old_reference: + self.arg += size + + def check_state(self): + assert(self.mnemonic != "EXTENDED_ARG") + assert(0 <= self.arg <= 0x7fffffff) + assert(0 <= self.opcode < 256) + + if self.reference is not None: + if self._is_relative: + assert(self.offset + self.get_size() + self.arg == self.reference) + else: + assert(self.arg == self.reference) + + def is_jump(self): + return self.mnemonic in CONDITIONAL_JUMPS or self.mnemonic in UNCONDITIONAL_JUMPS + + def make_nop(self): + self.opcode = dis.opmap["NOP"] + self.mnemonic = "NOP" + self.arg = 0 + self._is_relative = None + self.reference = None + self.check_state() + +class BasicBlock: + def __init__(self, instructions, last_one): + self.instructions = instructions + self.id = instructions[0].offset + + last_instr = instructions[-1] + + if last_one or last_instr.mnemonic in ENDS_FUNCTION: + self.edges = [] + elif last_instr.mnemonic in CONDITIONAL_JUMPS: + self.edges = list(set([ last_instr.reference, last_instr.offset + last_instr.get_size() ])) + else: + if last_instr.reference is not None: + self.edges = [ last_instr.reference ] + else: + self.edges = [ last_instr.offset + last_instr.get_size() ] + + def __iter__(self): + return iter(self.instructions) + + def __repr__(self): + return f"BasicBlock(id={self.id}, edges={self.edges})" + +class Instrumentor: + """ + This class implements the core instrumentation functionality. + It gets a single code object, builds a CFG of the bytecode and + can instrument the code for coverage collection via trace_control_flow() + and for data-flow tracing via trace_data_flow(). + + How to insert code: + 1. Select a target basic block + 2. Build up the new code as a list of `Instruction` objects. + Make sure to get the offsets right. + 3. Calculate the overall size needed by your new code (in bytes) + 4. Call _adjust() with your target offset and calculated size + 5. Insert your instruction list into the instruction list of the basic block + 6. Call _handle_size_changes() + Take a look at trace_control_flow() and trace_data_flow() for examples. + + Note that this calls only supports insertions, not deletions. + """ + def __init__(self, code, start_idx, start_pc): + self._start_idx = start_idx + self._start_pc = start_pc + self._cfg = OrderedDict() + self.consts = list(code.co_consts) + self._names = list(code.co_names) + self.num_counters = 0 + self.num_pcs = 0 + self._changes = [] + self._code = code + + self._build_cfg() + self._check_state() + + def _build_cfg(self): + lineno = self._code.co_firstlineno + arg = None + offset = None + length = Instruction.get_fixed_size() + instr_list = [] + basic_block_borders = [] + did_jump = False + jump_targets = set() + + for instruction in dis.get_instructions(self._code): + if instruction.starts_line is not None: + lineno = instruction.starts_line + + if instruction.opname == "EXTENDED_ARG": + if arg is None: + arg = 0 + offset = instruction.offset + + arg <<= 8 + arg |= instruction.arg + length += Instruction.get_fixed_size() + + continue + + elif arg is not None: + instr_list.append(Instruction(lineno, offset, instruction.opcode, (arg << 8) | instruction.arg, min_size=length)) + arg = None + offset = None + length = Instruction.get_fixed_size() + + else: + instr_list.append(Instruction(lineno, instruction.offset, instruction.opcode, instruction.arg)) + + if instr_list[-1].reference is not None: + jump_targets.add(instr_list[-1].reference) + + for c, instr in enumerate(instr_list): + if instr.offset == 0 or instr.offset in jump_targets or did_jump: + basic_block_borders.append(c) + + if instr.is_jump(): + did_jump = True + else: + did_jump = False + + basic_block_borders.append(len(instr_list)) + + for i in range(len(basic_block_borders) - 1): + start_of_bb = basic_block_borders[i] + end_of_bb = basic_block_borders[i + 1] + bb = BasicBlock(instr_list[start_of_bb:end_of_bb], i == len(basic_block_borders) - 2) + self._cfg[bb.id] = bb + + def _check_state(self): + assert(len(self._cfg) > 0) + seen_ids = set() + + for basic_block in self._cfg.values(): + assert(len(basic_block.instructions) > 0) + + assert(basic_block.id not in seen_ids) + seen_ids.add(basic_block.id) + + for edge in basic_block.edges: + assert(edge in self._cfg) + + listing = self._get_linear_instruction_listing() + i = 0 + + assert(listing[0].offset == 0) + + while i < len(listing) - 1: + assert(listing[i].offset + listing[i].get_size() == listing[i + 1].offset) + listing[i].check_state() + i += 1 + + def _get_name(self, name): + """ + Get an offset into the co_names list or + create a new entry if `name` is not found. + """ + try: + return self._names.index(name) + except ValueError: + self._names.append(name) + return len(self._names) - 1 + + def _get_const(self, constant): + """ + Get an offset into the co_consts list or + create a new entry if `const` is not found. + """ + try: + return self.consts.index(constant) + except ValueError: + self.consts.append(constant) + return len(self.consts) - 1 + + def _get_counter(self): + counter = self._start_idx + self.num_counters + self.num_counters += 1 + return self._get_const(counter) + + def _get_pc(self): + pc = self._start_pc + self.num_pcs + self.num_pcs += 1 + return self._get_const(pc) + + def _adjust(self, offset, size, *keep_refs): + """ + Signal all instructions that some instructions of size + `size` (in bytes) will be inserted at offset `offset`. + Sometimes it is necessary that some instructions do not + change their reference when a new insertion happens. + All those Instruction-objects whose reference shall not change + must be in `keep_refs`. + """ + for basic_block in self._cfg.values(): + for instr in basic_block: + instr.adjust(offset, size, instr in keep_refs) + + def _handle_size_changes(self): + """ + After insertions have been made it could be that the argument + of some instructions crossed certain boundaries so that more + EXTENDED_ARGs are required to build the oparg. + This function identifies all of those instructions whose size increased + with the latest insertion and adjusts all other instruction to the + new size. + """ + listing = self._get_linear_instruction_listing() + + while True: + found_invalid = False + i = 0 + + while i < len(listing) - 1: + next_offset = listing[i].offset + listing[i].get_size() + + if next_offset < listing[i + 1].offset: + raise Exception(f"Something weird happened with the offsets at offset {listing[i].offset}") + + elif next_offset > listing[i + 1].offset: + delta = next_offset - listing[i + 1].offset + self._adjust(listing[i].offset + 0.5, delta) + found_invalid = True + + i += 1 + + if not found_invalid: + break + + def _get_linear_instruction_listing(self): + listing = [] + for basic_block in self._cfg.values(): + for instr in basic_block: + listing.append(instr) + return listing + + def to_code(self): + self._check_state() + listing = self._get_linear_instruction_listing() + lnotab = [] + code = bytes() + stacksize = 0 + + if self._code.co_firstlineno != listing[0].lineno: + lnotab.append(0) + lnotab.append(listing[0].lineno - self._code.co_firstlineno) + + i = 0 + + while i < len(listing): + current_lineno = listing[i].lineno + new_code = bytes() + + while i < len(listing) and listing[i].lineno == current_lineno: + stack_effect = listing[i].get_stack_effect() + stacksize = max(stacksize, stacksize + stack_effect) + + new_code += listing[i].to_bytes() + i += 1 + + if i < len(listing): + term_zero = False + delta_bc = len(new_code) + delta_lineno = listing[i].lineno - current_lineno + + if delta_lineno <= -128 or delta_lineno >= 127 or delta_bc >= 255: + term_zero = True + + while True: + lnotab.append(min(delta_bc, 255)) + + if delta_lineno < 0: + lnotab.append((max(delta_lineno, -128)) % 256) + else: + lnotab.append(min(delta_lineno, 127)) + + if term_zero: + if delta_bc == 0 and delta_lineno == 0: + break + else: + if delta_bc < 255 and delta_lineno in range(-127, 128): + break + + if delta_bc >= 255: + delta_bc -= 255 + else: + delta_bc = 0 + + if delta_lineno <= -128: + delta_lineno += 128 + elif delta_lineno >= 127: + delta_lineno -= 127 + else: + delta_lineno = 0 + + code += new_code + + assert(self._code.co_stacksize <= stacksize) + + return get_code_object( + self._code, + stacksize, + code, + tuple(self.consts), + tuple(self._names), + bytes(lnotab) + ) + + def _generate_loc_invocation(self, lineno, offset): + """ + Builds the bytecode that calls atheris._loc() + """ + to_insert = [] + start_offset = offset + name_module = self._get_name(TARGET_MODULE) + name_cov = self._get_name(COVERAGE_FUNCTION) + + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_GLOBAL"], name_module)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_ATTR"], name_cov)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_CONST"], self._get_counter())) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["CALL_FUNCTION"], 1)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["POP_TOP"])) + offset += to_insert[-1].get_size() + + return offset - start_offset, to_insert + + def _generate_cmp_invocation(self, op, lineno, offset): + """ + Builds the bytecode that calls atheris._cmp(). + Only call this if the two objects being compared are non-constants. + """ + to_insert = [] + start_offset = offset + name_module = self._get_name(TARGET_MODULE) + name_cmp = self._get_name(COMPARE_FUNCTION) + const_op = self._get_const(op) + const_pc = self._get_pc() + const_False = self._get_const(False) + + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_GLOBAL"], name_module)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_ATTR"], name_cmp)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["ROT_THREE"])) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_CONST"], const_op)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_CONST"], const_pc)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_CONST"], const_False)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["CALL_FUNCTION"], 5)) + offset += to_insert[-1].get_size() + + return offset - start_offset, to_insert + + def _generate_const_cmp_invocation(self, op, lineno, offset, switch): + """ + Builds the bytecode that calls atheris._cmp(). + Only call this if one of the objects being compared is a constant + coming from co_consts. + If `switch` is true the constant is the second argument and needs + to be switched with the first argument. + """ + to_insert = [] + start_offset = offset + name_module = self._get_name(TARGET_MODULE) + name_cmp = self._get_name(COMPARE_FUNCTION) + const_pc = self._get_pc() + const_True = self._get_const(True) + const_op = None + + if switch: + const_op = self._get_const(REVERSE_CMP_OP[op]) + else: + const_op = self._get_const(op) + + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_GLOBAL"], name_module)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_ATTR"], name_cmp)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["ROT_THREE"])) + offset += to_insert[-1].get_size() + + if switch: + to_insert.append(Instruction(lineno, offset, dis.opmap["ROT_TWO"])) + offset += to_insert[-1].get_size() + + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_CONST"], const_op)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_CONST"], const_pc)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["LOAD_CONST"], const_True)) + offset += to_insert[-1].get_size() + to_insert.append(Instruction(lineno, offset, dis.opmap["CALL_FUNCTION"], 5)) + offset += to_insert[-1].get_size() + + return offset - start_offset, to_insert + + def trace_control_flow(self): + """ + Insert a call to atheris._loc() in every basic block that + is a target of a branch. The argument of _loc() is an id for + the branch. + + The following bytecode gets inserted: + LOAD_GLOBAL atheris + LOAD_ATTR _loc + LOAD_CONST + CALL_FUNCTION 1 + POP_TOP ; _loc() returns None, remove the return value + """ + already_instrumented = set() + + offset = self._cfg[0].instructions[0].offset + total_size, to_insert = self._generate_loc_invocation(self._cfg[0].instructions[0].lineno, offset) + self._adjust(offset, total_size) + self._cfg[0].instructions = to_insert + self._cfg[0].instructions + + for basic_block in self._cfg.values(): + if len(basic_block.edges) == 2: + for edge in basic_block.edges: + bb = self._cfg[edge] + + if bb.id not in already_instrumented: + already_instrumented.add(bb.id) + source_instr = [] + offset = bb.instructions[0].offset + + for source_bb in self._cfg.values(): + if bb.id in source_bb.edges and source_bb.instructions[-1].reference == offset: + source_instr.append(source_bb.instructions[-1]) + + total_size, to_insert = self._generate_loc_invocation(bb.instructions[0].lineno, offset) + + self._adjust(offset, total_size, *source_instr) + + bb.instructions = to_insert + bb.instructions + + self._handle_size_changes() + + def insert_registration(self, num_counters): + """ + This function inserts an import of atheris and a call to + atheris._reg() that tells atheris how many branches were instrumented. + This function should only be called once for the root code object + of a module after every nested code object has been instrumented. + + The bytecode that imports atheris looks like this: + LOAD_CONST 0 ; absolute import + LOAD_CONST None ; no fromlist + IMPORT_NAME atheris + STORE_GLOBAL atheris + + The bytecode that calls _reg() looks like this: + LOAD_GLOBAL atheris + LOAD_ATTR _reg + LOAD_CONST + CALL_FUNCTION 1 + POP_TOP ; discard return value of _reg() + """ + const_0 = self._get_const(0) + const_None = self._get_const(None) + name_module = self._get_name(TARGET_MODULE) + name_reg = self._get_name(REGISTER_FUNCTION) + const_num_counters = self._get_const(num_counters) + + to_insert = [] + start_offset = self._cfg[0].instructions[0].offset + offset = start_offset + lineno = self._cfg[0].instructions[0].lineno + + # Insert code to import the target module + to_insert.append( Instruction(lineno, offset, dis.opmap["LOAD_CONST"], const_0) ) + offset += to_insert[-1].get_size() + to_insert.append( Instruction(lineno, offset, dis.opmap["LOAD_CONST"], const_None) ) + offset += to_insert[-1].get_size() + to_insert.append( Instruction(lineno, offset, dis.opmap["IMPORT_NAME"], name_module) ) + offset += to_insert[-1].get_size() + to_insert.append( Instruction(lineno, offset, dis.opmap["STORE_GLOBAL"], name_module) ) + offset += to_insert[-1].get_size() + + # Insert a call to the registration function + to_insert.append( Instruction(lineno, offset, dis.opmap["LOAD_GLOBAL"], name_module) ) + offset += to_insert[-1].get_size() + to_insert.append( Instruction(lineno, offset, dis.opmap["LOAD_ATTR"], name_reg) ) + offset += to_insert[-1].get_size() + to_insert.append( Instruction(lineno, offset, dis.opmap["LOAD_CONST"], const_num_counters) ) + offset += to_insert[-1].get_size() + to_insert.append( Instruction(lineno, offset, dis.opmap["CALL_FUNCTION"], 1) ) + offset += to_insert[-1].get_size() + to_insert.append( Instruction(lineno, offset, dis.opmap["POP_TOP"]) ) + offset += to_insert[-1].get_size() + + total_size = offset - start_offset + + self._adjust(start_offset, total_size) + + self._cfg[0].instructions = to_insert + self._cfg[0].instructions + self._handle_size_changes() + + def trace_data_flow(self): + """ + This function instruments bytecode for data-flow tracing. + This works by replacing the instruction COMPARE_OP with + a call to atheris._cmp(). + The arguments for _cmp() are as follows: + - obj1 and obj2: The two values to compare + - opid: argument to COMPARE_OP + - pc: a counter for how many COMPARE_OPs have been replaced + - is_const: whether obj1 is a constant in co_consts. + To detect if any of the values being compared is a constant, all push and pop operations + have to be analyzed. If a constant appears in a comparison it must + always be given as obj1 to _cmp(). + + The bytecode that gets inserted looks like this: + LOAD_GLOBAL atheris + LOAD_ATTR _cmp + ROT_THREE ; move atheris._cmp below the two objects + LOAD_CONST + LOAD_CONST + LOAD_CONST + CALL_FUNCTION 5 + """ + stack_size = 0 + seen_consts = [] + + for basic_block in self._cfg.values(): + for c, instr in enumerate(basic_block.instructions): + if instr.mnemonic == "LOAD_CONST": + seen_consts.append(stack_size) + elif instr.mnemonic == "COMPARE_OP" and instr.arg <= 5: + # Determine the two values on the top of the stack when COMPARE_OP happens + consts_on_stack = list(filter(lambda x: stack_size - 2 <= x < stack_size, seen_consts)) + tos_is_constant = stack_size - 1 in consts_on_stack + tos1_is_constant = stack_size - 2 in consts_on_stack + + if not (tos_is_constant and tos1_is_constant): + offset = instr.offset + total_size = None + to_insert = None + + # Both items are non-constants + if (not tos_is_constant) and (not tos1_is_constant): + total_size, to_insert = self._generate_cmp_invocation(instr.arg, instr.lineno, offset) + + # One item is constant, one is non-constant + else: + total_size, to_insert = self._generate_const_cmp_invocation(instr.arg, instr.lineno, offset, tos_is_constant) + + self._adjust(offset, total_size) + + for i, new_instr in enumerate(to_insert): + basic_block.instructions.insert(c + i, new_instr) + + instr.make_nop() + + stack_size += instr.get_stack_effect() + seen_consts = list(filter(lambda x: x < stack_size, seen_consts)) + + self._handle_size_changes() + + def _dis(self): + print(f"Disassembly of {self._code.co_filename}:{self._code.co_name}") + for basic_block in self._cfg.values(): + print(" -bb-") + for instr in basic_block: + print(f" L.{instr.lineno} [{instr.offset}] {instr.mnemonic} ", end="") + + if instr.has_argument(): + print(f"{instr.arg} ", end="") + + if instr._is_relative: + print(f"(to {instr.reference})", end="") + + print() + +def patch_code(code, trace_dataflow, nested=False): + """ + This function takes an uninstrumented code object + of a module and instruments it including all nested + code objects. + """ + global current_index, current_pc + + old_index = current_index + + inst = Instrumentor(code, current_index, current_pc) + inst.trace_control_flow() + + if trace_dataflow: + inst.trace_data_flow() + + current_index += inst.num_counters + current_pc += inst.num_pcs + + # Repeat this for all nested code objects + for i in range(len(inst.consts)): + if isinstance(inst.consts[i], types.CodeType): + if (inst.consts[i].co_name in ["", "" if not nested else None] + or inst.consts[i].co_name[0] != "<" + or inst.consts[i].co_name[-1] != ">"): + inst.consts[i] = patch_code(inst.consts[i], trace_dataflow, nested=True) + + if not nested: + inst.insert_registration(current_index - old_index) + + return inst.to_code() diff --git a/atheris/version_dependent.py b/atheris/version_dependent.py new file mode 100644 index 00000000..ba2119be --- /dev/null +++ b/atheris/version_dependent.py @@ -0,0 +1,137 @@ +""" +This module manages the different aspects of bytecode instrumentation that +depend on specific python versions: + - Instructions + - Shape of a code object + +Currently supported python versions are: + - 3.6 + - 3.7 + - 3.8 + - 3.9 +""" + +import sys +import types + +PYTHON_VERSION = sys.version_info[:2] + +if PYTHON_VERSION < (3,6) or PYTHON_VERSION > (3,9): + raise RuntimeError(f"You are fuzzing on an unsupported python version: {PYTHON_VERSION[0]}.{PYTHON_VERSION[1]}. Only 3.6 - 3.9 are supported.") + +### Instruction categories ### + +CONDITIONAL_JUMPS = [ + # common + "FOR_ITER", + "JUMP_IF_FALSE_OR_POP", + "JUMP_IF_TRUE_OR_POP", + "POP_JUMP_IF_FALSE", + "POP_JUMP_IF_TRUE", + + # 3.9 + "JUMP_IF_NOT_EXC_MATCH", +] + +UNCONDITIONAL_JUMPS = [ + # common + "JUMP_FORWARD", + "JUMP_ABSOLUTE", + + # 3.6 / 3.7 + "CONTINUE_LOOP", + + # 3.8 + "CALL_FINALLY", +] + +ENDS_FUNCTION = [ + # common + "RAISE_VARARGS", + "RETURN_VALUE", + + # 3.9 + "RERAISE", +] + +HAVE_REL_REFERENCE = [ + # common + "SETUP_WITH", + "JUMP_FORWARD", + "FOR_ITER", + "SETUP_FINALLY", + "CALL_FINALLY", + + # 3.6 / 3.7 + "SETUP_LOOP", + "SETUP_EXCEPT", +] + +HAVE_ABS_REFERENCE = [ + # common + "POP_JUMP_IF_TRUE", + "POP_JUMP_IF_FALSE", + "JUMP_IF_TRUE_OR_POP", + "JUMP_IF_FALSE_OR_POP", + "JUMP_ABSOLUTE", + + # 3.6 / 3.7 + "CONTINUE_LOOP", + + # 3.9 + "JUMP_IF_NOT_EXC_MATCH", +] + +### Compare ops ### + +REVERSE_CMP_OP = [ + 4, + 5, + 2, + 3, + 0, + 1 +] + +### CodeTypes ### + +if (3,6) <= PYTHON_VERSION <= (3,7): + def get_code_object(code_obj, stacksize, bytecode, consts, names, lnotab): + return types.CodeType( + code_obj.co_argcount, + code_obj.co_kwonlyargcount, + code_obj.co_nlocals, + stacksize, + code_obj.co_flags, + bytecode, + consts, + names, + code_obj.co_varnames, + code_obj.co_filename, + code_obj.co_name, + code_obj.co_firstlineno, + lnotab, + code_obj.co_freevars, + code_obj.co_cellvars + ) + +else: + def get_code_object(code_obj, stacksize, bytecode, consts, names, lnotab): + return types.CodeType( + code_obj.co_argcount, + code_obj.co_posonlyargcount, + code_obj.co_kwonlyargcount, + code_obj.co_nlocals, + stacksize, + code_obj.co_flags, + bytecode, + consts, + names, + code_obj.co_varnames, + code_obj.co_filename, + code_obj.co_name, + code_obj.co_firstlineno, + lnotab, + code_obj.co_freevars, + code_obj.co_cellvars + ) diff --git a/atheris_no_libfuzzer b/atheris_no_libfuzzer new file mode 120000 index 00000000..7e292336 --- /dev/null +++ b/atheris_no_libfuzzer @@ -0,0 +1 @@ +./atheris \ No newline at end of file diff --git a/libfuzzer.cc b/libfuzzer.cc index 59b88813..4381f4e8 100644 --- a/libfuzzer.cc +++ b/libfuzzer.cc @@ -22,17 +22,15 @@ #include "atheris.h" #include "macros.h" -#include "tracer.h" #include "util.h" +#include "tracer.h" -using UserCb = int (*)(const uint8_t* Data, size_t Size); - -extern "C" int LLVMFuzzerRunDriver(int* argc, char*** argv, - int (*UserCb)(const uint8_t* Data, - size_t Size)); - -extern "C" void __sanitizer_cov_pcs_init(const uintptr_t* pcs_beg, - const uintptr_t* pcs_end); +using UserCb = int (*)(const uint8_t* Data, size_t Size); + +extern "C" { + int LLVMFuzzerRunDriver(int* argc, char*** argv, int (*UserCb)(const uint8_t* Data, size_t Size)); + void __sanitizer_cov_8bit_counters_init(uint8_t* start, uint8_t* stop); +} NO_SANITIZE std::string GetLibFuzzerSymbolsLocation() { @@ -46,7 +44,7 @@ std::string GetLibFuzzerSymbolsLocation() { NO_SANITIZE std::string GetCoverageSymbolsLocation() { Dl_info dl_info; - if (!dladdr((void*)&__sanitizer_cov_pcs_init, &dl_info)) { + if (!dladdr((void*)&__sanitizer_cov_8bit_counters_init, &dl_info)) { return ""; } return (dl_info.dli_fname); @@ -68,8 +66,28 @@ std::vector& args_global = *new std::vector(); bool setup_called = false; +unsigned long long num_counters = 0; +unsigned char* counters = NULL; + } // namespace +NO_SANITIZE +void _loc(unsigned long long idx) { + if (counters && idx < num_counters) { + counters[idx]++; + } +} + +NO_SANITIZE +void _reg(unsigned long long num) { + num_counters += num; +} + +NO_SANITIZE +py::handle _cmp (py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const) { + return TraceCompareOp(counters + idx, left.ptr(), right.ptr(), opid, left_is_const); +} + NO_SANITIZE void Init() { if (!&LLVMFuzzerRunDriver) { @@ -113,27 +131,7 @@ std::vector Setup( } ret.push_back(arg); } - - bool enable_python_coverage = true; - if (kwargs.contains("enable_python_coverage")) { - enable_python_coverage = kwargs["enable_python_coverage"].cast(); - } - -#ifdef HAS_OPCODE_TRACE - bool enable_python_opcode_coverage = true; -#else - bool enable_python_opcode_coverage = false; -#endif - - if (kwargs.contains("enable_python_opcode_coverage")) { - enable_python_opcode_coverage = - kwargs["enable_python_opcode_coverage"].cast(); - } - - if (enable_python_coverage) { - SetupTracer(print_funcs, enable_python_opcode_coverage); - } - + if (GetCoverageSymbolsLocation() != GetLibFuzzerSymbolsLocation()) { std::cerr << Colorize(STDERR_FILENO, "WARNING: Coverage symbols are being provided by a library other than libFuzzer. This will result in broken Python code coverage and severely impacted native extension code coverage. Symbols are coming from this library: " + GetCoverageSymbolsLocation() + "\nYou can likely resolve this issue by linking libFuzzer into Python directly, and using `atheris_no_libfuzzer` instead of `atheris`. See using_sanitizers.md for details."); } @@ -143,7 +141,6 @@ std::vector Setup( NO_SANITIZE int TestOneInput(const uint8_t* data, size_t size) { - TracerStartInput(); try { test_one_input_global(py::bytes(reinterpret_cast(data), size)); return 0; @@ -172,6 +169,13 @@ void Fuzz() { << std::endl; exit(1); } + + if (!num_counters) { + std::cerr << Colorize(STDERR_FILENO, + "Nothing has been instrumented. Did you use atheris.instrument()?") + << std::endl; + exit(1); + } std::vector args; args.reserve(args_global.size() + 1); @@ -182,6 +186,12 @@ void Fuzz() { char** args_ptr = &args[0]; int args_size = args_global.size(); + counters = new unsigned char[num_counters]; + + memset(counters, 0, num_counters); + + __sanitizer_cov_8bit_counters_init(counters, counters + num_counters); + exit(LLVMFuzzerRunDriver(&args_size, &args_ptr, &TestOneInput)); } diff --git a/libfuzzer.h b/libfuzzer.h new file mode 100644 index 00000000..e69de29b diff --git a/setup.py b/setup.py index 533ef36a..dcb2e65e 100644 --- a/setup.py +++ b/setup.py @@ -113,7 +113,7 @@ def get_libfuzzer_lib(): ext_modules = [ Extension( - "atheris", + "atheris.atheris", sorted([ "atheris.cc", "libfuzzer.cc", @@ -127,7 +127,7 @@ def get_libfuzzer_lib(): ], language="c++"), Extension( - "atheris_no_libfuzzer", + "atheris_no_libfuzzer.atheris", sorted([ "atheris.cc", "libfuzzer.cc", @@ -226,9 +226,9 @@ def build_extensions(self): for ext in self.extensions: ext.define_macros = [("VERSION_INFO", "'{}'".format(self.distribution.get_version())), - ("ATHERIS_MODULE_NAME", ext.name)] + ("ATHERIS_MODULE_NAME", "atheris")] ext.extra_compile_args = c_opts - if ext.name == "atheris_no_libfuzzer": + if ext.name == "atheris_no_libfuzzer.atheris": ext.extra_link_args = l_opts else: ext.extra_link_args = l_opts + [libfuzzer] @@ -239,7 +239,6 @@ def build_extensions(self): except Exception as e: sys.stderr.write(str(e)) sys.stderr.write("\n") - pass # Deploy versions of ASan and UBSan that have been merged with libFuzzer asan_name = orig_libfuzzer.replace(".fuzzer_no_main-", ".asan-") @@ -289,7 +288,6 @@ def merge_deploy_libfuzzer_sanitizer(self, libfuzzer, lib_name, except Exception as e: sys.stderr.write(str(e)) sys.stderr.write("\n") - pass setup( @@ -301,6 +299,7 @@ def merge_deploy_libfuzzer_sanitizer(self, libfuzzer, lib_name, description="A coverage-guided fuzzer for Python and Python extensions.", long_description=open("README.md", "r").read(), long_description_content_type="text/markdown", + packages=["atheris", "atheris_no_libfuzzer"], ext_modules=ext_modules, setup_requires=["pybind11>=2.5.0"], cmdclass={"build_ext": BuildExt}, diff --git a/tracer.cc b/tracer.cc index bae0718c..491684e3 100644 --- a/tracer.cc +++ b/tracer.cc @@ -30,132 +30,13 @@ #include "pybind11/stl.h" #include "util.h" -namespace atheris { - -struct PcTableEntry { - uintptr_t pc; - uintptr_t flags; -}; - -// Represents a "module" in the libFuzzer sense. The buffers in a module are -// dynamically allocated and never deleted. -struct Module { - // The length of the counters, fake_instruction_buffer, and pcs arrays. - size_t capacity; - - // The fraction of size that has been used - size_t size; - - // A collection of 8-bit counters, one for each Python trace key. - uint8_t* counters; - // A range of unused memory. We will generate fake "program counters" to point - // into this range. By allocating this memory, we can guarantee that they will - // never conflict with legitimate program counter values. - uint32_t* fake_instruction_buffer; - // Those program counters and metadata. - PcTableEntry* pcs; - - // Whether the PCs in this module are marked as being function entires or not. - bool is_function_entry; -}; - extern "C" { -void __sanitizer_cov_pcs_init(const uintptr_t* pcs_beg, - const uintptr_t* pcs_end); - -void __sanitizer_cov_8bit_counters_init(uint8_t* start, uint8_t* stop); - -void __sanitizer_cov_trace_pc_indir(uintptr_t callee); - -void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2); - -void __sanitizer_weak_hook_memcmp(void* caller_pc, const void* s1, - const void* s2, size_t n, int result); -} - -NO_SANITIZE -Module MakeModule(int capacity, bool is_function_entry) { - Module module; - module.size = 0; - module.capacity = capacity; - - module.counters = new uint8_t[capacity]; - module.fake_instruction_buffer = new uint32_t[capacity]; - memset(module.fake_instruction_buffer, 0, sizeof(uint32_t) * capacity); - module.pcs = new PcTableEntry[capacity]; - module.is_function_entry = is_function_entry; - - for (int i = 0; i < capacity; ++i) { - module.pcs[i].pc = - reinterpret_cast(module.fake_instruction_buffer + i); - } - - for (int i = 0; i < capacity; ++i) { - module.pcs[i].flags = is_function_entry; - } - - __sanitizer_cov_8bit_counters_init(module.counters, - module.counters + capacity); - __sanitizer_cov_pcs_init((uintptr_t*)(module.pcs), - (uintptr_t*)(module.pcs + capacity)); - - return module; -} - -using TraceKey = size_t; - -struct ModuleEntry { - Module* module = nullptr; - size_t idx = 0; -}; - -auto& reg_modules = *new std::deque{}; -auto& func_modules = *new std::deque{}; - -auto& key_to_reg_module = *new std::unordered_map(); -auto& key_to_func_module = *new std::unordered_map(); - -bool tracer_setup = false; - -NO_SANITIZE -std::pair FindOrAddModuleData( - TraceKey key, bool is_func_entry) { - PyGILState_Ensure(); - - auto& map = (is_func_entry ? key_to_func_module : key_to_reg_module); - auto& ret = map[key]; - - if (ret.module) return {&ret, false}; - - auto& deq = (is_func_entry ? func_modules : reg_modules); - ret.module = &deq.back(); - ret.idx = ret.module->size++; - if (ret.module->capacity == ret.module->size) { - deq.push_back(MakeModule(ret.module->capacity * 2, is_func_entry)); - } - - return {&ret, true}; + void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2); + void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2); + void __sanitizer_weak_hook_memcmp(void* caller_pc, const void* s1, const void* s2, size_t n, int result); } -NO_SANITIZE -void MarkEntryVisited(const ModuleEntry& entry) { - unsigned char& ctr = entry.module->counters[entry.idx]; - ++ctr; - if (ctr == 0) --ctr; -} - -int printed_funcs = 0; -int max_printed_funcs = 1; - -void PrintFunc(PyFrameObject* frame) { - std::cerr << "\tNEW_PY_FUNC[" << printed_funcs << "/" << max_printed_funcs - << "]: " << py::handle(frame->f_code->co_name).cast() - << "() " - << py::handle(frame->f_code->co_filename).cast() << ":" - << frame->f_lineno << std::endl; -} - -#ifdef HAS_OPCODE_TRACE +namespace atheris { NO_SANITIZE bool As64(int64_t* out, PyObject* integer) { @@ -191,10 +72,7 @@ int NoSanitizeMemcmp(const void* left, const void* right, size_t n) { // converts the strings to utf-8 before comparison when possible, which produces // significantly better results even though there's an encoding step every time. NO_SANITIZE -void TraceCompareUnicode(PyObject* left, PyObject* right, - const ModuleEntry& entry, PyFrameObject* frame) { - void* pc = entry.module->fake_instruction_buffer + entry.idx; - +void TraceCompareUnicode(PyObject* left, PyObject* right, void* pc) { PyUnicode_READY(left); PyUnicode_READY(right); @@ -214,165 +92,39 @@ void TraceCompareUnicode(PyObject* left, PyObject* right, } NO_SANITIZE -void TraceCompareOp(const ModuleEntry& entry, PyFrameObject* frame) { - void* pc = entry.module->fake_instruction_buffer + entry.idx; - - PyObject* left = frame->f_stacktop[-2]; - PyObject* right = frame->f_stacktop[-1]; - if (frame->f_stacktop - frame->f_valuestack < 2) { - std::cerr << Colorize( - STDERR_FILENO, - "Attempt to trace COMPARE_OP with <2 items on the stack."); - exit(1); - } - - if (PyLong_Check(left)) { - if (PyLong_Check(right)) { +PyObject* TraceCompareOp(void* pc, PyObject* left, PyObject* right, int opid, bool left_is_const) { + if (PyLong_Check(left) && PyLong_Check(right)) { // Integer-integer comparison. If both integers fit into 64 bits, report // an integer comparison. int64_t left_int; int64_t right_int; if (As64(&left_int, left) && As64(&right_int, right)) { - __sanitizer_cov_trace_cmp8(left_int, right_int); - return; + if (left_is_const) { + __sanitizer_cov_trace_const_cmp8(left_int, right_int); + } else { + __sanitizer_cov_trace_cmp8(left_int, right_int); + } } - } } - - // If comparing bytes, report a memcmp. Report that we're comparing the size, - // and then if that passes, compare the contents ourselves and report the - // results. - if (PyBytes_Check(left)) { - if (PyBytes_Check(right)) { - uint64_t left_size = PyBytes_Size(left); - uint64_t right_size = PyBytes_Size(right); - __sanitizer_cov_trace_cmp8(left_size, right_size); - if (left_size == right_size) { - const void* left_bytes = PyBytes_AsString(left); - const void* right_bytes = PyBytes_AsString(right); - int differ = NoSanitizeMemcmp(left_bytes, right_bytes, left_size); - __sanitizer_weak_hook_memcmp(pc, left_bytes, right_bytes, left_size, - differ); - } - return; + else if (PyBytes_Check(left) && PyBytes_Check(right)) { + // If comparing bytes, report a memcmp. Report that we're comparing the size, + // and then if that passes, compare the contents ourselves and report the + // results. + uint64_t left_size = PyBytes_Size(left); + uint64_t right_size = PyBytes_Size(right); + __sanitizer_cov_trace_cmp8(left_size, right_size); + if (left_size == right_size) { + const void* left_bytes = PyBytes_AsString(left); + const void* right_bytes = PyBytes_AsString(right); + int differ = NoSanitizeMemcmp(left_bytes, right_bytes, left_size); + __sanitizer_weak_hook_memcmp(pc, left_bytes, right_bytes, left_size, differ); } } - - if (PyUnicode_Check(left)) { - if (PyUnicode_Check(right)) { - TraceCompareUnicode(left, right, entry, frame); - return; - } + else if (PyUnicode_Check(left) && PyUnicode_Check(right)) { + TraceCompareUnicode(left, right, pc); } + + return PyObject_RichCompare(left, right, opid); } -NO_SANITIZE -int Tracer(void* pyobj, PyFrameObject* frame, int what, PyObject* arg_unused) { - frame->f_trace_opcodes = true; - - if (!tracer_setup) return 0; - - TraceKey key = 0; - if (what == PyTrace_CALL) { - key = CompositeHash(frame->f_lineno, what, frame->f_code); - } - if (what == PyTrace_OPCODE) { - key = CompositeHash(frame->f_lineno, what, frame->f_lasti, frame->f_code); - } - - // With opcode tracing, we only need to track CALL and OPCODE events. - // Anything else (e.g. LINE events) is redundant, as we'll also get one or - // more OPCODE events for those lines. - if (what == PyTrace_CALL || what == PyTrace_OPCODE) { - auto entry_data = FindOrAddModuleData(key, what == PyTrace_CALL); - MarkEntryVisited(*entry_data.first); - - if (what == PyTrace_OPCODE) { - unsigned int opcode = - PyBytes_AsString(frame->f_code->co_code)[frame->f_lasti]; - if (opcode == COMPARE_OP) { - TraceCompareOp(*entry_data.first, frame); - } - } - - if (what == PyTrace_CALL && entry_data.second && - printed_funcs < max_printed_funcs) { - ++printed_funcs; - PrintFunc(frame); - } - } - - return 0; -} - -#endif // HAS_OPCODE_TRACE - -NO_SANITIZE -int TracerNoOpcodes(void* pyobj, PyFrameObject* frame, int what, - PyObject* arg_unused) { - if (!tracer_setup) return 0; - - // When not using OPCODE tracing, trace every kind of event we can. - auto key = CompositeHash(frame->f_lineno, what, frame->f_code); - auto entry_data = FindOrAddModuleData(key, what == PyTrace_CALL); - MarkEntryVisited(*entry_data.first); - - if (what == PyTrace_CALL && entry_data.second && - printed_funcs < max_printed_funcs) { - ++printed_funcs; - PrintFunc(frame); - } - - return 0; -} - -NO_SANITIZE -void SetupTracer(int max_print_funcs, bool enable_opcode_tracing) { - reg_modules.push_back(MakeModule(512, false)); - func_modules.push_back(MakeModule(512, true)); - max_printed_funcs = max_print_funcs; - - TraceThisThread(enable_opcode_tracing); - -#ifdef HAS_OPCODE_TRACE - - if (enable_opcode_tracing) { - std::cerr << "INFO: Configured for Python tracing with opcodes." - << std::endl; - } else { - std::cerr << "INFO: Configured for Python tracing without opcodes." - << std::endl; - } - -#else - - if (enable_opcode_tracing) { - std::cerr << Colorize(STDERR_FILENO, - "Opcode tracing requested, but this feature is only " - "supported on Python 3.8+. Option will be ignored.") - << std::endl; - } - std::cerr << "INFO: Configured for Python tracing." << std::endl; - -#endif - - tracer_setup = true; -} - -void TraceThisThread(bool enable_opcode_tracing) { -#ifdef HAS_OPCODE_TRACE - if (enable_opcode_tracing) { - PyEval_SetTrace((Py_tracefunc)Tracer, (PyObject*)nullptr); - } else { - PyEval_SetTrace((Py_tracefunc)TracerNoOpcodes, (PyObject*)nullptr); - } -#else - PyEval_SetTrace((Py_tracefunc)TracerNoOpcodes, (PyObject*)nullptr); -#endif -} - -// Called before every TestOneInput. -NO_SANITIZE -void TracerStartInput() { printed_funcs = 0; } - } // namespace atheris diff --git a/tracer.h b/tracer.h index 6614d5fe..c77d6a1a 100644 --- a/tracer.h +++ b/tracer.h @@ -19,18 +19,9 @@ #include -#if PY_MAJOR_VERSION >= 3 -#if PY_MINOR_VERSION >= 7 -#define HAS_OPCODE_TRACE -#endif -#endif - namespace atheris { -void SetupTracer(int max_print_funcs, bool enable_opcode_tracing); -void TraceThisThread(bool enable_opcode_tracing); - -void TracerStartInput(); +PyObject* TraceCompareOp(void* pc, PyObject* left, PyObject* right, int opid, bool left_is_const); } // namespace atheris From 954571e95687a2edf858a23a37b321e1419b7d5b Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 19:13:22 +0200 Subject: [PATCH 03/36] Added atheris.Instrument() to get better control of what gets instrumented --- atheris/__init__.py | 2 +- atheris/import_hook.py | 96 ++++++++++++++++++++++++------------------ libfuzzer.cc | 2 +- 3 files changed, 57 insertions(+), 43 deletions(-) diff --git a/atheris/__init__.py b/atheris/__init__.py index 4b0e454f..51c05313 100644 --- a/atheris/__init__.py +++ b/atheris/__init__.py @@ -1,3 +1,3 @@ from .atheris import * from .atheris import _loc, _reg, _cmp -from .import_hook import set_target_module as SetTarget, unregister_import_hook as UnregisterImportHook +from .import_hook import instrument as Instrument diff --git a/atheris/import_hook.py b/atheris/import_hook.py index 1ef6a88f..863e3b29 100644 --- a/atheris/import_hook.py +++ b/atheris/import_hook.py @@ -1,10 +1,8 @@ """ -Atheris instruments all modules that get imported after atheris. -It does so by installing an import hook into sys.meta_path. -A filter can be set by calling SetTarget() with the name of the -target module. -The hook can be unregistered by calling UnregisterImportHook() -and be manually re-registered by calling RegisterImportHook(). +atheris instruments modules at import-time. +The Instrument() function temporarily installs an import hook (AtherisMetaPathFinder) +in sys.meta_path that employs a custom loader +(AtherisSourceFileLoader, AtherisSourcelessFileLoader). """ import sys @@ -15,13 +13,13 @@ from .instrument_bytecode import patch_code -TARGET_PACKAGES = None +TARGET_PACKAGES = set() class AtherisMetaPathFinder(MetaPathFinder): def find_spec(self, fullname, path, target=None): package_name = fullname.split(".")[0] - if TARGET_PACKAGES is None or package_name in TARGET_PACKAGES: + if not TARGET_PACKAGES or package_name in TARGET_PACKAGES: spec = PathFinder.find_spec(fullname, path, target) if spec is None or spec.loader is None: @@ -36,7 +34,6 @@ def find_spec(self, fullname, path, target=None): spec.loader_state = None - #TODO: better output ? print(f"Instrumenting {fullname}") return spec @@ -49,44 +46,61 @@ def invalidate_caches(self): class AtherisSourceFileLoader(SourceFileLoader): def get_code(self, fullname): - return patch_code(super().get_code(fullname), True) + code = super().get_code(fullname) + + if code is None: + return None + else: + return patch_code(code, True) class AtherisSourcelessFileLoader(SourcelessFileLoader): def get_code(self, fullname): - return patch_code(super().get_code(fullname), True) - -def set_target_module(module_name): - global TARGET_PACKAGES - - if TARGET_PACKAGES is None: - TARGET_PACKAGES = set() - - if "." in module_name: - module_name = module_name.split(".")[0] - - TARGET_PACKAGES.add(module_name) - -def unregister_import_hook(): - i = 0 - while i < len(sys.meta_path): - if isinstance(sys.meta_path[i], AtherisMetaPathFinder): - sys.meta_path.pop(i) + code = super().get_code(fullname) + + if code is None: + return None else: + return patch_code(code, True) + +class HookManager: + def __enter__(self): + i = 0 + while i < len(sys.meta_path): + if isinstance(sys.meta_path[i], AtherisMetaPathFinder): + return self i += 1 + + i = 0 + while i < len(sys.meta_path) and sys.meta_path[i] in [BuiltinImporter, FrozenImporter]: + i += 1 + + sys.meta_path.insert(i, AtherisMetaPathFinder()) + + return self + + def __exit__(self, *args): + i = 0 + while i < len(sys.meta_path): + if isinstance(sys.meta_path[i], AtherisMetaPathFinder): + sys.meta_path.pop(i) + else: + i += 1 + + TARGET_PACKAGES.clear() -def register_import_hook(): - # Don't register twice - i = 0 - while i < len(sys.meta_path): - if isinstance(sys.meta_path[i], AtherisMetaPathFinder): - return - i += 1 +def instrument(*modules): + """ + This function temporarily installs an import hook which instruments + all imported modules. + The arguments to this function are names of modules or packages. + If it is a fully qualified module name, the name of its package will be used. + """ + global TARGET_PACKAGES - i = 0 - while i < len(sys.meta_path) and sys.meta_path[i] in [BuiltinImporter, FrozenImporter]: - i += 1 + for module_name in modules: + if "." in module_name: + module_name = module_name.split(".")[0] - sys.meta_path.insert(i, AtherisMetaPathFinder()) + TARGET_PACKAGES.add(module_name) -# Automatically register import hook -register_import_hook() + return HookManager() diff --git a/libfuzzer.cc b/libfuzzer.cc index 4381f4e8..12d142b1 100644 --- a/libfuzzer.cc +++ b/libfuzzer.cc @@ -172,7 +172,7 @@ void Fuzz() { if (!num_counters) { std::cerr << Colorize(STDERR_FILENO, - "Nothing has been instrumented. Did you use atheris.instrument()?") + "Nothing has been instrumented. Did you use atheris.Instrument()?") << std::endl; exit(1); } From 03d95008c710286df437b067503c539c1ef20b34 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 19:24:51 +0200 Subject: [PATCH 04/36] Bug fix: Force constant selection of co_consts to be of same type --- atheris/instrument_bytecode.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py index f82918a0..18cb5b71 100644 --- a/atheris/instrument_bytecode.py +++ b/atheris/instrument_bytecode.py @@ -317,11 +317,12 @@ def _get_const(self, constant): Get an offset into the co_consts list or create a new entry if `const` is not found. """ - try: - return self.consts.index(constant) - except ValueError: - self.consts.append(constant) - return len(self.consts) - 1 + for i in range(len(self.consts)): + if type(self.consts[i]) == type(constant) and self.consts[i] == constant: + return i + + self.consts.append(constant) + return len(self.consts) - 1 def _get_counter(self): counter = self._start_idx + self.num_counters From 06d21c6f4ab026df7da11d3af8a628017dcd4082 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 19:39:23 +0200 Subject: [PATCH 05/36] Changed from global TARGET_PACKAGES in import_hook.py to a local variable --- atheris/import_hook.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/atheris/import_hook.py b/atheris/import_hook.py index 863e3b29..480fbbe9 100644 --- a/atheris/import_hook.py +++ b/atheris/import_hook.py @@ -13,13 +13,15 @@ from .instrument_bytecode import patch_code -TARGET_PACKAGES = set() - class AtherisMetaPathFinder(MetaPathFinder): + def __init__(self, packages): + super().__init__() + self._target_packages = packages + def find_spec(self, fullname, path, target=None): package_name = fullname.split(".")[0] - if not TARGET_PACKAGES or package_name in TARGET_PACKAGES: + if not self._target_packages or package_name in self._target_packages: spec = PathFinder.find_spec(fullname, path, target) if spec is None or spec.loader is None: @@ -63,6 +65,9 @@ def get_code(self, fullname): return patch_code(code, True) class HookManager: + def __init__(self, packages): + self._target_packages = packages + def __enter__(self): i = 0 while i < len(sys.meta_path): @@ -74,7 +79,7 @@ def __enter__(self): while i < len(sys.meta_path) and sys.meta_path[i] in [BuiltinImporter, FrozenImporter]: i += 1 - sys.meta_path.insert(i, AtherisMetaPathFinder()) + sys.meta_path.insert(i, AtherisMetaPathFinder(self._target_packages)) return self @@ -85,8 +90,6 @@ def __exit__(self, *args): sys.meta_path.pop(i) else: i += 1 - - TARGET_PACKAGES.clear() def instrument(*modules): """ @@ -95,12 +98,19 @@ def instrument(*modules): The arguments to this function are names of modules or packages. If it is a fully qualified module name, the name of its package will be used. """ - global TARGET_PACKAGES + target_packages = set() for module_name in modules: + if not isinstance(module_name, str): + raise RuntimeError("atheris.Instrument() expects names of modules of type ") + elif not module_name: + raise RuntimeError(f"atheris.Instrument(): Invalid module name: {module_name}") + elif module_name[0] == ".": + raise RuntimeError("atheris.Instrument(): Please specify fully qualified module names (absolute not relative)") + if "." in module_name: module_name = module_name.split(".")[0] - TARGET_PACKAGES.add(module_name) + target_packages.add(module_name) - return HookManager() + return HookManager(target_packages) From 89423dc5ed2c6d6c4ed05149074263a8d8fa4f04 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 20:46:23 +0200 Subject: [PATCH 06/36] Updated .md files with instrumentation information --- CONTRIBUTING => CONTRIBUTING.md | 0 README.md | 65 +++++++++------------------------ using_sanitizers.md | 2 +- 3 files changed, 18 insertions(+), 49 deletions(-) rename CONTRIBUTING => CONTRIBUTING.md (100%) diff --git a/CONTRIBUTING b/CONTRIBUTING.md similarity index 100% rename from CONTRIBUTING rename to CONTRIBUTING.md diff --git a/README.md b/README.md index 2dbfc497..9253c8a6 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ Atheris is a coverage-guided Python fuzzing engine. It supports fuzzing of Pytho ## Installation Instructions -Atheris supports Linux (32- and 64-bit) and Mac OS X. +Atheris supports Linux (32- and 64-bit) and Mac OS X. +Only python versions 3.6 - 3.9 are supported. ### Linux @@ -39,29 +40,22 @@ CLANG_BIN="$(pwd)/bin/clang" pip3 install atheris ### Example: ```python -import atheris import sys +import atheris + +with atheris.Instrument(): + import some_library def TestOneInput(data): - if data == b"bad": - raise RuntimeError("Badness!") + some_library.parse(data) atheris.Setup(sys.argv, TestOneInput) atheris.Fuzz() ``` Atheris supports fuzzing Python code, and uses Python code coverage information for this purpose. - -### Fuzzing Python Code - -While Atheris supports Python 2.7 and Python 3.3+, its Python code coverage support is *significantly better* when used with Python 3.8+, as it supports opcode-by-opcode coverage. If fuzzing Python code, we strongly recommend using Python 3.8+ where possible. - When fuzzing Python, Atheris will report a failure if the Python code under test throws an uncaught exception. -Be sure to pass `enable_python_coverage=True` as an argument to `Setup()`. You can additionally pass `enable_python_opcode_coverage=[True/False]` to turn on and off opcode coverage. Opcode coverage is typically beneficial, but may provide more performance impact than benefit on large Python projects. This option defaults to `True` on Python 3.8+, or `False` otherwise. - -Opcode coverage must be enabled to support features like intelligent string comparison fuzzing for Python code. - ### Fuzzing Native Extensions In order for native fuzzing to be effective, such native extensions must be built with Clang, using the argument `-fsanitize=fuzzer-no-link`. They should be built with the same `clang` as was used when building Atheris. @@ -82,30 +76,22 @@ Atheris is fully supported by [OSS-Fuzz](https://github.com/google/oss-fuzz), Go ## API -### Main Interface - -The `atheris` module provides two key functions: `Setup()` and `Fuzz()`. +The `atheris` module provides three key functions: `Instrument()`, `Setup()` and `Fuzz()`. -In your source file, define a fuzzer entry point function, and pass it to `atheris.Setup()`, along with the fuzzer's arguments (typically `sys.argv`). Finally, call `atheris.Fuzz()` to start fuzzing. Here's an example: +In your source file, when you import your target library make sure that this happens inside a `with atheris.Instrument():`-block. +Define a fuzzer entry point function and pass it to `atheris.Setup()` along with the fuzzer's arguments (typically `sys.argv`). Finally, call `atheris.Fuzz()` to start fuzzing. You must call `atheris.Setup()` before `atheris.Fuzz()`. -```python -def Setup(args, callback, enable_python_coverage=True, enable_python_opcode_coverage=True): -``` +### `Instrument(*modules)` +- `modules`: A list of module names that filters which modules shall be instrumented. If no names are specified every module gets instrumented. -Configure the Atheris Python Fuzzer. You must call `atheris.Setup()` before `atheris.Fuzz()`. +This has to be used together with a `with`-Statement. All modules that get imported in the `with`-block get instrumented for coverage collection. -Args: +### `Setup(args, test_one_input)` - `args`: A list of strings: the process arguments to pass to the fuzzer, typically `sys.argv`. This argument list may be modified in-place, to remove arguments consumed by the fuzzer. See [the LibFuzzer docs](https://llvm.org/docs/LibFuzzer.html#options) for a list of such options. - - `test_one_input`: your fuzzer's entry point. Must take a single `bytes` argument (`str` in Python 2). This will be repeatedly invoked with a single bytes container. + - `test_one_input`: your fuzzer's entry point. Must take a single `bytes` argument. This will be repeatedly invoked with a single bytes container. -Optional Args: - - `enable_python_coverage`: boolean. Controls whether to collect coverage information on Python code. Defaults to `True`. If fuzzing a native extension with minimal Python code, set to `False` for a performance increase. - - `enable_python_opcode_coverage`: boolean. Controls whether to collect Python opcode trace events. You typically want this enabled. Defaults to `True` on Python 3.8+, and `False` otherwise. Ignored if `enable_python_coverage=False`, or if using a version of Python prior to 3.8. - -```python -def Fuzz(): -``` +### `Fuzz()` This starts the fuzzer. You must have called `Setup()` before calling this function. This function does not return. @@ -113,24 +99,7 @@ In many cases `Setup()` and `Fuzz()` could be combined into a single function, b separated because you may want the fuzzer to consume the command-line arguments it handles before passing any remaining arguments to another setup function. -```python -def TraceThisThread(enable_python_opcode_coverage=True): -``` - -While we don't recommend using threads during fuzzing if you can avoid it, -Atheris does support it. - -This function enables the collection of coverage information for the current -thread. Python coverage collection must be enabled in `Setup()` or this has no -effect. (Thread coverage still works if this function is called before -`Setup()`, and `Setup()` is subsequently called with -`enable_python_coverage=True`). - -Optional Args: - - `enable_python_opcode_coverage`: boolean. Controls whether to collect Python opcode trace events for this thread. You typically want this enabled. Defaults to `True` ; ignored and unsupported if using a version of Python prior to 3.8. - - -### FuzzedDataProvider +### `FuzzedDataProvider` Often, a `bytes` object is not convenient input to your code being fuzzed. Similar to libFuzzer, we provide a FuzzedDataProvider to translate these bytes into other input forms. Alternatively, you can use [Hypothesis](https://hypothesis.readthedocs.io/) as described below. diff --git a/using_sanitizers.md b/using_sanitizers.md index 737b3c40..7febff9c 100644 --- a/using_sanitizers.md +++ b/using_sanitizers.md @@ -45,7 +45,7 @@ These files will be called: If these files are present, it means Atheris succesfully generated the files at installation time, and you can use this option. Simply `LD_PRELOAD` the right `.so` file, and you're good to go. Here's a complete example: ``` -LD_PRELOAD="$(python -c "import atheris; import os; print(os.path.dirname(atheris.path()))")/asan_with_fuzzer.so" python ./my_fuzzer.py +LD_PRELOAD="$(python -c "import atheris; import os; print(os.path.dirname(atheris.path()))")/../asan_with_fuzzer.so" python ./my_fuzzer.py ``` ### Option 2: Linking libFuzzer into Python From 0a8ad1cf94e1374cf22af7cc90df41dc6a46cdf5 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 20:54:15 +0200 Subject: [PATCH 07/36] Apparently co_stacksize sometimes is too large. Take the largest value possible --- atheris/instrument_bytecode.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py index 18cb5b71..5ae72aca 100644 --- a/atheris/instrument_bytecode.py +++ b/atheris/instrument_bytecode.py @@ -445,12 +445,10 @@ def to_code(self): delta_lineno = 0 code += new_code - - assert(self._code.co_stacksize <= stacksize) return get_code_object( self._code, - stacksize, + max(self._code.co_stacksize, stacksize), code, tuple(self.consts), tuple(self._names), From bf169fa7a59d6556600f6324e07277446c94b062 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 21:01:50 +0200 Subject: [PATCH 08/36] Allow num_counters to be 0 --- libfuzzer.cc | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/libfuzzer.cc b/libfuzzer.cc index 12d142b1..20a03955 100644 --- a/libfuzzer.cc +++ b/libfuzzer.cc @@ -169,13 +169,6 @@ void Fuzz() { << std::endl; exit(1); } - - if (!num_counters) { - std::cerr << Colorize(STDERR_FILENO, - "Nothing has been instrumented. Did you use atheris.Instrument()?") - << std::endl; - exit(1); - } std::vector args; args.reserve(args_global.size() + 1); @@ -185,12 +178,12 @@ void Fuzz() { args.push_back(nullptr); char** args_ptr = &args[0]; int args_size = args_global.size(); - - counters = new unsigned char[num_counters]; - memset(counters, 0, num_counters); - - __sanitizer_cov_8bit_counters_init(counters, counters + num_counters); + if (num_counters) { + counters = new unsigned char[num_counters]; + memset(counters, 0, num_counters); + __sanitizer_cov_8bit_counters_init(counters, counters + num_counters); + } exit(LLVMFuzzerRunDriver(&args_size, &args_ptr, &TestOneInput)); } From a39d5be683cd2df642f7cdedebddedb54da935a9 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 21:06:53 +0200 Subject: [PATCH 09/36] Don't allow atheris to instrument itself --- atheris/import_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atheris/import_hook.py b/atheris/import_hook.py index 480fbbe9..b751b435 100644 --- a/atheris/import_hook.py +++ b/atheris/import_hook.py @@ -21,7 +21,7 @@ def __init__(self, packages): def find_spec(self, fullname, path, target=None): package_name = fullname.split(".")[0] - if not self._target_packages or package_name in self._target_packages: + if (not self._target_packages or package_name in self._target_packages) and package_name != "atheris": spec = PathFinder.find_spec(fullname, path, target) if spec is None or spec.loader is None: From 3022c6e7d86f9aa33334cbc179b2075ad50800de Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 21:15:44 +0200 Subject: [PATCH 10/36] Updated example_fuzzers to include atheris.Instrument() --- example_fuzzers/example_library.py | 5 +++++ example_fuzzers/fuzzing_example.py | 7 ++----- example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py | 5 ++++- example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py | 6 +++++- example_fuzzers/json_fuzzer/ujson_fuzzer.py | 8 +++++--- example_fuzzers/yaml_fuzzer/yaml_fuzzer.py | 4 +++- 6 files changed, 24 insertions(+), 11 deletions(-) create mode 100644 example_fuzzers/example_library.py diff --git a/example_fuzzers/example_library.py b/example_fuzzers/example_library.py new file mode 100644 index 00000000..c8aff5b3 --- /dev/null +++ b/example_fuzzers/example_library.py @@ -0,0 +1,5 @@ +def CodeBeingFuzzed(number): + """Raises an exception if number is 17.""" + if number == 17: + raise RuntimeError('Number was seventeen!') + diff --git a/example_fuzzers/fuzzing_example.py b/example_fuzzers/fuzzing_example.py index cc4e3316..505c213c 100644 --- a/example_fuzzers/fuzzing_example.py +++ b/example_fuzzers/fuzzing_example.py @@ -20,11 +20,8 @@ import atheris - -def CodeBeingFuzzed(number): - """Raises an exception if number is 17.""" - if number == 17: - raise RuntimeError('Number was seventeen!') +with atheris.Instrument("example_library"): + from example_library import CodeBeingFuzzed def TestOneInput(data): diff --git a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py index 64cbd76c..eebcbb7a 100644 --- a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py @@ -43,7 +43,10 @@ import unicodedata import atheris -import idna + +with atheris.Instrument("idna"): + import idna + import libidn2 diff --git a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py index 6aa4b2a3..8561e9a1 100755 --- a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py @@ -40,10 +40,14 @@ Unicode 9 and therefore produces incorrect metadata about Unicode 11 characters. """ import atheris -import idna import sys import unicodedata +with atheris.Instrument("idna"): + import idna + +# libidn2 is just an extension. +# Only python code can be instrumented. import libidn2 diff --git a/example_fuzzers/json_fuzzer/ujson_fuzzer.py b/example_fuzzers/json_fuzzer/ujson_fuzzer.py index 0f0e8177..dcef6c6d 100755 --- a/example_fuzzers/json_fuzzer/ujson_fuzzer.py +++ b/example_fuzzers/json_fuzzer/ujson_fuzzer.py @@ -28,6 +28,10 @@ import sys import atheris_no_libfuzzer as atheris + +# Here atheris.Instrument() is not necessary +# because ujson is just an extension. +# Only python code can be instrumented. import ujson @@ -48,9 +52,7 @@ def TestOneInput(input_bytes): def main(): - # Since everything interesting in this fuzzer is in native code, we can - # disable Python coverage to improve performance and reduce coverage noise. - atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=False) + atheris.Setup(sys.argv, TestOneInput) atheris.Fuzz() if __name__ == "__main__": diff --git a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py index 234c19b5..e78ff17c 100644 --- a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py +++ b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py @@ -28,7 +28,9 @@ import warnings import atheris -from ruamel import yaml as ruamel_yaml + +with atheris.Instrument("ruamel.yaml"): + from ruamel import yaml as ruamel_yaml # Suppress all warnings. warnings.simplefilter("ignore") From 04998482866df0088360fe4f5bfc456f20b110c8 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Fri, 25 Jun 2021 22:03:46 +0200 Subject: [PATCH 11/36] Added -ldl flag to asan_with_fuzzer.so --- setup_utils/merge_libfuzzer_sanitizer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_utils/merge_libfuzzer_sanitizer.sh b/setup_utils/merge_libfuzzer_sanitizer.sh index 4081f100..81be4c7f 100755 --- a/setup_utils/merge_libfuzzer_sanitizer.sh +++ b/setup_utils/merge_libfuzzer_sanitizer.sh @@ -35,7 +35,7 @@ cp "$sanitizer" "$tmp_sanitizer" ar d "$tmp_sanitizer" $strip_preinit # Intentionally not quoted -"$CXX" -Wl,--whole-archive "$libfuzzer" "$tmp_sanitizer" -Wl,--no-whole-archive -lpthread -shared -o "$tmp_merged" +"$CXX" -Wl,--whole-archive "$libfuzzer" "$tmp_sanitizer" -Wl,--no-whole-archive -lpthread -ldl -shared -o "$tmp_merged" echo "$tmp_merged" exit 0 From 95fd68a18b496e2517d2f192f98bf9d545073968 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Mon, 28 Jun 2021 15:06:22 +0200 Subject: [PATCH 12/36] Separate loop for calculation of stack size in Instrumentor.to_code --- atheris/instrument_bytecode.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py index 5ae72aca..9650979f 100644 --- a/atheris/instrument_bytecode.py +++ b/atheris/instrument_bytecode.py @@ -390,7 +390,6 @@ def to_code(self): listing = self._get_linear_instruction_listing() lnotab = [] code = bytes() - stacksize = 0 if self._code.co_firstlineno != listing[0].lineno: lnotab.append(0) @@ -403,9 +402,6 @@ def to_code(self): new_code = bytes() while i < len(listing) and listing[i].lineno == current_lineno: - stack_effect = listing[i].get_stack_effect() - stacksize = max(stacksize, stacksize + stack_effect) - new_code += listing[i].to_bytes() i += 1 @@ -445,10 +441,16 @@ def to_code(self): delta_lineno = 0 code += new_code + + stacksize = 0 + for instr in listing: + stacksize = max(stacksize, stacksize + instr.get_stack_effect()) + + assert(self._code.co_stacksize <= stacksize) return get_code_object( self._code, - max(self._code.co_stacksize, stacksize), + stacksize, code, tuple(self.consts), tuple(self._names), From c7632f2c4ad4f25562ecf78279d794533bfc2f09 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Mon, 28 Jun 2021 17:53:28 +0200 Subject: [PATCH 13/36] Corrected code that parses lnotab Removed assertion of stacksize --- atheris/instrument_bytecode.py | 63 +++------------------------------- atheris/version_dependent.py | 55 +++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 58 deletions(-) diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py index 9650979f..9ac448fc 100644 --- a/atheris/instrument_bytecode.py +++ b/atheris/instrument_bytecode.py @@ -12,7 +12,7 @@ import dis from collections import OrderedDict -from .version_dependent import get_code_object, CONDITIONAL_JUMPS, UNCONDITIONAL_JUMPS, ENDS_FUNCTION, HAVE_REL_REFERENCE, HAVE_ABS_REFERENCE, REVERSE_CMP_OP +from .version_dependent import get_code_object, get_lnotab, CONDITIONAL_JUMPS, UNCONDITIONAL_JUMPS, ENDS_FUNCTION, HAVE_REL_REFERENCE, HAVE_ABS_REFERENCE, REVERSE_CMP_OP current_index = 0 current_pc = 0 @@ -388,65 +388,12 @@ def _get_linear_instruction_listing(self): def to_code(self): self._check_state() listing = self._get_linear_instruction_listing() - lnotab = [] - code = bytes() - - if self._code.co_firstlineno != listing[0].lineno: - lnotab.append(0) - lnotab.append(listing[0].lineno - self._code.co_firstlineno) - - i = 0 - - while i < len(listing): - current_lineno = listing[i].lineno - new_code = bytes() - - while i < len(listing) and listing[i].lineno == current_lineno: - new_code += listing[i].to_bytes() - i += 1 - - if i < len(listing): - term_zero = False - delta_bc = len(new_code) - delta_lineno = listing[i].lineno - current_lineno - - if delta_lineno <= -128 or delta_lineno >= 127 or delta_bc >= 255: - term_zero = True - - while True: - lnotab.append(min(delta_bc, 255)) - - if delta_lineno < 0: - lnotab.append((max(delta_lineno, -128)) % 256) - else: - lnotab.append(min(delta_lineno, 127)) - - if term_zero: - if delta_bc == 0 and delta_lineno == 0: - break - else: - if delta_bc < 255 and delta_lineno in range(-127, 128): - break - - if delta_bc >= 255: - delta_bc -= 255 - else: - delta_bc = 0 - - if delta_lineno <= -128: - delta_lineno += 128 - elif delta_lineno >= 127: - delta_lineno -= 127 - else: - delta_lineno = 0 - - code += new_code - + code = bytes() stacksize = 0 + for instr in listing: + code += instr.to_bytes() stacksize = max(stacksize, stacksize + instr.get_stack_effect()) - - assert(self._code.co_stacksize <= stacksize) return get_code_object( self._code, @@ -454,7 +401,7 @@ def to_code(self): code, tuple(self.consts), tuple(self._names), - bytes(lnotab) + get_lnotab(self._code, listing) ) def _generate_loc_invocation(self, lineno, offset): diff --git a/atheris/version_dependent.py b/atheris/version_dependent.py index ba2119be..eef22e74 100644 --- a/atheris/version_dependent.py +++ b/atheris/version_dependent.py @@ -135,3 +135,58 @@ def get_code_object(code_obj, stacksize, bytecode, consts, names, lnotab): code_obj.co_freevars, code_obj.co_cellvars ) + +### Lnotab handling ### + +if (3,6) <= PYTHON_VERSION <= (3,9): + def get_lnotab(code, listing): + lnotab = [] + current_lineno = listing[0].lineno + i = 0 + + assert(listing[0].lineno >= code.co_firstlineno) + + if listing[0].lineno > code.co_firstlineno: + delta_lineno = listing[0].lineno - code.co_firstlineno + + while delta_lineno > 127: + lnotab.extend([0, 127]) + delta_lineno -= 127 + + lnotab.extend([0, delta_lineno]) + + while True: + delta_bc = 0 + + while i < len(listing) and listing[i].lineno == current_lineno: + delta_bc += listing[i].get_size() + i += 1 + + if i >= len(listing): + break + + assert(delta_bc > 0) + + delta_lineno = listing[i].lineno - current_lineno + + while delta_bc > 255: + lnotab.extend([255, 0]) + delta_bc -= 255 + + if delta_lineno < 0: + while delta_lineno < -128: + lnotab.extend([delta_bc, 0x80]) + delta_bc = 0 + delta_lineno += 128 + + delta_lineno %= 256 + else: + while delta_lineno > 127: + lnotab.extend([delta_bc, 127]) + delta_bc = 0 + delta_lineno -= 127 + + lnotab.extend([delta_bc, delta_lineno]) + current_lineno = listing[i].lineno + + return bytes(lnotab) From b41e3fb1d5c8c5c1153efd0a958a2203c6f805d2 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Tue, 29 Jun 2021 13:48:58 +0200 Subject: [PATCH 14/36] Added trace_dataflow argument to atheris.Instrument() --- atheris/import_hook.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/atheris/import_hook.py b/atheris/import_hook.py index b751b435..b67b2e6f 100644 --- a/atheris/import_hook.py +++ b/atheris/import_hook.py @@ -14,9 +14,10 @@ from .instrument_bytecode import patch_code class AtherisMetaPathFinder(MetaPathFinder): - def __init__(self, packages): + def __init__(self, packages, trace_dataflow): super().__init__() self._target_packages = packages + self._trace_dataflow = trace_dataflow def find_spec(self, fullname, path, target=None): package_name = fullname.split(".")[0] @@ -28,15 +29,15 @@ def find_spec(self, fullname, path, target=None): return None if isinstance(spec.loader, SourceFileLoader): - spec.loader = AtherisSourceFileLoader(spec.loader.name, spec.loader.path) + spec.loader = AtherisSourceFileLoader(spec.loader.name, spec.loader.path, self._trace_dataflow) elif isinstance(spec.loader, SourcelessFileLoader): - spec.loader = AtherisSourcelessFileLoader(spec.loader.name, spec.loader.path) + spec.loader = AtherisSourcelessFileLoader(spec.loader.name, spec.loader.path, self._trace_dataflow) else: return None spec.loader_state = None - print(f"Instrumenting {fullname}") + print(f"INFO: Instrumenting {fullname}") return spec @@ -47,26 +48,35 @@ def invalidate_caches(self): return PathFinder.invalidate_caches() class AtherisSourceFileLoader(SourceFileLoader): + def __init__(self, name, path, trace_dataflow): + super().__init__(name, path) + self._trace_dataflow = trace_dataflow + def get_code(self, fullname): code = super().get_code(fullname) if code is None: return None else: - return patch_code(code, True) + return patch_code(code, self._trace_dataflow) class AtherisSourcelessFileLoader(SourcelessFileLoader): + def __init__(self, name, path, trace_dataflow): + super().__init__(name, path) + self._trace_dataflow = trace_dataflow + def get_code(self, fullname): code = super().get_code(fullname) if code is None: return None else: - return patch_code(code, True) + return patch_code(code, self._trace_dataflow) class HookManager: - def __init__(self, packages): + def __init__(self, packages, trace_dataflow): self._target_packages = packages + self._trace_dataflow = trace_dataflow def __enter__(self): i = 0 @@ -79,7 +89,7 @@ def __enter__(self): while i < len(sys.meta_path) and sys.meta_path[i] in [BuiltinImporter, FrozenImporter]: i += 1 - sys.meta_path.insert(i, AtherisMetaPathFinder(self._target_packages)) + sys.meta_path.insert(i, AtherisMetaPathFinder(self._target_packages, self._trace_dataflow)) return self @@ -91,7 +101,7 @@ def __exit__(self, *args): else: i += 1 -def instrument(*modules): +def instrument(*modules, trace_dataflow=True): """ This function temporarily installs an import hook which instruments all imported modules. @@ -113,4 +123,4 @@ def instrument(*modules): target_packages.add(module_name) - return HookManager(target_packages) + return HookManager(target_packages, trace_dataflow) From e5833b7febf5dc7151adb8c64d18454c96b25493 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Tue, 29 Jun 2021 15:53:23 +0200 Subject: [PATCH 15/36] Added copyright notice for Fraunhofer FKIE and updated notices to the current year --- atheris.cc | 3 ++- atheris.h | 3 ++- atheris/__init__.py | 14 ++++++++++++++ atheris/import_hook.py | 14 ++++++++++++++ atheris/instrument_bytecode.py | 16 +++++++++++++++- atheris/version_dependent.py | 15 +++++++++++++++ deployment/Dockerfile | 2 +- deployment/build_wheels.sh | 2 +- deployment/build_wheels_mac.sh | 2 +- deployment/deploy_pypi.sh | 2 +- example_fuzzers/example_library.py | 14 ++++++++++++++ example_fuzzers/fuzzing_example.py | 3 ++- .../idna_fuzzer/idna_acceptance_fuzzer.py | 3 ++- example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py | 3 ++- .../idna_fuzzer/libidn2_wrapper/libidn2.cc | 2 +- .../idna_fuzzer/libidn2_wrapper/setup.py | 2 +- .../json_fuzzer/build_install_ujson.sh | 2 +- .../json_fuzzer/hypothesis_structured_fuzzer.py | 2 +- .../json_fuzzer/json_differential_fuzzer.py | 2 +- example_fuzzers/json_fuzzer/ujson_fuzzer.py | 2 +- example_fuzzers/yaml_fuzzer/yaml_fuzzer.py | 3 ++- fuzzed_data_provider.cc | 2 +- fuzzed_data_provider.h | 2 +- fuzzed_data_provider_test.py | 2 +- libfuzzer.cc | 3 ++- macros.h | 2 +- setup.py | 3 ++- setup_utils/check_libfuzzer_version.sh | 2 +- setup_utils/find_libfuzzer.sh | 2 +- setup_utils/fuzzer_run_driver_wrapper.cc | 2 +- setup_utils/merge_libfuzzer_sanitizer.sh | 2 +- setup_utils/upgrade_libfuzzer.sh | 2 +- third_party/build_modified_libfuzzer.sh | 2 +- tracer.cc | 3 ++- tracer.h | 3 ++- util.cc | 2 +- util.h | 2 +- util_test.cc | 2 +- 38 files changed, 115 insertions(+), 34 deletions(-) diff --git a/atheris.cc b/atheris.cc index 0b1b29c2..22d351e9 100644 --- a/atheris.cc +++ b/atheris.cc @@ -1,4 +1,5 @@ -// Copyright 2020 Google LLC +// Copyright 2021 Google LLC +// Copyright 2021 Fraunhofer FKIE // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/atheris.h b/atheris.h index e86bfc10..8df0f5fc 100644 --- a/atheris.h +++ b/atheris.h @@ -1,5 +1,6 @@ /* - * Copyright 2020 Google LLC + * Copyright 2021 Google LLC + * Copyright 2021 Fraunhofer FKIE * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/atheris/__init__.py b/atheris/__init__.py index 51c05313..0b11d026 100644 --- a/atheris/__init__.py +++ b/atheris/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2021 Fraunhofer FKIE +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .atheris import * from .atheris import _loc, _reg, _cmp from .import_hook import instrument as Instrument diff --git a/atheris/import_hook.py b/atheris/import_hook.py index b67b2e6f..1d8f8976 100644 --- a/atheris/import_hook.py +++ b/atheris/import_hook.py @@ -1,3 +1,17 @@ +# Copyright 2021 Fraunhofer FKIE +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ atheris instruments modules at import-time. The Instrument() function temporarily installs an import hook (AtherisMetaPathFinder) diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py index 9ac448fc..6cd34c8a 100644 --- a/atheris/instrument_bytecode.py +++ b/atheris/instrument_bytecode.py @@ -1,3 +1,17 @@ +# Copyright 2021 Fraunhofer FKIE +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ This module provides the instrumentation functionality for atheris. Mainly the function patch_code(), which can instrument a code object @@ -208,7 +222,7 @@ class Instrumentor: 6. Call _handle_size_changes() Take a look at trace_control_flow() and trace_data_flow() for examples. - Note that this calls only supports insertions, not deletions. + Note that Instrumentor only supports insertions, not deletions. """ def __init__(self, code, start_idx, start_pc): self._start_idx = start_idx diff --git a/atheris/version_dependent.py b/atheris/version_dependent.py index eef22e74..dfbfb3cc 100644 --- a/atheris/version_dependent.py +++ b/atheris/version_dependent.py @@ -1,8 +1,23 @@ +# Copyright 2021 Fraunhofer FKIE +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ This module manages the different aspects of bytecode instrumentation that depend on specific python versions: - Instructions - Shape of a code object + - Construction of the lnotab Currently supported python versions are: - 3.6 diff --git a/deployment/Dockerfile b/deployment/Dockerfile index 3ebf4dfa..686ac320 100644 --- a/deployment/Dockerfile +++ b/deployment/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/deployment/build_wheels.sh b/deployment/build_wheels.sh index 6ac53fa0..b4b69007 100755 --- a/deployment/build_wheels.sh +++ b/deployment/build_wheels.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/deployment/build_wheels_mac.sh b/deployment/build_wheels_mac.sh index 8ab32e7f..90a7f996 100644 --- a/deployment/build_wheels_mac.sh +++ b/deployment/build_wheels_mac.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/deployment/deploy_pypi.sh b/deployment/deploy_pypi.sh index d1ad724b..81848839 100755 --- a/deployment/deploy_pypi.sh +++ b/deployment/deploy_pypi.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/example_library.py b/example_fuzzers/example_library.py index c8aff5b3..c9497456 100644 --- a/example_fuzzers/example_library.py +++ b/example_fuzzers/example_library.py @@ -1,3 +1,17 @@ +# Copyright 2021 Fraunhofer FKIE +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + def CodeBeingFuzzed(number): """Raises an exception if number is 17.""" if number == 17: diff --git a/example_fuzzers/fuzzing_example.py b/example_fuzzers/fuzzing_example.py index 505c213c..76b571b9 100644 --- a/example_fuzzers/fuzzing_example.py +++ b/example_fuzzers/fuzzing_example.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC +# Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py index eebcbb7a..ff09b888 100644 --- a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py @@ -1,7 +1,8 @@ #!/usr/bin/python3 # coding=utf-8 -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC +# Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py index 8561e9a1..97920a44 100755 --- a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py @@ -1,7 +1,8 @@ #!/usr/bin/python3 # coding=utf-8 -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC +# Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/idna_fuzzer/libidn2_wrapper/libidn2.cc b/example_fuzzers/idna_fuzzer/libidn2_wrapper/libidn2.cc index a9b8ca39..081da5b1 100644 --- a/example_fuzzers/idna_fuzzer/libidn2_wrapper/libidn2.cc +++ b/example_fuzzers/idna_fuzzer/libidn2_wrapper/libidn2.cc @@ -1,4 +1,4 @@ -// Copyright 2020 Google LLC +// Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/example_fuzzers/idna_fuzzer/libidn2_wrapper/setup.py b/example_fuzzers/idna_fuzzer/libidn2_wrapper/setup.py index 9b64423e..73ada8ac 100644 --- a/example_fuzzers/idna_fuzzer/libidn2_wrapper/setup.py +++ b/example_fuzzers/idna_fuzzer/libidn2_wrapper/setup.py @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/json_fuzzer/build_install_ujson.sh b/example_fuzzers/json_fuzzer/build_install_ujson.sh index 866b66ed..af72acba 100644 --- a/example_fuzzers/json_fuzzer/build_install_ujson.sh +++ b/example_fuzzers/json_fuzzer/build_install_ujson.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py b/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py index b8baea56..1e0d7845 100644 --- a/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py +++ b/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2020 Zac Hatfield-Dodds +# Copyright 2021 Zac Hatfield-Dodds # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/json_fuzzer/json_differential_fuzzer.py b/example_fuzzers/json_fuzzer/json_differential_fuzzer.py index f94ea440..59904783 100755 --- a/example_fuzzers/json_fuzzer/json_differential_fuzzer.py +++ b/example_fuzzers/json_fuzzer/json_differential_fuzzer.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/json_fuzzer/ujson_fuzzer.py b/example_fuzzers/json_fuzzer/ujson_fuzzer.py index dcef6c6d..9877f06e 100755 --- a/example_fuzzers/json_fuzzer/ujson_fuzzer.py +++ b/example_fuzzers/json_fuzzer/ujson_fuzzer.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py index e78ff17c..b60eba42 100644 --- a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py +++ b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC +# Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/fuzzed_data_provider.cc b/fuzzed_data_provider.cc index b9337277..86c778e1 100644 --- a/fuzzed_data_provider.cc +++ b/fuzzed_data_provider.cc @@ -1,4 +1,4 @@ -// Copyright 2020 Google LLC +// Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/fuzzed_data_provider.h b/fuzzed_data_provider.h index 8f3cd768..9263c4ea 100644 --- a/fuzzed_data_provider.h +++ b/fuzzed_data_provider.h @@ -1,5 +1,5 @@ /* - * Copyright 2020 Google LLC + * Copyright 2021 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/fuzzed_data_provider_test.py b/fuzzed_data_provider_test.py index a44dd462..f053cee8 100644 --- a/fuzzed_data_provider_test.py +++ b/fuzzed_data_provider_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/libfuzzer.cc b/libfuzzer.cc index 20a03955..42166d05 100644 --- a/libfuzzer.cc +++ b/libfuzzer.cc @@ -1,4 +1,5 @@ -// Copyright 2020 Google LLC +// Copyright 2021 Google LLC +// Copyright 2021 Fraunhofer FKIE // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/macros.h b/macros.h index 5eb24901..09ae1b2d 100644 --- a/macros.h +++ b/macros.h @@ -1,5 +1,5 @@ /* - * Copyright 2020 Google LLC + * Copyright 2021 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/setup.py b/setup.py index dcb2e65e..d552f6f6 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC +# Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup_utils/check_libfuzzer_version.sh b/setup_utils/check_libfuzzer_version.sh index 1defc47a..48fc58e6 100755 --- a/setup_utils/check_libfuzzer_version.sh +++ b/setup_utils/check_libfuzzer_version.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup_utils/find_libfuzzer.sh b/setup_utils/find_libfuzzer.sh index 1691b30a..6f03a0db 100755 --- a/setup_utils/find_libfuzzer.sh +++ b/setup_utils/find_libfuzzer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup_utils/fuzzer_run_driver_wrapper.cc b/setup_utils/fuzzer_run_driver_wrapper.cc index e312c141..bcfe3ee0 100644 --- a/setup_utils/fuzzer_run_driver_wrapper.cc +++ b/setup_utils/fuzzer_run_driver_wrapper.cc @@ -1,4 +1,4 @@ -// Copyright 2020 Google LLC +// Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/setup_utils/merge_libfuzzer_sanitizer.sh b/setup_utils/merge_libfuzzer_sanitizer.sh index 81be4c7f..826e2237 100755 --- a/setup_utils/merge_libfuzzer_sanitizer.sh +++ b/setup_utils/merge_libfuzzer_sanitizer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup_utils/upgrade_libfuzzer.sh b/setup_utils/upgrade_libfuzzer.sh index 1d9055da..cc0577eb 100755 --- a/setup_utils/upgrade_libfuzzer.sh +++ b/setup_utils/upgrade_libfuzzer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/third_party/build_modified_libfuzzer.sh b/third_party/build_modified_libfuzzer.sh index a3533b76..86b2c4e4 100755 --- a/third_party/build_modified_libfuzzer.sh +++ b/third_party/build_modified_libfuzzer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tracer.cc b/tracer.cc index 491684e3..1f0b96e8 100644 --- a/tracer.cc +++ b/tracer.cc @@ -1,4 +1,5 @@ -// Copyright 2020 Google LLC +// Copyright 2021 Google LLC +// Copyright 2021 Fraunhofer FKIE // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/tracer.h b/tracer.h index c77d6a1a..c46af71d 100644 --- a/tracer.h +++ b/tracer.h @@ -1,5 +1,6 @@ /* - * Copyright 2020 Google LLC + * Copyright 2021 Google LLC + * Copyright 2021 Fraunhofer FKIE * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/util.cc b/util.cc index 881b9116..efcbb553 100644 --- a/util.cc +++ b/util.cc @@ -1,4 +1,4 @@ -// Copyright 2020 Google LLC +// Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/util.h b/util.h index daaa3503..fcb1967e 100644 --- a/util.h +++ b/util.h @@ -1,5 +1,5 @@ /* - * Copyright 2020 Google LLC + * Copyright 2021 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/util_test.cc b/util_test.cc index 9a6b7530..85e67ac8 100644 --- a/util_test.cc +++ b/util_test.cc @@ -1,4 +1,4 @@ -// Copyright 2020 Google LLC +// Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. From f551ad227cf99c37ff2a8a0ed710f29bde2b54be Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Wed, 30 Jun 2021 13:12:45 +0200 Subject: [PATCH 16/36] Changed atheris.Instrument() to atheris.instrument() --- README.md | 22 +++++++++++++------ atheris/__init__.py | 2 +- atheris/import_hook.py | 8 +++---- example_fuzzers/fuzzing_example.py | 2 +- .../idna_fuzzer/idna_acceptance_fuzzer.py | 2 +- .../idna_fuzzer/idna_uts46_fuzzer.py | 2 +- example_fuzzers/json_fuzzer/ujson_fuzzer.py | 2 +- example_fuzzers/yaml_fuzzer/yaml_fuzzer.py | 2 +- 8 files changed, 25 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 9253c8a6..c234364b 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ CLANG_BIN="$(pwd)/bin/clang" pip3 install atheris import sys import atheris -with atheris.Instrument(): +with atheris.instrument(): import some_library def TestOneInput(data): @@ -76,22 +76,30 @@ Atheris is fully supported by [OSS-Fuzz](https://github.com/google/oss-fuzz), Go ## API -The `atheris` module provides three key functions: `Instrument()`, `Setup()` and `Fuzz()`. +The `atheris` module provides three key functions: `instrument()`, `Setup()` and `Fuzz()`. -In your source file, when you import your target library make sure that this happens inside a `with atheris.Instrument():`-block. +In your source file, import all libraries you wish to fuzz inside a `with atheris.instrument():`-block, like this: +```py +# library_a will not get instrumented +import library_a + +with atheris.instrument(): + # library_b will get instrumented + import library_b +``` Define a fuzzer entry point function and pass it to `atheris.Setup()` along with the fuzzer's arguments (typically `sys.argv`). Finally, call `atheris.Fuzz()` to start fuzzing. You must call `atheris.Setup()` before `atheris.Fuzz()`. -### `Instrument(*modules)` +#### `instrument(*modules)` - `modules`: A list of module names that filters which modules shall be instrumented. If no names are specified every module gets instrumented. This has to be used together with a `with`-Statement. All modules that get imported in the `with`-block get instrumented for coverage collection. -### `Setup(args, test_one_input)` +#### `Setup(args, test_one_input)` - `args`: A list of strings: the process arguments to pass to the fuzzer, typically `sys.argv`. This argument list may be modified in-place, to remove arguments consumed by the fuzzer. See [the LibFuzzer docs](https://llvm.org/docs/LibFuzzer.html#options) for a list of such options. - `test_one_input`: your fuzzer's entry point. Must take a single `bytes` argument. This will be repeatedly invoked with a single bytes container. -### `Fuzz()` +#### `Fuzz()` This starts the fuzzer. You must have called `Setup()` before calling this function. This function does not return. @@ -99,7 +107,7 @@ In many cases `Setup()` and `Fuzz()` could be combined into a single function, b separated because you may want the fuzzer to consume the command-line arguments it handles before passing any remaining arguments to another setup function. -### `FuzzedDataProvider` +#### `FuzzedDataProvider` Often, a `bytes` object is not convenient input to your code being fuzzed. Similar to libFuzzer, we provide a FuzzedDataProvider to translate these bytes into other input forms. Alternatively, you can use [Hypothesis](https://hypothesis.readthedocs.io/) as described below. diff --git a/atheris/__init__.py b/atheris/__init__.py index 0b11d026..134b3093 100644 --- a/atheris/__init__.py +++ b/atheris/__init__.py @@ -14,4 +14,4 @@ from .atheris import * from .atheris import _loc, _reg, _cmp -from .import_hook import instrument as Instrument +from .import_hook import instrument diff --git a/atheris/import_hook.py b/atheris/import_hook.py index 1d8f8976..ddbb9b5d 100644 --- a/atheris/import_hook.py +++ b/atheris/import_hook.py @@ -14,7 +14,7 @@ """ atheris instruments modules at import-time. -The Instrument() function temporarily installs an import hook (AtherisMetaPathFinder) +The instrument() function temporarily installs an import hook (AtherisMetaPathFinder) in sys.meta_path that employs a custom loader (AtherisSourceFileLoader, AtherisSourcelessFileLoader). """ @@ -126,11 +126,11 @@ def instrument(*modules, trace_dataflow=True): for module_name in modules: if not isinstance(module_name, str): - raise RuntimeError("atheris.Instrument() expects names of modules of type ") + raise RuntimeError("atheris.instrument() expects names of modules of type ") elif not module_name: - raise RuntimeError(f"atheris.Instrument(): Invalid module name: {module_name}") + raise RuntimeError(f"atheris.instrument(): Invalid module name: {module_name}") elif module_name[0] == ".": - raise RuntimeError("atheris.Instrument(): Please specify fully qualified module names (absolute not relative)") + raise RuntimeError("atheris.instrument(): Please specify fully qualified module names (absolute not relative)") if "." in module_name: module_name = module_name.split(".")[0] diff --git a/example_fuzzers/fuzzing_example.py b/example_fuzzers/fuzzing_example.py index 76b571b9..a0d32a58 100644 --- a/example_fuzzers/fuzzing_example.py +++ b/example_fuzzers/fuzzing_example.py @@ -21,7 +21,7 @@ import atheris -with atheris.Instrument("example_library"): +with atheris.instrument("example_library"): from example_library import CodeBeingFuzzed diff --git a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py index ff09b888..610c7304 100644 --- a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py @@ -45,7 +45,7 @@ import atheris -with atheris.Instrument("idna"): +with atheris.instrument("idna"): import idna import libidn2 diff --git a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py index 97920a44..8d268e9a 100755 --- a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py @@ -44,7 +44,7 @@ import sys import unicodedata -with atheris.Instrument("idna"): +with atheris.instrument("idna"): import idna # libidn2 is just an extension. diff --git a/example_fuzzers/json_fuzzer/ujson_fuzzer.py b/example_fuzzers/json_fuzzer/ujson_fuzzer.py index 9877f06e..c92fcb8c 100755 --- a/example_fuzzers/json_fuzzer/ujson_fuzzer.py +++ b/example_fuzzers/json_fuzzer/ujson_fuzzer.py @@ -29,7 +29,7 @@ import sys import atheris_no_libfuzzer as atheris -# Here atheris.Instrument() is not necessary +# Here atheris.instrument() is not necessary # because ujson is just an extension. # Only python code can be instrumented. import ujson diff --git a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py index b60eba42..08515bf1 100644 --- a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py +++ b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py @@ -30,7 +30,7 @@ import atheris -with atheris.Instrument("ruamel.yaml"): +with atheris.instrument("ruamel.yaml"): from ruamel import yaml as ruamel_yaml # Suppress all warnings. From 2b785101de26a290bab9d2fc5d7909ed0341ecc3 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Wed, 30 Jun 2021 13:57:42 +0200 Subject: [PATCH 17/36] Changed atheris.instrument()'s arguments to `include` and `exclude` --- README.md | 7 ++-- atheris/import_hook.py | 41 +++++++++++-------- example_fuzzers/fuzzing_example.py | 2 +- .../idna_fuzzer/idna_acceptance_fuzzer.py | 2 +- .../idna_fuzzer/idna_uts46_fuzzer.py | 2 +- example_fuzzers/yaml_fuzzer/yaml_fuzzer.py | 2 +- 6 files changed, 32 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index c234364b..1b7b222e 100644 --- a/README.md +++ b/README.md @@ -89,10 +89,11 @@ with atheris.instrument(): ``` Define a fuzzer entry point function and pass it to `atheris.Setup()` along with the fuzzer's arguments (typically `sys.argv`). Finally, call `atheris.Fuzz()` to start fuzzing. You must call `atheris.Setup()` before `atheris.Fuzz()`. -#### `instrument(*modules)` -- `modules`: A list of module names that filters which modules shall be instrumented. If no names are specified every module gets instrumented. +#### `instrument(include=[], exclude=[])` +- `include`: A list of fully-qualified module names that shall be instrumented. If this is not specified every module will get instrumented. +- `exclude`: A list of fully-qualified module names that shall NOT be instrumented. -This has to be used together with a `with`-Statement. All modules that get imported in the `with`-block get instrumented for coverage collection. +This has to be used together with a `with`-Statement. #### `Setup(args, test_one_input)` - `args`: A list of strings: the process arguments to pass to the fuzzer, typically `sys.argv`. This argument list may be modified in-place, to remove arguments consumed by the fuzzer. diff --git a/atheris/import_hook.py b/atheris/import_hook.py index ddbb9b5d..42904fba 100644 --- a/atheris/import_hook.py +++ b/atheris/import_hook.py @@ -28,15 +28,19 @@ from .instrument_bytecode import patch_code class AtherisMetaPathFinder(MetaPathFinder): - def __init__(self, packages, trace_dataflow): + def __init__(self, include_packages, exclude_modules, trace_dataflow): super().__init__() - self._target_packages = packages + self._include_packages = include_packages + self._exclude_modules = exclude_modules self._trace_dataflow = trace_dataflow def find_spec(self, fullname, path, target=None): + if fullname in self._exclude_modules: + return None + package_name = fullname.split(".")[0] - if (not self._target_packages or package_name in self._target_packages) and package_name != "atheris": + if (not self._include_packages or package_name in self._include_packages) and package_name != "atheris": spec = PathFinder.find_spec(fullname, path, target) if spec is None or spec.loader is None: @@ -88,8 +92,9 @@ def get_code(self, fullname): return patch_code(code, self._trace_dataflow) class HookManager: - def __init__(self, packages, trace_dataflow): - self._target_packages = packages + def __init__(self, include_packages, exclude_modules, trace_dataflow): + self._include_packages = include_packages + self._exclude_modules = exclude_modules self._trace_dataflow = trace_dataflow def __enter__(self): @@ -103,7 +108,7 @@ def __enter__(self): while i < len(sys.meta_path) and sys.meta_path[i] in [BuiltinImporter, FrozenImporter]: i += 1 - sys.meta_path.insert(i, AtherisMetaPathFinder(self._target_packages, self._trace_dataflow)) + sys.meta_path.insert(i, AtherisMetaPathFinder(self._include_packages, self._exclude_modules, self._trace_dataflow)) return self @@ -115,26 +120,28 @@ def __exit__(self, *args): else: i += 1 -def instrument(*modules, trace_dataflow=True): +def instrument(include=[], exclude=[]): """ - This function temporarily installs an import hook which instruments - all imported modules. - The arguments to this function are names of modules or packages. - If it is a fully qualified module name, the name of its package will be used. + This function temporarily installs an import hook which instruments the imported modules. + `include` is a list of module names that shall be instrumented. + `exclude` is a list of module names that shall not be instrumented. + Note that for every module name in `include` the whole package will + get instrumented. """ - target_packages = set() + include_packages = set() - for module_name in modules: + for module_name in include + exclude: if not isinstance(module_name, str): raise RuntimeError("atheris.instrument() expects names of modules of type ") elif not module_name: - raise RuntimeError(f"atheris.instrument(): Invalid module name: {module_name}") + raise RuntimeError(f"atheris.instrument(): You supplied an empty module name") elif module_name[0] == ".": raise RuntimeError("atheris.instrument(): Please specify fully qualified module names (absolute not relative)") - + + for module_name in include: if "." in module_name: module_name = module_name.split(".")[0] - target_packages.add(module_name) + include_packages.add(module_name) - return HookManager(target_packages, trace_dataflow) + return HookManager(include_packages, set(exclude), trace_dataflow=True) diff --git a/example_fuzzers/fuzzing_example.py b/example_fuzzers/fuzzing_example.py index a0d32a58..ff0bb9e0 100644 --- a/example_fuzzers/fuzzing_example.py +++ b/example_fuzzers/fuzzing_example.py @@ -21,7 +21,7 @@ import atheris -with atheris.instrument("example_library"): +with atheris.instrument(include=["example_library"]): from example_library import CodeBeingFuzzed diff --git a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py index 610c7304..def3fafb 100644 --- a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py @@ -45,7 +45,7 @@ import atheris -with atheris.instrument("idna"): +with atheris.instrument(include=["idna"]): import idna import libidn2 diff --git a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py index 8d268e9a..fa40b9f2 100755 --- a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py @@ -44,7 +44,7 @@ import sys import unicodedata -with atheris.instrument("idna"): +with atheris.instrument(include=["idna"]): import idna # libidn2 is just an extension. diff --git a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py index 08515bf1..26ff3342 100644 --- a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py +++ b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py @@ -30,7 +30,7 @@ import atheris -with atheris.instrument("ruamel.yaml"): +with atheris.instrument(include=["ruamel.yaml"]): from ruamel import yaml as ruamel_yaml # Suppress all warnings. From d378dfee23b31f9e6b686d5430765d9734e68d15 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Wed, 30 Jun 2021 14:00:07 +0200 Subject: [PATCH 18/36] Added notice about atheris 1.0 when python version is too old --- atheris/version_dependent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atheris/version_dependent.py b/atheris/version_dependent.py index dfbfb3cc..491132f8 100644 --- a/atheris/version_dependent.py +++ b/atheris/version_dependent.py @@ -32,7 +32,7 @@ PYTHON_VERSION = sys.version_info[:2] if PYTHON_VERSION < (3,6) or PYTHON_VERSION > (3,9): - raise RuntimeError(f"You are fuzzing on an unsupported python version: {PYTHON_VERSION[0]}.{PYTHON_VERSION[1]}. Only 3.6 - 3.9 are supported.") + raise RuntimeError(f"You are fuzzing on an unsupported python version: {PYTHON_VERSION[0]}.{PYTHON_VERSION[1]}. Only 3.6 - 3.9 are supported by atheris 2.0. Use atheris 1.0 for older python versions.") ### Instruction categories ### From 07e2c569d328f70308044b07aaef2ae0217cdc14 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Wed, 30 Jun 2021 14:04:48 +0200 Subject: [PATCH 19/36] Updated comment of ujson_fuzzer --- example_fuzzers/json_fuzzer/ujson_fuzzer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/example_fuzzers/json_fuzzer/ujson_fuzzer.py b/example_fuzzers/json_fuzzer/ujson_fuzzer.py index c92fcb8c..9fb42325 100755 --- a/example_fuzzers/json_fuzzer/ujson_fuzzer.py +++ b/example_fuzzers/json_fuzzer/ujson_fuzzer.py @@ -31,7 +31,8 @@ # Here atheris.instrument() is not necessary # because ujson is just an extension. -# Only python code can be instrumented. +# Only python code is instrumented with atheris.instrument(); +# extensions are instrumented at compile-time. import ujson From 7c805f0a4598f126666d1580b9bab681e77b8a88 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Wed, 30 Jun 2021 14:24:34 +0200 Subject: [PATCH 20/36] Renamed _loc to _trace_branch and _reg to _reserve_counters --- atheris.cc | 4 ++-- atheris.h | 4 ++-- atheris/__init__.py | 2 +- atheris/instrument_bytecode.py | 28 ++++++++++++++-------------- libfuzzer.cc | 6 +++--- libfuzzer.h | 0 6 files changed, 22 insertions(+), 22 deletions(-) delete mode 100644 libfuzzer.h diff --git a/atheris.cc b/atheris.cc index 22d351e9..3a7e165e 100644 --- a/atheris.cc +++ b/atheris.cc @@ -35,8 +35,8 @@ PYBIND11_MODULE(ATHERIS_MODULE_NAME, m) { m.def("Setup", &Setup); m.def("Fuzz", &Fuzz); - m.def("_loc", &_loc); - m.def("_reg", &_reg); + m.def("_trace_branch", &_trace_branch); + m.def("_reserve_counters", &_reserve_counters); m.def("_cmp", &_cmp); py::class_(m, "FuzzedDataProvider") diff --git a/atheris.h b/atheris.h index 8df0f5fc..caf18c56 100644 --- a/atheris.h +++ b/atheris.h @@ -44,8 +44,8 @@ std::vector Setup( void Fuzz(); py::handle _cmp (py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const); -void _reg(unsigned long long num); -void _loc(unsigned long long idx); +void _reserve_counters(unsigned long long num); +void _trace_branch(unsigned long long idx); } // namespace atheris diff --git a/atheris/__init__.py b/atheris/__init__.py index 134b3093..178a9829 100644 --- a/atheris/__init__.py +++ b/atheris/__init__.py @@ -13,5 +13,5 @@ # limitations under the License. from .atheris import * -from .atheris import _loc, _reg, _cmp +from .atheris import _trace_branch, _reserve_counters, _cmp from .import_hook import instrument diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py index 6cd34c8a..82d4dbb1 100644 --- a/atheris/instrument_bytecode.py +++ b/atheris/instrument_bytecode.py @@ -32,8 +32,8 @@ current_pc = 0 TARGET_MODULE = "atheris" -REGISTER_FUNCTION = "_reg" -COVERAGE_FUNCTION = "_loc" +REGISTER_FUNCTION = "_reserve_counters" +COVERAGE_FUNCTION = "_trace_branch" COMPARE_FUNCTION = "_cmp" class Instruction: @@ -418,9 +418,9 @@ def to_code(self): get_lnotab(self._code, listing) ) - def _generate_loc_invocation(self, lineno, offset): + def _generate_trace_branch_invocation(self, lineno, offset): """ - Builds the bytecode that calls atheris._loc() + Builds the bytecode that calls atheris._trace_branch() """ to_insert = [] start_offset = offset @@ -515,21 +515,21 @@ def _generate_const_cmp_invocation(self, op, lineno, offset, switch): def trace_control_flow(self): """ - Insert a call to atheris._loc() in every basic block that - is a target of a branch. The argument of _loc() is an id for + Insert a call to atheris._trace_branch() in every basic block that + is a target of a branch. The argument of _trace_branch() is an id for the branch. The following bytecode gets inserted: LOAD_GLOBAL atheris - LOAD_ATTR _loc + LOAD_ATTR _trace_branch LOAD_CONST CALL_FUNCTION 1 - POP_TOP ; _loc() returns None, remove the return value + POP_TOP ; _trace_branch() returns None, remove the return value """ already_instrumented = set() offset = self._cfg[0].instructions[0].offset - total_size, to_insert = self._generate_loc_invocation(self._cfg[0].instructions[0].lineno, offset) + total_size, to_insert = self._generate_trace_branch_invocation(self._cfg[0].instructions[0].lineno, offset) self._adjust(offset, total_size) self._cfg[0].instructions = to_insert + self._cfg[0].instructions @@ -547,7 +547,7 @@ def trace_control_flow(self): if bb.id in source_bb.edges and source_bb.instructions[-1].reference == offset: source_instr.append(source_bb.instructions[-1]) - total_size, to_insert = self._generate_loc_invocation(bb.instructions[0].lineno, offset) + total_size, to_insert = self._generate_trace_branch_invocation(bb.instructions[0].lineno, offset) self._adjust(offset, total_size, *source_instr) @@ -558,7 +558,7 @@ def trace_control_flow(self): def insert_registration(self, num_counters): """ This function inserts an import of atheris and a call to - atheris._reg() that tells atheris how many branches were instrumented. + atheris._reserve_counters() that tells atheris how many branches were instrumented. This function should only be called once for the root code object of a module after every nested code object has been instrumented. @@ -568,12 +568,12 @@ def insert_registration(self, num_counters): IMPORT_NAME atheris STORE_GLOBAL atheris - The bytecode that calls _reg() looks like this: + The bytecode that calls _reserve_counters() looks like this: LOAD_GLOBAL atheris - LOAD_ATTR _reg + LOAD_ATTR _reserve_counters LOAD_CONST CALL_FUNCTION 1 - POP_TOP ; discard return value of _reg() + POP_TOP ; discard return value of _reserve_counters() """ const_0 = self._get_const(0) const_None = self._get_const(None) diff --git a/libfuzzer.cc b/libfuzzer.cc index 42166d05..0ad23014 100644 --- a/libfuzzer.cc +++ b/libfuzzer.cc @@ -68,19 +68,19 @@ std::vector& args_global = *new std::vector(); bool setup_called = false; unsigned long long num_counters = 0; -unsigned char* counters = NULL; +unsigned char* counters = nullptr; } // namespace NO_SANITIZE -void _loc(unsigned long long idx) { +void _trace_branch(unsigned long long idx) { if (counters && idx < num_counters) { counters[idx]++; } } NO_SANITIZE -void _reg(unsigned long long num) { +void _reserve_counters(unsigned long long num) { num_counters += num; } diff --git a/libfuzzer.h b/libfuzzer.h deleted file mode 100644 index e69de29b..00000000 From 644f7f07de067255aa517b75c7683fed14865c73 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Wed, 30 Jun 2021 15:24:25 +0200 Subject: [PATCH 21/36] Added support for instrumenting modules after atheris.Fuzz() has been called --- libfuzzer.cc | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/libfuzzer.cc b/libfuzzer.cc index 0ad23014..2495220a 100644 --- a/libfuzzer.cc +++ b/libfuzzer.cc @@ -64,29 +64,35 @@ std::function& test_one_input_global = }); std::vector& args_global = *new std::vector(); - +std::vector& counters = *new std::vector(); bool setup_called = false; - -unsigned long long num_counters = 0; -unsigned char* counters = nullptr; +bool fuzz_called = false; } // namespace NO_SANITIZE void _trace_branch(unsigned long long idx) { - if (counters && idx < num_counters) { + if (idx < counters.size()) { counters[idx]++; } } NO_SANITIZE void _reserve_counters(unsigned long long num) { - num_counters += num; + if (num > 0) { + unsigned int old_size = counters.size(); + + counters.resize(old_size + num, 0); + + if (fuzz_called) { + __sanitizer_cov_8bit_counters_init(&counters[old_size], &counters[old_size] + num); + } + } } NO_SANITIZE py::handle _cmp (py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const) { - return TraceCompareOp(counters + idx, left.ptr(), right.ptr(), opid, left_is_const); + return TraceCompareOp(&counters[0] + idx, left.ptr(), right.ptr(), opid, left_is_const); } NO_SANITIZE @@ -170,6 +176,8 @@ void Fuzz() { << std::endl; exit(1); } + + fuzz_called = true; std::vector args; args.reserve(args_global.size() + 1); @@ -180,10 +188,8 @@ void Fuzz() { char** args_ptr = &args[0]; int args_size = args_global.size(); - if (num_counters) { - counters = new unsigned char[num_counters]; - memset(counters, 0, num_counters); - __sanitizer_cov_8bit_counters_init(counters, counters + num_counters); + if (counters.size()) { + __sanitizer_cov_8bit_counters_init(&counters[0], &counters[0] + counters.size()); } exit(LLVMFuzzerRunDriver(&args_size, &args_ptr, &TestOneInput)); From abdf2016c08487b21da10d134c60a5e8c652f3df Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Wed, 30 Jun 2021 15:41:43 +0200 Subject: [PATCH 22/36] Documented TraceCompareOp --- tracer.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tracer.cc b/tracer.cc index 1f0b96e8..54e021c9 100644 --- a/tracer.cc +++ b/tracer.cc @@ -92,6 +92,16 @@ void TraceCompareUnicode(PyObject* left, PyObject* right, void* pc) { } } +// This function hooks COMPARE_OP, inserts calls for dataflow tracing +// and performs an actual comparison at the end. +// pc is a pointer belonging exclusively to the current comparison. +// left and right are the objects to compare. +// opid is one of Py_LT, Py_LE, Py_EQ, Py_NE, Py_GT, or Py_GE. +// left_is_const states whether the left argument is a constant. +// When two values are compared, only one constant can be involved +// otherwise this function wouldn't get called. And if a constant +// is involved it is always brought to the left because __sanitizer_cov_trace_const_cmp8 +// expects the first argument to be the constant. NO_SANITIZE PyObject* TraceCompareOp(void* pc, PyObject* left, PyObject* right, int opid, bool left_is_const) { if (PyLong_Check(left) && PyLong_Check(right)) { @@ -106,8 +116,7 @@ PyObject* TraceCompareOp(void* pc, PyObject* left, PyObject* right, int opid, bo __sanitizer_cov_trace_cmp8(left_int, right_int); } } - } - else if (PyBytes_Check(left) && PyBytes_Check(right)) { + } else if (PyBytes_Check(left) && PyBytes_Check(right)) { // If comparing bytes, report a memcmp. Report that we're comparing the size, // and then if that passes, compare the contents ourselves and report the // results. @@ -120,8 +129,7 @@ PyObject* TraceCompareOp(void* pc, PyObject* left, PyObject* right, int opid, bo int differ = NoSanitizeMemcmp(left_bytes, right_bytes, left_size); __sanitizer_weak_hook_memcmp(pc, left_bytes, right_bytes, left_size, differ); } - } - else if (PyUnicode_Check(left) && PyUnicode_Check(right)) { + } else if (PyUnicode_Check(left) && PyUnicode_Check(right)) { TraceCompareUnicode(left, right, pc); } From cc0dd60be790739f29ade4ae12919438d66387c8 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Wed, 30 Jun 2021 15:47:25 +0200 Subject: [PATCH 23/36] Got rid of floating point equality --- atheris/instrument_bytecode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py index 82d4dbb1..d4c3dcb6 100644 --- a/atheris/instrument_bytecode.py +++ b/atheris/instrument_bytecode.py @@ -136,7 +136,7 @@ def adjust(self, changed_offset, size, keep_ref): old_reference = self.reference old_size = self.get_size() - if changed_offset == old_offset + 0.5: + if old_offset < changed_offset < old_offset + 1: if old_reference is not None: if self._is_relative: self.reference += size From fa07027d67ee5227efb3868badf36310d5a98c43 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Wed, 30 Jun 2021 20:29:19 +0200 Subject: [PATCH 24/36] Got rid of floating point equality --- atheris/instrument_bytecode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py index d4c3dcb6..dd0ac6f2 100644 --- a/atheris/instrument_bytecode.py +++ b/atheris/instrument_bytecode.py @@ -136,7 +136,7 @@ def adjust(self, changed_offset, size, keep_ref): old_reference = self.reference old_size = self.get_size() - if old_offset < changed_offset < old_offset + 1: + if old_offset < changed_offset < (old_offset + 1): if old_reference is not None: if self._is_relative: self.reference += size From f6c2a25294a153f5b74aac0b3192803e96f0087a Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 1 Jul 2021 11:56:55 +0200 Subject: [PATCH 25/36] Added exception handling to _cmp and cleaned up libfuzzer.cc --- atheris.cc | 2 +- atheris.h | 3 +-- libfuzzer.cc | 41 +++++++++++++++++++++-------------------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/atheris.cc b/atheris.cc index 3a7e165e..cc82b4a7 100644 --- a/atheris.cc +++ b/atheris.cc @@ -37,7 +37,7 @@ PYBIND11_MODULE(ATHERIS_MODULE_NAME, m) { m.def("Fuzz", &Fuzz); m.def("_trace_branch", &_trace_branch); m.def("_reserve_counters", &_reserve_counters); - m.def("_cmp", &_cmp); + m.def("_cmp", &_cmp, py::return_value_policy::move); py::class_(m, "FuzzedDataProvider") .def(py::init()) diff --git a/atheris.h b/atheris.h index caf18c56..0a2f9f5e 100644 --- a/atheris.h +++ b/atheris.h @@ -38,8 +38,7 @@ void Init(); std::vector Setup( const std::vector& args, - const std::function& test_one_input, - pybind11::kwargs kwargs); + const std::function& test_one_input); void Fuzz(); diff --git a/libfuzzer.cc b/libfuzzer.cc index 2495220a..fb07a538 100644 --- a/libfuzzer.cc +++ b/libfuzzer.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -79,20 +80,30 @@ void _trace_branch(unsigned long long idx) { NO_SANITIZE void _reserve_counters(unsigned long long num) { + if (fuzz_called) { + std::cerr << Colorize(STDERR_FILENO, + "Tried to reserve counters after fuzzing has been started.") + << std::endl + << Colorize(STDERR_FILENO, + "This is not supported. Instrument _all_ modules before calling atheris.Fuzz().") + << std::endl; + _exit(-1); + } + if (num > 0) { - unsigned int old_size = counters.size(); - - counters.resize(old_size + num, 0); - - if (fuzz_called) { - __sanitizer_cov_8bit_counters_init(&counters[old_size], &counters[old_size] + num); - } + counters.resize(counters.size() + num, 0); } } NO_SANITIZE -py::handle _cmp (py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const) { - return TraceCompareOp(&counters[0] + idx, left.ptr(), right.ptr(), opid, left_is_const); +py::handle _cmp(py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const) { + PyObject* ret = TraceCompareOp(&counters[0] + idx, left.ptr(), right.ptr(), opid, left_is_const); + + if (ret == nullptr) { + throw py::error_already_set(); + } else { + return ret; + } } NO_SANITIZE @@ -108,8 +119,7 @@ void Init() { NO_SANITIZE std::vector Setup( const std::vector& args, - const std::function& test_one_input, - py::kwargs kwargs) { + const std::function& test_one_input) { if (setup_called) { std::cerr << Colorize(STDERR_FILENO, "Setup() must not be called more than once.") @@ -121,15 +131,6 @@ std::vector Setup( args_global = args; test_one_input_global = test_one_input; - int print_funcs = 2; - - // Parse out any libFuzzer flags we also care about. - for (const std::string& arg : args) { - if (arg.substr(0, 13) == "-print_funcs=") { - print_funcs = std::stoul(arg.substr(13, std::string::npos)); - } - } - // Strip libFuzzer arguments (single dash). std::vector ret; for (const std::string& arg : args) { From 67b49f36c4ba8a1b2b7afa566aa46c807964284c Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 15:03:51 +0200 Subject: [PATCH 26/36] Added PCTable creation --- libfuzzer.cc | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/libfuzzer.cc b/libfuzzer.cc index fb07a538..cebfc28f 100644 --- a/libfuzzer.cc +++ b/libfuzzer.cc @@ -27,11 +27,17 @@ #include "util.h" #include "tracer.h" +struct PCTableEntry { + void* pc; + long flags; +}; + using UserCb = int (*)(const uint8_t* Data, size_t Size); extern "C" { int LLVMFuzzerRunDriver(int* argc, char*** argv, int (*UserCb)(const uint8_t* Data, size_t Size)); void __sanitizer_cov_8bit_counters_init(uint8_t* start, uint8_t* stop); + void __sanitizer_cov_pcs_init(uint8_t *pcs_beg, uint8_t *pcs_end); } NO_SANITIZE @@ -66,6 +72,7 @@ std::function& test_one_input_global = std::vector& args_global = *new std::vector(); std::vector& counters = *new std::vector(); +std::vector& pctable = *new std::vector(); bool setup_called = false; bool fuzz_called = false; @@ -92,6 +99,14 @@ void _reserve_counters(unsigned long long num) { if (num > 0) { counters.resize(counters.size() + num, 0); + + int old_pctable_size = pctable.size(); + pctable.resize(old_pctable_size + num); + + for (int i = old_pctable_size; i < pctable.size(); ++i) { + pctable[i].pc = reinterpret_cast(i + 1); + pctable[i].flags = 0; + } } } @@ -189,8 +204,9 @@ void Fuzz() { char** args_ptr = &args[0]; int args_size = args_global.size(); - if (counters.size()) { + if (!counters.empty()) { __sanitizer_cov_8bit_counters_init(&counters[0], &counters[0] + counters.size()); + __sanitizer_cov_pcs_init(reinterpret_cast(&pctable[0]), reinterpret_cast(&pctable[0] + pctable.size())); } exit(LLVMFuzzerRunDriver(&args_size, &args_ptr, &TestOneInput)); From 5b731f42d4dd2956f4c094b71fdc24deb878c002 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 15:29:22 +0200 Subject: [PATCH 27/36] Renamed _cmp to _trace_cmp --- atheris.cc | 2 +- atheris.h | 2 +- atheris/__init__.py | 2 +- atheris/instrument_bytecode.py | 16 ++++++++-------- libfuzzer.cc | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/atheris.cc b/atheris.cc index cc82b4a7..d4dedb73 100644 --- a/atheris.cc +++ b/atheris.cc @@ -37,7 +37,7 @@ PYBIND11_MODULE(ATHERIS_MODULE_NAME, m) { m.def("Fuzz", &Fuzz); m.def("_trace_branch", &_trace_branch); m.def("_reserve_counters", &_reserve_counters); - m.def("_cmp", &_cmp, py::return_value_policy::move); + m.def("_trace_cmp", &_trace_cmp, py::return_value_policy::move); py::class_(m, "FuzzedDataProvider") .def(py::init()) diff --git a/atheris.h b/atheris.h index 0a2f9f5e..134523bf 100644 --- a/atheris.h +++ b/atheris.h @@ -42,7 +42,7 @@ std::vector Setup( void Fuzz(); -py::handle _cmp (py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const); +py::handle _trace_cmp (py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const); void _reserve_counters(unsigned long long num); void _trace_branch(unsigned long long idx); diff --git a/atheris/__init__.py b/atheris/__init__.py index 178a9829..5049d3c4 100644 --- a/atheris/__init__.py +++ b/atheris/__init__.py @@ -13,5 +13,5 @@ # limitations under the License. from .atheris import * -from .atheris import _trace_branch, _reserve_counters, _cmp +from .atheris import _trace_branch, _reserve_counters, _trace_cmp from .import_hook import instrument diff --git a/atheris/instrument_bytecode.py b/atheris/instrument_bytecode.py index dd0ac6f2..ac5f811d 100644 --- a/atheris/instrument_bytecode.py +++ b/atheris/instrument_bytecode.py @@ -34,7 +34,7 @@ TARGET_MODULE = "atheris" REGISTER_FUNCTION = "_reserve_counters" COVERAGE_FUNCTION = "_trace_branch" -COMPARE_FUNCTION = "_cmp" +COMPARE_FUNCTION = "_trace_cmp" class Instruction: """ @@ -442,7 +442,7 @@ def _generate_trace_branch_invocation(self, lineno, offset): def _generate_cmp_invocation(self, op, lineno, offset): """ - Builds the bytecode that calls atheris._cmp(). + Builds the bytecode that calls atheris._trace_cmp(). Only call this if the two objects being compared are non-constants. """ to_insert = [] @@ -472,7 +472,7 @@ def _generate_cmp_invocation(self, op, lineno, offset): def _generate_const_cmp_invocation(self, op, lineno, offset, switch): """ - Builds the bytecode that calls atheris._cmp(). + Builds the bytecode that calls atheris._trace_cmp(). Only call this if one of the objects being compared is a constant coming from co_consts. If `switch` is true the constant is the second argument and needs @@ -619,20 +619,20 @@ def trace_data_flow(self): """ This function instruments bytecode for data-flow tracing. This works by replacing the instruction COMPARE_OP with - a call to atheris._cmp(). - The arguments for _cmp() are as follows: + a call to atheris._trace_cmp(). + The arguments for _trace_cmp() are as follows: - obj1 and obj2: The two values to compare - opid: argument to COMPARE_OP - pc: a counter for how many COMPARE_OPs have been replaced - is_const: whether obj1 is a constant in co_consts. To detect if any of the values being compared is a constant, all push and pop operations have to be analyzed. If a constant appears in a comparison it must - always be given as obj1 to _cmp(). + always be given as obj1 to _trace_cmp(). The bytecode that gets inserted looks like this: LOAD_GLOBAL atheris - LOAD_ATTR _cmp - ROT_THREE ; move atheris._cmp below the two objects + LOAD_ATTR _trace_cmp + ROT_THREE ; move atheris._trace_cmp below the two objects LOAD_CONST LOAD_CONST LOAD_CONST diff --git a/libfuzzer.cc b/libfuzzer.cc index cebfc28f..9fd5db03 100644 --- a/libfuzzer.cc +++ b/libfuzzer.cc @@ -92,7 +92,7 @@ void _reserve_counters(unsigned long long num) { "Tried to reserve counters after fuzzing has been started.") << std::endl << Colorize(STDERR_FILENO, - "This is not supported. Instrument _all_ modules before calling atheris.Fuzz().") + "This is not supported. Instrument the modules before calling atheris.Fuzz().") << std::endl; _exit(-1); } @@ -111,7 +111,7 @@ void _reserve_counters(unsigned long long num) { } NO_SANITIZE -py::handle _cmp(py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const) { +py::handle _trace_cmp(py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const) { PyObject* ret = TraceCompareOp(&counters[0] + idx, left.ptr(), right.ptr(), opid, left_is_const); if (ret == nullptr) { From 582820348e03c05d1ed71efe9f6b11cd3f62e9df Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:17:39 +0200 Subject: [PATCH 28/36] Restructured the atheris package and added `internal_libfuzzer` argument to atheris.Setup() to get rid of atheris_no_libfuzzer as a separate package --- atheris.cc | 110 +++++++++++++++++++++++++++-- atheris.h | 5 +- atheris/__init__.py | 3 +- atheris_no_libfuzzer | 1 - libfuzzer.cc => core.cc | 150 ++++++++++++++++------------------------ setup.py | 30 +++++--- 6 files changed, 188 insertions(+), 111 deletions(-) delete mode 120000 atheris_no_libfuzzer rename libfuzzer.cc => core.cc (71%) diff --git a/atheris.cc b/atheris.cc index d4dedb73..93ee0b6b 100644 --- a/atheris.cc +++ b/atheris.cc @@ -15,6 +15,14 @@ #include "atheris.h" +#include +#include +#include + +#include +#include +#include +#include #include #include "fuzzed_data_provider.h" @@ -23,16 +31,110 @@ #include "pybind11/pybind11.h" #include "pybind11/stl.h" #include "util.h" +#include "atheris.h" namespace atheris { + +namespace py = pybind11; + +namespace { + +std::function& test_one_input_global = + *new std::function([](py::bytes data) -> void { + std::cerr << "You must call Setup() before Fuzz()." << std::endl; + _exit(-1); + }); + +std::vector& args_global = *new std::vector(); +unsigned long long num_counters = 0; +bool internal_libfuzzer = true; +bool setup_called = false; + +} // namespace + +NO_SANITIZE +void _trace_branch(unsigned long long idx) { + +} + +NO_SANITIZE +void _reserve_counters(unsigned long long num) { + num_counters += num; +} + +NO_SANITIZE +py::handle _trace_cmp(py::handle left, py::handle right, int opid, unsigned long long idx, bool left_is_const) { + PyObject* ret = PyObject_RichCompare(left.ptr(), right.ptr(), opid); + + if (ret == nullptr) { + throw py::error_already_set(); + } else { + return ret; + } +} + +NO_SANITIZE +std::vector Setup( + const std::vector& args, + const std::function& test_one_input, + py::kwargs kwargs) { + if (setup_called) { + std::cerr << Colorize(STDERR_FILENO, + "Setup() must not be called more than once.") + << std::endl; + exit(1); + } + setup_called = true; + + args_global = args; + test_one_input_global = test_one_input; + + // Strip libFuzzer arguments (single dash). + std::vector ret; + for (const std::string& arg : args) { + if (arg.size() > 1 && arg[0] == '-' && arg[1] != '-') { + continue; + } + ret.push_back(arg); + } + + if (kwargs.contains("internal_libfuzzer")) { + internal_libfuzzer = kwargs["internal_libfuzzer"].cast(); + } + + return ret; +} + +NO_SANITIZE +void Fuzz() { + if (!setup_called) { + std::cerr << Colorize(STDERR_FILENO, + "Setup() must be called before Fuzz() can be called.") + << std::endl; + exit(1); + } + + py::module_ atheris = py::module_::import("sys").attr("modules")["atheris"]; + py::module_ core; + + if (internal_libfuzzer) { + core = py::module_::import("atheris.core_with_libfuzzer"); + } else { + core = py::module_::import("atheris.core_without_libfuzzer"); + } + + atheris.attr("_trace_cmp") = core.attr("_trace_cmp"); + atheris.attr("_reserve_counters") = core.attr("_reserve_counters"); + atheris.attr("_trace_branch") = core.attr("_trace_branch"); + + core.attr("start_fuzzing")(args_global, test_one_input_global, num_counters); +} #ifndef ATHERIS_MODULE_NAME -#define ATHERIS_MODULE_NAME atheris +#error Need ATHERIS_MODULE_NAME #endif // ATHERIS_MODULE_NAME PYBIND11_MODULE(ATHERIS_MODULE_NAME, m) { - Init(); - m.def("Setup", &Setup); m.def("Fuzz", &Fuzz); m.def("_trace_branch", &_trace_branch); @@ -68,8 +170,6 @@ PYBIND11_MODULE(ATHERIS_MODULE_NAME, m) { .def("remaining_bytes", &FuzzedDataProvider::remaining_bytes) .def("buffer", &FuzzedDataProvider::buffer); m.attr("ALL_REMAINING") = std::numeric_limits::max(); - - m.def("path", &GetDynamicLocation); } } // namespace atheris diff --git a/atheris.h b/atheris.h index 134523bf..a6177f0b 100644 --- a/atheris.h +++ b/atheris.h @@ -34,11 +34,10 @@ namespace atheris { namespace py = pybind11; -void Init(); - std::vector Setup( const std::vector& args, - const std::function& test_one_input); + const std::function& test_one_input, + py::kwargs kwargs); void Fuzz(); diff --git a/atheris/__init__.py b/atheris/__init__.py index 5049d3c4..28207a4e 100644 --- a/atheris/__init__.py +++ b/atheris/__init__.py @@ -12,6 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .atheris import * -from .atheris import _trace_branch, _reserve_counters, _trace_cmp +from .atheris import Setup, Fuzz, FuzzedDataProvider, _trace_branch, _reserve_counters, _trace_cmp from .import_hook import instrument diff --git a/atheris_no_libfuzzer b/atheris_no_libfuzzer deleted file mode 120000 index 7e292336..00000000 --- a/atheris_no_libfuzzer +++ /dev/null @@ -1 +0,0 @@ -./atheris \ No newline at end of file diff --git a/libfuzzer.cc b/core.cc similarity index 71% rename from libfuzzer.cc rename to core.cc index 9fd5db03..d06d54b7 100644 --- a/libfuzzer.cc +++ b/core.cc @@ -21,11 +21,14 @@ #include #include #include +#include -#include "atheris.h" #include "macros.h" #include "util.h" #include "tracer.h" +#include "pybind11/functional.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" struct PCTableEntry { void* pc; @@ -33,7 +36,7 @@ struct PCTableEntry { }; using UserCb = int (*)(const uint8_t* Data, size_t Size); - + extern "C" { int LLVMFuzzerRunDriver(int* argc, char*** argv, int (*UserCb)(const uint8_t* Data, size_t Size)); void __sanitizer_cov_8bit_counters_init(uint8_t* start, uint8_t* stop); @@ -62,21 +65,27 @@ namespace atheris { namespace py = pybind11; -namespace { - std::function& test_one_input_global = *new std::function([](py::bytes data) -> void { std::cerr << "You must call Setup() before Fuzz()." << std::endl; _exit(-1); }); - -std::vector& args_global = *new std::vector(); std::vector& counters = *new std::vector(); std::vector& pctable = *new std::vector(); -bool setup_called = false; -bool fuzz_called = false; -} // namespace +NO_SANITIZE +void Init() { + if (!&LLVMFuzzerRunDriver) { + throw std::runtime_error( + "LLVMFuzzerRunDriver symbol not found. This means " + "you had an old version of Clang installed when " + "you built Atheris."); + } + + if (GetCoverageSymbolsLocation() != GetLibFuzzerSymbolsLocation()) { + std::cerr << Colorize(STDERR_FILENO, "WARNING: Coverage symbols are being provided by a library other than libFuzzer. This will result in broken Python code coverage and severely impacted native extension code coverage. Symbols are coming from this library: " + GetCoverageSymbolsLocation() + "\nYou can likely resolve this issue by linking libFuzzer into Python directly, and using `atheris_no_libfuzzer` instead of `atheris`. See using_sanitizers.md for details."); + } +} NO_SANITIZE void _trace_branch(unsigned long long idx) { @@ -87,27 +96,13 @@ void _trace_branch(unsigned long long idx) { NO_SANITIZE void _reserve_counters(unsigned long long num) { - if (fuzz_called) { - std::cerr << Colorize(STDERR_FILENO, - "Tried to reserve counters after fuzzing has been started.") - << std::endl - << Colorize(STDERR_FILENO, - "This is not supported. Instrument the modules before calling atheris.Fuzz().") - << std::endl; - _exit(-1); - } - - if (num > 0) { - counters.resize(counters.size() + num, 0); - - int old_pctable_size = pctable.size(); - pctable.resize(old_pctable_size + num); - - for (int i = old_pctable_size; i < pctable.size(); ++i) { - pctable[i].pc = reinterpret_cast(i + 1); - pctable[i].flags = 0; - } - } + std::cerr << Colorize(STDERR_FILENO, + "Tried to reserve counters after fuzzing has been started.") + << std::endl + << Colorize(STDERR_FILENO, + "This is not supported. Instrument the modules before calling atheris.Fuzz().") + << std::endl; + _exit(-1); } NO_SANITIZE @@ -121,47 +116,6 @@ py::handle _trace_cmp(py::handle left, py::handle right, int opid, unsigned long } } -NO_SANITIZE -void Init() { - if (!&LLVMFuzzerRunDriver) { - throw std::runtime_error( - "LLVMFuzzerRunDriver symbol not found. This means " - "you had an old version of Clang installed when " - "you built Atheris."); - } -} - -NO_SANITIZE -std::vector Setup( - const std::vector& args, - const std::function& test_one_input) { - if (setup_called) { - std::cerr << Colorize(STDERR_FILENO, - "Setup() must not be called more than once.") - << std::endl; - exit(1); - } - setup_called = true; - - args_global = args; - test_one_input_global = test_one_input; - - // Strip libFuzzer arguments (single dash). - std::vector ret; - for (const std::string& arg : args) { - if (arg.size() > 1 && arg[0] == '-' && arg[1] != '-') { - continue; - } - ret.push_back(arg); - } - - if (GetCoverageSymbolsLocation() != GetLibFuzzerSymbolsLocation()) { - std::cerr << Colorize(STDERR_FILENO, "WARNING: Coverage symbols are being provided by a library other than libFuzzer. This will result in broken Python code coverage and severely impacted native extension code coverage. Symbols are coming from this library: " + GetCoverageSymbolsLocation() + "\nYou can likely resolve this issue by linking libFuzzer into Python directly, and using `atheris_no_libfuzzer` instead of `atheris`. See using_sanitizers.md for details."); - } - - return ret; -} - NO_SANITIZE int TestOneInput(const uint8_t* data, size_t size) { try { @@ -185,31 +139,49 @@ int TestOneInput(const uint8_t* data, size_t size) { } NO_SANITIZE -void Fuzz() { - if (!setup_called) { - std::cerr << Colorize(STDERR_FILENO, - "Setup() must be called before Fuzz() can be called.") - << std::endl; - exit(1); - } - - fuzz_called = true; - - std::vector args; - args.reserve(args_global.size() + 1); - for (const std::string& arg : args_global) { - args.push_back(const_cast(arg.c_str())); +void start_fuzzing(const std::vector& args, + const std::function& test_one_input, + unsigned long long num_counters +) { + test_one_input_global = test_one_input; + + std::vector arg_array; + arg_array.reserve(args.size() + 1); + for (const std::string& arg : args) { + arg_array.push_back(const_cast(arg.c_str())); } - args.push_back(nullptr); - char** args_ptr = &args[0]; - int args_size = args_global.size(); + arg_array.push_back(nullptr); + char** args_ptr = &arg_array[0]; + int args_size = args.size(); - if (!counters.empty()) { + if (num_counters) { + counters.resize(num_counters, 0); __sanitizer_cov_8bit_counters_init(&counters[0], &counters[0] + counters.size()); + + pctable.resize(num_counters); + + for (int i = 0; i < pctable.size(); ++i) { + pctable[i].pc = reinterpret_cast(i + 1); + pctable[i].flags = 0; + } + __sanitizer_cov_pcs_init(reinterpret_cast(&pctable[0]), reinterpret_cast(&pctable[0] + pctable.size())); } exit(LLVMFuzzerRunDriver(&args_size, &args_ptr, &TestOneInput)); } +#ifndef ATHERIS_MODULE_NAME +#error Need ATHERIS_MODULE_NAME +#endif // ATHERIS_MODULE_NAME + +PYBIND11_MODULE(ATHERIS_MODULE_NAME, m) { + Init(); + + m.def("start_fuzzing", &start_fuzzing); + m.def("_trace_branch", &_trace_branch); + m.def("_reserve_counters", &_reserve_counters); + m.def("_trace_cmp", &_trace_cmp, py::return_value_policy::move); +} + } // namespace atheris diff --git a/setup.py b/setup.py index d552f6f6..505e3abe 100644 --- a/setup.py +++ b/setup.py @@ -117,8 +117,6 @@ def get_libfuzzer_lib(): "atheris.atheris", sorted([ "atheris.cc", - "libfuzzer.cc", - "tracer.cc", "util.cc", "fuzzed_data_provider.cc", ]), @@ -128,13 +126,23 @@ def get_libfuzzer_lib(): ], language="c++"), Extension( - "atheris_no_libfuzzer.atheris", + "atheris.core_with_libfuzzer", sorted([ - "atheris.cc", - "libfuzzer.cc", + "core.cc", + "tracer.cc", + "util.cc", + ]), + include_dirs=[ + # Path to pybind11 headers + PybindIncludeGetter(), + ], + language="c++"), + Extension( + "atheris.core_without_libfuzzer", + sorted([ + "core.cc", "tracer.cc", "util.cc", - "fuzzed_data_provider.cc", ]), include_dirs=[ # Path to pybind11 headers @@ -227,12 +235,12 @@ def build_extensions(self): for ext in self.extensions: ext.define_macros = [("VERSION_INFO", "'{}'".format(self.distribution.get_version())), - ("ATHERIS_MODULE_NAME", "atheris")] + ("ATHERIS_MODULE_NAME", ext.name.split(".")[1])] ext.extra_compile_args = c_opts - if ext.name == "atheris_no_libfuzzer.atheris": - ext.extra_link_args = l_opts - else: + if ext.name == "atheris.core_with_libfuzzer": ext.extra_link_args = l_opts + [libfuzzer] + else: + ext.extra_link_args = l_opts build_ext.build_extensions(self) try: @@ -300,7 +308,7 @@ def merge_deploy_libfuzzer_sanitizer(self, libfuzzer, lib_name, description="A coverage-guided fuzzer for Python and Python extensions.", long_description=open("README.md", "r").read(), long_description_content_type="text/markdown", - packages=["atheris", "atheris_no_libfuzzer"], + packages=["atheris"], ext_modules=ext_modules, setup_requires=["pybind11>=2.5.0"], cmdclass={"build_ext": BuildExt}, From 1f4941df8f5e7351bdf0f9c957c14ead1be8d5ab Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 18:09:01 +0200 Subject: [PATCH 29/36] Added atheris.path() --- atheris/__init__.py | 1 + atheris/utils.py | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 atheris/utils.py diff --git a/atheris/__init__.py b/atheris/__init__.py index 28207a4e..5939ab83 100644 --- a/atheris/__init__.py +++ b/atheris/__init__.py @@ -14,3 +14,4 @@ from .atheris import Setup, Fuzz, FuzzedDataProvider, _trace_branch, _reserve_counters, _trace_cmp from .import_hook import instrument +from .utils import path diff --git a/atheris/utils.py b/atheris/utils.py new file mode 100644 index 00000000..507b7da7 --- /dev/null +++ b/atheris/utils.py @@ -0,0 +1,7 @@ +import sys +import os + +def path(): + dir, _ = os.path.split(sys.modules["atheris"].__file__) + dir, _ = os.path.split(dir) + return dir From 25942411a3e9729df3c8ef7abc3f9109aaecceaf Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 18:09:31 +0200 Subject: [PATCH 30/36] Updated documentation to reflect the new changes --- README.md | 3 ++- using_sanitizers.md | 35 ++++++++++------------------------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 1b7b222e..387ff6e1 100644 --- a/README.md +++ b/README.md @@ -95,10 +95,11 @@ Define a fuzzer entry point function and pass it to `atheris.Setup()` along with This has to be used together with a `with`-Statement. -#### `Setup(args, test_one_input)` +#### `Setup(args, test_one_input, internal_libfuzzer=True)` - `args`: A list of strings: the process arguments to pass to the fuzzer, typically `sys.argv`. This argument list may be modified in-place, to remove arguments consumed by the fuzzer. See [the LibFuzzer docs](https://llvm.org/docs/LibFuzzer.html#options) for a list of such options. - `test_one_input`: your fuzzer's entry point. Must take a single `bytes` argument. This will be repeatedly invoked with a single bytes container. + - `internal_libfuzzer`: Indicates whether libfuzzer shall be provided by atheris or an external library (see [using_sanitizers.md](./using_sanitizers.md)). #### `Fuzz()` diff --git a/using_sanitizers.md b/using_sanitizers.md index 7febff9c..49aabb79 100644 --- a/using_sanitizers.md +++ b/using_sanitizers.md @@ -15,16 +15,16 @@ If your extension is too complex and this doesn't work, you may have to make spe ## Step 2: Use an external libFuzzer -For technical reasons detailed below, libFuzzer must not be linked into Atheris if sanitizers are being used. First, replace this: +For technical reasons detailed below, libFuzzer must not be linked into Atheris if sanitizers are being used. +Set `internal_libfuzzer=False` in the call to `atheris.Setup` like this: -``` +```py import atheris -``` -With this: +... -``` -import atheris_no_libfuzzer as atheris +atheris.Setup(..., internal_libfuzzer=False) +atheris.Fuzz() ``` Now that libFuzzer is no longer being provided by Atheris, it needs to be provided elsewhere. There are two options: @@ -34,7 +34,7 @@ Now that libFuzzer is no longer being provided by Atheris, it needs to be provid If you can use this option, we recommend it; it is significantly easier than option #2. (However, this option is not yet supported on Mac). When Atheris is installed, it attempts to generate custom ASan and UBSan shared libraries that have libFuzzer linked in. You can find these libraries in the directory returned by this command: ``` -python -c "import atheris; import os; print(os.path.dirname(atheris.path()))" +python -c "import atheris; print(atheris.path())" ``` These files will be called: @@ -45,7 +45,7 @@ These files will be called: If these files are present, it means Atheris succesfully generated the files at installation time, and you can use this option. Simply `LD_PRELOAD` the right `.so` file, and you're good to go. Here's a complete example: ``` -LD_PRELOAD="$(python -c "import atheris; import os; print(os.path.dirname(atheris.path()))")/../asan_with_fuzzer.so" python ./my_fuzzer.py +LD_PRELOAD="$(python -c "import atheris; print(atheris.path())")/asan_with_fuzzer.so" python ./my_fuzzer.py ``` ### Option 2: Linking libFuzzer into Python @@ -82,7 +82,7 @@ needed) is written to the `site-packages` directory adjacent to where Atheris is installed. You can find it in the directory returned by this command: ``` -python3 -c "import atheris; import os; print(os.path.dirname(atheris.path()))" +python3 -c "import atheris; print(atheris.path())" ``` The `build_modified_libfuzzer.sh` script uses the libFuzzer found there by @@ -107,19 +107,4 @@ than the weak symbols from ASan/UBSan. ## What if I'm not using a Sanitizer? -While we recommend that you use a sanitizer when fuzzing native code, it's not mandatory. If you'd like to use Atheris to fuzz native code without a sanitizer, you should still build your extension with `-fsanitize=fuzzer-no-link`, and then `LD_PRELOAD` *the atheris shared library* itself. - -``` -LD_PRELOAD="path/to/atheris.so" python ./your_fuzzer.py -``` - -If you want to make a Python fuzzer that runs both with or without a sanitizer, you can use this code pattern: - -``` -try: - import atheris_no_libfuzzer as atheris -except ImportError: - import atheris -``` - -Loading `atheris_no_libfuzzer` will fail if libFuzzer hasn't been linked into CPython and hasn't been preloaded. +While we recommend that you use a sanitizer when fuzzing native code, it's not mandatory. If you'd like to use Atheris to fuzz native code without a sanitizer, you should still build your extension with `-fsanitize=fuzzer-no-link`, and still `LD_PRELOAD` `asan_with_fuzzer.so`. From b70b55a998b3fb99ee1aa86b88a46b1b5121cf1a Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 18:14:52 +0200 Subject: [PATCH 31/36] Updated example_fuzzers --- example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py | 5 +++++ example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py | 5 ++++- .../json_fuzzer/hypothesis_structured_fuzzer.py | 2 +- example_fuzzers/json_fuzzer/json_differential_fuzzer.py | 7 ++----- example_fuzzers/json_fuzzer/ujson_fuzzer.py | 4 ++-- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py index def3fafb..4088cb86 100644 --- a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py @@ -48,6 +48,11 @@ with atheris.instrument(include=["idna"]): import idna +# libidn2 is just an extension. +# Only python code is instrumented with atheris.instrument(); +# extensions are instrumented at compile-time +# so a call to atheris.instrument() is not +# necessary here. import libidn2 diff --git a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py index fa40b9f2..01c0cc13 100755 --- a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py @@ -48,7 +48,10 @@ import idna # libidn2 is just an extension. -# Only python code can be instrumented. +# Only python code is instrumented with atheris.instrument(); +# extensions are instrumented at compile-time +# so a call to atheris.instrument() is not +# necessary here. import libidn2 diff --git a/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py b/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py index 1e0d7845..4116c205 100644 --- a/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py +++ b/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py @@ -67,5 +67,5 @@ def test_ujson_roundtrip(obj, kwargs): test_ujson_roundtrip() # If that passed, we use Atheris to provide the inputs to our test: - atheris.Setup(sys.argv, test_ujson_roundtrip.hypothesis.fuzz_one_input) + atheris.Setup(sys.argv, test_ujson_roundtrip.hypothesis.fuzz_one_input, internal_libfuzzer=False) atheris.Fuzz() diff --git a/example_fuzzers/json_fuzzer/json_differential_fuzzer.py b/example_fuzzers/json_fuzzer/json_differential_fuzzer.py index 59904783..d5e50c00 100755 --- a/example_fuzzers/json_fuzzer/json_differential_fuzzer.py +++ b/example_fuzzers/json_fuzzer/json_differential_fuzzer.py @@ -37,10 +37,7 @@ # See using_sanitizers.md for what this is about. -try: - import atheris_no_libfuzzer as atheris -except ImportError: - import atheris +import atheris import json import ujson @@ -90,7 +87,7 @@ def TestOneInput(input_bytes): def main(): - atheris.Setup(sys.argv, TestOneInput) + atheris.Setup(sys.argv, TestOneInput, internal_libfuzzer=False) atheris.Fuzz() if __name__ == "__main__": diff --git a/example_fuzzers/json_fuzzer/ujson_fuzzer.py b/example_fuzzers/json_fuzzer/ujson_fuzzer.py index 9fb42325..e8c32f04 100755 --- a/example_fuzzers/json_fuzzer/ujson_fuzzer.py +++ b/example_fuzzers/json_fuzzer/ujson_fuzzer.py @@ -27,7 +27,7 @@ """ import sys -import atheris_no_libfuzzer as atheris +import atheris # Here atheris.instrument() is not necessary # because ujson is just an extension. @@ -53,7 +53,7 @@ def TestOneInput(input_bytes): def main(): - atheris.Setup(sys.argv, TestOneInput) + atheris.Setup(sys.argv, TestOneInput, internal_libfuzzer=False) atheris.Fuzz() if __name__ == "__main__": From 4a6d37cff0f990432e1f23cbfc7a09d1df4b343a Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 18:18:21 +0200 Subject: [PATCH 32/36] Fixed indentation --- core.cc | 4 ++-- tracer.cc | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/core.cc b/core.cc index d06d54b7..2595e6bd 100644 --- a/core.cc +++ b/core.cc @@ -31,8 +31,8 @@ #include "pybind11/stl.h" struct PCTableEntry { - void* pc; - long flags; + void* pc; + long flags; }; using UserCb = int (*)(const uint8_t* Data, size_t Size); diff --git a/tracer.cc b/tracer.cc index 54e021c9..6e6079a1 100644 --- a/tracer.cc +++ b/tracer.cc @@ -32,9 +32,9 @@ #include "util.h" extern "C" { - void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2); - void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2); - void __sanitizer_weak_hook_memcmp(void* caller_pc, const void* s1, const void* s2, size_t n, int result); + void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2); + void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2); + void __sanitizer_weak_hook_memcmp(void* caller_pc, const void* s1, const void* s2, size_t n, int result); } namespace atheris { @@ -105,17 +105,17 @@ void TraceCompareUnicode(PyObject* left, PyObject* right, void* pc) { NO_SANITIZE PyObject* TraceCompareOp(void* pc, PyObject* left, PyObject* right, int opid, bool left_is_const) { if (PyLong_Check(left) && PyLong_Check(right)) { - // Integer-integer comparison. If both integers fit into 64 bits, report - // an integer comparison. - int64_t left_int; - int64_t right_int; - if (As64(&left_int, left) && As64(&right_int, right)) { - if (left_is_const) { - __sanitizer_cov_trace_const_cmp8(left_int, right_int); - } else { - __sanitizer_cov_trace_cmp8(left_int, right_int); - } + // Integer-integer comparison. If both integers fit into 64 bits, report + // an integer comparison. + int64_t left_int; + int64_t right_int; + if (As64(&left_int, left) && As64(&right_int, right)) { + if (left_is_const) { + __sanitizer_cov_trace_const_cmp8(left_int, right_int); + } else { + __sanitizer_cov_trace_cmp8(left_int, right_int); } + } } else if (PyBytes_Check(left) && PyBytes_Check(right)) { // If comparing bytes, report a memcmp. Report that we're comparing the size, // and then if that passes, compare the contents ourselves and report the From 6ada953027b349ac3b3c1c6e32f544944eeba6df Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 18:25:31 +0200 Subject: [PATCH 33/36] Updated copyright info --- atheris.cc | 2 +- atheris.h | 2 +- atheris/utils.py | 14 ++++++++++++++ core.cc | 2 +- deployment/Dockerfile | 2 +- deployment/build_wheels.sh | 2 +- deployment/build_wheels_mac.sh | 2 +- deployment/deploy_pypi.sh | 2 +- example_fuzzers/fuzzing_example.py | 2 +- .../idna_fuzzer/idna_acceptance_fuzzer.py | 2 +- example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py | 2 +- .../idna_fuzzer/libidn2_wrapper/libidn2.cc | 2 +- .../idna_fuzzer/libidn2_wrapper/setup.py | 2 +- example_fuzzers/json_fuzzer/build_install_ujson.sh | 2 +- .../json_fuzzer/hypothesis_structured_fuzzer.py | 2 +- .../json_fuzzer/json_differential_fuzzer.py | 2 +- example_fuzzers/json_fuzzer/ujson_fuzzer.py | 2 +- example_fuzzers/yaml_fuzzer/yaml_fuzzer.py | 2 +- fuzzed_data_provider.cc | 2 +- fuzzed_data_provider.h | 2 +- fuzzed_data_provider_test.py | 2 +- macros.h | 2 +- setup.py | 2 +- setup_utils/check_libfuzzer_version.sh | 2 +- setup_utils/find_libfuzzer.sh | 2 +- setup_utils/fuzzer_run_driver_wrapper.cc | 2 +- setup_utils/merge_libfuzzer_sanitizer.sh | 2 +- setup_utils/upgrade_libfuzzer.sh | 2 +- third_party/build_modified_libfuzzer.sh | 2 +- tracer.cc | 2 +- tracer.h | 2 +- util.cc | 2 +- util.h | 2 +- util_test.cc | 2 +- 34 files changed, 47 insertions(+), 33 deletions(-) diff --git a/atheris.cc b/atheris.cc index 93ee0b6b..274e32c9 100644 --- a/atheris.cc +++ b/atheris.cc @@ -1,4 +1,4 @@ -// Copyright 2021 Google LLC +// Copyright 2020 Google LLC // Copyright 2021 Fraunhofer FKIE // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/atheris.h b/atheris.h index a6177f0b..3c1aa056 100644 --- a/atheris.h +++ b/atheris.h @@ -1,5 +1,5 @@ /* - * Copyright 2021 Google LLC + * Copyright 2020 Google LLC * Copyright 2021 Fraunhofer FKIE * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/atheris/utils.py b/atheris/utils.py index 507b7da7..740b03f8 100644 --- a/atheris/utils.py +++ b/atheris/utils.py @@ -1,3 +1,17 @@ +# Copyright 2021 Fraunhofer FKIE +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys import os diff --git a/core.cc b/core.cc index 2595e6bd..44bd6313 100644 --- a/core.cc +++ b/core.cc @@ -1,4 +1,4 @@ -// Copyright 2021 Google LLC +// Copyright 2020 Google LLC // Copyright 2021 Fraunhofer FKIE // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/deployment/Dockerfile b/deployment/Dockerfile index 686ac320..3ebf4dfa 100644 --- a/deployment/Dockerfile +++ b/deployment/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/deployment/build_wheels.sh b/deployment/build_wheels.sh index b4b69007..6ac53fa0 100755 --- a/deployment/build_wheels.sh +++ b/deployment/build_wheels.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/deployment/build_wheels_mac.sh b/deployment/build_wheels_mac.sh index 90a7f996..8ab32e7f 100644 --- a/deployment/build_wheels_mac.sh +++ b/deployment/build_wheels_mac.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/deployment/deploy_pypi.sh b/deployment/deploy_pypi.sh index 81848839..d1ad724b 100755 --- a/deployment/deploy_pypi.sh +++ b/deployment/deploy_pypi.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/fuzzing_example.py b/example_fuzzers/fuzzing_example.py index ff0bb9e0..38e4ee7b 100644 --- a/example_fuzzers/fuzzing_example.py +++ b/example_fuzzers/fuzzing_example.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py index 4088cb86..8e1a2e39 100644 --- a/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_acceptance_fuzzer.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 # coding=utf-8 -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py index 01c0cc13..85bf2610 100755 --- a/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py +++ b/example_fuzzers/idna_fuzzer/idna_uts46_fuzzer.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 # coding=utf-8 -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/example_fuzzers/idna_fuzzer/libidn2_wrapper/libidn2.cc b/example_fuzzers/idna_fuzzer/libidn2_wrapper/libidn2.cc index 081da5b1..a9b8ca39 100644 --- a/example_fuzzers/idna_fuzzer/libidn2_wrapper/libidn2.cc +++ b/example_fuzzers/idna_fuzzer/libidn2_wrapper/libidn2.cc @@ -1,4 +1,4 @@ -// Copyright 2021 Google LLC +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/example_fuzzers/idna_fuzzer/libidn2_wrapper/setup.py b/example_fuzzers/idna_fuzzer/libidn2_wrapper/setup.py index 73ada8ac..9b64423e 100644 --- a/example_fuzzers/idna_fuzzer/libidn2_wrapper/setup.py +++ b/example_fuzzers/idna_fuzzer/libidn2_wrapper/setup.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/json_fuzzer/build_install_ujson.sh b/example_fuzzers/json_fuzzer/build_install_ujson.sh index af72acba..866b66ed 100644 --- a/example_fuzzers/json_fuzzer/build_install_ujson.sh +++ b/example_fuzzers/json_fuzzer/build_install_ujson.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py b/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py index 4116c205..48af9c2a 100644 --- a/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py +++ b/example_fuzzers/json_fuzzer/hypothesis_structured_fuzzer.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2021 Zac Hatfield-Dodds +# Copyright 2020 Zac Hatfield-Dodds # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/json_fuzzer/json_differential_fuzzer.py b/example_fuzzers/json_fuzzer/json_differential_fuzzer.py index d5e50c00..a62d30b4 100755 --- a/example_fuzzers/json_fuzzer/json_differential_fuzzer.py +++ b/example_fuzzers/json_fuzzer/json_differential_fuzzer.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/json_fuzzer/ujson_fuzzer.py b/example_fuzzers/json_fuzzer/ujson_fuzzer.py index e8c32f04..77eddc00 100755 --- a/example_fuzzers/json_fuzzer/ujson_fuzzer.py +++ b/example_fuzzers/json_fuzzer/ujson_fuzzer.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py index 26ff3342..a6d48567 100644 --- a/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py +++ b/example_fuzzers/yaml_fuzzer/yaml_fuzzer.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/fuzzed_data_provider.cc b/fuzzed_data_provider.cc index 86c778e1..b9337277 100644 --- a/fuzzed_data_provider.cc +++ b/fuzzed_data_provider.cc @@ -1,4 +1,4 @@ -// Copyright 2021 Google LLC +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/fuzzed_data_provider.h b/fuzzed_data_provider.h index 9263c4ea..8f3cd768 100644 --- a/fuzzed_data_provider.h +++ b/fuzzed_data_provider.h @@ -1,5 +1,5 @@ /* - * Copyright 2021 Google LLC + * Copyright 2020 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/fuzzed_data_provider_test.py b/fuzzed_data_provider_test.py index f053cee8..a44dd462 100644 --- a/fuzzed_data_provider_test.py +++ b/fuzzed_data_provider_test.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/macros.h b/macros.h index 09ae1b2d..5eb24901 100644 --- a/macros.h +++ b/macros.h @@ -1,5 +1,5 @@ /* - * Copyright 2021 Google LLC + * Copyright 2020 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/setup.py b/setup.py index 505e3abe..349c3366 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # Copyright 2021 Fraunhofer FKIE # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/setup_utils/check_libfuzzer_version.sh b/setup_utils/check_libfuzzer_version.sh index 48fc58e6..1defc47a 100755 --- a/setup_utils/check_libfuzzer_version.sh +++ b/setup_utils/check_libfuzzer_version.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup_utils/find_libfuzzer.sh b/setup_utils/find_libfuzzer.sh index 6f03a0db..1691b30a 100755 --- a/setup_utils/find_libfuzzer.sh +++ b/setup_utils/find_libfuzzer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup_utils/fuzzer_run_driver_wrapper.cc b/setup_utils/fuzzer_run_driver_wrapper.cc index bcfe3ee0..e312c141 100644 --- a/setup_utils/fuzzer_run_driver_wrapper.cc +++ b/setup_utils/fuzzer_run_driver_wrapper.cc @@ -1,4 +1,4 @@ -// Copyright 2021 Google LLC +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/setup_utils/merge_libfuzzer_sanitizer.sh b/setup_utils/merge_libfuzzer_sanitizer.sh index 826e2237..81be4c7f 100755 --- a/setup_utils/merge_libfuzzer_sanitizer.sh +++ b/setup_utils/merge_libfuzzer_sanitizer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup_utils/upgrade_libfuzzer.sh b/setup_utils/upgrade_libfuzzer.sh index cc0577eb..1d9055da 100755 --- a/setup_utils/upgrade_libfuzzer.sh +++ b/setup_utils/upgrade_libfuzzer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/third_party/build_modified_libfuzzer.sh b/third_party/build_modified_libfuzzer.sh index 86b2c4e4..a3533b76 100755 --- a/third_party/build_modified_libfuzzer.sh +++ b/third_party/build_modified_libfuzzer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tracer.cc b/tracer.cc index 6e6079a1..7554c0f7 100644 --- a/tracer.cc +++ b/tracer.cc @@ -1,4 +1,4 @@ -// Copyright 2021 Google LLC +// Copyright 2020 Google LLC // Copyright 2021 Fraunhofer FKIE // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tracer.h b/tracer.h index c46af71d..c5485113 100644 --- a/tracer.h +++ b/tracer.h @@ -1,5 +1,5 @@ /* - * Copyright 2021 Google LLC + * Copyright 2020 Google LLC * Copyright 2021 Fraunhofer FKIE * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/util.cc b/util.cc index efcbb553..881b9116 100644 --- a/util.cc +++ b/util.cc @@ -1,4 +1,4 @@ -// Copyright 2021 Google LLC +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/util.h b/util.h index fcb1967e..daaa3503 100644 --- a/util.h +++ b/util.h @@ -1,5 +1,5 @@ /* - * Copyright 2021 Google LLC + * Copyright 2020 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/util_test.cc b/util_test.cc index 85e67ac8..9a6b7530 100644 --- a/util_test.cc +++ b/util_test.cc @@ -1,4 +1,4 @@ -// Copyright 2021 Google LLC +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. From 3b296fa3365d9899712fdb84da317fbac736f556 Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 20:14:55 +0200 Subject: [PATCH 34/36] Sending instrumentation output to stderr --- atheris/import_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atheris/import_hook.py b/atheris/import_hook.py index 42904fba..712a6cfa 100644 --- a/atheris/import_hook.py +++ b/atheris/import_hook.py @@ -55,7 +55,7 @@ def find_spec(self, fullname, path, target=None): spec.loader_state = None - print(f"INFO: Instrumenting {fullname}") + print(f"INFO: Instrumenting {fullname}", file=sys.stderr) return spec From 0ad5eecacf36294d779ce512cc58f19e01ae22ae Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 20:15:23 +0200 Subject: [PATCH 35/36] Bug fix: Use `pybind11::module` instead of `pybind11::module_` --- atheris.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/atheris.cc b/atheris.cc index 274e32c9..eeecd664 100644 --- a/atheris.cc +++ b/atheris.cc @@ -114,13 +114,13 @@ void Fuzz() { exit(1); } - py::module_ atheris = py::module_::import("sys").attr("modules")["atheris"]; - py::module_ core; + py::module atheris = py::module::import("sys").attr("modules")["atheris"]; + py::module core; if (internal_libfuzzer) { - core = py::module_::import("atheris.core_with_libfuzzer"); + core = py::module::import("atheris.core_with_libfuzzer"); } else { - core = py::module_::import("atheris.core_without_libfuzzer"); + core = py::module::import("atheris.core_without_libfuzzer"); } atheris.attr("_trace_cmp") = core.attr("_trace_cmp"); From d58e219808da339593fe5bd27003a6820a5d662a Mon Sep 17 00:00:00 2001 From: pd-fkie <77979557+pd-fkie@users.noreply.github.com> Date: Thu, 8 Jul 2021 21:10:12 +0200 Subject: [PATCH 36/36] Bug fix ? Cast `pybind11::detail::item_accessor` to `pybind11::module` --- atheris.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atheris.cc b/atheris.cc index eeecd664..5b7b8ff1 100644 --- a/atheris.cc +++ b/atheris.cc @@ -114,7 +114,7 @@ void Fuzz() { exit(1); } - py::module atheris = py::module::import("sys").attr("modules")["atheris"]; + py::module atheris = (py::module) py::module::import("sys").attr("modules")["atheris"]; py::module core; if (internal_libfuzzer) {