Skip to content

Commit

Permalink
Merge pull request #122 from S2E/forkprofiler
Browse files Browse the repository at this point in the history
Forkprofiler
  • Loading branch information
vitalych authored Apr 28, 2018
2 parents b92e366 + 4ef41b5 commit 9434ada
Show file tree
Hide file tree
Showing 38 changed files with 1,655 additions and 450 deletions.
10 changes: 8 additions & 2 deletions s2e_env/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def run_from_argv(self, argv):
args = cmd_options.pop('args', ())

try:
if os.getuid() == 0:
if not os.getuid():
raise CommandError('Please do not run s2e as root')

output = self.execute(*args, **cmd_options)
Expand Down Expand Up @@ -196,7 +196,8 @@ def handle(self, *args, **options):
raise NotImplementedError('subclasses of BaseCommand must provide a '
'handle() method')


# pylint: disable=abstract-method
# We don't want to implement handle() in this class
class EnvCommand(BaseCommand):
"""
The base command for all commands that follow the ``init`` command.
Expand Down Expand Up @@ -298,6 +299,8 @@ def image_path(self, *p):
return self.env_path('images', *p)


# pylint: disable=abstract-method
# We don't want to implement handle() in this class
class ProjectCommand(EnvCommand):
"""
The base command for all commands that work on existing projects.
Expand Down Expand Up @@ -342,3 +345,6 @@ def project_path(self, *p):
Create a path relative to this project directory.
"""
return os.path.join(self._project_dir, *p)

def symbol_search_path(self):
return [self.project_path(), self.project_path('guestfs'), self.project_path('guest-tools')]
3 changes: 1 addition & 2 deletions s2e_env/commands/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ def _rebuild_components(self, components):
continue

# Check if the user has specified a valid component prefix
# XXX This will delete both the debug and release stamps (if they
# exist)
# TODO: This will delete both the debug and release stamps (if they exist)
if component in stamp_prefixes:
stamps_to_delete.extend(glob.glob(self.env_path('build', 's2e', 'stamps', '%s-*' % component)))
continue
Expand Down
80 changes: 59 additions & 21 deletions s2e_env/commands/code_coverage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,26 +67,64 @@ def get_tb_state(tb_coverage_file):
return None


def parse_tb_file(path, module):
def _touint64(num):
"""
Parse a translation block coverage file generated by S2E's
``TranslationBlockCoverage`` plugin.
This is required to convert signed json integers to unsigned.
"""
with open(path, 'r') as f:
try:
tb_coverage_data = json.load(f)
except Exception:
logger.warning('Failed to parse translation block JSON file %s',
path)
return None

if not tb_coverage_data:
logger.warning('Translation block JSON file %s is empty', path)
return None

if module not in tb_coverage_data:
logger.warning('Target %s not found in translation block JSON file %s',
module, path)
return None

return tb_coverage_data[module]
return num & 0xffffffffffffffff


def _tb_to_uint64(tb):
start_addr = _touint64(tb[0])
end_addr = _touint64(tb[1])
size = tb[2]
return start_addr, end_addr, size

def aggregate_tb_files_per_state(tb_files):
"""
Aggregate translation block coverage information from all files and
put them in a single dict.
"""
ret = {}
for f in tb_files:
with open(f, 'r') as fp:
try:
data = json.load(fp)
except Exception:
logger.warning('Failed to parse translation block JSON file %s', f)
continue

if not data:
logger.warning('Translation block JSON file %s is empty', f)
continue

state_id = get_tb_state(f)

for module_path, coverage in data.iteritems():
states = {}
if module_path not in ret.keys():
ret[module_path] = states
else:
states = ret[module_path]

tbs = set()
if state_id not in states:
states[state_id] = tbs
else:
tbs = states[state_id]

for tb in coverage:
tbs.add(_tb_to_uint64(tb))
return ret


def aggregate_tb_files(tb_files):
ret = {}
tbs = aggregate_tb_files_per_state(tb_files)
for module_path, states in tbs.iteritems():
tbs = set()
ret[module_path] = tbs
for coverage in states.itervalues():
for tb in coverage:
tbs.add(_tb_to_uint64(tb))
return ret
189 changes: 104 additions & 85 deletions s2e_env/commands/code_coverage/basic_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
import struct

from s2e_env.command import ProjectCommand, CommandError
from . import get_tb_files, get_tb_state, parse_tb_file
from s2e_env.symbols.paths import guess_target_path
from . import get_tb_files, aggregate_tb_files_per_state


logger = logging.getLogger('basicblock')
Expand Down Expand Up @@ -87,13 +88,40 @@ def __init__(self, *args, **kwargs):
*args, **kwargs)

# pylint: disable=method-hidden
# pylint: disable=no-self-use
def object_hook(self, d):
if 'start_addr' in d:
return BasicBlock(d['start_addr'], d['end_addr'], d['function'])

return d


def _get_basic_block_coverage(tb_coverage, bbs):
"""
Calculate the basic block coverage.
This information is derived from the static basic block list (generated
by the chosen disassembler) and the translation block (TB) list
(extracted from the JSON file(s) generated by S2E's
``TranslationBlockCoverage`` plugin).
"""

covered_bbs = defaultdict(set)

for state, coverage in tb_coverage.iteritems():
logger.info('Calculating basic block coverage for state %d', state)

for tb_start_addr, tb_end_addr, _ in coverage:
for bb in bbs:
# Check if the translation block falls within a basic block
# OR a basic block falls within a translation block
if (bb.end_addr >= tb_start_addr >= bb.start_addr or
bb.start_addr <= tb_end_addr <= bb.end_addr):
covered_bbs[state].add(bb)

return covered_bbs


class BasicBlockCoverage(ProjectCommand):
"""
Generate a basic block coverage report.
Expand All @@ -120,65 +148,78 @@ class BasicBlockCoverage(ProjectCommand):
'Total basic blocks: {num_bbs}\n' \
'Covered basic blocks: {num_covered_bbs} ({percent:.1%})'

def _get_disas_info(self, module, actual_module_path):
# Check if a cached version of the disassembly information exists.
# If it does, then we don't have to disassemble the binary (which
# may take a long time for large binaries)
disas_info = self._get_cached_disassembly_info(actual_module_path)

# If no cached .disas file exists, generate a new one using the
# given disassembler and cache the results
if not disas_info:
disas_info = self._get_disassembly_info(actual_module_path)
if not disas_info:
raise CommandError('No disassembly information found')

# TODO: store the cached file along side the original file (e.g., in guestfs)
self._save_disassembly_info(module, disas_info)

return disas_info

# pylint: disable=too-many-arguments
# TODO: reduce number of args
def _save_coverage(self, options, actual_module_path, module, disas_info, tb_coverage):
# Calculate basic block coverage information (based on the
# translation block coverage recorded by S2E)
bbs = disas_info.get('bbs', [])
bb_coverage = _get_basic_block_coverage(tb_coverage, bbs)
if not bb_coverage:
raise CommandError('No basic block coverage information found')

# Calculate some statistics (across all states)
total_bbs = len(bbs)
num_covered_bbs = len(set(itertools.chain(*bb_coverage.values())))

# Write the basic block coverage information to disk.
#
# If we are using drcov format, each state's basic block coverage
# is written to a separate drcov file.
#
# Otherwise combine all the basic block coverage information
# (across all states) into a single JSON file.
if options['drcov']:
bb_coverage_loc = self._save_drcov(actual_module_path,
disas_info['base_addr'],
disas_info['end_addr'],
bb_coverage)
else:
bb_coverage_loc = self._save_basic_block_coverage(module,
bb_coverage,
total_bbs,
num_covered_bbs)

logger.success(self.RESULTS.format(bb_loc=bb_coverage_loc,
num_bbs=total_bbs,
num_covered_bbs=num_covered_bbs,
percent=num_covered_bbs / total_bbs))

def handle(self, *args, **options):
# Initialize the backend disassembler
self._initialize_disassembler()

target_path = self._project_desc['target_path']
target_dir = os.path.dirname(target_path)
modules = self._project_desc['modules']
tb_files = get_tb_files(self.project_path('s2e-last'))
tb_coverage_files = aggregate_tb_files_per_state(tb_files)

# Get translation block coverage information for each module
for module, _ in modules:
module_path = os.path.join(target_dir, module)

# Check if a cached version of the disassembly information exists.
# If it does, then we don't have to disassemble the binary (which
# may take a long time for large binaries)
disas_info = self._get_cached_disassembly_info(module)
for module_path, tb_coverage in tb_coverage_files.iteritems():
try:
actual_module_path = guess_target_path(self.symbol_search_path(), module_path)
except Exception as e:
logger.error(e)
continue

# If no cached .disas file exists, generate a new one using the
# given disassembler and cache the results
if not disas_info:
disas_info = self._get_disassembly_info(module_path)
if not disas_info:
raise CommandError('No disassembly information found')

self._save_disassembly_info(module, disas_info)

# Calculate basic block coverage information (based on the
# translation block coverage recorded by S2E)
bbs = disas_info.get('bbs', [])
bb_coverage = self._get_basic_block_coverage(module, bbs)
if not bb_coverage:
raise CommandError('No basic block coverage information found')

# Calculate some statistics (across all states)
total_bbs = len(bbs)
num_covered_bbs = len(set(itertools.chain(*bb_coverage.values())))

# Write the basic block coverage information to disk.
#
# If we are using drcov format, each state's basic block coverage
# is written to a separate drcov file.
#
# Otherwise combine all the basic block coverage information
# (across all states) into a single JSON file.
if options['drcov']:
bb_coverage_loc = self._save_drcov(module_path,
disas_info['base_addr'],
disas_info['end_addr'],
bb_coverage)
else:
bb_coverage_loc = self._save_basic_block_coverage(module,
bb_coverage,
total_bbs,
num_covered_bbs)

logger.success(self.RESULTS.format(bb_loc=bb_coverage_loc,
num_bbs=total_bbs,
num_covered_bbs=num_covered_bbs,
percent=num_covered_bbs / total_bbs))
module = os.path.basename(actual_module_path)
disas_info = self._get_disas_info(module, actual_module_path)
self._save_coverage(options, actual_module_path, module, disas_info, tb_coverage)

def _initialize_disassembler(self):
"""
Expand Down Expand Up @@ -251,37 +292,6 @@ def _save_disassembly_info(self, module, disas_info):
with open(disas_path, 'w') as disas_file:
json.dump(disas_info, disas_file, cls=BasicBlockEncoder)

def _get_basic_block_coverage(self, module, bbs):
"""
Calculate the basic block coverage.
This information is derived from the static basic block list (generated
by the chosen disassembler) and the translation block (TB) list
(extracted from the JSON file(s) generated by S2E's
``TranslationBlockCoverage`` plugin).
"""
tb_coverage_files = get_tb_files(self.project_path('s2e-last'))
covered_bbs = defaultdict(set)

for tb_coverage_file in tb_coverage_files:
tb_coverage_data = parse_tb_file(tb_coverage_file, module)
if not tb_coverage_data:
continue

state = get_tb_state(tb_coverage_file)

logger.info('Calculating basic block coverage for state %d', state)

for tb_start_addr, tb_end_addr, _ in tb_coverage_data:
for bb in bbs:
# Check if the translation block falls within a basic block
# OR a basic block falls within a translation block
if (bb.end_addr >= tb_start_addr >= bb.start_addr or
bb.start_addr <= tb_end_addr <= bb.end_addr):
covered_bbs[state].add(bb)

return covered_bbs

def _save_basic_block_coverage(self, module, basic_blocks, total_bbs, num_covered_bbs):
"""
Write the basic block coverage information to a single JSON file. This
Expand Down Expand Up @@ -319,6 +329,15 @@ def _save_basic_block_coverage(self, module, basic_blocks, total_bbs, num_covere

return bb_coverage_file

@staticmethod
def _make_disassembly_info(bbs, base_addr, end_addr):
return {
'bbs': bbs,
'base_addr': base_addr,
'end_addr': end_addr,
}


####################
# drcov generation #
####################
Expand Down
6 changes: 1 addition & 5 deletions s2e_env/commands/code_coverage/binaryninja_basic_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,7 @@ def _get_disassembly_info(self, module_path):
# Get the module's end address
end_addr = self._bv.end

return {
'bbs': bbs,
'base_addr': base_addr,
'end_addr': end_addr,
}
return BasicBlockCoverage._make_disassembly_info(bbs, base_addr, end_addr)

def _split_basic_block(self, func_name, basic_block):
"""
Expand Down
1 change: 1 addition & 0 deletions s2e_env/commands/code_coverage/ida_basic_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def _get_disassembly_info(self, module_path):
raise CommandError('Failed to generate disas file for '
'%s' % module_name)

logger.info('Disassembly successful')
# Parse the basic block list file
with open(disas_file, 'r') as f:
return json.load(f, cls=BasicBlockDecoder)
Expand Down
Loading

0 comments on commit 9434ada

Please sign in to comment.