Skip to content

Commit

Permalink
[fuzzing] wasm2c integration (#2772)
Browse files Browse the repository at this point in the history
This adds support for fuzzing with wabt's wasm2c that @binji wrote.
Basically we compile the wasm to C, then compile the C to a native
executable with a custom main() to wrap around it. The executable
should then print exactly the same as that wasm when run in either
the binaryen interpreter or in a JS VM with our wrapper JS for that
wasm. In other words, compiling the wasm to C is another way to
run that wasm.

The main reasons I want this are to fuzz wasm2c itself, and to
have another option for fuzzing emcc. For the latter, we do fuzz
wasm-opt quite a lot, but that doesn't fuzz the non-wasm-opt
parts of emcc. And using wasm2c for that is nice since the
starting point is always a wasm file, which means we
can use tools like wasm-reduce and so forth, which can be
integrated with this fuzzer.

This also:

Refactors the fuzzer harness a little to make it easier to
add more "VMs" to run wasms in.

Do not autoreduce when re-running a testcase, which I hit
while developing this.
  • Loading branch information
kripken authored Apr 22, 2020
1 parent d8b414d commit 35a36b1
Show file tree
Hide file tree
Showing 4 changed files with 323 additions and 63 deletions.
182 changes: 121 additions & 61 deletions scripts/fuzz_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,27 +292,78 @@ def count_runs(self):
# Run VMs and compare results

class VM:
def __init__(self, name, run, deterministic_nans, requires_legalization):
def __init__(self, name, run, can_run, can_compare_to_self, can_compare_to_others):
self.name = name
self.run = run
self.deterministic_nans = deterministic_nans
self.requires_legalization = requires_legalization
self.can_run = can_run
self.can_compare_to_self = can_compare_to_self
self.can_compare_to_others = can_compare_to_others


class CompareVMs(TestCaseHandler):
def __init__(self):
super(CompareVMs, self).__init__()

def run_binaryen_interpreter(wasm):
def byn_run(wasm):
return run_bynterp(wasm, ['--fuzz-exec-before'])

def run_v8(wasm):
def v8_run(wasm):
run([in_bin('wasm-opt'), wasm, '--emit-js-wrapper=' + wasm + '.js'] + FEATURE_OPTS)
return run_vm([shared.V8, wasm + '.js'] + shared.V8_OPTS + ['--', wasm])

def yes():
return True

def if_legal_and_no_nans():
return LEGALIZE and not NANS

def if_no_nans():
return not NANS

class Wasm2C(VM):
name = 'wasm2c'

def __init__(self):
# look for wabt in the path. if it's not here, don't run wasm2c
try:
wabt_bin = shared.which('wasm2c')
wabt_root = os.path.dirname(os.path.dirname(wabt_bin))
self.wasm2c_dir = os.path.join(wabt_root, 'wasm2c')
except Exception as e:
print('warning: no wabt found:', e)
self.wasm2c_dir = None

def can_run(self):
if self.wasm2c_dir is None:
return False
# if we legalize for JS, the ABI is not what C wants
if LEGALIZE:
return False
# wasm2c doesn't support most features
return all([x in FEATURE_OPTS for x in ['--disable-exception-handling', '--disable-simd', '--disable-threads', '--disable-bulk-memory', '--disable-nontrapping-float-to-int', '--disable-tail-call', '--disable-sign-ext', '--disable-reference-types', '--disable-multivalue']])

def run(self, wasm):
run([in_bin('wasm-opt'), wasm, '--emit-wasm2c-wrapper=main.c'] + FEATURE_OPTS)
run(['wasm2c', wasm, '-o', 'wasm.c'])
compile_cmd = ['clang', 'main.c', 'wasm.c', os.path.join(self.wasm2c_dir, 'wasm-rt-impl.c'), '-I' + self.wasm2c_dir, '-lm', '-Werror']
run(compile_cmd)
return run_vm(['./a.out'])

def can_compare_to_self(self):
# The binaryen optimizer changes NaNs in the ways that wasm
# expects, but that's not quite what C has
return not NANS

def can_compare_to_others(self):
# C won't trap on OOB, and NaNs can differ from wasm VMs
return not OOB and not NANS

self.vms = [
VM('binaryen interpreter', run_binaryen_interpreter, deterministic_nans=True, requires_legalization=False),
VM('d8', run_v8, deterministic_nans=False, requires_legalization=True),
VM('binaryen interpreter', byn_run, can_run=yes, can_compare_to_self=yes, can_compare_to_others=yes),
# with nans, VM differences can confuse us, so only very simple VMs can compare to themselves after opts in that case.
# if not legalized, the JS will fail immediately, so no point to compare to others
VM('d8', v8_run, can_run=yes, can_compare_to_self=if_no_nans, can_compare_to_others=if_legal_and_no_nans),
Wasm2C()
]

def handle_pair(self, input, before_wasm, after_wasm, opts):
Expand All @@ -321,32 +372,38 @@ def handle_pair(self, input, before_wasm, after_wasm, opts):
self.compare_before_and_after(before, after)

def run_vms(self, wasm):
results = []
# vm_results will contain pairs of (vm, result)
vm_results = []
for vm in self.vms:
results.append(fix_output(vm.run(wasm)))
if vm.can_run():
vm_results.append((vm, fix_output(vm.run(wasm))))

# compare between the vms on this specific input

# NaNs are a source of nondeterminism between VMs; don't compare them.
if not NANS:
first = None
for i in range(len(results)):
# No legalization for JS means we can't compare JS to others, as any
# illegal export will fail immediately.
if LEGALIZE or not vm.requires_legalization:
if first is None:
first = i
else:
compare_between_vms(results[first], results[i], 'CompareVMs between VMs: ' + self.vms[first].name + ' and ' + self.vms[i].name)

return results
first_vm = None
first_result = None
for vm, result in vm_results:
if vm.can_compare_to_others():
if first_vm is None:
first_vm = vm
first_result = result
else:
compare_between_vms(first_result, result, 'CompareVMs between VMs: ' + first_vm.name + ' and ' + vm.name)

return vm_results

def compare_before_and_after(self, before, after):
# we received lists of (vm, result). the lists must be of the same size,
# and with the same vms
assert len(before) == len(after)
num = len(before)
for i in range(num):
assert before[i][0] == after[i][0]

# compare each VM to itself on the before and after inputs
for i in range(len(before)):
vm = self.vms[i]
if vm.deterministic_nans:
compare(before[i], after[i], 'CompareVMs between before and after: ' + vm.name)
for i in range(num):
if before[i][0].can_compare_to_self():
compare(before[i][1], after[i][1], 'CompareVMs between before and after: ' + before[i][0].name)

def can_run_on_feature_opts(self, feature_opts):
return all([x in feature_opts for x in ['--disable-simd', '--disable-reference-types', '--disable-exception-handling', '--disable-multivalue']])
Expand Down Expand Up @@ -487,7 +544,7 @@ def can_run_on_feature_opts(self, feature_opts):


# Do one test, given an input file for -ttf and some optimizations to run
def test_one(random_input, opts):
def test_one(random_input, opts, allow_autoreduce):
randomize_pass_debug()
randomize_feature_opts()
randomize_fuzz_settings()
Expand Down Expand Up @@ -535,40 +592,41 @@ def write_commands_and_test(opts):
try:
write_commands_and_test(opts)
except subprocess.CalledProcessError:
print('')
print('====================')
print('Found a problem! See "t.sh" for the commands, and "input.wasm" for the input. Auto-reducing to "reduced.wasm" and "tt.sh"...')
print('====================')
print('')
# first, reduce the fuzz opts: keep removing until we can't
while 1:
reduced = False
for i in range(len(opts)):
# some opts can't be removed, like --flatten --dfo requires flatten
if opts[i] == '--flatten':
if i != len(opts) - 1 and opts[i + 1] in ('--dfo', '--local-cse', '--rereloop'):
continue
shorter = opts[:i] + opts[i + 1:]
try:
write_commands_and_test(shorter)
except subprocess.CalledProcessError:
# great, the shorter one is good as well
opts = shorter
print('reduced opts to ' + ' '.join(opts))
reduced = True
if allow_autoreduce:
print('')
print('====================')
print('Found a problem! See "t.sh" for the commands, and "input.wasm" for the input. Auto-reducing to "reduced.wasm" and "tt.sh"...')
print('====================')
print('')
# first, reduce the fuzz opts: keep removing until we can't
while 1:
reduced = False
for i in range(len(opts)):
# some opts can't be removed, like --flatten --dfo requires flatten
if opts[i] == '--flatten':
if i != len(opts) - 1 and opts[i + 1] in ('--dfo', '--local-cse', '--rereloop'):
continue
shorter = opts[:i] + opts[i + 1:]
try:
write_commands_and_test(shorter)
except subprocess.CalledProcessError:
# great, the shorter one is good as well
opts = shorter
print('reduced opts to ' + ' '.join(opts))
reduced = True
break
if not reduced:
break
if not reduced:
break
# second, reduce the wasm
# copy a.wasm to a safe place as the reducer will use the commands on new inputs, and the commands work on a.wasm
shutil.copyfile('a.wasm', 'input.wasm')
# add a command to verify the input. this lets the reducer see that it is indeed working on the input correctly
commands = [in_bin('wasm-opt') + ' -all a.wasm'] + get_commands(opts)
write_commands(commands, 'tt.sh')
# reduce the input to something smaller with the same behavior on the script
subprocess.check_call([in_bin('wasm-reduce'), 'input.wasm', '--command=bash tt.sh', '-t', 'a.wasm', '-w', 'reduced.wasm'])
print('Finished reduction. See "tt.sh" and "reduced.wasm".')
raise Exception('halting after autoreduction')
# second, reduce the wasm
# copy a.wasm to a safe place as the reducer will use the commands on new inputs, and the commands work on a.wasm
shutil.copyfile('a.wasm', 'input.wasm')
# add a command to verify the input. this lets the reducer see that it is indeed working on the input correctly
commands = [in_bin('wasm-opt') + ' -all a.wasm'] + get_commands(opts)
write_commands(commands, 'tt.sh')
# reduce the input to something smaller with the same behavior on the script
subprocess.check_call([in_bin('wasm-reduce'), 'input.wasm', '--command=bash tt.sh', '-t', 'a.wasm', '-w', 'reduced.wasm'])
print('Finished reduction. See "tt.sh" and "reduced.wasm".')
raise Exception('halting after autoreduction')
print('')

# create a second wasm for handlers that want to look at pairs.
Expand Down Expand Up @@ -736,7 +794,9 @@ def randomize_opt_flags():
opts = randomize_opt_flags()
print('randomized opts:', ' '.join(opts))
try:
total_wasm_size += test_one(raw_input_data, opts)
# don't autoreduce if we are given a specific case to test, as this
# is a reproduction of the test case, not the first finding of it
total_wasm_size += test_one(raw_input_data, opts, allow_autoreduce=given_seed is None)
except KeyboardInterrupt:
print('(stopping by user request)')
break
Expand Down
1 change: 0 additions & 1 deletion src/tools/execution-results.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
// Shared execution result checking code
//

#include "ir/import-utils.h"
#include "shell-interface.h"
#include "wasm.h"

Expand Down
17 changes: 16 additions & 1 deletion src/tools/wasm-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "wasm-printing.h"
#include "wasm-s-parser.h"
#include "wasm-validator.h"
#include "wasm2c-wrapper.h"

#define DEBUG_TYPE "opt"

Expand Down Expand Up @@ -87,6 +88,7 @@ int main(int argc, const char* argv[]) {
bool fuzzOOB = true;
std::string emitJSWrapper;
std::string emitSpecWrapper;
std::string emitWasm2CWrapper;
std::string inputSourceMapFilename;
std::string outputSourceMapFilename;
std::string outputSourceMapUrl;
Expand Down Expand Up @@ -185,6 +187,14 @@ int main(int argc, const char* argv[]) {
[&](Options* o, const std::string& arguments) {
emitSpecWrapper = arguments;
})
.add("--emit-wasm2c-wrapper",
"-esw",
"Emit a C wrapper file that can run the wasm after it is compiled "
"with wasm2c, useful for fuzzing",
Options::Arguments::One,
[&](Options* o, const std::string& arguments) {
emitWasm2CWrapper = arguments;
})
.add("--input-source-map",
"-ism",
"Consume source map from the specified file",
Expand Down Expand Up @@ -293,13 +303,18 @@ int main(int argc, const char* argv[]) {
outfile << generateJSWrapper(wasm);
outfile.close();
}

if (emitSpecWrapper.size() > 0) {
std::ofstream outfile;
outfile.open(emitSpecWrapper, std::ofstream::out);
outfile << generateSpecWrapper(wasm);
outfile.close();
}
if (emitWasm2CWrapper.size() > 0) {
std::ofstream outfile;
outfile.open(emitWasm2CWrapper, std::ofstream::out);
outfile << generateWasm2CWrapper(wasm);
outfile.close();
}

std::string firstOutput;

Expand Down
Loading

0 comments on commit 35a36b1

Please sign in to comment.