From 674874690e117abdf22e1c631eae11853dcc8773 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 11 Jun 2023 05:47:42 +0200 Subject: [PATCH 01/14] init fuzzers --- fuzz/.gitignore | 3 + fuzz/Cargo.toml | 69 ++++++++++++++++++++ fuzz/README.md | 73 ++++++++++++++++++++++ fuzz/corpus/rome_parse_d_ts | 1 + fuzz/corpus/rome_parse_jsx | 1 + fuzz/corpus/rome_parse_module | 1 + fuzz/corpus/rome_parse_script | 1 + fuzz/corpus/rome_parse_tsx | 1 + fuzz/corpus/rome_parse_typescript | 1 + fuzz/fuzz_targets/rome_common.rs | 20 ++++++ fuzz/fuzz_targets/rome_parse_all.rs | 35 +++++++++++ fuzz/fuzz_targets/rome_parse_d_ts.rs | 15 +++++ fuzz/fuzz_targets/rome_parse_jsx.rs | 15 +++++ fuzz/fuzz_targets/rome_parse_module.rs | 15 +++++ fuzz/fuzz_targets/rome_parse_script.rs | 15 +++++ fuzz/fuzz_targets/rome_parse_tsx.rs | 15 +++++ fuzz/fuzz_targets/rome_parse_typescript.rs | 15 +++++ fuzz/init-fuzzer.sh | 28 +++++++++ fuzz/reinit-fuzzer.sh | 14 +++++ 19 files changed, 338 insertions(+) create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/README.md create mode 120000 fuzz/corpus/rome_parse_d_ts create mode 120000 fuzz/corpus/rome_parse_jsx create mode 120000 fuzz/corpus/rome_parse_module create mode 120000 fuzz/corpus/rome_parse_script create mode 120000 fuzz/corpus/rome_parse_tsx create mode 120000 fuzz/corpus/rome_parse_typescript create mode 100644 fuzz/fuzz_targets/rome_common.rs create mode 100644 fuzz/fuzz_targets/rome_parse_all.rs create mode 100644 fuzz/fuzz_targets/rome_parse_d_ts.rs create mode 100644 fuzz/fuzz_targets/rome_parse_jsx.rs create mode 100644 fuzz/fuzz_targets/rome_parse_module.rs create mode 100644 fuzz/fuzz_targets/rome_parse_script.rs create mode 100644 fuzz/fuzz_targets/rome_parse_tsx.rs create mode 100644 fuzz/fuzz_targets/rome_parse_typescript.rs create mode 100644 fuzz/init-fuzzer.sh create mode 100644 fuzz/reinit-fuzzer.sh diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 00000000000..b3e053b0c73 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,3 @@ +artifacts/ +corpus/rome_parse_all +Cargo.lock diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 00000000000..303ef94dc37 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,69 @@ +[package] +name = "rome-fuzz" +version = "0.0.0" +authors = [ + "Addison Crump ", +] +publish = false +edition = "2021" + +[features] +default = ["libfuzzer"] +full-idempotency = [] +libfuzzer = ["libfuzzer-sys/link_libfuzzer"] +rome_parse_all = [] + +[package.metadata] +cargo-fuzz = true + +[dependencies] +arbitrary = { version = "1.3.0", features = ["derive"] } +libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false } +rome_js_parser = { path = "../crates/rome_js_parser" } +rome_js_syntax = { path = "../crates/rome_js_syntax" } +similar = { version = "2.2.1" } + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "rome_parse_all" +path = "fuzz_targets/rome_parse_all.rs" +required-features = ["rome_parse_all"] + +[[bin]] +name = "rome_parse_d_ts" +path = "fuzz_targets/rome_parse_d_ts.rs" + +[[bin]] +name = "rome_parse_module" +path = "fuzz_targets/rome_parse_module.rs" + +[[bin]] +name = "rome_parse_script" +path = "fuzz_targets/rome_parse_script.rs" + +[[bin]] +name = "rome_parse_jsx" +path = "fuzz_targets/rome_parse_jsx.rs" + +[[bin]] +name = "rome_parse_tsx" +path = "fuzz_targets/rome_parse_tsx.rs" + +[[bin]] +name = "rome_parse_typescript" +path = "fuzz_targets/rome_parse_typescript.rs" + +[profile.release] +opt-level = 3 +debug = true + +[profile.dev] +opt-level = 3 +debug = true + +[profile.test] +opt-level = 3 +debug = true diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 00000000000..70a225c6aec --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,73 @@ +# rome-fuzz + +Fuzzers and associated utilities for automatic testing of Rome. + +## Usage + +To use the fuzzers provided in this directory, start by invoking: + +```bash +./fuzz/init-fuzzers.sh +``` + +This will install [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) and optionally download a +[dataset](https://zenodo.org/record/3628784) which improves the efficacy of the testing. +**This step is necessary for initialising the corpus directory, as all fuzzers share a common +corpus.** +The dataset may take several hours to download and clean, so if you're just looking to try out the +fuzzers, skip the dataset download, though be warned that some features simply cannot be tested +without it (very unlikely for the fuzzer to generate valid python code from "thin air"). + +Once you have initialised the fuzzers, you can then execute any fuzzer with: + +```bash +cargo fuzz run -s none name_of_fuzzer -- -timeout=1 +``` + +**Users using Apple M1 devices must use a nightly compiler and omit the `-s none` portion of this +command, as this architecture does not support fuzzing without a sanitizer.** +You can view the names of the available fuzzers with `cargo fuzz list`. +For specific details about how each fuzzer works, please read this document in its entirety. + +**IMPORTANT: You should run `./reinit-fuzzer.sh` after adding more file-based testcases.** This will +allow the testing of new features that you've added unit tests for. + +### Debugging a crash + +Once you've found a crash, you'll need to debug it. +The easiest first step in this process is to minimise the input such that the crash is still +triggered with a smaller input. +`cargo-fuzz` supports this out of the box with: + +```bash +cargo fuzz tmin -s none name_of_fuzzer artifacts/name_of_fuzzer/crash-... +``` + +From here, you will need to analyse the input and potentially the behaviour of the program. +The debugging process from here is unfortunately less well-defined, so you will need to apply some +expertise here. +Happy hunting! + +## A brief introduction to fuzzers + +Fuzzing, or fuzz testing, is the process of providing generated data to a program under test. +The most common variety of fuzzers are mutational fuzzers; given a set of existing inputs (a +"corpus"), it will attempt to slightly change (or "mutate") these inputs into new inputs that cover +parts of the code that haven't yet been observed. +Using this strategy, we can quite efficiently generate testcases which cover significant portions of +the program, both with expected and unexpected data. +[This is really quite effective for finding bugs.](https://github.com/rust-fuzz/trophy-case) + +The fuzzers here use [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz), a utility which allows +Rust to integrate with [libFuzzer](https://llvm.org/docs/LibFuzzer.html), the fuzzer library built +into LLVM. +Each source file present in [`fuzz_targets`](fuzz_targets) is a harness, which is, in effect, a unit +test which can handle different inputs. +When an input is provided to a harness, the harness processes this data and libFuzzer observes the +code coverage and any special values used in comparisons over the course of the run. +Special values are preserved for future mutations and inputs which cover new regions of code are +added to the corpus. + +## Each fuzzer harness in detail + +TODO diff --git a/fuzz/corpus/rome_parse_d_ts b/fuzz/corpus/rome_parse_d_ts new file mode 120000 index 00000000000..116f3e7db5d --- /dev/null +++ b/fuzz/corpus/rome_parse_d_ts @@ -0,0 +1 @@ +rome_parse_all \ No newline at end of file diff --git a/fuzz/corpus/rome_parse_jsx b/fuzz/corpus/rome_parse_jsx new file mode 120000 index 00000000000..116f3e7db5d --- /dev/null +++ b/fuzz/corpus/rome_parse_jsx @@ -0,0 +1 @@ +rome_parse_all \ No newline at end of file diff --git a/fuzz/corpus/rome_parse_module b/fuzz/corpus/rome_parse_module new file mode 120000 index 00000000000..116f3e7db5d --- /dev/null +++ b/fuzz/corpus/rome_parse_module @@ -0,0 +1 @@ +rome_parse_all \ No newline at end of file diff --git a/fuzz/corpus/rome_parse_script b/fuzz/corpus/rome_parse_script new file mode 120000 index 00000000000..116f3e7db5d --- /dev/null +++ b/fuzz/corpus/rome_parse_script @@ -0,0 +1 @@ +rome_parse_all \ No newline at end of file diff --git a/fuzz/corpus/rome_parse_tsx b/fuzz/corpus/rome_parse_tsx new file mode 120000 index 00000000000..116f3e7db5d --- /dev/null +++ b/fuzz/corpus/rome_parse_tsx @@ -0,0 +1 @@ +rome_parse_all \ No newline at end of file diff --git a/fuzz/corpus/rome_parse_typescript b/fuzz/corpus/rome_parse_typescript new file mode 120000 index 00000000000..116f3e7db5d --- /dev/null +++ b/fuzz/corpus/rome_parse_typescript @@ -0,0 +1 @@ +rome_parse_all \ No newline at end of file diff --git a/fuzz/fuzz_targets/rome_common.rs b/fuzz/fuzz_targets/rome_common.rs new file mode 100644 index 00000000000..4771bf0ff64 --- /dev/null +++ b/fuzz/fuzz_targets/rome_common.rs @@ -0,0 +1,20 @@ +use libfuzzer_sys::Corpus; +use rome_js_parser::parse; +use rome_js_syntax::JsFileSource; + +pub fn fuzz_source_type(data: &[u8], source: JsFileSource) -> Corpus { + let Ok(code1) = std::str::from_utf8(data) else { return Corpus::Reject; }; + + let parse1 = parse(code1, source); + if !parse1.has_errors() { + let code2 = parse1.tree().to_string(); + let parse2 = parse(&code2, source); + assert!( + !parse2.has_errors(), + "unparsing introduced a formatting error" + ); + assert_eq!(code2, parse2.tree().to_string()); + } + + Corpus::Keep +} diff --git a/fuzz/fuzz_targets/rome_parse_all.rs b/fuzz/fuzz_targets/rome_parse_all.rs new file mode 100644 index 00000000000..51293d253e8 --- /dev/null +++ b/fuzz/fuzz_targets/rome_parse_all.rs @@ -0,0 +1,35 @@ +#![no_main] + +mod rome_parse_d_ts; +mod rome_parse_jsx; +mod rome_parse_module; +mod rome_parse_script; +mod rome_parse_tsx; +mod rome_parse_typescript; + +use libfuzzer_sys::{fuzz_target, Corpus}; + +fn do_fuzz(data: &[u8]) -> Corpus { + let mut keep = Corpus::Reject; + if let Corpus::Keep = rome_parse_d_ts::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_parse_jsx::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_parse_module::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_parse_script::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_parse_tsx::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_parse_typescript::do_fuzz(data) { + keep = Corpus::Keep; + } + keep +} + +fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_d_ts.rs b/fuzz/fuzz_targets/rome_parse_d_ts.rs new file mode 100644 index 00000000000..fd7cee1a4a2 --- /dev/null +++ b/fuzz/fuzz_targets/rome_parse_d_ts.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_parse_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::d_ts(); + rome_common::fuzz_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_parse_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_jsx.rs b/fuzz/fuzz_targets/rome_parse_jsx.rs new file mode 100644 index 00000000000..2e62cc1f25c --- /dev/null +++ b/fuzz/fuzz_targets/rome_parse_jsx.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_parse_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::jsx(); + rome_common::fuzz_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_parse_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_module.rs b/fuzz/fuzz_targets/rome_parse_module.rs new file mode 100644 index 00000000000..db73e350c5a --- /dev/null +++ b/fuzz/fuzz_targets/rome_parse_module.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_parse_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::js_module(); + rome_common::fuzz_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_parse_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_script.rs b/fuzz/fuzz_targets/rome_parse_script.rs new file mode 100644 index 00000000000..411f44793f0 --- /dev/null +++ b/fuzz/fuzz_targets/rome_parse_script.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_parse_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::js_script(); + rome_common::fuzz_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_parse_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_tsx.rs b/fuzz/fuzz_targets/rome_parse_tsx.rs new file mode 100644 index 00000000000..13dcd9e2eff --- /dev/null +++ b/fuzz/fuzz_targets/rome_parse_tsx.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_parse_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::tsx(); + rome_common::fuzz_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_parse_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_typescript.rs b/fuzz/fuzz_targets/rome_parse_typescript.rs new file mode 100644 index 00000000000..59355de1c4e --- /dev/null +++ b/fuzz/fuzz_targets/rome_parse_typescript.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_parse_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::ts(); + rome_common::fuzz_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_parse_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/init-fuzzer.sh b/fuzz/init-fuzzer.sh new file mode 100644 index 00000000000..5345e6cece4 --- /dev/null +++ b/fuzz/init-fuzzer.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# https://stackoverflow.com/a/246128/3549270 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +cd "$SCRIPT_DIR" + +if ! cargo fuzz --help >&/dev/null; then + cargo install --git https://github.com/rust-fuzz/cargo +stable-fuzz.git +fi + +if [ ! -d corpus/rome_parse_all ]; then + mkdir -p corpus/rome_parse_all + read -p "Would you like to build a corpus from a javascript source code dataset? (this will take a long time!) [Y/n] " -n 1 -r + echo + cd corpus/rome_parse_all + if [[ $REPLY =~ ^[Yy]$ ]]; then + curl -L http://files.srl.inf.ethz.ch/data/js_dataset.tar.gz | tar xzO data.tar.gz | tar xz + find . -type d -exec chmod 755 {} \; + find . -type f -exec chmod 644 {} \; + fi + cp -r "../../../crates/rome_js_parser/test_data" . + find . -name \*.rast -delete + cd - + cargo fuzz cmin --features rome_parse_all -s none rome_parse_all +fi + +echo "Done! You are ready to fuzz." diff --git a/fuzz/reinit-fuzzer.sh b/fuzz/reinit-fuzzer.sh new file mode 100644 index 00000000000..abc8f436cde --- /dev/null +++ b/fuzz/reinit-fuzzer.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# https://stackoverflow.com/a/246128/3549270 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +cd "$SCRIPT_DIR" + +cd corpus/rome_parse_all +cp -r "../../../crates/rome_js_parser/test_data" . +find . -name \*.rast -delete +cd - +cargo fuzz cmin --features rome_parse_all -s none rome_parse_all + +echo "Done! You are ready to fuzz." From 1a2ed62522acce029e5150eeb5c2f3f9fb9962af Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 11 Jun 2023 06:06:20 +0200 Subject: [PATCH 02/14] correct corpus link --- fuzz/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/README.md b/fuzz/README.md index 70a225c6aec..8d8bce9ae40 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -11,7 +11,7 @@ To use the fuzzers provided in this directory, start by invoking: ``` This will install [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) and optionally download a -[dataset](https://zenodo.org/record/3628784) which improves the efficacy of the testing. +[dataset](https://www.sri.inf.ethz.ch/js150) which improves the efficacy of the testing. **This step is necessary for initialising the corpus directory, as all fuzzers share a common corpus.** The dataset may take several hours to download and clean, so if you're just looking to try out the From 06760305db75a6107814e9ff02f7e558ccc7be9f Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 11 Jun 2023 19:10:36 +0200 Subject: [PATCH 03/14] add more fuzzers --- fuzz/.gitignore | 2 ++ fuzz/Cargo.toml | 6 ++++ fuzz/README.md | 40 ++++++++++++++++++++-- fuzz/fuzz_targets/rome_common.rs | 28 ++++++++++----- fuzz/fuzz_targets/rome_parse_d_ts.rs | 2 +- fuzz/fuzz_targets/rome_parse_json.rs | 13 +++++++ fuzz/fuzz_targets/rome_parse_jsx.rs | 2 +- fuzz/fuzz_targets/rome_parse_module.rs | 2 +- fuzz/fuzz_targets/rome_parse_script.rs | 2 +- fuzz/fuzz_targets/rome_parse_tsx.rs | 2 +- fuzz/fuzz_targets/rome_parse_typescript.rs | 2 +- 11 files changed, 84 insertions(+), 17 deletions(-) create mode 100644 fuzz/fuzz_targets/rome_parse_json.rs diff --git a/fuzz/.gitignore b/fuzz/.gitignore index b3e053b0c73..5e00ab090db 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,3 +1,5 @@ artifacts/ corpus/rome_parse_all +corpus/rome_parse_json +corpus/rome_parse_css Cargo.lock diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 303ef94dc37..44ae841f77a 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -21,6 +21,8 @@ arbitrary = { version = "1.3.0", features = ["derive"] } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false } rome_js_parser = { path = "../crates/rome_js_parser" } rome_js_syntax = { path = "../crates/rome_js_syntax" } +rome_json_parser = { path = "../crates/rome_json_parser" } +rome_json_syntax = { path = "../crates/rome_json_syntax" } similar = { version = "2.2.1" } # Prevent this from interfering with workspaces @@ -36,6 +38,10 @@ required-features = ["rome_parse_all"] name = "rome_parse_d_ts" path = "fuzz_targets/rome_parse_d_ts.rs" +[[bin]] +name = "rome_parse_json" +path = "fuzz_targets/rome_parse_json.rs" + [[bin]] name = "rome_parse_module" path = "fuzz_targets/rome_parse_module.rs" diff --git a/fuzz/README.md b/fuzz/README.md index 8d8bce9ae40..35b5998005a 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -10,8 +10,8 @@ To use the fuzzers provided in this directory, start by invoking: ./fuzz/init-fuzzers.sh ``` -This will install [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) and optionally download a -[dataset](https://www.sri.inf.ethz.ch/js150) which improves the efficacy of the testing. +This will install [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) and optionally download +datasets which improve the efficacy of the testing. **This step is necessary for initialising the corpus directory, as all fuzzers share a common corpus.** The dataset may take several hours to download and clean, so if you're just looking to try out the @@ -70,4 +70,38 @@ added to the corpus. ## Each fuzzer harness in detail -TODO +Each fuzzer harness is designed to test different aspects of Rome. +Since Rome's primary function is parsing, formatting, and linting, we can use fuzzing not only to +detect crashes or panics, but also to detect violations of guarantees of the crate. +This concept is used extensively throughout the fuzzers. + +### `rome_parse_*` + +Each of the `rome_parse_*` fuzz harnesses utilise the [round-trip +property](https://blog.ssanj.net/posts/2016-06-26-property-based-testing-patterns.html) of parsing +and unparsing; that is, given a particular input, if we parse some code successfully, we expect the +unparsed code to have the content as the original code. +If they do not match, then some details of the original input were not captured on the first parse. +The corpus for the JS-like parsers is based on unit tests and [a JS dataset for machine learning +training](https://www.sri.inf.ethz.ch/js150). + +Errata for specific fuzzers can be seen below. + +#### `rome_parse_json` + +Since JSON formats are distinct from JS source code and are a relatively simple format, it is not +strictly necessary to use the shared corpus. +[Fuzzbench](https://google.github.io/fuzzbench/) results consistently show that JSON parsers tend to +max out their coverage with minimal or no corpora. + +At time of writing (June 11, 2023), JSONC does not seem to be supported, so it is not fuzzed. + +#### `rome_parse_css` + +TODO (this is potentially hard -- not a lot of corpora for CSS in the wild) + +#### `rome_parse_all` + +This fuzz harness merely merges all the JS parsers together to create a shared corpus. +It can be used in place of the parsers for d_ts, jsx, module, script, tsx, and typescript in +continuous integration. \ No newline at end of file diff --git a/fuzz/fuzz_targets/rome_common.rs b/fuzz/fuzz_targets/rome_common.rs index 4771bf0ff64..dce766924f9 100644 --- a/fuzz/fuzz_targets/rome_common.rs +++ b/fuzz/fuzz_targets/rome_common.rs @@ -1,19 +1,31 @@ +#![allow(dead_code)] + use libfuzzer_sys::Corpus; use rome_js_parser::parse; use rome_js_syntax::JsFileSource; +use rome_json_parser::parse_json; -pub fn fuzz_source_type(data: &[u8], source: JsFileSource) -> Corpus { +pub fn fuzz_js_parser_with_source_type(data: &[u8], source: JsFileSource) -> Corpus { let Ok(code1) = std::str::from_utf8(data) else { return Corpus::Reject; }; let parse1 = parse(code1, source); if !parse1.has_errors() { - let code2 = parse1.tree().to_string(); - let parse2 = parse(&code2, source); - assert!( - !parse2.has_errors(), - "unparsing introduced a formatting error" - ); - assert_eq!(code2, parse2.tree().to_string()); + let syntax1 = parse1.syntax(); + let code2 = syntax1.to_string(); + assert_eq!(code1, code2, "unparse output differed"); + } + + Corpus::Keep +} + +pub fn fuzz_json_parser(data: &[u8]) -> Corpus { + let Ok(code1) = std::str::from_utf8(data) else { return Corpus::Reject; }; + + let parse1 = parse_json(code1); + if !parse1.has_errors() { + let syntax1 = parse1.syntax(); + let code2 = syntax1.to_string(); + assert_eq!(code1, code2, "unparse output differed"); } Corpus::Keep diff --git a/fuzz/fuzz_targets/rome_parse_d_ts.rs b/fuzz/fuzz_targets/rome_parse_d_ts.rs index fd7cee1a4a2..f10264e8b7a 100644 --- a/fuzz/fuzz_targets/rome_parse_d_ts.rs +++ b/fuzz/fuzz_targets/rome_parse_d_ts.rs @@ -8,7 +8,7 @@ use rome_js_syntax::JsFileSource; pub fn do_fuzz(case: &[u8]) -> Corpus { let parse_type = JsFileSource::d_ts(); - rome_common::fuzz_source_type(case, parse_type) + rome_common::fuzz_js_parser_with_source_type(case, parse_type) } #[cfg(not(feature = "rome_parse_all"))] diff --git a/fuzz/fuzz_targets/rome_parse_json.rs b/fuzz/fuzz_targets/rome_parse_json.rs new file mode 100644 index 00000000000..ffab45157f8 --- /dev/null +++ b/fuzz/fuzz_targets/rome_parse_json.rs @@ -0,0 +1,13 @@ +#![cfg_attr(not(feature = "rome_parse_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + rome_common::fuzz_json_parser(case) +} + +#[cfg(not(feature = "rome_parse_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_jsx.rs b/fuzz/fuzz_targets/rome_parse_jsx.rs index 2e62cc1f25c..6e99f138e36 100644 --- a/fuzz/fuzz_targets/rome_parse_jsx.rs +++ b/fuzz/fuzz_targets/rome_parse_jsx.rs @@ -8,7 +8,7 @@ use rome_js_syntax::JsFileSource; pub fn do_fuzz(case: &[u8]) -> Corpus { let parse_type = JsFileSource::jsx(); - rome_common::fuzz_source_type(case, parse_type) + rome_common::fuzz_js_parser_with_source_type(case, parse_type) } #[cfg(not(feature = "rome_parse_all"))] diff --git a/fuzz/fuzz_targets/rome_parse_module.rs b/fuzz/fuzz_targets/rome_parse_module.rs index db73e350c5a..5045c4d4ba9 100644 --- a/fuzz/fuzz_targets/rome_parse_module.rs +++ b/fuzz/fuzz_targets/rome_parse_module.rs @@ -8,7 +8,7 @@ use rome_js_syntax::JsFileSource; pub fn do_fuzz(case: &[u8]) -> Corpus { let parse_type = JsFileSource::js_module(); - rome_common::fuzz_source_type(case, parse_type) + rome_common::fuzz_js_parser_with_source_type(case, parse_type) } #[cfg(not(feature = "rome_parse_all"))] diff --git a/fuzz/fuzz_targets/rome_parse_script.rs b/fuzz/fuzz_targets/rome_parse_script.rs index 411f44793f0..de430349ad3 100644 --- a/fuzz/fuzz_targets/rome_parse_script.rs +++ b/fuzz/fuzz_targets/rome_parse_script.rs @@ -8,7 +8,7 @@ use rome_js_syntax::JsFileSource; pub fn do_fuzz(case: &[u8]) -> Corpus { let parse_type = JsFileSource::js_script(); - rome_common::fuzz_source_type(case, parse_type) + rome_common::fuzz_js_parser_with_source_type(case, parse_type) } #[cfg(not(feature = "rome_parse_all"))] diff --git a/fuzz/fuzz_targets/rome_parse_tsx.rs b/fuzz/fuzz_targets/rome_parse_tsx.rs index 13dcd9e2eff..b70d84dd21d 100644 --- a/fuzz/fuzz_targets/rome_parse_tsx.rs +++ b/fuzz/fuzz_targets/rome_parse_tsx.rs @@ -8,7 +8,7 @@ use rome_js_syntax::JsFileSource; pub fn do_fuzz(case: &[u8]) -> Corpus { let parse_type = JsFileSource::tsx(); - rome_common::fuzz_source_type(case, parse_type) + rome_common::fuzz_js_parser_with_source_type(case, parse_type) } #[cfg(not(feature = "rome_parse_all"))] diff --git a/fuzz/fuzz_targets/rome_parse_typescript.rs b/fuzz/fuzz_targets/rome_parse_typescript.rs index 59355de1c4e..d948fa2bbc0 100644 --- a/fuzz/fuzz_targets/rome_parse_typescript.rs +++ b/fuzz/fuzz_targets/rome_parse_typescript.rs @@ -8,7 +8,7 @@ use rome_js_syntax::JsFileSource; pub fn do_fuzz(case: &[u8]) -> Corpus { let parse_type = JsFileSource::ts(); - rome_common::fuzz_source_type(case, parse_type) + rome_common::fuzz_js_parser_with_source_type(case, parse_type) } #[cfg(not(feature = "rome_parse_all"))] From 008af4e20a49aa7e44beac278aabf07964ac192a Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 11 Jun 2023 20:10:40 +0200 Subject: [PATCH 04/14] add formatter fuzzers --- fuzz/.gitignore | 6 +-- fuzz/Cargo.toml | 40 ++++++++++++++- fuzz/corpus/rome_parse_all | 1 + fuzz/corpus/rome_parse_json | 1 + fuzz/fuzz_targets/rome_common.rs | 57 +++++++++++++++++++++ fuzz/fuzz_targets/rome_format_all.rs | 35 +++++++++++++ fuzz/fuzz_targets/rome_format_d_ts.rs | 15 ++++++ fuzz/fuzz_targets/rome_format_json.rs | 13 +++++ fuzz/fuzz_targets/rome_format_jsx.rs | 15 ++++++ fuzz/fuzz_targets/rome_format_module.rs | 15 ++++++ fuzz/fuzz_targets/rome_format_script.rs | 15 ++++++ fuzz/fuzz_targets/rome_format_tsx.rs | 15 ++++++ fuzz/fuzz_targets/rome_format_typescript.rs | 15 ++++++ fuzz/fuzz_targets/rome_parse_d_ts.rs | 4 +- fuzz/fuzz_targets/rome_parse_json.rs | 4 +- fuzz/fuzz_targets/rome_parse_jsx.rs | 4 +- fuzz/fuzz_targets/rome_parse_module.rs | 4 +- fuzz/fuzz_targets/rome_parse_script.rs | 4 +- fuzz/fuzz_targets/rome_parse_tsx.rs | 4 +- fuzz/fuzz_targets/rome_parse_typescript.rs | 4 +- fuzz/init-fuzzer.sh | 17 ++++-- fuzz/reinit-fuzzer.sh | 12 +++-- 22 files changed, 274 insertions(+), 26 deletions(-) create mode 120000 fuzz/corpus/rome_parse_all create mode 120000 fuzz/corpus/rome_parse_json create mode 100644 fuzz/fuzz_targets/rome_format_all.rs create mode 100644 fuzz/fuzz_targets/rome_format_d_ts.rs create mode 100644 fuzz/fuzz_targets/rome_format_json.rs create mode 100644 fuzz/fuzz_targets/rome_format_jsx.rs create mode 100644 fuzz/fuzz_targets/rome_format_module.rs create mode 100644 fuzz/fuzz_targets/rome_format_script.rs create mode 100644 fuzz/fuzz_targets/rome_format_tsx.rs create mode 100644 fuzz/fuzz_targets/rome_format_typescript.rs diff --git a/fuzz/.gitignore b/fuzz/.gitignore index 5e00ab090db..83c6aae36aa 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,5 +1,5 @@ artifacts/ -corpus/rome_parse_all -corpus/rome_parse_json -corpus/rome_parse_css +corpus/rome_format_all +corpus/rome_format_json +corpus/rome_format_css Cargo.lock diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 44ae841f77a..ad09cd13612 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,7 +11,7 @@ edition = "2021" default = ["libfuzzer"] full-idempotency = [] libfuzzer = ["libfuzzer-sys/link_libfuzzer"] -rome_parse_all = [] +rome_all = [] [package.metadata] cargo-fuzz = true @@ -19,8 +19,11 @@ cargo-fuzz = true [dependencies] arbitrary = { version = "1.3.0", features = ["derive"] } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false } +rome_formatter = { path = "../crates/rome_formatter" } +rome_js_formatter = { path = "../crates/rome_js_formatter" } rome_js_parser = { path = "../crates/rome_js_parser" } rome_js_syntax = { path = "../crates/rome_js_syntax" } +rome_json_formatter = { path = "../crates/rome_json_formatter" } rome_json_parser = { path = "../crates/rome_json_parser" } rome_json_syntax = { path = "../crates/rome_json_syntax" } similar = { version = "2.2.1" } @@ -32,7 +35,7 @@ members = ["."] [[bin]] name = "rome_parse_all" path = "fuzz_targets/rome_parse_all.rs" -required-features = ["rome_parse_all"] +required-features = ["rome_all"] [[bin]] name = "rome_parse_d_ts" @@ -62,6 +65,39 @@ path = "fuzz_targets/rome_parse_tsx.rs" name = "rome_parse_typescript" path = "fuzz_targets/rome_parse_typescript.rs" +[[bin]] +name = "rome_format_all" +path = "fuzz_targets/rome_format_all.rs" +required-features = ["rome_all"] + +[[bin]] +name = "rome_format_d_ts" +path = "fuzz_targets/rome_format_d_ts.rs" + +[[bin]] +name = "rome_format_json" +path = "fuzz_targets/rome_format_json.rs" + +[[bin]] +name = "rome_format_module" +path = "fuzz_targets/rome_format_module.rs" + +[[bin]] +name = "rome_format_script" +path = "fuzz_targets/rome_format_script.rs" + +[[bin]] +name = "rome_format_jsx" +path = "fuzz_targets/rome_format_jsx.rs" + +[[bin]] +name = "rome_format_tsx" +path = "fuzz_targets/rome_format_tsx.rs" + +[[bin]] +name = "rome_format_typescript" +path = "fuzz_targets/rome_format_typescript.rs" + [profile.release] opt-level = 3 debug = true diff --git a/fuzz/corpus/rome_parse_all b/fuzz/corpus/rome_parse_all new file mode 120000 index 00000000000..27c9131818d --- /dev/null +++ b/fuzz/corpus/rome_parse_all @@ -0,0 +1 @@ +rome_format_all \ No newline at end of file diff --git a/fuzz/corpus/rome_parse_json b/fuzz/corpus/rome_parse_json new file mode 120000 index 00000000000..893f57d8280 --- /dev/null +++ b/fuzz/corpus/rome_parse_json @@ -0,0 +1 @@ +rome_format_json \ No newline at end of file diff --git a/fuzz/fuzz_targets/rome_common.rs b/fuzz/fuzz_targets/rome_common.rs index dce766924f9..991a7abd80e 100644 --- a/fuzz/fuzz_targets/rome_common.rs +++ b/fuzz/fuzz_targets/rome_common.rs @@ -1,8 +1,13 @@ #![allow(dead_code)] use libfuzzer_sys::Corpus; +use rome_formatter::format_node; +use rome_js_formatter::context::JsFormatOptions; +use rome_js_formatter::JsFormatLanguage; use rome_js_parser::parse; use rome_js_syntax::JsFileSource; +use rome_json_formatter::context::JsonFormatOptions; +use rome_json_formatter::JsonFormatLanguage; use rome_json_parser::parse_json; pub fn fuzz_js_parser_with_source_type(data: &[u8], source: JsFileSource) -> Corpus { @@ -18,6 +23,32 @@ pub fn fuzz_js_parser_with_source_type(data: &[u8], source: JsFileSource) -> Cor Corpus::Keep } +pub fn fuzz_js_formatter_with_source_type(data: &[u8], source: JsFileSource) -> Corpus { + let Ok(code1) = std::str::from_utf8(data) else { return Corpus::Reject; }; + + let parse1 = parse(code1, source); + if !parse1.has_errors() { + let language = JsFormatLanguage::new(JsFormatOptions::new(source)); + let syntax1 = parse1.syntax(); + if let Ok(formatted1) = format_node(&syntax1, language.clone()) { + if let Ok(printed1) = formatted1.print() { + let code2 = printed1.as_code(); + let parse2 = parse(code2, source); + assert!(!parse2.has_errors(), "formatter introduced errors"); + let syntax2 = parse2.syntax(); + let formatted2 = format_node(&syntax2, language) + .expect("formatted code could not be reformatted"); + let printed2 = formatted2 + .print() + .expect("reformatted code could not be printed"); + assert_eq!(code2, printed2.as_code(), "format results differ") + } + } + } + + Corpus::Keep +} + pub fn fuzz_json_parser(data: &[u8]) -> Corpus { let Ok(code1) = std::str::from_utf8(data) else { return Corpus::Reject; }; @@ -30,3 +61,29 @@ pub fn fuzz_json_parser(data: &[u8]) -> Corpus { Corpus::Keep } + +pub fn fuzz_json_formatter(data: &[u8]) -> Corpus { + let Ok(code1) = std::str::from_utf8(data) else { return Corpus::Reject; }; + + let parse1 = parse_json(code1); + if !parse1.has_errors() { + let language = JsonFormatLanguage::new(JsonFormatOptions::default()); + let syntax1 = parse1.syntax(); + if let Ok(formatted1) = format_node(&syntax1, language.clone()) { + if let Ok(printed1) = formatted1.print() { + let code2 = printed1.as_code(); + let parse2 = parse_json(code2); + assert!(!parse2.has_errors(), "formatter introduced errors"); + let syntax2 = parse2.syntax(); + let formatted2 = format_node(&syntax2, language) + .expect("formatted code could not be reformatted"); + let printed2 = formatted2 + .print() + .expect("reformatted code could not be printed"); + assert_eq!(code2, printed2.as_code(), "format results differ") + } + } + } + + Corpus::Keep +} diff --git a/fuzz/fuzz_targets/rome_format_all.rs b/fuzz/fuzz_targets/rome_format_all.rs new file mode 100644 index 00000000000..6cb50f82f17 --- /dev/null +++ b/fuzz/fuzz_targets/rome_format_all.rs @@ -0,0 +1,35 @@ +#![no_main] + +mod rome_format_d_ts; +mod rome_format_jsx; +mod rome_format_module; +mod rome_format_script; +mod rome_format_tsx; +mod rome_format_typescript; + +use libfuzzer_sys::{fuzz_target, Corpus}; + +fn do_fuzz(data: &[u8]) -> Corpus { + let mut keep = Corpus::Reject; + if let Corpus::Keep = rome_format_d_ts::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_format_jsx::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_format_module::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_format_script::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_format_tsx::do_fuzz(data) { + keep = Corpus::Keep; + } + if let Corpus::Keep = rome_format_typescript::do_fuzz(data) { + keep = Corpus::Keep; + } + keep +} + +fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_format_d_ts.rs b/fuzz/fuzz_targets/rome_format_d_ts.rs new file mode 100644 index 00000000000..721965fe2ff --- /dev/null +++ b/fuzz/fuzz_targets/rome_format_d_ts.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::d_ts(); + rome_common::fuzz_js_formatter_with_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_format_json.rs b/fuzz/fuzz_targets/rome_format_json.rs new file mode 100644 index 00000000000..3810d5949e9 --- /dev/null +++ b/fuzz/fuzz_targets/rome_format_json.rs @@ -0,0 +1,13 @@ +#![cfg_attr(not(feature = "rome_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + rome_common::fuzz_json_formatter(case) +} + +#[cfg(not(feature = "rome_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_format_jsx.rs b/fuzz/fuzz_targets/rome_format_jsx.rs new file mode 100644 index 00000000000..e1547aa49c6 --- /dev/null +++ b/fuzz/fuzz_targets/rome_format_jsx.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::jsx(); + rome_common::fuzz_js_formatter_with_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_format_module.rs b/fuzz/fuzz_targets/rome_format_module.rs new file mode 100644 index 00000000000..13a253bfea7 --- /dev/null +++ b/fuzz/fuzz_targets/rome_format_module.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::js_module(); + rome_common::fuzz_js_formatter_with_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_format_script.rs b/fuzz/fuzz_targets/rome_format_script.rs new file mode 100644 index 00000000000..738fb5c81e7 --- /dev/null +++ b/fuzz/fuzz_targets/rome_format_script.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::js_script(); + rome_common::fuzz_js_formatter_with_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_format_tsx.rs b/fuzz/fuzz_targets/rome_format_tsx.rs new file mode 100644 index 00000000000..e5ca74dd8a3 --- /dev/null +++ b/fuzz/fuzz_targets/rome_format_tsx.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::tsx(); + rome_common::fuzz_js_formatter_with_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_format_typescript.rs b/fuzz/fuzz_targets/rome_format_typescript.rs new file mode 100644 index 00000000000..c3c352b404a --- /dev/null +++ b/fuzz/fuzz_targets/rome_format_typescript.rs @@ -0,0 +1,15 @@ +#![cfg_attr(not(feature = "rome_all"), no_main)] + +#[path = "rome_common.rs"] +mod rome_common; + +use libfuzzer_sys::Corpus; +use rome_js_syntax::JsFileSource; + +pub fn do_fuzz(case: &[u8]) -> Corpus { + let parse_type = JsFileSource::ts(); + rome_common::fuzz_js_formatter_with_source_type(case, parse_type) +} + +#[cfg(not(feature = "rome_all"))] +libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_d_ts.rs b/fuzz/fuzz_targets/rome_parse_d_ts.rs index f10264e8b7a..fc9f1e4704c 100644 --- a/fuzz/fuzz_targets/rome_parse_d_ts.rs +++ b/fuzz/fuzz_targets/rome_parse_d_ts.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(feature = "rome_parse_all"), no_main)] +#![cfg_attr(not(feature = "rome_all"), no_main)] #[path = "rome_common.rs"] mod rome_common; @@ -11,5 +11,5 @@ pub fn do_fuzz(case: &[u8]) -> Corpus { rome_common::fuzz_js_parser_with_source_type(case, parse_type) } -#[cfg(not(feature = "rome_parse_all"))] +#[cfg(not(feature = "rome_all"))] libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_json.rs b/fuzz/fuzz_targets/rome_parse_json.rs index ffab45157f8..de6ce143fe6 100644 --- a/fuzz/fuzz_targets/rome_parse_json.rs +++ b/fuzz/fuzz_targets/rome_parse_json.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(feature = "rome_parse_all"), no_main)] +#![cfg_attr(not(feature = "rome_all"), no_main)] #[path = "rome_common.rs"] mod rome_common; @@ -9,5 +9,5 @@ pub fn do_fuzz(case: &[u8]) -> Corpus { rome_common::fuzz_json_parser(case) } -#[cfg(not(feature = "rome_parse_all"))] +#[cfg(not(feature = "rome_all"))] libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_jsx.rs b/fuzz/fuzz_targets/rome_parse_jsx.rs index 6e99f138e36..4413536869f 100644 --- a/fuzz/fuzz_targets/rome_parse_jsx.rs +++ b/fuzz/fuzz_targets/rome_parse_jsx.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(feature = "rome_parse_all"), no_main)] +#![cfg_attr(not(feature = "rome_all"), no_main)] #[path = "rome_common.rs"] mod rome_common; @@ -11,5 +11,5 @@ pub fn do_fuzz(case: &[u8]) -> Corpus { rome_common::fuzz_js_parser_with_source_type(case, parse_type) } -#[cfg(not(feature = "rome_parse_all"))] +#[cfg(not(feature = "rome_all"))] libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_module.rs b/fuzz/fuzz_targets/rome_parse_module.rs index 5045c4d4ba9..e935489b2e6 100644 --- a/fuzz/fuzz_targets/rome_parse_module.rs +++ b/fuzz/fuzz_targets/rome_parse_module.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(feature = "rome_parse_all"), no_main)] +#![cfg_attr(not(feature = "rome_all"), no_main)] #[path = "rome_common.rs"] mod rome_common; @@ -11,5 +11,5 @@ pub fn do_fuzz(case: &[u8]) -> Corpus { rome_common::fuzz_js_parser_with_source_type(case, parse_type) } -#[cfg(not(feature = "rome_parse_all"))] +#[cfg(not(feature = "rome_all"))] libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_script.rs b/fuzz/fuzz_targets/rome_parse_script.rs index de430349ad3..36fed6dd8d1 100644 --- a/fuzz/fuzz_targets/rome_parse_script.rs +++ b/fuzz/fuzz_targets/rome_parse_script.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(feature = "rome_parse_all"), no_main)] +#![cfg_attr(not(feature = "rome_all"), no_main)] #[path = "rome_common.rs"] mod rome_common; @@ -11,5 +11,5 @@ pub fn do_fuzz(case: &[u8]) -> Corpus { rome_common::fuzz_js_parser_with_source_type(case, parse_type) } -#[cfg(not(feature = "rome_parse_all"))] +#[cfg(not(feature = "rome_all"))] libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_tsx.rs b/fuzz/fuzz_targets/rome_parse_tsx.rs index b70d84dd21d..3dc732c14a0 100644 --- a/fuzz/fuzz_targets/rome_parse_tsx.rs +++ b/fuzz/fuzz_targets/rome_parse_tsx.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(feature = "rome_parse_all"), no_main)] +#![cfg_attr(not(feature = "rome_all"), no_main)] #[path = "rome_common.rs"] mod rome_common; @@ -11,5 +11,5 @@ pub fn do_fuzz(case: &[u8]) -> Corpus { rome_common::fuzz_js_parser_with_source_type(case, parse_type) } -#[cfg(not(feature = "rome_parse_all"))] +#[cfg(not(feature = "rome_all"))] libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/rome_parse_typescript.rs b/fuzz/fuzz_targets/rome_parse_typescript.rs index d948fa2bbc0..5a4d68c3340 100644 --- a/fuzz/fuzz_targets/rome_parse_typescript.rs +++ b/fuzz/fuzz_targets/rome_parse_typescript.rs @@ -1,4 +1,4 @@ -#![cfg_attr(not(feature = "rome_parse_all"), no_main)] +#![cfg_attr(not(feature = "rome_all"), no_main)] #[path = "rome_common.rs"] mod rome_common; @@ -11,5 +11,5 @@ pub fn do_fuzz(case: &[u8]) -> Corpus { rome_common::fuzz_js_parser_with_source_type(case, parse_type) } -#[cfg(not(feature = "rome_parse_all"))] +#[cfg(not(feature = "rome_all"))] libfuzzer_sys::fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/init-fuzzer.sh b/fuzz/init-fuzzer.sh index 5345e6cece4..69d8f2fe413 100644 --- a/fuzz/init-fuzzer.sh +++ b/fuzz/init-fuzzer.sh @@ -9,11 +9,11 @@ if ! cargo fuzz --help >&/dev/null; then cargo install --git https://github.com/rust-fuzz/cargo +stable-fuzz.git fi -if [ ! -d corpus/rome_parse_all ]; then - mkdir -p corpus/rome_parse_all +if [ ! -d corpus/rome_format_all ]; then + mkdir -p corpus/rome_format_all read -p "Would you like to build a corpus from a javascript source code dataset? (this will take a long time!) [Y/n] " -n 1 -r echo - cd corpus/rome_parse_all + cd corpus/rome_format_all if [[ $REPLY =~ ^[Yy]$ ]]; then curl -L http://files.srl.inf.ethz.ch/data/js_dataset.tar.gz | tar xzO data.tar.gz | tar xz find . -type d -exec chmod 755 {} \; @@ -22,7 +22,16 @@ if [ ! -d corpus/rome_parse_all ]; then cp -r "../../../crates/rome_js_parser/test_data" . find . -name \*.rast -delete cd - - cargo fuzz cmin --features rome_parse_all -s none rome_parse_all + cargo fuzz cmin --features rome_all -s none rome_format_all +fi + +if [ ! -d corpus/rome_format_json ]; then + mkdir -p corpus/rome_format_json + cd corpus/rome_format_json + cp -r "../../../crates/rome_json_parser/tests/json_test_suite" . + find . -name \*.rast -delete + cd - + cargo fuzz cmin -s none rome_format_json fi echo "Done! You are ready to fuzz." diff --git a/fuzz/reinit-fuzzer.sh b/fuzz/reinit-fuzzer.sh index abc8f436cde..6eb133dbdcd 100644 --- a/fuzz/reinit-fuzzer.sh +++ b/fuzz/reinit-fuzzer.sh @@ -5,10 +5,16 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" -cd corpus/rome_parse_all -cp -r "../../../crates/rome_js_parser/test_data" . +cd corpus/rome_format_all +cp -r "../../../crates/rome_js_formatter/test_data" . find . -name \*.rast -delete cd - -cargo fuzz cmin --features rome_parse_all -s none rome_parse_all +cargo fuzz cmin --features rome_format_all -s none rome_format_all + +cd corpus/rome_format_json +cp -r "../../../crates/rome_json_parser/tests/json_test_suite" . +find . -name \*.rast -delete +cd - +cargo fuzz cmin -s none rome_format_json echo "Done! You are ready to fuzz." From 7bbfebbe376810ccc93e293b74c565bd8a43a0d2 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 11 Jun 2023 20:18:30 +0200 Subject: [PATCH 05/14] document formatter strategy --- fuzz/README.md | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fuzz/README.md b/fuzz/README.md index 35b5998005a..25ded63c66e 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -96,12 +96,24 @@ max out their coverage with minimal or no corpora. At time of writing (June 11, 2023), JSONC does not seem to be supported, so it is not fuzzed. -#### `rome_parse_css` - -TODO (this is potentially hard -- not a lot of corpora for CSS in the wild) - #### `rome_parse_all` This fuzz harness merely merges all the JS parsers together to create a shared corpus. It can be used in place of the parsers for d_ts, jsx, module, script, tsx, and typescript in -continuous integration. \ No newline at end of file +continuous integration. + +### `rome_format_*` + +These fuzzers use the same corpora as the fuzzers previously mentioned, but check the correctness of +the formatters as well. +We assume the following qualities of formatters: + - Formatters will not introduce syntax errors into the program + - Formatting code twice will have the same result as formatting code once + +In this way, we verify the [idempotency](https://en.wikipedia.org/wiki/Idempotence) and syntax +preservation property of formatting. + +Of particular note: these fuzzers may have false negative results if e.g. two tokens are turned into +one token and the reformatting result is the same. +Unfortunately, we can't necessarily control for this because the formatter may reorganise the +sequence of tokens. \ No newline at end of file From 906bee6dd0867f11907feeebc4e95fdd0f163fd9 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 11 Jun 2023 20:26:37 +0200 Subject: [PATCH 06/14] add fuzzer build to CI --- .github/workflows/pull_request.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 04a64fbd1cb..8d33cfa5c39 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -83,6 +83,20 @@ jobs: - name: Run doctests run: cargo test --doc + fuzz-all: + name: Build common fuzzers + steps: + - name: Checkout repository + uses: actions/checkout@v3 + - name: Install toolchain + uses: moonrepo/setup-rust@v0 + with: + bins: cargo-fuzz + - name: Build rome_parse_all + run: cargo fuzz build --features rome_all -s none rome_parse_all + - name: Build rome_format_all + run: cargo fuzz build --features rome_all -s none rome_format_all + test-node-api: name: Test node.js API runs-on: ubuntu-latest From 83dd602f10b09813c02670162019d9770fe38238 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 11 Jun 2023 20:29:30 +0200 Subject: [PATCH 07/14] better github workflow --- .github/workflows/pull_request.yml | 9 ++++----- fuzz/fuzz_targets/rome_common.rs | 3 +++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 8d33cfa5c39..10b4bd4e513 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -6,6 +6,7 @@ on: - main paths: # Only run when changes are made to rust code or root Cargo - 'crates/**' + - 'fuzz/**' - 'xtask/**' - 'Cargo.toml' - 'Cargo.lock' @@ -84,7 +85,7 @@ jobs: run: cargo test --doc fuzz-all: - name: Build common fuzzers + name: Build and init fuzzers steps: - name: Checkout repository uses: actions/checkout@v3 @@ -92,10 +93,8 @@ jobs: uses: moonrepo/setup-rust@v0 with: bins: cargo-fuzz - - name: Build rome_parse_all - run: cargo fuzz build --features rome_all -s none rome_parse_all - - name: Build rome_format_all - run: cargo fuzz build --features rome_all -s none rome_format_all + - name: Run init-fuzzer + run: yes n | sh fuzz/init-fuzzer.sh test-node-api: name: Test node.js API diff --git a/fuzz/fuzz_targets/rome_common.rs b/fuzz/fuzz_targets/rome_common.rs index 991a7abd80e..7bf8685489c 100644 --- a/fuzz/fuzz_targets/rome_common.rs +++ b/fuzz/fuzz_targets/rome_common.rs @@ -1,3 +1,6 @@ +//! Common functionality between different fuzzers. Look here if you need to inspect implementation +//! details for the fuzzer harnesses! + #![allow(dead_code)] use libfuzzer_sys::Corpus; From ed1fc0eb0ec5e463b990272c4d94d025daea186d Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sun, 11 Jun 2023 20:42:02 +0200 Subject: [PATCH 08/14] whoops, need to specify where it runs --- .github/workflows/pull_request.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 10b4bd4e513..aa09e0531f0 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -86,6 +86,8 @@ jobs: fuzz-all: name: Build and init fuzzers + runs-on: ubuntu-latest + steps: - name: Checkout repository uses: actions/checkout@v3 From 5539b4b493a408ede07d207fcd4e2f28baac78cb Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 12 Jun 2023 19:41:15 +0200 Subject: [PATCH 09/14] fix CI --- .github/workflows/pull_request.yml | 2 +- fuzz/init-fuzzer.sh | 14 ++++++++------ fuzz/reinit-fuzzer.sh | 6 ++++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index aa09e0531f0..a8925497275 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -96,7 +96,7 @@ jobs: with: bins: cargo-fuzz - name: Run init-fuzzer - run: yes n | sh fuzz/init-fuzzer.sh + run: sh fuzz/init-fuzzer.sh test-node-api: name: Test node.js API diff --git a/fuzz/init-fuzzer.sh b/fuzz/init-fuzzer.sh index 69d8f2fe413..984a5924fe2 100644 --- a/fuzz/init-fuzzer.sh +++ b/fuzz/init-fuzzer.sh @@ -11,13 +11,15 @@ fi if [ ! -d corpus/rome_format_all ]; then mkdir -p corpus/rome_format_all - read -p "Would you like to build a corpus from a javascript source code dataset? (this will take a long time!) [Y/n] " -n 1 -r - echo cd corpus/rome_format_all - if [[ $REPLY =~ ^[Yy]$ ]]; then - curl -L http://files.srl.inf.ethz.ch/data/js_dataset.tar.gz | tar xzO data.tar.gz | tar xz - find . -type d -exec chmod 755 {} \; - find . -type f -exec chmod 644 {} \; + if [ -z ${CI+x} ]; then + read -p "Would you like to build a corpus from a javascript source code dataset? (this will take a long time!) [Y/n] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + curl -L http://files.srl.inf.ethz.ch/data/js_dataset.tar.gz | tar xzO data.tar.gz | tar xz + find . -type d -exec chmod 755 {} \; + find . -type f -exec chmod 644 {} \; + fi fi cp -r "../../../crates/rome_js_parser/test_data" . find . -name \*.rast -delete diff --git a/fuzz/reinit-fuzzer.sh b/fuzz/reinit-fuzzer.sh index 6eb133dbdcd..a9215365afd 100644 --- a/fuzz/reinit-fuzzer.sh +++ b/fuzz/reinit-fuzzer.sh @@ -5,12 +5,14 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" +mkdir -p corpus/rome_format_all cd corpus/rome_format_all -cp -r "../../../crates/rome_js_formatter/test_data" . +cp -r "../../../crates/rome_js_parser/test_data" . find . -name \*.rast -delete cd - -cargo fuzz cmin --features rome_format_all -s none rome_format_all +cargo fuzz cmin --features rome_all -s none rome_format_all +mkdir -p corpus/rome_format_json cd corpus/rome_format_json cp -r "../../../crates/rome_json_parser/tests/json_test_suite" . find . -name \*.rast -delete From ce7c4708e8104f101ce2f930a83d7bf7847874e0 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 12 Jun 2023 19:42:00 +0200 Subject: [PATCH 10/14] address naming nit --- fuzz/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index ad09cd13612..9c51a7f91fe 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "rome-fuzz" +name = "rome_fuzz" version = "0.0.0" authors = [ "Addison Crump ", From 3db6138b398ec19845ff440a5442376aa80f5ff9 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 12 Jun 2023 20:07:01 +0200 Subject: [PATCH 11/14] add text diff to formatter --- fuzz/fuzz_targets/rome_common.rs | 35 ++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/fuzz/fuzz_targets/rome_common.rs b/fuzz/fuzz_targets/rome_common.rs index 7bf8685489c..67956676e44 100644 --- a/fuzz/fuzz_targets/rome_common.rs +++ b/fuzz/fuzz_targets/rome_common.rs @@ -12,6 +12,7 @@ use rome_js_syntax::JsFileSource; use rome_json_formatter::context::JsonFormatOptions; use rome_json_formatter::JsonFormatLanguage; use rome_json_parser::parse_json; +use similar::TextDiff; pub fn fuzz_js_parser_with_source_type(data: &[u8], source: JsFileSource) -> Corpus { let Ok(code1) = std::str::from_utf8(data) else { return Corpus::Reject; }; @@ -37,14 +38,27 @@ pub fn fuzz_js_formatter_with_source_type(data: &[u8], source: JsFileSource) -> if let Ok(printed1) = formatted1.print() { let code2 = printed1.as_code(); let parse2 = parse(code2, source); - assert!(!parse2.has_errors(), "formatter introduced errors"); + assert!( + !parse2.has_errors(), + "formatter introduced errors:\n{}", + TextDiff::from_lines(code1, code2) + .unified_diff() + .header("original code", "formatted") + ); let syntax2 = parse2.syntax(); let formatted2 = format_node(&syntax2, language) .expect("formatted code could not be reformatted"); let printed2 = formatted2 .print() .expect("reformatted code could not be printed"); - assert_eq!(code2, printed2.as_code(), "format results differ") + assert_eq!( + code2, + printed2.as_code(), + "format results differ:\n{}", + TextDiff::from_lines(code1, code2) + .unified_diff() + .header("formatted", "reformatted") + ) } } } @@ -76,14 +90,27 @@ pub fn fuzz_json_formatter(data: &[u8]) -> Corpus { if let Ok(printed1) = formatted1.print() { let code2 = printed1.as_code(); let parse2 = parse_json(code2); - assert!(!parse2.has_errors(), "formatter introduced errors"); + assert!( + !parse2.has_errors(), + "formatter introduced errors:\n{}", + TextDiff::from_lines(code1, code2) + .unified_diff() + .header("original code", "formatted") + ); let syntax2 = parse2.syntax(); let formatted2 = format_node(&syntax2, language) .expect("formatted code could not be reformatted"); let printed2 = formatted2 .print() .expect("reformatted code could not be printed"); - assert_eq!(code2, printed2.as_code(), "format results differ") + assert_eq!( + code2, + printed2.as_code(), + "format results differ:\n{}", + TextDiff::from_lines(code1, code2) + .unified_diff() + .header("formatted", "reformatted") + ) } } } From eaef5dec3e8ad65a4ff33b7807a45578ba4e12fc Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 12 Jun 2023 22:44:21 +0200 Subject: [PATCH 12/14] add linter checks to formatter output --- .github/workflows/pull_request.yml | 2 +- fuzz/Cargo.toml | 10 +++- fuzz/fuzz_targets/rome_common.rs | 91 ++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index a8925497275..ff9ece5e0c5 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -96,7 +96,7 @@ jobs: with: bins: cargo-fuzz - name: Run init-fuzzer - run: sh fuzz/init-fuzzer.sh + run: bash fuzz/init-fuzzer.sh test-node-api: name: Test node.js API diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 9c51a7f91fe..84b9f4e03be 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -19,13 +19,17 @@ cargo-fuzz = true [dependencies] arbitrary = { version = "1.3.0", features = ["derive"] } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false } +rome_analyze = { path = "../crates/rome_analyze" } +rome_diagnostics = { path = "../crates/rome_diagnostics" } rome_formatter = { path = "../crates/rome_formatter" } +rome_js_analyze = { path = "../crates/rome_js_analyze" } rome_js_formatter = { path = "../crates/rome_js_formatter" } rome_js_parser = { path = "../crates/rome_js_parser" } rome_js_syntax = { path = "../crates/rome_js_syntax" } rome_json_formatter = { path = "../crates/rome_json_formatter" } rome_json_parser = { path = "../crates/rome_json_parser" } rome_json_syntax = { path = "../crates/rome_json_syntax" } +rome_service = { path = "../crates/rome_service" } similar = { version = "2.2.1" } # Prevent this from interfering with workspaces @@ -99,13 +103,13 @@ name = "rome_format_typescript" path = "fuzz_targets/rome_format_typescript.rs" [profile.release] -opt-level = 3 +opt-level = 2 debug = true [profile.dev] -opt-level = 3 +opt-level = 2 debug = true [profile.test] -opt-level = 3 +opt-level = 2 debug = true diff --git a/fuzz/fuzz_targets/rome_common.rs b/fuzz/fuzz_targets/rome_common.rs index 67956676e44..16d421ba365 100644 --- a/fuzz/fuzz_targets/rome_common.rs +++ b/fuzz/fuzz_targets/rome_common.rs @@ -4,7 +4,10 @@ #![allow(dead_code)] use libfuzzer_sys::Corpus; +use rome_analyze::{AnalysisFilter, AnalyzerOptions, ControlFlow, RuleFilter}; +use rome_diagnostics::Diagnostic; use rome_formatter::format_node; +use rome_js_analyze::analyze; use rome_js_formatter::context::JsFormatOptions; use rome_js_formatter::JsFormatLanguage; use rome_js_parser::parse; @@ -12,7 +15,9 @@ use rome_js_syntax::JsFileSource; use rome_json_formatter::context::JsonFormatOptions; use rome_json_formatter::JsonFormatLanguage; use rome_json_parser::parse_json; +use rome_service::Rules; use similar::TextDiff; +use std::fmt::{Display, Formatter}; pub fn fuzz_js_parser_with_source_type(data: &[u8], source: JsFileSource) -> Corpus { let Ok(code1) = std::str::from_utf8(data) else { return Corpus::Reject; }; @@ -27,12 +32,69 @@ pub fn fuzz_js_parser_with_source_type(data: &[u8], source: JsFileSource) -> Cor Corpus::Keep } +static mut ANALYSIS_RULES: Option = None; +static mut ANALYSIS_RULE_FILTERS: Option> = None; +static mut ANALYSIS_OPTIONS: Option = None; + +struct DiagnosticDescriptionExtractor<'a, D> { + diagnostic: &'a D, +} + +impl<'a, D> DiagnosticDescriptionExtractor<'a, D> { + pub fn new(diagnostic: &'a D) -> Self { + Self { diagnostic } + } +} + +impl<'a, D> Display for DiagnosticDescriptionExtractor<'a, D> +where + D: Diagnostic, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.diagnostic.description(f) + } +} + pub fn fuzz_js_formatter_with_source_type(data: &[u8], source: JsFileSource) -> Corpus { let Ok(code1) = std::str::from_utf8(data) else { return Corpus::Reject; }; + // TODO: replace with OnceLock when upgrading to 1.70 + let rule_filters = if let Some(rules) = unsafe { ANALYSIS_RULE_FILTERS.as_ref() } { + rules + } else { + let rules = unsafe { + ANALYSIS_RULES.get_or_insert_with(|| Rules { + all: Some(true), + ..Default::default() + }) + }; + let rules = rules.as_enabled_rules().into_iter().collect::>(); + unsafe { + ANALYSIS_RULE_FILTERS = Some(rules); + ANALYSIS_RULE_FILTERS.as_ref().unwrap_unchecked() + } + }; + let options = unsafe { ANALYSIS_OPTIONS.get_or_insert_with(AnalyzerOptions::default) }; + let parse1 = parse(code1, source); if !parse1.has_errors() { let language = JsFormatLanguage::new(JsFormatOptions::new(source)); + let tree1 = parse1.tree(); + let mut linter_errors = Vec::new(); + let _ = analyze( + &tree1, + AnalysisFilter::from_enabled_rules(Some(rule_filters)), + options, + source, + |e| -> ControlFlow<()> { + if let Some(diagnostic) = e.diagnostic() { + linter_errors + .push(DiagnosticDescriptionExtractor::new(&diagnostic).to_string()); + } + + ControlFlow::Continue(()) + }, + ); let syntax1 = parse1.syntax(); if let Ok(formatted1) = format_node(&syntax1, language.clone()) { if let Ok(printed1) = formatted1.print() { @@ -45,6 +107,35 @@ pub fn fuzz_js_formatter_with_source_type(data: &[u8], source: JsFileSource) -> .unified_diff() .header("original code", "formatted") ); + let tree2 = parse2.tree(); + let (maybe_diagnostic, _) = analyze( + &tree2, + AnalysisFilter::from_enabled_rules(Some(rule_filters)), + options, + source, + |e| { + if let Some(diagnostic) = e.diagnostic() { + let new_error = + DiagnosticDescriptionExtractor::new(&diagnostic).to_string(); + if let Some(idx) = linter_errors.iter().position(|e| *e == new_error) { + linter_errors.remove(idx); + } else { + return ControlFlow::Break(new_error); + } + } + + ControlFlow::Continue(()) + }, + ); + if let Some(diagnostic) = maybe_diagnostic { + panic!( + "formatter introduced linter failure: {}\n{}", + diagnostic, + TextDiff::from_lines(code1, code2) + .unified_diff() + .header("original code", "formatted") + ); + } let syntax2 = parse2.syntax(); let formatted2 = format_node(&syntax2, language) .expect("formatted code could not be reformatted"); From e4d87d2b2d677d013e55c017c693e75e1167a011 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Tue, 13 Jun 2023 00:12:52 +0200 Subject: [PATCH 13/14] correct diff args --- fuzz/fuzz_targets/rome_common.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fuzz/fuzz_targets/rome_common.rs b/fuzz/fuzz_targets/rome_common.rs index 16d421ba365..158045afc7a 100644 --- a/fuzz/fuzz_targets/rome_common.rs +++ b/fuzz/fuzz_targets/rome_common.rs @@ -129,8 +129,9 @@ pub fn fuzz_js_formatter_with_source_type(data: &[u8], source: JsFileSource) -> ); if let Some(diagnostic) = maybe_diagnostic { panic!( - "formatter introduced linter failure: {}\n{}", + "formatter introduced linter failure: {} (expected one of: {})\n{}", diagnostic, + linter_errors.join(", "), TextDiff::from_lines(code1, code2) .unified_diff() .header("original code", "formatted") @@ -142,11 +143,12 @@ pub fn fuzz_js_formatter_with_source_type(data: &[u8], source: JsFileSource) -> let printed2 = formatted2 .print() .expect("reformatted code could not be printed"); + let code3 = printed2.as_code(); assert_eq!( code2, - printed2.as_code(), + code3, "format results differ:\n{}", - TextDiff::from_lines(code1, code2) + TextDiff::from_lines(code2, code3) .unified_diff() .header("formatted", "reformatted") ) From 08047c00aa9ce71c6e743295ef49508ea53c84fd Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Wed, 14 Jun 2023 11:49:33 +0200 Subject: [PATCH 14/14] use strip dead code (ew) to resolve the memory usage issue --- fuzz/Cargo.toml | 16 ++++++++++------ fuzz/README.md | 13 ++++++++++--- fuzz/init-fuzzer.sh | 4 ++-- fuzz/reinit-fuzzer.sh | 4 ++-- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 84b9f4e03be..5cd9028c067 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -102,14 +102,18 @@ path = "fuzz_targets/rome_format_tsx.rs" name = "rome_format_typescript" path = "fuzz_targets/rome_format_typescript.rs" +# enabling debug seems to cause a massive use of RAM (>12GB) [profile.release] -opt-level = 2 -debug = true +opt-level = 3 +#debug = true +debug = false [profile.dev] -opt-level = 2 -debug = true +opt-level = 3 +#debug = true +debug = false [profile.test] -opt-level = 2 -debug = true +opt-level = 3 +#debug = true +debug = false diff --git a/fuzz/README.md b/fuzz/README.md index 25ded63c66e..58efc1bb211 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -21,7 +21,7 @@ without it (very unlikely for the fuzzer to generate valid python code from "thi Once you have initialised the fuzzers, you can then execute any fuzzer with: ```bash -cargo fuzz run -s none name_of_fuzzer -- -timeout=1 +cargo fuzz run --strip-dead-code -s none name_of_fuzzer -- -timeout=1 ``` **Users using Apple M1 devices must use a nightly compiler and omit the `-s none` portion of this @@ -40,7 +40,7 @@ triggered with a smaller input. `cargo-fuzz` supports this out of the box with: ```bash -cargo fuzz tmin -s none name_of_fuzzer artifacts/name_of_fuzzer/crash-... +cargo fuzz tmin --strip-dead-code -s none name_of_fuzzer artifacts/name_of_fuzzer/crash-... ``` From here, you will need to analyse the input and potentially the behaviour of the program. @@ -116,4 +116,11 @@ preservation property of formatting. Of particular note: these fuzzers may have false negative results if e.g. two tokens are turned into one token and the reformatting result is the same. Unfortunately, we can't necessarily control for this because the formatter may reorganise the -sequence of tokens. \ No newline at end of file +sequence of tokens. + +## Errata + +Unfortunately, `--strip-dead-code` is necessary to build the target with a suitable amount of +memory. +This seems to be caused by some issue in LLVM, but I haven't been able to spend the time to +investigate this fully yet. diff --git a/fuzz/init-fuzzer.sh b/fuzz/init-fuzzer.sh index 984a5924fe2..e1e208c2777 100644 --- a/fuzz/init-fuzzer.sh +++ b/fuzz/init-fuzzer.sh @@ -24,7 +24,7 @@ if [ ! -d corpus/rome_format_all ]; then cp -r "../../../crates/rome_js_parser/test_data" . find . -name \*.rast -delete cd - - cargo fuzz cmin --features rome_all -s none rome_format_all + cargo fuzz cmin --strip-dead-code --features rome_all -s none rome_format_all fi if [ ! -d corpus/rome_format_json ]; then @@ -33,7 +33,7 @@ if [ ! -d corpus/rome_format_json ]; then cp -r "../../../crates/rome_json_parser/tests/json_test_suite" . find . -name \*.rast -delete cd - - cargo fuzz cmin -s none rome_format_json + cargo fuzz cmin --strip-dead-code -s none rome_format_json fi echo "Done! You are ready to fuzz." diff --git a/fuzz/reinit-fuzzer.sh b/fuzz/reinit-fuzzer.sh index a9215365afd..378ea0f0026 100644 --- a/fuzz/reinit-fuzzer.sh +++ b/fuzz/reinit-fuzzer.sh @@ -10,13 +10,13 @@ cd corpus/rome_format_all cp -r "../../../crates/rome_js_parser/test_data" . find . -name \*.rast -delete cd - -cargo fuzz cmin --features rome_all -s none rome_format_all +cargo fuzz cmin --strip-dead-code --features rome_all -s none rome_format_all mkdir -p corpus/rome_format_json cd corpus/rome_format_json cp -r "../../../crates/rome_json_parser/tests/json_test_suite" . find . -name \*.rast -delete cd - -cargo fuzz cmin -s none rome_format_json +cargo fuzz cmin --strip-dead-code -s none rome_format_json echo "Done! You are ready to fuzz."