Skip to content

Commit 1685d95

Browse files
authored
[red-knot] Add fuzzer to catch panics for invalid syntax (#14678)
## Summary This PR adds a fuzzer harness for red knot that runs the type checker on source code that contains invalid syntax. Additionally, this PR also updates the `init-fuzzer.sh` script to increase the corpus size to: * Include various crates that includes Python source code * Use the 3.13 CPython source code And, remove any non-Python files from the final corpus so that when the fuzzer tries to minify the corpus, it doesn't produce files that only contains documentation content as that's just noise. ## Test Plan Run `./fuzz/init-fuzzer.sh`, say no to the large dataset. Run the fuzzer with `cargo +night fuzz run red_knot_check_invalid_syntax -- -timeout=5`
1 parent 575deb5 commit 1685d95

File tree

6 files changed

+195
-10
lines changed

6 files changed

+195
-10
lines changed

.github/workflows/ci.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ jobs:
3232
# Flag that is raised when any code is changed
3333
# This is superset of the linter and formatter
3434
code: ${{ steps.changed.outputs.code_any_changed }}
35+
# Flag that is raised when any code that affects the fuzzer is changed
36+
fuzz: ${{ steps.changed.outputs.fuzz_any_changed }}
3537
steps:
3638
- uses: actions/checkout@v4
3739
with:
@@ -79,6 +81,11 @@ jobs:
7981
- python/**
8082
- .github/workflows/ci.yaml
8183
84+
fuzz:
85+
- fuzz/Cargo.toml
86+
- fuzz/Cargo.lock
87+
- fuzz/fuzz_targets/**
88+
8289
code:
8390
- "**/*"
8491
- "!**/*.md"
@@ -288,7 +295,7 @@ jobs:
288295
name: "cargo fuzz build"
289296
runs-on: ubuntu-latest
290297
needs: determine_changes
291-
if: ${{ github.ref == 'refs/heads/main' }}
298+
if: ${{ github.ref == 'refs/heads/main' || needs.determine_changes.outputs.fuzz == 'true' }}
292299
timeout-minutes: 10
293300
steps:
294301
- uses: actions/checkout@v4

fuzz/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ libfuzzer = ["libfuzzer-sys/link_libfuzzer"]
1717
cargo-fuzz = true
1818

1919
[dependencies]
20+
red_knot_python_semantic = { path = "../crates/red_knot_python_semantic" }
21+
red_knot_vendored = { path = "../crates/red_knot_vendored" }
22+
ruff_db = { path = "../crates/ruff_db" }
2023
ruff_linter = { path = "../crates/ruff_linter" }
2124
ruff_python_ast = { path = "../crates/ruff_python_ast" }
2225
ruff_python_codegen = { path = "../crates/ruff_python_codegen" }
@@ -26,12 +29,18 @@ ruff_python_formatter = { path = "../crates/ruff_python_formatter"}
2629
ruff_text_size = { path = "../crates/ruff_text_size" }
2730

2831
libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false }
32+
salsa = { git = "https://github.com/salsa-rs/salsa.git", rev = "254c749b02cde2fd29852a7463a33e800b771758" }
2933
similar = { version = "2.5.0" }
34+
tracing = { version = "0.1.40" }
3035

3136
# Prevent this from interfering with workspaces
3237
[workspace]
3338
members = ["."]
3439

40+
[[bin]]
41+
name = "red_knot_check_invalid_syntax"
42+
path = "fuzz_targets/red_knot_check_invalid_syntax.rs"
43+
3544
[[bin]]
3645
name = "ruff_parse_simple"
3746
path = "fuzz_targets/ruff_parse_simple.rs"

fuzz/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ Each fuzzer harness in [`fuzz_targets`](fuzz_targets) targets a different aspect
7474
them in different ways. While there is implementation-specific documentation in the source code
7575
itself, each harness is briefly described below.
7676

77+
### `red_knot_check_invalid_syntax`
78+
79+
This fuzz harness checks that the type checker (Red Knot) does not panic when checking a source
80+
file with invalid syntax. This rejects any corpus entries that is already valid Python code.
81+
Currently, this is limited to syntax errors that's produced by Ruff's Python parser which means
82+
that it does not cover all possible syntax errors (<https://github.com/astral-sh/ruff/issues/11934>).
83+
A possible workaround for now would be to bypass the parser and run the type checker on all inputs
84+
regardless of syntax errors.
85+
7786
### `ruff_parse_simple`
7887

7988
This fuzz harness does not perform any "smart" testing of Ruff; it merely checks that the parsing
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ruff_fix_validity
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
//! Fuzzer harness that runs the type checker to catch for panics for source code containing
2+
//! syntax errors.
3+
4+
#![no_main]
5+
6+
use std::sync::{Mutex, OnceLock};
7+
8+
use libfuzzer_sys::{fuzz_target, Corpus};
9+
10+
use red_knot_python_semantic::types::check_types;
11+
use red_knot_python_semantic::{
12+
Db as SemanticDb, Program, ProgramSettings, PythonVersion, SearchPathSettings,
13+
};
14+
use ruff_db::files::{system_path_to_file, File, Files};
15+
use ruff_db::system::{DbWithTestSystem, System, SystemPathBuf, TestSystem};
16+
use ruff_db::vendored::VendoredFileSystem;
17+
use ruff_db::{Db as SourceDb, Upcast};
18+
use ruff_python_parser::{parse_unchecked, Mode};
19+
20+
/// Database that can be used for testing.
21+
///
22+
/// Uses an in memory filesystem and it stubs out the vendored files by default.
23+
#[salsa::db]
24+
struct TestDb {
25+
storage: salsa::Storage<Self>,
26+
files: Files,
27+
system: TestSystem,
28+
vendored: VendoredFileSystem,
29+
events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
30+
}
31+
32+
impl TestDb {
33+
fn new() -> Self {
34+
Self {
35+
storage: salsa::Storage::default(),
36+
system: TestSystem::default(),
37+
vendored: red_knot_vendored::file_system().clone(),
38+
events: std::sync::Arc::default(),
39+
files: Files::default(),
40+
}
41+
}
42+
}
43+
44+
#[salsa::db]
45+
impl SourceDb for TestDb {
46+
fn vendored(&self) -> &VendoredFileSystem {
47+
&self.vendored
48+
}
49+
50+
fn system(&self) -> &dyn System {
51+
&self.system
52+
}
53+
54+
fn files(&self) -> &Files {
55+
&self.files
56+
}
57+
}
58+
59+
impl DbWithTestSystem for TestDb {
60+
fn test_system(&self) -> &TestSystem {
61+
&self.system
62+
}
63+
64+
fn test_system_mut(&mut self) -> &mut TestSystem {
65+
&mut self.system
66+
}
67+
}
68+
69+
impl Upcast<dyn SourceDb> for TestDb {
70+
fn upcast(&self) -> &(dyn SourceDb + 'static) {
71+
self
72+
}
73+
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
74+
self
75+
}
76+
}
77+
78+
#[salsa::db]
79+
impl SemanticDb for TestDb {
80+
fn is_file_open(&self, file: File) -> bool {
81+
!file.path(self).is_vendored_path()
82+
}
83+
}
84+
85+
#[salsa::db]
86+
impl salsa::Database for TestDb {
87+
fn salsa_event(&self, event: &dyn Fn() -> salsa::Event) {
88+
let event = event();
89+
tracing::trace!("event: {:?}", event);
90+
let mut events = self.events.lock().unwrap();
91+
events.push(event);
92+
}
93+
}
94+
95+
fn setup_db() -> TestDb {
96+
let db = TestDb::new();
97+
98+
let src_root = SystemPathBuf::from("/src");
99+
db.memory_file_system()
100+
.create_directory_all(&src_root)
101+
.unwrap();
102+
103+
Program::from_settings(
104+
&db,
105+
&ProgramSettings {
106+
target_version: PythonVersion::default(),
107+
search_paths: SearchPathSettings::new(src_root),
108+
},
109+
)
110+
.expect("Valid search path settings");
111+
112+
db
113+
}
114+
115+
static TEST_DB: OnceLock<Mutex<TestDb>> = OnceLock::new();
116+
117+
fn do_fuzz(case: &[u8]) -> Corpus {
118+
let Ok(code) = std::str::from_utf8(case) else {
119+
return Corpus::Reject;
120+
};
121+
122+
let parsed = parse_unchecked(code, Mode::Module);
123+
if parsed.is_valid() {
124+
return Corpus::Reject;
125+
}
126+
127+
let mut db = TEST_DB
128+
.get_or_init(|| Mutex::new(setup_db()))
129+
.lock()
130+
.unwrap();
131+
132+
for path in &["/src/a.py", "/src/a.pyi"] {
133+
db.write_file(path, code).unwrap();
134+
let file = system_path_to_file(&*db, path).unwrap();
135+
check_types(&*db, file);
136+
db.memory_file_system().remove_file(path).unwrap();
137+
file.sync(&mut *db);
138+
}
139+
140+
Corpus::Keep
141+
}
142+
143+
fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });

fuzz/init-fuzzer.sh

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,32 @@ fi
1111

1212
if [ ! -d corpus/ruff_fix_validity ]; then
1313
mkdir -p corpus/ruff_fix_validity
14-
read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
15-
echo
16-
cd corpus/ruff_fix_validity
17-
if [[ $REPLY =~ ^[Yy]$ ]]; then
18-
curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
14+
15+
(
16+
cd corpus/ruff_fix_validity
17+
18+
read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
19+
echo
20+
if [[ $REPLY =~ ^[Yy]$ ]]; then
21+
curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
22+
fi
23+
24+
# Build a smaller corpus in addition to the (optional) larger corpus
25+
curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.13.0.tar.gz' | tar xz
26+
cp -r "../../../crates/red_knot_workspace/resources/test/corpus" "red_knot_workspace"
27+
cp -r "../../../crates/ruff_linter/resources/test/fixtures" "ruff_linter"
28+
cp -r "../../../crates/ruff_python_formatter/resources/test/fixtures" "ruff_python_formatter"
29+
cp -r "../../../crates/ruff_python_parser/resources" "ruff_python_parser"
30+
31+
# Delete all non-Python files
32+
find . -type f -not -name "*.py" -delete
33+
)
34+
35+
if [[ "$OSTYPE" == "darwin"* ]]; then
36+
cargo +nightly fuzz cmin ruff_fix_validity -- -timeout=5
37+
else
38+
cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
1939
fi
20-
curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.12.0b2.tar.gz' | tar xz
21-
cp -r "../../../crates/ruff_linter/resources/test" .
22-
cd -
23-
cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
2440
fi
2541

2642
echo "Done! You are ready to fuzz."

0 commit comments

Comments
 (0)