Skip to content

Commit 18a1145

Browse files
committed
test(linter): add debug assertions for skipping rules (#13724)
1 parent 3af1e5d commit 18a1145

File tree

2 files changed

+131
-80
lines changed

2 files changed

+131
-80
lines changed

crates/oxc_linter/src/context/host.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,16 @@ impl<'a> ContextHost<'a> {
342342
std::mem::take(&mut *messages)
343343
}
344344

345+
#[cfg(debug_assertions)]
346+
pub fn get_diagnostics(&self, cb: impl FnOnce(&mut Vec<Message<'a>>)) {
347+
cb(self.diagnostics.borrow_mut().as_mut());
348+
}
349+
350+
#[cfg(debug_assertions)]
351+
pub fn diagnostic_count(&self) -> usize {
352+
self.diagnostics.borrow().len()
353+
}
354+
345355
/// Creates a new [`LintContext`] for a specific rule.
346356
pub fn spawn(self: Rc<Self>, rule: &RuleEnum, severity: AllowWarnDeny) -> LintContext<'a> {
347357
let rule_name = rule.name();

crates/oxc_linter/src/lib.rs

Lines changed: 121 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ impl Linter {
130130
self.external_linter.is_some()
131131
}
132132

133+
/// # Panics
134+
/// Panics if running in debug mode and the number of diagnostics does not match when running with/without optimizations
133135
pub fn run<'a>(
134136
&self,
135137
path: &Path,
@@ -140,116 +142,150 @@ impl Linter {
140142

141143
let mut ctx_host = Rc::new(ContextHost::new(path, context_sub_hosts, self.options, config));
142144

145+
#[cfg(debug_assertions)]
146+
let mut current_diagnostic_index = 0;
147+
143148
loop {
144149
let rules = rules
145150
.iter()
146151
.filter(|(rule, _)| rule.should_run(&ctx_host) && !rule.is_tsgolint_rule())
147-
.map(|(rule, severity)| (rule, Rc::clone(&ctx_host).spawn(rule, *severity)));
152+
.map(|(rule, severity)| (rule, Rc::clone(&ctx_host).spawn(rule, *severity)))
153+
.collect::<Vec<_>>();
148154

149155
let semantic = ctx_host.semantic();
150156

151157
let should_run_on_jest_node =
152158
ctx_host.plugins().has_test() && ctx_host.frameworks().is_test();
153159

154-
// IMPORTANT: We have two branches here for performance reasons:
155-
//
156-
// 1) Branch where we iterate over each node, then each rule
157-
// 2) Branch where we iterate over each rule, then each node
158-
//
159-
// When the number of nodes is relatively small, most of them can fit
160-
// in the cache and we can save iterating over the rules multiple times.
161-
// But for large files, the number of nodes can be so large that it
162-
// starts to not fit into the cache and pushes out other data, like the rules.
163-
// So we end up thrashing the cache with each rule iteration. In this case,
164-
// it's better to put rules in the inner loop, as the rules data is smaller
165-
// and is more likely to fit in the cache.
166-
//
167-
// The threshold here is chosen to balance between performance improvement
168-
// from not iterating over rules multiple times, but also ensuring that we
169-
// don't thrash the cache too much. Feel free to tweak based on benchmarking.
170-
//
171-
// See https://github.com/oxc-project/oxc/pull/6600 for more context.
172-
if semantic.nodes().len() > 200_000 {
173-
// Collect rules into a Vec so that we can iterate over the rules multiple times
174-
let rules = rules.collect::<Vec<_>>();
175-
176-
// TODO: It seems like there is probably a more intelligent way to preallocate space here. This will
177-
// likely incur quite a few unnecessary reallocs currently. We theoretically could compute this at
178-
// compile-time since we know all of the rules and their AST node type information ahead of time.
160+
let execute_rules = |with_ast_kind_filtering: bool| {
161+
// IMPORTANT: We have two branches here for performance reasons:
179162
//
180-
// Use boxed array to help compiler see that indexing into it with an `AstType`
181-
// cannot go out of bounds, and remove bounds checks.
182-
let mut rules_by_ast_type = boxed_array![Vec::new(); AST_TYPE_MAX as usize + 1];
183-
// TODO: Compute needed capacity. This is a slight overestimate as not 100% of rules will need to run on all
184-
// node types, but it at least guarantees we won't need to realloc.
185-
let mut rules_any_ast_type = Vec::with_capacity(rules.len());
186-
187-
for (rule, ctx) in &rules {
188-
let rule = *rule;
189-
// Collect node type information for rules. In large files, benchmarking showed it was worth
190-
// collecting rules into buckets by AST node type to avoid iterating over all rules for each node.
191-
if let Some(ast_types) = rule.types_info() {
192-
for ty in ast_types {
193-
rules_by_ast_type[ty as usize].push((rule, ctx));
194-
}
195-
} else {
196-
rules_any_ast_type.push((rule, ctx));
197-
}
198-
199-
rule.run_once(ctx);
200-
}
163+
// 1) Branch where we iterate over each node, then each rule
164+
// 2) Branch where we iterate over each rule, then each node
165+
//
166+
// When the number of nodes is relatively small, most of them can fit
167+
// in the cache and we can save iterating over the rules multiple times.
168+
// But for large files, the number of nodes can be so large that it
169+
// starts to not fit into the cache and pushes out other data, like the rules.
170+
// So we end up thrashing the cache with each rule iteration. In this case,
171+
// it's better to put rules in the inner loop, as the rules data is smaller
172+
// and is more likely to fit in the cache.
173+
//
174+
// The threshold here is chosen to balance between performance improvement
175+
// from not iterating over rules multiple times, but also ensuring that we
176+
// don't thrash the cache too much. Feel free to tweak based on benchmarking.
177+
//
178+
// See https://github.com/oxc-project/oxc/pull/6600 for more context.
179+
if semantic.nodes().len() > 200_000 {
180+
// TODO: It seems like there is probably a more intelligent way to preallocate space here. This will
181+
// likely incur quite a few unnecessary reallocs currently. We theoretically could compute this at
182+
// compile-time since we know all of the rules and their AST node type information ahead of time.
183+
//
184+
// Use boxed array to help compiler see that indexing into it with an `AstType`
185+
// cannot go out of bounds, and remove bounds checks.
186+
let mut rules_by_ast_type = boxed_array![Vec::new(); AST_TYPE_MAX as usize + 1];
187+
// TODO: Compute needed capacity. This is a slight overestimate as not 100% of rules will need to run on all
188+
// node types, but it at least guarantees we won't need to realloc.
189+
let mut rules_any_ast_type = Vec::with_capacity(rules.len());
201190

202-
for symbol in semantic.scoping().symbol_ids() {
203191
for (rule, ctx) in &rules {
204-
rule.run_on_symbol(symbol, ctx);
205-
}
206-
}
192+
let rule = *rule;
193+
// Collect node type information for rules. In large files, benchmarking showed it was worth
194+
// collecting rules into buckets by AST node type to avoid iterating over all rules for each node.
195+
if with_ast_kind_filtering && let Some(ast_types) = rule.types_info() {
196+
for ty in ast_types {
197+
rules_by_ast_type[ty as usize].push((rule, ctx));
198+
}
199+
} else {
200+
rules_any_ast_type.push((rule, ctx));
201+
}
207202

208-
// Run rules on nodes
209-
for node in semantic.nodes() {
210-
for (rule, ctx) in &rules_by_ast_type[node.kind().ty() as usize] {
211-
rule.run(node, ctx);
212-
}
213-
for (rule, ctx) in &rules_any_ast_type {
214-
rule.run(node, ctx);
203+
rule.run_once(ctx);
215204
}
216-
}
217205

218-
if should_run_on_jest_node {
219-
for jest_node in iter_possible_jest_call_node(semantic) {
206+
for symbol in semantic.scoping().symbol_ids() {
220207
for (rule, ctx) in &rules {
221-
rule.run_on_jest_node(&jest_node, ctx);
208+
rule.run_on_symbol(symbol, ctx);
222209
}
223210
}
224-
}
225-
} else {
226-
for (rule, ref ctx) in rules {
227-
rule.run_once(ctx);
228211

229-
for symbol in semantic.scoping().symbol_ids() {
230-
rule.run_on_symbol(symbol, ctx);
231-
}
232-
233-
// For smaller files, benchmarking showed it was faster to iterate over all rules and just check the
234-
// node types as we go, rather than pre-bucketing rules by AST node type and doing extra allocations.
235-
if let Some(ast_types) = rule.types_info() {
236-
for node in semantic.nodes() {
237-
if ast_types.has(node.kind().ty()) {
238-
rule.run(node, ctx);
239-
}
212+
// Run rules on nodes
213+
for node in semantic.nodes() {
214+
for (rule, ctx) in &rules_by_ast_type[node.kind().ty() as usize] {
215+
rule.run(node, ctx);
240216
}
241-
} else {
242-
for node in semantic.nodes() {
217+
for (rule, ctx) in &rules_any_ast_type {
243218
rule.run(node, ctx);
244219
}
245220
}
246221

247222
if should_run_on_jest_node {
248223
for jest_node in iter_possible_jest_call_node(semantic) {
249-
rule.run_on_jest_node(&jest_node, ctx);
224+
for (rule, ctx) in &rules {
225+
rule.run_on_jest_node(&jest_node, ctx);
226+
}
227+
}
228+
}
229+
} else {
230+
for (rule, ctx) in &rules {
231+
rule.run_once(ctx);
232+
233+
for symbol in semantic.scoping().symbol_ids() {
234+
rule.run_on_symbol(symbol, ctx);
235+
}
236+
237+
// For smaller files, benchmarking showed it was faster to iterate over all rules and just check the
238+
// node types as we go, rather than pre-bucketing rules by AST node type and doing extra allocations.
239+
if with_ast_kind_filtering && let Some(ast_types) = rule.types_info() {
240+
for node in semantic.nodes() {
241+
if ast_types.has(node.kind().ty()) {
242+
rule.run(node, ctx);
243+
}
244+
}
245+
} else {
246+
for node in semantic.nodes() {
247+
rule.run(node, ctx);
248+
}
249+
}
250+
251+
if should_run_on_jest_node {
252+
for jest_node in iter_possible_jest_call_node(semantic) {
253+
rule.run_on_jest_node(&jest_node, ctx);
254+
}
250255
}
251256
}
252257
}
258+
};
259+
260+
execute_rules(true);
261+
262+
#[cfg(debug_assertions)]
263+
{
264+
let diagnostics_after_optimized = ctx_host.diagnostic_count();
265+
execute_rules(false);
266+
let diagnostics_after_unoptimized = ctx_host.diagnostic_count();
267+
ctx_host.get_diagnostics(|diagnostics| {
268+
let optimized_diagnostics = &diagnostics[current_diagnostic_index..diagnostics_after_optimized];
269+
let unoptimized_diagnostics = &diagnostics[diagnostics_after_optimized..diagnostics_after_unoptimized];
270+
271+
// Check that we have the same number of diagnostics
272+
assert_eq!(
273+
optimized_diagnostics.len(),
274+
unoptimized_diagnostics.len(),
275+
"Running with and without optimizations produced different diagnostic counts: {} vs {}",
276+
optimized_diagnostics.len(),
277+
unoptimized_diagnostics.len()
278+
);
279+
for (opt_diag, unopt_diag) in optimized_diagnostics.iter().zip(unoptimized_diagnostics.iter()){
280+
assert_eq!(
281+
opt_diag,
282+
unopt_diag,
283+
"Diagnostic differs between optimized and unoptimized runs",
284+
);
285+
}
286+
287+
diagnostics.truncate(current_diagnostic_index + optimized_diagnostics.len());
288+
});
253289
}
254290

255291
self.run_external_rules(&external_rules, path, &mut ctx_host, allocator);
@@ -264,6 +300,11 @@ impl Linter {
264300
if !ctx_host.next_sub_host() {
265301
break;
266302
}
303+
304+
#[cfg(debug_assertions)]
305+
{
306+
current_diagnostic_index = ctx_host.diagnostic_count();
307+
}
267308
}
268309

269310
ctx_host.take_diagnostics()

0 commit comments

Comments
 (0)