@@ -130,6 +130,8 @@ impl Linter {
130130 self . external_linter . is_some ( )
131131 }
132132
133+ /// # Panics
134+ /// Panics if running in debug mode and the number of diagnostics does not match when running with/without optimizations
133135 pub fn run < ' a > (
134136 & self ,
135137 path : & Path ,
@@ -140,116 +142,150 @@ impl Linter {
140142
141143 let mut ctx_host = Rc :: new ( ContextHost :: new ( path, context_sub_hosts, self . options , config) ) ;
142144
145+ #[ cfg( debug_assertions) ]
146+ let mut current_diagnostic_index = 0 ;
147+
143148 loop {
144149 let rules = rules
145150 . iter ( )
146151 . filter ( |( rule, _) | rule. should_run ( & ctx_host) && !rule. is_tsgolint_rule ( ) )
147- . map ( |( rule, severity) | ( rule, Rc :: clone ( & ctx_host) . spawn ( rule, * severity) ) ) ;
152+ . map ( |( rule, severity) | ( rule, Rc :: clone ( & ctx_host) . spawn ( rule, * severity) ) )
153+ . collect :: < Vec < _ > > ( ) ;
148154
149155 let semantic = ctx_host. semantic ( ) ;
150156
151157 let should_run_on_jest_node =
152158 ctx_host. plugins ( ) . has_test ( ) && ctx_host. frameworks ( ) . is_test ( ) ;
153159
154- // IMPORTANT: We have two branches here for performance reasons:
155- //
156- // 1) Branch where we iterate over each node, then each rule
157- // 2) Branch where we iterate over each rule, then each node
158- //
159- // When the number of nodes is relatively small, most of them can fit
160- // in the cache and we can save iterating over the rules multiple times.
161- // But for large files, the number of nodes can be so large that it
162- // starts to not fit into the cache and pushes out other data, like the rules.
163- // So we end up thrashing the cache with each rule iteration. In this case,
164- // it's better to put rules in the inner loop, as the rules data is smaller
165- // and is more likely to fit in the cache.
166- //
167- // The threshold here is chosen to balance between performance improvement
168- // from not iterating over rules multiple times, but also ensuring that we
169- // don't thrash the cache too much. Feel free to tweak based on benchmarking.
170- //
171- // See https://github.com/oxc-project/oxc/pull/6600 for more context.
172- if semantic. nodes ( ) . len ( ) > 200_000 {
173- // Collect rules into a Vec so that we can iterate over the rules multiple times
174- let rules = rules. collect :: < Vec < _ > > ( ) ;
175-
176- // TODO: It seems like there is probably a more intelligent way to preallocate space here. This will
177- // likely incur quite a few unnecessary reallocs currently. We theoretically could compute this at
178- // compile-time since we know all of the rules and their AST node type information ahead of time.
160+ let execute_rules = |with_ast_kind_filtering : bool | {
161+ // IMPORTANT: We have two branches here for performance reasons:
179162 //
180- // Use boxed array to help compiler see that indexing into it with an `AstType`
181- // cannot go out of bounds, and remove bounds checks.
182- let mut rules_by_ast_type = boxed_array ! [ Vec :: new( ) ; AST_TYPE_MAX as usize + 1 ] ;
183- // TODO: Compute needed capacity. This is a slight overestimate as not 100% of rules will need to run on all
184- // node types, but it at least guarantees we won't need to realloc.
185- let mut rules_any_ast_type = Vec :: with_capacity ( rules. len ( ) ) ;
186-
187- for ( rule, ctx) in & rules {
188- let rule = * rule;
189- // Collect node type information for rules. In large files, benchmarking showed it was worth
190- // collecting rules into buckets by AST node type to avoid iterating over all rules for each node.
191- if let Some ( ast_types) = rule. types_info ( ) {
192- for ty in ast_types {
193- rules_by_ast_type[ ty as usize ] . push ( ( rule, ctx) ) ;
194- }
195- } else {
196- rules_any_ast_type. push ( ( rule, ctx) ) ;
197- }
198-
199- rule. run_once ( ctx) ;
200- }
163+ // 1) Branch where we iterate over each node, then each rule
164+ // 2) Branch where we iterate over each rule, then each node
165+ //
166+ // When the number of nodes is relatively small, most of them can fit
167+ // in the cache and we can save iterating over the rules multiple times.
168+ // But for large files, the number of nodes can be so large that it
169+ // starts to not fit into the cache and pushes out other data, like the rules.
170+ // So we end up thrashing the cache with each rule iteration. In this case,
171+ // it's better to put rules in the inner loop, as the rules data is smaller
172+ // and is more likely to fit in the cache.
173+ //
174+ // The threshold here is chosen to balance between performance improvement
175+ // from not iterating over rules multiple times, but also ensuring that we
176+ // don't thrash the cache too much. Feel free to tweak based on benchmarking.
177+ //
178+ // See https://github.com/oxc-project/oxc/pull/6600 for more context.
179+ if semantic. nodes ( ) . len ( ) > 200_000 {
180+ // TODO: It seems like there is probably a more intelligent way to preallocate space here. This will
181+ // likely incur quite a few unnecessary reallocs currently. We theoretically could compute this at
182+ // compile-time since we know all of the rules and their AST node type information ahead of time.
183+ //
184+ // Use boxed array to help compiler see that indexing into it with an `AstType`
185+ // cannot go out of bounds, and remove bounds checks.
186+ let mut rules_by_ast_type = boxed_array ! [ Vec :: new( ) ; AST_TYPE_MAX as usize + 1 ] ;
187+ // TODO: Compute needed capacity. This is a slight overestimate as not 100% of rules will need to run on all
188+ // node types, but it at least guarantees we won't need to realloc.
189+ let mut rules_any_ast_type = Vec :: with_capacity ( rules. len ( ) ) ;
201190
202- for symbol in semantic. scoping ( ) . symbol_ids ( ) {
203191 for ( rule, ctx) in & rules {
204- rule. run_on_symbol ( symbol, ctx) ;
205- }
206- }
192+ let rule = * rule;
193+ // Collect node type information for rules. In large files, benchmarking showed it was worth
194+ // collecting rules into buckets by AST node type to avoid iterating over all rules for each node.
195+ if with_ast_kind_filtering && let Some ( ast_types) = rule. types_info ( ) {
196+ for ty in ast_types {
197+ rules_by_ast_type[ ty as usize ] . push ( ( rule, ctx) ) ;
198+ }
199+ } else {
200+ rules_any_ast_type. push ( ( rule, ctx) ) ;
201+ }
207202
208- // Run rules on nodes
209- for node in semantic. nodes ( ) {
210- for ( rule, ctx) in & rules_by_ast_type[ node. kind ( ) . ty ( ) as usize ] {
211- rule. run ( node, ctx) ;
212- }
213- for ( rule, ctx) in & rules_any_ast_type {
214- rule. run ( node, ctx) ;
203+ rule. run_once ( ctx) ;
215204 }
216- }
217205
218- if should_run_on_jest_node {
219- for jest_node in iter_possible_jest_call_node ( semantic) {
206+ for symbol in semantic. scoping ( ) . symbol_ids ( ) {
220207 for ( rule, ctx) in & rules {
221- rule. run_on_jest_node ( & jest_node , ctx) ;
208+ rule. run_on_symbol ( symbol , ctx) ;
222209 }
223210 }
224- }
225- } else {
226- for ( rule, ref ctx) in rules {
227- rule. run_once ( ctx) ;
228211
229- for symbol in semantic. scoping ( ) . symbol_ids ( ) {
230- rule. run_on_symbol ( symbol, ctx) ;
231- }
232-
233- // For smaller files, benchmarking showed it was faster to iterate over all rules and just check the
234- // node types as we go, rather than pre-bucketing rules by AST node type and doing extra allocations.
235- if let Some ( ast_types) = rule. types_info ( ) {
236- for node in semantic. nodes ( ) {
237- if ast_types. has ( node. kind ( ) . ty ( ) ) {
238- rule. run ( node, ctx) ;
239- }
212+ // Run rules on nodes
213+ for node in semantic. nodes ( ) {
214+ for ( rule, ctx) in & rules_by_ast_type[ node. kind ( ) . ty ( ) as usize ] {
215+ rule. run ( node, ctx) ;
240216 }
241- } else {
242- for node in semantic. nodes ( ) {
217+ for ( rule, ctx) in & rules_any_ast_type {
243218 rule. run ( node, ctx) ;
244219 }
245220 }
246221
247222 if should_run_on_jest_node {
248223 for jest_node in iter_possible_jest_call_node ( semantic) {
249- rule. run_on_jest_node ( & jest_node, ctx) ;
224+ for ( rule, ctx) in & rules {
225+ rule. run_on_jest_node ( & jest_node, ctx) ;
226+ }
227+ }
228+ }
229+ } else {
230+ for ( rule, ctx) in & rules {
231+ rule. run_once ( ctx) ;
232+
233+ for symbol in semantic. scoping ( ) . symbol_ids ( ) {
234+ rule. run_on_symbol ( symbol, ctx) ;
235+ }
236+
237+ // For smaller files, benchmarking showed it was faster to iterate over all rules and just check the
238+ // node types as we go, rather than pre-bucketing rules by AST node type and doing extra allocations.
239+ if with_ast_kind_filtering && let Some ( ast_types) = rule. types_info ( ) {
240+ for node in semantic. nodes ( ) {
241+ if ast_types. has ( node. kind ( ) . ty ( ) ) {
242+ rule. run ( node, ctx) ;
243+ }
244+ }
245+ } else {
246+ for node in semantic. nodes ( ) {
247+ rule. run ( node, ctx) ;
248+ }
249+ }
250+
251+ if should_run_on_jest_node {
252+ for jest_node in iter_possible_jest_call_node ( semantic) {
253+ rule. run_on_jest_node ( & jest_node, ctx) ;
254+ }
250255 }
251256 }
252257 }
258+ } ;
259+
260+ execute_rules ( true ) ;
261+
262+ #[ cfg( debug_assertions) ]
263+ {
264+ let diagnostics_after_optimized = ctx_host. diagnostic_count ( ) ;
265+ execute_rules ( false ) ;
266+ let diagnostics_after_unoptimized = ctx_host. diagnostic_count ( ) ;
267+ ctx_host. get_diagnostics ( |diagnostics| {
268+ let optimized_diagnostics = & diagnostics[ current_diagnostic_index..diagnostics_after_optimized] ;
269+ let unoptimized_diagnostics = & diagnostics[ diagnostics_after_optimized..diagnostics_after_unoptimized] ;
270+
271+ // Check that we have the same number of diagnostics
272+ assert_eq ! (
273+ optimized_diagnostics. len( ) ,
274+ unoptimized_diagnostics. len( ) ,
275+ "Running with and without optimizations produced different diagnostic counts: {} vs {}" ,
276+ optimized_diagnostics. len( ) ,
277+ unoptimized_diagnostics. len( )
278+ ) ;
279+ for ( opt_diag, unopt_diag) in optimized_diagnostics. iter ( ) . zip ( unoptimized_diagnostics. iter ( ) ) {
280+ assert_eq ! (
281+ opt_diag,
282+ unopt_diag,
283+ "Diagnostic differs between optimized and unoptimized runs" ,
284+ ) ;
285+ }
286+
287+ diagnostics. truncate ( current_diagnostic_index + optimized_diagnostics. len ( ) ) ;
288+ } ) ;
253289 }
254290
255291 self . run_external_rules ( & external_rules, path, & mut ctx_host, allocator) ;
@@ -264,6 +300,11 @@ impl Linter {
264300 if !ctx_host. next_sub_host ( ) {
265301 break ;
266302 }
303+
304+ #[ cfg( debug_assertions) ]
305+ {
306+ current_diagnostic_index = ctx_host. diagnostic_count ( ) ;
307+ }
267308 }
268309
269310 ctx_host. take_diagnostics ( )
0 commit comments