11use oxc_span:: Atom ;
2- use rustc_hash:: { FxHashMap , FxHashSet } ;
2+ use rustc_hash:: FxHashSet ;
33
44use crate :: parser:: reader:: Reader ;
55
6- /// Currently all of properties are read only from outside of this module.
6+ /// NOTE: Currently all of properties are read- only from outside of this module.
77/// Even inside of this module, it is not changed after initialized.
88#[ derive( Debug ) ]
99pub struct State < ' a > {
@@ -49,12 +49,7 @@ impl<'a> State<'a> {
4949 }
5050}
5151
52- enum SimpleUnit < ' a > {
53- Open ,
54- Close ,
55- Pipe ,
56- GroupName ( Atom < ' a > ) ,
57- }
52+ // ---
5853
5954/// Returns: Result<(num_of_left_parens, capturing_group_names), duplicated_named_capturing_group_offsets>
6055fn parse_capturing_groups < ' a > (
@@ -67,18 +62,12 @@ fn parse_capturing_groups<'a>(
6762 // (?=...), (?!...), (?<=...), (?<!...)
6863 let mut num_of_left_capturing_parens = 0 ;
6964
70- // Collect capturing group names
71- let mut group_names: FxHashMap < Atom < ' a > , ( u32 , u32 ) > = FxHashMap :: default ( ) ;
72- // At the same time, check duplicates
73- // If you want to process this most efficiently:
74- // - define a scope for each Disjunction
75- // - then check for duplicates for each `|` while inheriting the parent-child relationship
76- // ref. https://source.chromium.org/chromium/chromium/src/+/main:v8/src/regexp/regexp-parser.cc;l=1644
77- // However, duplicates are rare in the first place.
78- // And as long as it works simply, this may be enough.
79- let mut may_duplicates: FxHashMap < Atom < ' a > , DuplicatedNamedCapturingGroupOffsets > =
80- FxHashMap :: default ( ) ;
81- let mut simplified: Vec < SimpleUnit < ' a > > = vec ! [ ] ;
65+ // Track all named groups with their depth and alternative path
66+ let mut named_groups: Vec < NamedGroupInfo < ' a > > = Vec :: new ( ) ;
67+ let mut group_names: FxHashSet < Atom < ' a > > = FxHashSet :: default ( ) ;
68+
69+ // Track alternatives and depth
70+ let mut tracker = AlternativeTracker :: new ( ) ;
8271
8372 let mut in_escape = false ;
8473 let mut in_character_class = false ;
@@ -92,22 +81,22 @@ fn parse_capturing_groups<'a>(
9281 } else if cp == ']' as u32 {
9382 in_character_class = false ;
9483 } else if !in_character_class && cp == '|' as u32 {
95- simplified . push ( SimpleUnit :: Pipe ) ;
84+ tracker . mark_alternative ( ) ;
9685 } else if !in_character_class && cp == ')' as u32 {
97- simplified . push ( SimpleUnit :: Close ) ;
86+ tracker . exit_group ( ) ;
9887 } else if !in_character_class && cp == '(' as u32 {
9988 reader. advance ( ) ;
89+ tracker. enter_group ( ) ;
10090
101- simplified. push ( SimpleUnit :: Open ) ;
102-
103- // Skip IgnoreGroup
91+ // Check for non-capturing groups and lookarounds
92+ // Note: these still increase depth but don't count as capturing groups
10493 if reader. eat2 ( '?' , ':' )
105- // Skip LookAroundAssertion
10694 || reader. eat2 ( '?' , '=' )
10795 || reader. eat2 ( '?' , '!' )
10896 || reader. eat3 ( '?' , '<' , '=' )
10997 || reader. eat3 ( '?' , '<' , '!' )
11098 {
99+ // Non-capturing group or lookaround - depth already incremented
111100 continue ;
112101 }
113102
@@ -127,17 +116,29 @@ fn parse_capturing_groups<'a>(
127116
128117 if reader. eat ( '>' ) {
129118 let group_name = reader. atom ( span_start, span_end) ;
130-
131- simplified. push ( SimpleUnit :: GroupName ( group_name) ) ;
132- // Check duplicates later
133- if let Some ( last_span) = group_names. get ( & group_name) {
134- let entry = may_duplicates. entry ( group_name) . or_default ( ) ;
135- entry. push ( * last_span) ;
136- entry. push ( ( span_start, span_end) ) ;
137- } else {
138- group_names. insert ( group_name, ( span_start, span_end) ) ;
119+ let alternative_path = tracker. get_alternative_path ( ) ;
120+
121+ // Check for duplicates with existing groups
122+ for existing in & named_groups {
123+ if existing. name == group_name {
124+ // Check if they can participate together
125+ if !AlternativeTracker :: can_participate (
126+ & existing. alternative_path ,
127+ & alternative_path,
128+ ) {
129+ // True duplicate - return error
130+ return Err ( vec ! [ existing. span, ( span_start, span_end) ] ) ;
131+ }
132+ }
139133 }
140134
135+ named_groups. push ( NamedGroupInfo {
136+ name : group_name,
137+ span : ( span_start, span_end) ,
138+ alternative_path,
139+ } ) ;
140+ group_names. insert ( group_name) ;
141+
141142 continue ;
142143 }
143144 }
@@ -149,63 +150,72 @@ fn parse_capturing_groups<'a>(
149150 reader. advance ( ) ;
150151 }
151152
152- // Check duplicates and emit error if exists
153- if !may_duplicates. is_empty ( ) {
154- // Check must be done for each group name
155- for ( group_name, spans) in may_duplicates {
156- let iter = simplified. iter ( ) . clone ( ) ;
153+ Ok ( ( num_of_left_capturing_parens, group_names) )
154+ }
157155
158- let mut alternative_depth = FxHashSet :: default ( ) ;
159- let mut depth = 0_u32 ;
160- let mut is_first = true ;
156+ /// Tracks which alternatives at each depth level have been seen.
157+ /// Used to determine if duplicate named groups are in different alternatives.
158+ #[ derive( Debug ) ]
159+ struct AlternativeTracker {
160+ /// Current nesting depth
161+ depth : u32 ,
162+ /// Current alternative index at each depth level (stack-based)
163+ /// Each level represents the alternative index at that nesting depth
164+ current_alternative : Vec < u32 > ,
165+ }
161166
162- ' outer: for token in iter {
163- match token {
164- SimpleUnit :: Open => {
165- depth += 1 ;
166- }
167- SimpleUnit :: Close => {
168- // May panic if the pattern has invalid, unbalanced parens
169- depth = depth. saturating_sub ( 1 ) ;
170- }
171- SimpleUnit :: Pipe => {
172- if !is_first {
173- alternative_depth. insert ( depth) ;
174- }
175- }
176- SimpleUnit :: GroupName ( name) => {
177- // Check target group name only
178- if * name != group_name {
179- continue ;
180- }
181- // Skip the first one, because it is not duplicated
182- if is_first {
183- is_first = false ;
184- continue ;
185- }
167+ impl AlternativeTracker {
168+ fn new ( ) -> Self {
169+ Self { depth : 0 , current_alternative : vec ! [ 0 ] }
170+ }
186171
187- // If left outer `|` is found, both can participate
188- // `|(?<n>)`
189- // ^ ^ depth: 1
190- // ^ depth: 0
191- for i in ( 0 ..depth) . rev ( ) {
192- if alternative_depth. contains ( & i) {
193- // Remove it, next duplicates requires another `|`
194- alternative_depth. remove ( & i) ;
195- continue ' outer;
196- }
197- }
172+ fn enter_group ( & mut self ) {
173+ self . depth += 1 ;
174+ while self . current_alternative . len ( ) <= self . depth as usize {
175+ self . current_alternative . push ( 0 ) ;
176+ }
177+ }
198178
199- return Err ( spans) ;
200- }
201- }
179+ fn exit_group ( & mut self ) {
180+ if let Some ( alt) = self . current_alternative . get_mut ( self . depth as usize ) {
181+ * alt = 0 ;
182+ }
183+ self . depth = self . depth . saturating_sub ( 1 ) ;
184+ }
185+
186+ fn mark_alternative ( & mut self ) {
187+ if let Some ( alt) = self . current_alternative . get_mut ( self . depth as usize ) {
188+ * alt += 1 ;
189+ }
190+ }
191+
192+ fn get_alternative_path ( & self ) -> Vec < u32 > {
193+ self . current_alternative . iter ( ) . take ( ( self . depth + 1 ) as usize ) . copied ( ) . collect ( )
194+ }
195+
196+ fn can_participate ( alt1 : & [ u32 ] , alt2 : & [ u32 ] ) -> bool {
197+ let min_len = alt1. len ( ) . min ( alt2. len ( ) ) ;
198+ // Check as prefixes, if they differ at any level,
199+ // it means they are in different alternatives, so they can participate together.
200+ for i in 0 ..min_len {
201+ if alt1[ i] != alt2[ i] {
202+ return true ;
202203 }
203204 }
205+ false
204206 }
207+ }
205208
206- Ok ( ( num_of_left_capturing_parens, group_names. keys ( ) . copied ( ) . collect ( ) ) )
209+ /// Tracks information about a named capturing group
210+ #[ derive( Debug , Clone ) ]
211+ struct NamedGroupInfo < ' a > {
212+ name : Atom < ' a > ,
213+ span : ( u32 , u32 ) ,
214+ alternative_path : Vec < u32 > ,
207215}
208216
217+ // ---
218+
209219#[ cfg( test) ]
210220mod tests {
211221 use super :: * ;
@@ -225,6 +235,12 @@ mod tests {
225235 ( "(foo)(?<n>bar)(?<nn>baz)" , ( 3 , 2 ) ) ,
226236 ( "(?<n>.)(?<m>.)|(?<n>..)|(?<m>.)" , ( 4 , 2 ) ) ,
227237 ( "(?<n>.)(?<m>.)|(?:..)|(?<m>.)" , ( 3 , 2 ) ) ,
238+ // Test exit_group reset behavior: consecutive groups at same depth
239+ ( "((?<a>x))((?<b>y))|(?<c>z)" , ( 5 , 3 ) ) , // 2 outer groups + 2 inner named + 1 named = 5 total
240+ ( "((?<a>x))|((?<a>y))" , ( 4 , 1 ) ) , // 2 outer + 2 inner named = 4 total, 1 unique name
241+ // Nested groups with alternatives
242+ ( "((?<a>x)|((?<a>y)))" , ( 4 , 1 ) ) , // 1 outer + 1 named + 1 inner + 1 named = 4 total
243+ ( "(((?<a>x))|((?<b>y)))|(((?<a>z))|((?<b>w)))" , ( 10 , 2 ) ) , // Complex nesting
228244 ] {
229245 let mut reader = Reader :: initialize ( source_text, true , false ) . unwrap ( ) ;
230246
@@ -238,9 +254,15 @@ mod tests {
238254
239255 #[ test]
240256 fn duplicated_named_capturing_groups ( ) {
241- for source_text in
242- [ "(?<n>.)(?<n>..)" , "(?<n>.(?<n>..))" , "|(?<n>.(?<n>..))" , "(?<m>.)|(?<n>.(?<n>..))" ]
243- {
257+ for source_text in [
258+ "(?<n>.)(?<n>..)" ,
259+ "(?<n>.(?<n>..))" ,
260+ "|(?<n>.(?<n>..))" ,
261+ "(?<m>.)|(?<n>.(?<n>..))" ,
262+ // Test consecutive groups with same name in same alternative (should be error)
263+ "((?<a>x))((?<a>y))((?<a>z))" ,
264+ "(?<n>a)((?<n>b))" ,
265+ ] {
244266 let mut reader = Reader :: initialize ( source_text, true , false ) . unwrap ( ) ;
245267
246268 assert ! ( parse_capturing_groups( & mut reader) . is_err( ) , "{source_text}" ) ;
0 commit comments