Skip to content

Commit

Permalink
Catch \k<0>
Browse files Browse the repository at this point in the history
  • Loading branch information
jaynetics committed Feb 28, 2023
1 parent cee8b82 commit f8884f7
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 28 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
- fixed quantifiers after comment groups being mis-assigned to that group
* e.g. in `/a(?#foo){3}/` (matches 'aaa')
- fixed scanner accepting unmatched closing parentheses ')'
- fixed Scanner accepting two cases of invalid Regexp syntax
* unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
* these are a `SyntaxError` in Ruby, so could only be passed as a String
* they now raise a `Regexp::Scanner::ScannerError`
- fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
Expand Down
41 changes: 18 additions & 23 deletions lib/regexp_parser/scanner/scanner.rl
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,9 @@
# try to treat every other group head as options group, like Ruby
group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;

group_ref = [gk];
group_name_id_ab = ([^!0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
group_number = '-'? . [1-9] . [0-9]*;
group_number = '-'? . [0-9]+;
group_level = [+\-] . [0-9]+;

group_name = ('<' . group_name_id_ab? . '>') |
Expand All @@ -95,15 +94,11 @@

group_named = ('?' . group_name );

group_name_backref = 'k' . (('<' . group_name_id_ab? . group_level? '>') |
("'" . group_name_id_sq? . group_level? "'"));
group_name_call = 'g' . (('<' . group_name_id_ab? . group_level? '>') |
("'" . group_name_id_sq? . group_level? "'"));
group_ref_body = (('<' . (group_name_id_ab? | group_number) . group_level? '>') |
("'" . (group_name_id_sq? | group_number) . group_level? "'"));

group_number_backref = 'k' . (('<' . group_number . group_level? '>') |
("'" . group_number . group_level? "'"));
group_number_call = 'g' . (('<' . ((group_number . group_level?) | '0') '>') |
("'" . ((group_number . group_level?) | '0') "'"));
group_ref = 'k' . group_ref_body;
group_call = 'g' . group_ref_body;

group_type = group_atomic | group_passive | group_absence | group_named;

Expand Down Expand Up @@ -548,35 +543,35 @@

# Group backreference, named and numbered
# ------------------------------------------------------------------------
backslash . (group_name_backref | group_number_backref) > (backslashed, 4) {
backslash . (group_ref) > (backslashed, 4) {
case text = copy(data, ts, te)
when /^\\k(<>|'')/
raise ValidationError.for(:backref, 'backreference', 'ref ID is empty')
when /^\\k(.)[^\p{digit}\-][^+\-]*\D$/
when /^\\k(.)[^0-9\-][^+\-]*['>]$/
emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
when /^\\k(.)\d+\D$/
when /^\\k(.)[1-9]\d*['>]$/
emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
when /^\\k(.)-\d+\D$/
when /^\\k(.)-[1-9]\d*['>]$/
emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
when /^\\k(.)[^\p{digit}\-].*[+\-]\d+\D$/
when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
when /^\\k(.)-?\d+[+\-]\d+\D$/
when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
else
raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
end
};

# Group call, named and numbered
# ------------------------------------------------------------------------
backslash . (group_name_call | group_number_call) > (backslashed, 4) {
backslash . (group_call) > (backslashed, 4) {
case text = copy(data, ts, te)
when /^\\g(<>|'')/
raise ValidationError.for(:backref, 'subexpression call', 'ref ID is empty')
when /^\\g(.)[^\p{digit}+\->][^+\-]*/
when /^\\g(.)[^0-9+\-].*['>]$/
emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
when /^\\g(.)\d+\D$/
when /^\\g(.)\d+['>]$/
emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
when /^\\g(.)[+-]\d+/
emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
else
raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
end
};

Expand Down
10 changes: 6 additions & 4 deletions spec/scanner/errors_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,12 @@
include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-foo)'
include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-u)'
include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-mixu)'
include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k<>'
include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k\'\''
include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g<>'
include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g\'\''
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k\'\''
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<0>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k\'0\''
include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g<>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g\'\''
include_examples 'scan error', RS::UnknownUnicodePropertyError, 'unknown property', '\p{foobar}'
include_examples 'scan error', RS::UnknownPosixClassError, 'unknown POSIX class [::]', '[[::]]'
include_examples 'scan error', RS::UnknownPosixClassError, 'unknown POSIX class [:^:]', '[[:^:]]'
Expand Down

0 comments on commit f8884f7

Please sign in to comment.