Skip to content

Commit

Permalink
[Fix rubocop#11514] Add new Lint/UnescapedBracketInRegexp cop.
Browse files Browse the repository at this point in the history
  • Loading branch information
dvandersluis authored and bbatsov committed Oct 24, 2024
1 parent b42c30b commit 2553bc4
Show file tree
Hide file tree
Showing 5 changed files with 311 additions and 0 deletions.
1 change: 1 addition & 0 deletions changelog/new_add_new_unescaped_bracket_in_regexp_cop.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* [#11514](https://github.com/rubocop/rubocop/issues/11514): Add new `Lint/UnescapedBracketInRegexp` cop. ([@dvandersluis][])
5 changes: 5 additions & 0 deletions config/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2457,6 +2457,11 @@ Lint/UnderscorePrefixedVariableName:
VersionAdded: '0.21'
AllowKeywordBlockArguments: false

Lint/UnescapedBracketInRegexp:
Description: 'Checks for unescaped literal `[` in Regexp.'
Enabled: pending
VersionAdded: '<<next>>'

Lint/UnexpectedBlockArity:
Description: 'Looks for blocks that have fewer arguments that the calling method expects.'
Enabled: pending
Expand Down
1 change: 1 addition & 0 deletions lib/rubocop.rb
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@
require_relative 'rubocop/cop/lint/trailing_comma_in_attribute_declaration'
require_relative 'rubocop/cop/lint/triple_quotes'
require_relative 'rubocop/cop/lint/underscore_prefixed_variable_name'
require_relative 'rubocop/cop/lint/unescaped_bracket_in_regexp'
require_relative 'rubocop/cop/lint/unexpected_block_arity'
require_relative 'rubocop/cop/lint/unified_integer'
require_relative 'rubocop/cop/lint/unmodified_reduce_accumulator'
Expand Down
88 changes: 88 additions & 0 deletions lib/rubocop/cop/lint/unescaped_bracket_in_regexp.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# frozen_string_literal: true

module RuboCop
module Cop
module Lint
# Checks for Regexpes (both literals and via `Regexp.new` / `Regexp.compile`)
# that contain unescaped `[` characters.
#
# It emulates the following Ruby warning:
#
# [source,ruby]
# ----
# $ ruby -e '/abc]123/'
# -e:1: warning: regular expression has ']' without escape: /abc]123/
# ----
#
# @example
# # bad
# /abc]123/
# %r{abc]123}
# Regexp.new('abc]123')
# Regexp.compile('abc]123')
#
# # good
# /abc\]123/
# %r{abc\]123}
# Regexp.new('abc\]123')
# Regexp.compile('abc\]123')
#
class UnescapedBracketInRegexp < Base
extend AutoCorrector

MSG = 'Regular expression has `]` without escape.'
RESTRICT_ON_SEND = %i[new compile].freeze

# @!method regexp_constructor(node)
def_node_search :regexp_constructor, <<~PATTERN
(send
(const {nil? cbase} :Regexp) {:new :compile}
$str
...
)
PATTERN

def on_regexp(node)
RuboCop::Util.silence_warnings do
node.parsed_tree&.each_expression do |expr|
detect_offenses(node, expr)
end
end
end

def on_send(node)
# Ignore nodes that contain interpolation
return if node.each_descendant(:dstr).any?

regexp_constructor(node) do |text|
Regexp::Parser.parse(text.value)&.each_expression do |expr|
detect_offenses(text, expr)
end
end
end

private

def detect_offenses(node, expr)
return unless expr.type?(:literal)

expr.text.scan(/(?<!\\)\]/) do
pos = Regexp.last_match.begin(0)
next if pos.zero? # if the unescaped bracket is the first character, Ruby does not warn

location = range_at_index(node, expr.ts, pos)

add_offense(location) do |corrector|
corrector.replace(location, '\]')
end
end
end

def range_at_index(node, index, offset)
adjustment = index + offset
node.loc.begin.end.adjust(begin_pos: adjustment, end_pos: adjustment + 1)
end
end
end
end
end
216 changes: 216 additions & 0 deletions spec/rubocop/cop/lint/unescaped_bracket_in_regexp_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
# frozen_string_literal: true

RSpec.describe RuboCop::Cop::Lint::UnescapedBracketInRegexp, :config do
around { |example| RuboCop::Util.silence_warnings(&example) }

context 'literal Regexp' do
context 'when unescaped bracket is the first character' do
it 'does not register an offense' do
# this does not register a Ruby warning
expect_no_offenses(<<~RUBY)
/]/
RUBY
end
end

context 'unescaped bracket in regexp' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
/abc]123/
^ Regular expression has `]` without escape.
RUBY

expect_correction(<<~'RUBY')
/abc\]123/
RUBY
end
end

context 'unescaped bracket in regexp with regexp options' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
/abc]123/i
^ Regular expression has `]` without escape.
RUBY

expect_correction(<<~'RUBY')
/abc\]123/i
RUBY
end
end

context 'multiple unescaped brackets in regexp' do
it 'registers an offense for each bracket' do
expect_offense(<<~RUBY)
/abc]123]/
^ Regular expression has `]` without escape.
^ Regular expression has `]` without escape.
RUBY

expect_correction(<<~'RUBY')
/abc\]123\]/
RUBY
end
end

context 'escaped bracket in regexp' do
it 'does not register an offense' do
expect_no_offenses(<<~'RUBY')
/abc\]123/
RUBY
end
end

context 'character class' do
it 'does not register an offense' do
expect_no_offenses(<<~RUBY)
/[abc]/
RUBY
end
end
end

context '%r{} Regexp' do
context 'when unescaped bracket is the first character' do
it 'does not register an offense' do
# this does not register a Ruby warning
expect_no_offenses(<<~RUBY)
%r{]}
RUBY
end
end

context 'unescaped bracket in regexp' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
%r{abc]123}
^ Regular expression has `]` without escape.
RUBY

expect_correction(<<~'RUBY')
%r{abc\]123}
RUBY
end
end

context 'unescaped bracket in regexp with regexp options' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
%r{abc]123}i
^ Regular expression has `]` without escape.
RUBY

expect_correction(<<~'RUBY')
%r{abc\]123}i
RUBY
end
end

context 'multiple unescaped brackets in regexp' do
it 'registers an offense for each bracket' do
expect_offense(<<~RUBY)
%r{abc]123]}
^ Regular expression has `]` without escape.
^ Regular expression has `]` without escape.
RUBY

expect_correction(<<~'RUBY')
%r{abc\]123\]}
RUBY
end
end

context 'escaped bracket in regexp' do
it 'does not register an offense' do
expect_no_offenses(<<~'RUBY')
%r{abc\]123}
RUBY
end
end

context 'character class' do
it 'does not register an offense' do
expect_no_offenses(<<~RUBY)
%r{[abc]}
RUBY
end
end
end

%i[new compile].each do |method|
context "Regexp.#{method}" do
context 'when unescaped bracket is the first character' do
it 'does not register an offense' do
# this does not register a Ruby warning
expect_no_offenses(<<~RUBY)
Regexp.#{method}(']')
RUBY
end
end

context 'unescaped bracket in regexp' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY, method: method)
Regexp.#{method}('abc]123')
_{method} ^ Regular expression has `]` without escape.
RUBY

expect_correction(<<~RUBY)
Regexp.#{method}('abc\\]123')
RUBY
end
end

context 'unescaped bracket in regexp with regexp options' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY, method: method)
Regexp.#{method}('abc]123', 'i')
_{method} ^ Regular expression has `]` without escape.
RUBY

expect_correction(<<~RUBY)
Regexp.#{method}('abc\\]123', 'i')
RUBY
end
end

context 'multiple unescaped brackets in regexp' do
it 'registers an offense for each bracket' do
expect_offense(<<~RUBY, method: method)
Regexp.#{method}('abc]123]')
_{method} ^ Regular expression has `]` without escape.
_{method} ^ Regular expression has `]` without escape.
RUBY

expect_correction(<<~RUBY)
Regexp.#{method}('abc\\]123\\]')
RUBY
end
end

context 'escaped bracket in regexp' do
it 'does not register an offense' do
expect_no_offenses(<<~RUBY)
Regexp.#{method}('abc\\]123')
RUBY
end
end

context 'character class' do
it 'does not register an offense' do
expect_no_offenses(<<~RUBY)
Regexp.#{method}('[abc]')
RUBY
end
end

context 'containing `dstr` node' do
it 'does not register an offense' do
expect_no_offenses(<<~RUBY)
Regexp.#{method}("(?:\#{arr[1]}:\\s*)")
RUBY
end
end
end
end
end

0 comments on commit 2553bc4

Please sign in to comment.