[Fix rubocop#11514] Add new Lint/UnescapedBracketInRegexp cop.

dvandersluis · Oct 24, 2024 · 2553bc4 · 2553bc4
1 parent b42c30b
commit 2553bc4
Show file tree

Hide file tree

Showing 5 changed files with 311 additions and 0 deletions.
diff --git a/changelog/new_add_new_unescaped_bracket_in_regexp_cop.md b/changelog/new_add_new_unescaped_bracket_in_regexp_cop.md
@@ -0,0 +1 @@
+* [#11514](https://github.com/rubocop/rubocop/issues/11514): Add new `Lint/UnescapedBracketInRegexp` cop. ([@dvandersluis][])
diff --git a/config/default.yml b/config/default.yml
@@ -2457,6 +2457,11 @@ Lint/UnderscorePrefixedVariableName:
   VersionAdded: '0.21'
   AllowKeywordBlockArguments: false
 
+Lint/UnescapedBracketInRegexp:
+  Description: 'Checks for unescaped literal `[` in Regexp.'
+  Enabled: pending
+  VersionAdded: '<<next>>'
+
 Lint/UnexpectedBlockArity:
   Description: 'Looks for blocks that have fewer arguments that the calling method expects.'
   Enabled: pending

diff --git a/lib/rubocop.rb b/lib/rubocop.rb
@@ -405,6 +405,7 @@
 require_relative 'rubocop/cop/lint/trailing_comma_in_attribute_declaration'
 require_relative 'rubocop/cop/lint/triple_quotes'
 require_relative 'rubocop/cop/lint/underscore_prefixed_variable_name'
+require_relative 'rubocop/cop/lint/unescaped_bracket_in_regexp'
 require_relative 'rubocop/cop/lint/unexpected_block_arity'
 require_relative 'rubocop/cop/lint/unified_integer'
 require_relative 'rubocop/cop/lint/unmodified_reduce_accumulator'

diff --git a/lib/rubocop/cop/lint/unescaped_bracket_in_regexp.rb b/lib/rubocop/cop/lint/unescaped_bracket_in_regexp.rb
@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+
+module RuboCop
+  module Cop
+    module Lint
+      # Checks for Regexpes (both literals and via `Regexp.new` / `Regexp.compile`)
+      # that contain unescaped `[` characters.
+      #
+      # It emulates the following Ruby warning:
+      #
+      # [source,ruby]
+      # ----
+      # $ ruby -e '/abc]123/'
+      # -e:1: warning: regular expression has ']' without escape: /abc]123/
+      # ----
+      #
+      # @example
+      #   # bad
+      #   /abc]123/
+      #   %r{abc]123}
+      #   Regexp.new('abc]123')
+      #   Regexp.compile('abc]123')
+      #
+      #   # good
+      #   /abc\]123/
+      #   %r{abc\]123}
+      #   Regexp.new('abc\]123')
+      #   Regexp.compile('abc\]123')
+      #
+      class UnescapedBracketInRegexp < Base
+        extend AutoCorrector
+
+        MSG = 'Regular expression has `]` without escape.'
+        RESTRICT_ON_SEND = %i[new compile].freeze
+
+        # @!method regexp_constructor(node)
+        def_node_search :regexp_constructor, <<~PATTERN
+          (send
+            (const {nil? cbase} :Regexp) {:new :compile}
+            $str
+            ...
+          )
+        PATTERN
+
+        def on_regexp(node)
+          RuboCop::Util.silence_warnings do
+            node.parsed_tree&.each_expression do |expr|
+              detect_offenses(node, expr)
+            end
+          end
+        end
+
+        def on_send(node)
+          # Ignore nodes that contain interpolation
+          return if node.each_descendant(:dstr).any?
+
+          regexp_constructor(node) do |text|
+            Regexp::Parser.parse(text.value)&.each_expression do |expr|
+              detect_offenses(text, expr)
+            end
+          end
+        end
+
+        private
+
+        def detect_offenses(node, expr)
+          return unless expr.type?(:literal)
+
+          expr.text.scan(/(?<!\\)\]/) do
+            pos = Regexp.last_match.begin(0)
+            next if pos.zero? # if the unescaped bracket is the first character, Ruby does not warn
+
+            location = range_at_index(node, expr.ts, pos)
+
+            add_offense(location) do |corrector|
+              corrector.replace(location, '\]')
+            end
+          end
+        end
+
+        def range_at_index(node, index, offset)
+          adjustment = index + offset
+          node.loc.begin.end.adjust(begin_pos: adjustment, end_pos: adjustment + 1)
+        end
+      end
+    end
+  end
+end
diff --git a/spec/rubocop/cop/lint/unescaped_bracket_in_regexp_spec.rb b/spec/rubocop/cop/lint/unescaped_bracket_in_regexp_spec.rb
@@ -0,0 +1,216 @@
+# frozen_string_literal: true
+
+RSpec.describe RuboCop::Cop::Lint::UnescapedBracketInRegexp, :config do
+  around { |example| RuboCop::Util.silence_warnings(&example) }
+
+  context 'literal Regexp' do
+    context 'when unescaped bracket is the first character' do
+      it 'does not register an offense' do
+        # this does not register a Ruby warning
+        expect_no_offenses(<<~RUBY)
+          /]/
+        RUBY
+      end
+    end
+
+    context 'unescaped bracket in regexp' do
+      it 'registers an offense and corrects' do
+        expect_offense(<<~RUBY)
+          /abc]123/
+              ^ Regular expression has `]` without escape.
+        RUBY
+
+        expect_correction(<<~'RUBY')
+          /abc\]123/
+        RUBY
+      end
+    end
+
+    context 'unescaped bracket in regexp with regexp options' do
+      it 'registers an offense and corrects' do
+        expect_offense(<<~RUBY)
+          /abc]123/i
+              ^ Regular expression has `]` without escape.
+        RUBY
+
+        expect_correction(<<~'RUBY')
+          /abc\]123/i
+        RUBY
+      end
+    end
+
+    context 'multiple unescaped brackets in regexp' do
+      it 'registers an offense for each bracket' do
+        expect_offense(<<~RUBY)
+          /abc]123]/
+              ^ Regular expression has `]` without escape.
+                  ^ Regular expression has `]` without escape.
+        RUBY
+
+        expect_correction(<<~'RUBY')
+          /abc\]123\]/
+        RUBY
+      end
+    end
+
+    context 'escaped bracket in regexp' do
+      it 'does not register an offense' do
+        expect_no_offenses(<<~'RUBY')
+          /abc\]123/
+        RUBY
+      end
+    end
+
+    context 'character class' do
+      it 'does not register an offense' do
+        expect_no_offenses(<<~RUBY)
+          /[abc]/
+        RUBY
+      end
+    end
+  end
+
+  context '%r{} Regexp' do
+    context 'when unescaped bracket is the first character' do
+      it 'does not register an offense' do
+        # this does not register a Ruby warning
+        expect_no_offenses(<<~RUBY)
+          %r{]}
+        RUBY
+      end
+    end
+
+    context 'unescaped bracket in regexp' do
+      it 'registers an offense and corrects' do
+        expect_offense(<<~RUBY)
+          %r{abc]123}
+                ^ Regular expression has `]` without escape.
+        RUBY
+
+        expect_correction(<<~'RUBY')
+          %r{abc\]123}
+        RUBY
+      end
+    end
+
+    context 'unescaped bracket in regexp with regexp options' do
+      it 'registers an offense and corrects' do
+        expect_offense(<<~RUBY)
+          %r{abc]123}i
+                ^ Regular expression has `]` without escape.
+        RUBY
+
+        expect_correction(<<~'RUBY')
+          %r{abc\]123}i
+        RUBY
+      end
+    end
+
+    context 'multiple unescaped brackets in regexp' do
+      it 'registers an offense for each bracket' do
+        expect_offense(<<~RUBY)
+          %r{abc]123]}
+                ^ Regular expression has `]` without escape.
+                    ^ Regular expression has `]` without escape.
+        RUBY
+
+        expect_correction(<<~'RUBY')
+          %r{abc\]123\]}
+        RUBY
+      end
+    end
+
+    context 'escaped bracket in regexp' do
+      it 'does not register an offense' do
+        expect_no_offenses(<<~'RUBY')
+          %r{abc\]123}
+        RUBY
+      end
+    end
+
+    context 'character class' do
+      it 'does not register an offense' do
+        expect_no_offenses(<<~RUBY)
+          %r{[abc]}
+        RUBY
+      end
+    end
+  end
+
+  %i[new compile].each do |method|
+    context "Regexp.#{method}" do
+      context 'when unescaped bracket is the first character' do
+        it 'does not register an offense' do
+          # this does not register a Ruby warning
+          expect_no_offenses(<<~RUBY)
+            Regexp.#{method}(']')
+          RUBY
+        end
+      end
+
+      context 'unescaped bracket in regexp' do
+        it 'registers an offense and corrects' do
+          expect_offense(<<~RUBY, method: method)
+            Regexp.#{method}('abc]123')
+                   _{method}     ^ Regular expression has `]` without escape.
+          RUBY
+
+          expect_correction(<<~RUBY)
+            Regexp.#{method}('abc\\]123')
+          RUBY
+        end
+      end
+
+      context 'unescaped bracket in regexp with regexp options' do
+        it 'registers an offense and corrects' do
+          expect_offense(<<~RUBY, method: method)
+            Regexp.#{method}('abc]123', 'i')
+                   _{method}     ^ Regular expression has `]` without escape.
+          RUBY
+
+          expect_correction(<<~RUBY)
+            Regexp.#{method}('abc\\]123', 'i')
+          RUBY
+        end
+      end
+
+      context 'multiple unescaped brackets in regexp' do
+        it 'registers an offense for each bracket' do
+          expect_offense(<<~RUBY, method: method)
+            Regexp.#{method}('abc]123]')
+                   _{method}     ^ Regular expression has `]` without escape.
+                   _{method}         ^ Regular expression has `]` without escape.
+          RUBY
+
+          expect_correction(<<~RUBY)
+            Regexp.#{method}('abc\\]123\\]')
+          RUBY
+        end
+      end
+
+      context 'escaped bracket in regexp' do
+        it 'does not register an offense' do
+          expect_no_offenses(<<~RUBY)
+            Regexp.#{method}('abc\\]123')
+          RUBY
+        end
+      end
+
+      context 'character class' do
+        it 'does not register an offense' do
+          expect_no_offenses(<<~RUBY)
+            Regexp.#{method}('[abc]')
+          RUBY
+        end
+      end
+
+      context 'containing `dstr` node' do
+        it 'does not register an offense' do
+          expect_no_offenses(<<~RUBY)
+            Regexp.#{method}("(?:\#{arr[1]}:\\s*)")
+          RUBY
+        end
+      end
+    end
+  end
+end
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		* [#11514](https://github.com/rubocop/rubocop/issues/11514): Add new `Lint/UnescapedBracketInRegexp` cop. ([@dvandersluis][])