diff --git a/lib/rubocop.rb b/lib/rubocop.rb index e4b06706b707..e7fc39acc2d1 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -23,6 +23,7 @@ require_relative 'rubocop/error' require_relative 'rubocop/warning' +require_relative 'rubocop/cop/tokens_util' require_relative 'rubocop/cop/util' require_relative 'rubocop/cop/offense' require_relative 'rubocop/cop/message_annotator' diff --git a/lib/rubocop/cop/mixin/surrounding_space.rb b/lib/rubocop/cop/mixin/surrounding_space.rb index 25297e8005a1..254cccd74cfe 100644 --- a/lib/rubocop/cop/mixin/surrounding_space.rb +++ b/lib/rubocop/cop/mixin/surrounding_space.rb @@ -30,31 +30,6 @@ def side_space_range(range:, side:) Parser::Source::Range.new(buffer, begin_pos, end_pos) end - def index_of_first_token(node) - range = node.source_range - token_table[range.line][range.column] - end - - def index_of_last_token(node) - range = node.source_range - table_row = token_table[range.last_line] - (0...range.last_column).reverse_each do |c| - ix = table_row[c] - return ix if ix - end - end - - def token_table - @token_table ||= begin - table = {} - processed_source.tokens.each_with_index do |t, ix| - table[t.line] ||= {} - table[t.line][t.column] = ix - end - table - end - end - def on_new_investigation @token_table = nil super diff --git a/lib/rubocop/cop/tokens_util.rb b/lib/rubocop/cop/tokens_util.rb new file mode 100644 index 000000000000..c71dafc0f625 --- /dev/null +++ b/lib/rubocop/cop/tokens_util.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +module RuboCop + # Common methods and behaviors for dealing with tokens. + module TokensUtil + module_function + + # rubocop:disable Metrics/AbcSize + def tokens(node) + @tokens ||= {} + return @tokens[node.object_id] if @tokens[node.object_id] + + @tokens[node.object_id] = + # The tokens list is always sorted by token position, + # except for cases when heredoc is passed as a method argument. + # In this case tokens are interleaved by heredoc contents' tokens. + # We can try a fast (binary) search, assuming the mentioned cases are rare, + # and fallback to linear search if failed. + if (tokens = fast_tokens(node)) + tokens + else + begin_pos = node.source_range.begin_pos + end_pos = node.source_range.end_pos + + processed_source.tokens.select do |token| + token.end_pos <= end_pos && token.begin_pos >= begin_pos + end + end + end + # rubocop:enable Metrics/AbcSize + + def index_of_first_token(node) + index = fast_index_of_first_token(node) + return index if index + + begin_pos = node.source_range.begin_pos + processed_source.tokens.index { |token| token.begin_pos == begin_pos } + end + + def index_of_last_token(node) + index = fast_index_of_last_token(node) + return index if index + + end_pos = node.source_range.end_pos + processed_source.tokens.index { |token| token.end_pos == end_pos } + end + + private + + def fast_index_of_first_token(node) + begin_pos = node.source_range.begin_pos + tokens = processed_source.tokens + + index = tokens.bsearch_index { |token| token.begin_pos >= begin_pos } + index if index && tokens[index].begin_pos == begin_pos + end + + def fast_index_of_last_token(node) + end_pos = node.source_range.end_pos + tokens = processed_source.tokens + + index = tokens.bsearch_index { |token| token.end_pos >= end_pos } + index if index && tokens[index].end_pos == end_pos + end + + def fast_tokens(node) + begin_index = index_of_first_token(node) + end_index = index_of_last_token(node) + + tokens = processed_source.tokens[begin_index..end_index] + tokens if sorted_tokens?(tokens) + end + + def sorted_tokens?(tokens) + prev_begin_pos = -1 + tokens.each do |token| + return false if token.begin_pos < prev_begin_pos + + prev_begin_pos = token.begin_pos + end + true + end + end +end diff --git a/lib/rubocop/cop/util.rb b/lib/rubocop/cop/util.rb index 3080d3108e79..6b1b6ffb9bc9 100644 --- a/lib/rubocop/cop/util.rb +++ b/lib/rubocop/cop/util.rb @@ -5,6 +5,7 @@ module Cop # This module contains a collection of useful utility methods. module Util include PathUtil + include TokensUtil # Match literal regex characters, not including anchors, character # classes, alternatives, groups, repetitions, references, etc @@ -127,19 +128,6 @@ def to_supported_styles(enforced_style) .sub('Style', 'Styles') end - def tokens(node) - @tokens ||= {} - return @tokens[node.object_id] if @tokens[node.object_id] - - source_range = node.source_range - begin_pos = source_range.begin_pos - end_pos = source_range.end_pos - - @tokens[node.object_id] = processed_source.tokens.select do |token| - token.end_pos <= end_pos && token.begin_pos >= begin_pos - end - end - private def compatible_external_encoding_for?(src)