Skip to content

Commit 1b17101

Browse files
authored
Rewrite RubyLex to fix some bugs and make it possible to add new features easily (#500)
* Add nesting level parser for multiple use (indent, prompt, termination check) * Rewrite RubyLex using NestingParser * Add nesting parser tests, fix some existing tests * Add description comment, rename method to NestingParser * Add comments and tweak code to RubyLex * Update NestingParser test * Extract list of ltype tokens to constants
1 parent 359cb28 commit 1b17101

File tree

4 files changed

+661
-508
lines changed

4 files changed

+661
-508
lines changed

lib/irb/nesting_parser.rb

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
# frozen_string_literal: true
2+
module IRB
3+
module NestingParser
4+
IGNORE_TOKENS = %i[on_sp on_ignored_nl on_comment on_embdoc_beg on_embdoc on_embdoc_end]
5+
6+
# Scan each token and call the given block with array of token and other information for parsing
7+
def self.scan_opens(tokens)
8+
opens = []
9+
pending_heredocs = []
10+
first_token_on_line = true
11+
tokens.each do |t|
12+
skip = false
13+
last_tok, state, args = opens.last
14+
case state
15+
when :in_unquoted_symbol
16+
unless IGNORE_TOKENS.include?(t.event)
17+
opens.pop
18+
skip = true
19+
end
20+
when :in_lambda_head
21+
opens.pop if t.event == :on_tlambeg || (t.event == :on_kw && t.tok == 'do')
22+
when :in_method_head
23+
unless IGNORE_TOKENS.include?(t.event)
24+
next_args = []
25+
body = nil
26+
if args.include?(:receiver)
27+
case t.event
28+
when :on_lparen, :on_ivar, :on_gvar, :on_cvar
29+
# def (receiver). | def @ivar. | def $gvar. | def @@cvar.
30+
next_args << :dot
31+
when :on_kw
32+
case t.tok
33+
when 'self', 'true', 'false', 'nil'
34+
# def self(arg) | def self.
35+
next_args.push(:arg, :dot)
36+
else
37+
# def if(arg)
38+
skip = true
39+
next_args << :arg
40+
end
41+
when :on_op, :on_backtick
42+
# def +(arg)
43+
skip = true
44+
next_args << :arg
45+
when :on_ident, :on_const
46+
# def a(arg) | def a.
47+
next_args.push(:arg, :dot)
48+
end
49+
end
50+
if args.include?(:dot)
51+
# def receiver.name
52+
next_args << :name if t.event == :on_period || (t.event == :on_op && t.tok == '::')
53+
end
54+
if args.include?(:name)
55+
if %i[on_ident on_const on_op on_kw on_backtick].include?(t.event)
56+
# def name(arg) | def receiver.name(arg)
57+
next_args << :arg
58+
skip = true
59+
end
60+
end
61+
if args.include?(:arg)
62+
case t.event
63+
when :on_nl, :on_semicolon
64+
# def recever.f;
65+
body = :normal
66+
when :on_lparen
67+
# def recever.f()
68+
next_args << :eq
69+
else
70+
if t.event == :on_op && t.tok == '='
71+
# def receiver.f =
72+
body = :oneliner
73+
else
74+
# def recever.f arg
75+
next_args << :arg_without_paren
76+
end
77+
end
78+
end
79+
if args.include?(:eq)
80+
if t.event == :on_op && t.tok == '='
81+
body = :oneliner
82+
else
83+
body = :normal
84+
end
85+
end
86+
if args.include?(:arg_without_paren)
87+
if %i[on_semicolon on_nl].include?(t.event)
88+
# def f a;
89+
body = :normal
90+
else
91+
# def f a, b
92+
next_args << :arg_without_paren
93+
end
94+
end
95+
if body == :oneliner
96+
opens.pop
97+
elsif body
98+
opens[-1] = [last_tok, nil]
99+
else
100+
opens[-1] = [last_tok, :in_method_head, next_args]
101+
end
102+
end
103+
when :in_for_while_until_condition
104+
if t.event == :on_semicolon || t.event == :on_nl || (t.event == :on_kw && t.tok == 'do')
105+
skip = true if t.event == :on_kw && t.tok == 'do'
106+
opens[-1] = [last_tok, nil]
107+
end
108+
end
109+
110+
unless skip
111+
case t.event
112+
when :on_kw
113+
case t.tok
114+
when 'begin', 'class', 'module', 'do', 'case'
115+
opens << [t, nil]
116+
when 'end'
117+
opens.pop
118+
when 'def'
119+
opens << [t, :in_method_head, [:receiver, :name]]
120+
when 'if', 'unless'
121+
unless t.state.allbits?(Ripper::EXPR_LABEL)
122+
opens << [t, nil]
123+
end
124+
when 'while', 'until'
125+
unless t.state.allbits?(Ripper::EXPR_LABEL)
126+
opens << [t, :in_for_while_until_condition]
127+
end
128+
when 'ensure', 'rescue'
129+
unless t.state.allbits?(Ripper::EXPR_LABEL)
130+
opens.pop
131+
opens << [t, nil]
132+
end
133+
when 'elsif', 'else', 'when'
134+
opens.pop
135+
opens << [t, nil]
136+
when 'for'
137+
opens << [t, :in_for_while_until_condition]
138+
when 'in'
139+
if last_tok&.event == :on_kw && %w[case in].include?(last_tok.tok) && first_token_on_line
140+
opens.pop
141+
opens << [t, nil]
142+
end
143+
end
144+
when :on_tlambda
145+
opens << [t, :in_lambda_head]
146+
when :on_lparen, :on_lbracket, :on_lbrace, :on_tlambeg, :on_embexpr_beg, :on_embdoc_beg
147+
opens << [t, nil]
148+
when :on_rparen, :on_rbracket, :on_rbrace, :on_embexpr_end, :on_embdoc_end
149+
opens.pop
150+
when :on_heredoc_beg
151+
pending_heredocs << t
152+
when :on_heredoc_end
153+
opens.pop
154+
when :on_backtick
155+
opens << [t, nil] if t.state.allbits?(Ripper::EXPR_BEG)
156+
when :on_tstring_beg, :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_regexp_beg
157+
opens << [t, nil]
158+
when :on_tstring_end, :on_regexp_end, :on_label_end
159+
opens.pop
160+
when :on_symbeg
161+
if t.tok == ':'
162+
opens << [t, :in_unquoted_symbol]
163+
else
164+
opens << [t, nil]
165+
end
166+
end
167+
end
168+
if t.event == :on_nl || t.event == :on_semicolon
169+
first_token_on_line = true
170+
elsif t.event != :on_sp
171+
first_token_on_line = false
172+
end
173+
if pending_heredocs.any? && t.tok.include?("\n")
174+
pending_heredocs.reverse_each { |t| opens << [t, nil] }
175+
pending_heredocs = []
176+
end
177+
yield t, opens if block_given?
178+
end
179+
opens.map(&:first) + pending_heredocs.reverse
180+
end
181+
182+
def self.open_tokens(tokens)
183+
# scan_opens without block will return a list of open tokens at last token position
184+
scan_opens(tokens)
185+
end
186+
187+
# Calculates token information [line_tokens, prev_opens, next_opens, min_depth] for each line.
188+
# Example code
189+
# ["hello
190+
# world"+(
191+
# First line
192+
# line_tokens: [[lbracket, '['], [tstring_beg, '"'], [tstring_content("hello\nworld"), "hello\n"]]
193+
# prev_opens: []
194+
# next_tokens: [lbracket, tstring_beg]
195+
# min_depth: 0 (minimum at beginning of line)
196+
# Second line
197+
# line_tokens: [[tstring_content("hello\nworld"), "world"], [tstring_end, '"'], [op, '+'], [lparen, '(']]
198+
# prev_opens: [lbracket, tstring_beg]
199+
# next_tokens: [lbracket, lparen]
200+
# min_depth: 1 (minimum just after tstring_end)
201+
def self.parse_by_line(tokens)
202+
line_tokens = []
203+
prev_opens = []
204+
min_depth = 0
205+
output = []
206+
last_opens = scan_opens(tokens) do |t, opens|
207+
depth = t == opens.last&.first ? opens.size - 1 : opens.size
208+
min_depth = depth if depth < min_depth
209+
if t.tok.include?("\n")
210+
t.tok.each_line do |line|
211+
line_tokens << [t, line]
212+
next if line[-1] != "\n"
213+
next_opens = opens.map(&:first)
214+
output << [line_tokens, prev_opens, next_opens, min_depth]
215+
prev_opens = next_opens
216+
min_depth = prev_opens.size
217+
line_tokens = []
218+
end
219+
else
220+
line_tokens << [t, t.tok]
221+
end
222+
end
223+
output << [line_tokens, prev_opens, last_opens, min_depth] if line_tokens.any?
224+
output
225+
end
226+
end
227+
end

0 commit comments

Comments
 (0)